diff --git a/src/include/io.h b/src/include/io.h index 33fac4b..fa87bd1 100644 --- a/src/include/io.h +++ b/src/include/io.h @@ -16,6 +16,9 @@ along with this program. If not, see . */ +#ifndef HAVE_IO +#define HAVE_IO + #include struct linefile { @@ -27,3 +30,5 @@ void ungetline(struct linefile *file, char *line); char *getline(struct linefile *file); struct linefile *newlinefile(FILE *file); void freelinefile(struct linefile *file); + +#endif diff --git a/src/include/mdutil.h b/src/include/mdutil.h new file mode 100644 index 0000000..9c4b768 --- /dev/null +++ b/src/include/mdutil.h @@ -0,0 +1,42 @@ +/* + ncdg - A program to help generate natechoe.dev + Copyright (C) 2022 Nate Choe (natechoe9@gmail.com) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +#ifndef HAVE_MDUTIL +#define HAVE_MDUTIL + +enum linetype { + EMPTY, + PLAIN, + SPACECODE, + HR, + SETEXT1, + /* === */ + SETEXT2 + /* --- */ +}; + +enum nodetype { + PARAGRAPH, + CODE, + NONE +}; + +enum linetype identifyline(char *line); +char *realcontent(char *line, enum linetype type); + +#endif diff --git a/src/include/util.h b/src/include/util.h new file mode 100644 index 0000000..d58ef5f --- /dev/null +++ b/src/include/util.h @@ -0,0 +1,35 @@ +/* + ncdg - A program to help generate natechoe.dev + Copyright (C) 2022 Nate Choe (natechoe9@gmail.com) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ +#ifndef HAVE_UTIL +#define HAVE_UTIL + +#include + +struct string { + size_t len; + size_t alloc; + char *data; +}; + +struct string *newstring(); +void freestring(struct string *str); +int appendcharstring(struct string *str, char c); +int appendstrstring(struct string *str, char *s); +void resetstring(struct string *str); + +#endif diff --git a/src/mdutil.c b/src/mdutil.c new file mode 100644 index 0000000..e6bd7f5 --- /dev/null +++ b/src/mdutil.c @@ -0,0 +1,86 @@ +/* + ncdg - A program to help generate natechoe.dev + Copyright (C) 2022 Nate Choe (natechoe9@gmail.com) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +#include +#include +#include + +#include + +static char *truncate(char *str); + +enum linetype identifyline(char *line) { + int i; + for (i = 0; i < 4; ++i) { + if (!isspace(line[i])) + goto notcode; + } + return SPACECODE; +notcode: + line = truncate(line); + if (line[0] == '\0') + return EMPTY; + { + int hrcount; + if (strchr("-*_=", line[0]) == NULL) + goto nothr; + /* A delimiting line can only contain '-', '*', '_', and ' '. */ + hrcount = 0; + for (i = 0; line[i]; ++i) { + if (!isspace(line[i]) && line[i] != line[0]) + goto nothr; + /* You can't mix delimiter characters, and you can't + * have anything other than a delimiter character or + * white space. */ + if (line[i] == line[0]) + ++hrcount; + } + if (hrcount >= 3) { + switch (line[0]) { + case '=': + return SETEXT1; + case '-': + return SETEXT2; + default: + return HR; + } + } + /* There has to be at least 3 delimiter characters */ + } +nothr: + return PLAIN; +} +/* TODO: Finish this */ + +static char *truncate(char *str) { + while (isspace(str[0])) + ++str; + return str; +} + +char *realcontent(char *line, enum linetype type) { + switch (type) { + case EMPTY: case HR: case SETEXT1: case SETEXT2: + return NULL; + case PLAIN: + return line; + case SPACECODE: + return line + 4; + } + return NULL; +} diff --git a/src/template.c b/src/template.c index f58fea8..575d67b 100644 --- a/src/template.c +++ b/src/template.c @@ -21,506 +21,132 @@ #include #include +#include +#include #include -enum paratype { - NORMAL, - EMPTY, - H1, H2, H3, H4, H5, H6, - BLOCKQUOTE, - CODESPACE, CODEBACK, - UL, OL, - HL +struct parsestate { + enum nodetype type; + struct string *para; }; -enum inlinetype { - ITALIC, - BOLD, - CODE -}; - -static const struct { - char c; - char *escape; -} escapes[] = { - {'&', "&"}, - {';', ";"}, - {'<', "<"}, - {'>', ">"}, -}; - -static int parsepara(struct linefile *infile, FILE *outfile); -static enum paratype identifypara(char *line, char **contentret); - -static char *untrail(char *line); -static size_t reallen(char *line); -static int islinebreak(char *line); - -static int paraeasycase(struct linefile *infile, FILE *outfile, - char *line, char *buff, - char *tag, enum paratype type); -static int parahardcase(struct linefile *infile, FILE *outfile, - char *line, char *buff, - char *vars, char *linetag, char *tag, enum paratype type); -static int paracodecase(struct linefile *infile, FILE *outfile, - char *line, char *buff, - char *vars, enum paratype type); -static long strsearch(char *data, long start, size_t datalen, char c, int reps); -/* strsearch finds instances in data with reps repetitions of c. returns the - * last instance in the first group. For example: - * - * c = '.', reps = 2, data = " ...", returns 2 - * c = '.', reps = 2, data = ".. ...", returns 4 - * c = '.', reps = 1, data = " ...", returns 3 - * */ - -static long writelinked(char *data, long i, size_t len, char *tag, - FILE *outfile); - -static int writeescape(char c, FILE *outfile); -static int writedata(char *data, size_t len, FILE *outfile); -static int writesimple(char *data, size_t len, FILE *outfile); - -static const char *escapedchars = "!\"#%&'()*,./:;?@[\\]^{|}~"; +static int parseline(char *line, struct parsestate *currstate, FILE *out); +static int endpara(struct parsestate *state, FILE *out); int parsetemplate(FILE *infile, FILE *outfile) { struct linefile *realin; + struct parsestate currstate; + int code; + + currstate.type = NONE; + currstate.para = newstring(); + realin = newlinefile(infile); - while (parsepara(realin, outfile) == 0) ; + for (;;) { + char *currline; + currline = getline(realin); + if (currline == NULL) { + code = 0; + break; + } + if (parseline(currline, &currstate, outfile)) { + code = 1; + break; + } + } + endpara(&currstate, outfile); freelinefile(realin); - return 0; + return code; } -static int parsepara(struct linefile *infile, FILE *outfile) { - for (;;) { - char *line, *buff; - /* line exists for the explicit purpose of being freed later */ - enum paratype type; +static int parseline(char *line, struct parsestate *currstate, FILE *out) { + enum linetype type; - line = getline(infile); - if (line == NULL) + type = identifyline(line); + fflush(stdout); + + switch (type) { + case EMPTY: + endpara(currstate, out); + currstate->type = NONE; + return 0; + case SETEXT1: + if (currstate->type != PARAGRAPH) return 1; - type = identifypara(line, &buff); - - buff = untrail(buff); - - if (buff[0] == '\0') { - free(line); - continue; - } - - switch (type) { -#define EASY_CASE(enumtype, tag) \ - case enumtype: \ - paraeasycase(infile, outfile, line, buff, \ - tag, enumtype); \ - return 0; -#define HARD_CASE(enumtype, tag, linetag, vars) \ - case enumtype: \ - parahardcase(infile, outfile, line, buff, \ - vars, linetag, tag, enumtype); \ - return 0; -#define CODE_CASE(enumtype, vars) \ - case enumtype: \ - paracodecase(infile, outfile, line, buff, \ - vars, enumtype); \ - return 0; - EASY_CASE(H1, "h1"); - EASY_CASE(H2, "h2"); - EASY_CASE(H3, "h3"); - EASY_CASE(H4, "h4"); - EASY_CASE(H5, "h5"); - EASY_CASE(H6, "h6"); - HARD_CASE(NORMAL, "p", NULL, NULL); - HARD_CASE(BLOCKQUOTE, "blockquote", NULL, NULL); - HARD_CASE(UL, "ul", "li", NULL); - HARD_CASE(OL, "ol", "li", NULL); - CODE_CASE(CODESPACE, "class='block'"); - CODE_CASE(CODEBACK, "class='block'"); - case HL: - fputs("
", outfile); - free(line); - return 0; - case EMPTY: - free(line); - continue; - } - } -} - -static int isbreak(char *line) { - int count, i; - char whitechar; - count = 0; - whitechar = '\0'; - for (i = 0; line[i] != '\0'; ++i) { - if (line[i] == line[0]) - ++count; - else if (line[i] == ' ' || line[i] == '\t') { - if (whitechar == '\0') - whitechar = line[i]; - if (whitechar != line[i]) - return 0; + currstate->type = NONE; + fputs("

", out); + fwrite(currstate->para->data, 1, currstate->para->len, out); + fputs("

", out); + resetstring(currstate->para); + return 0; + case SETEXT2: + if (currstate->type != PARAGRAPH) + goto hr; + currstate->type = NONE; + fputs("

", out); + fwrite(currstate->para->data, 1, currstate->para->len, out); + fputs("

", out); + resetstring(currstate->para); + return 0; + case HR: hr: + endpara(currstate, out); + currstate->type = NONE; + fputs("
", out); + return 0; + case PLAIN: + if (currstate->type != PARAGRAPH) { + endpara(currstate, out); + currstate->type = PARAGRAPH; } else - return 0; + appendcharstring(currstate->para, ' '); + appendstrstring(currstate->para, realcontent(line, type)); + return 0; + /* According to the commonmark spec, this markdown: + + Chapter 1 + --- + + * Should NOT compile to this: + +

Chapter 1


+ + * but rather to this + +

Chapter 1

+ + * This means that we need to store the contents of the + * paragraph and only write after obtaining the whole thing + * as to not include the wrong tags. + * */ + case SPACECODE: + if (currstate->type != CODE) { + endpara(currstate, out); + currstate->type = CODE; + fputs("", out); + } + else + fputs("
", out); + fputs(realcontent(line, type), out); + break; } - return count >= 3; return 0; } -static enum paratype identifypara(char *line, char **contentret) { - int i; - for (i = 0; i < 4; ++i) { - if (line[i] == ' ') - continue; - if (line[i] == '\0') - return EMPTY; - goto whitegone; - } - - *contentret = line + i; - return CODESPACE; - -whitegone: - line += i; - /* At this point, line has no extraneous trailing whitespace */ - switch (line[0]) { - case '\0': - return EMPTY; - case '#': - for (i = 0; i < 6 && line[i] == '#'; ++i) ; - *contentret = line + i; - if (line[i] != '\0' && line[i] != ' ') - goto normal; - return H1 + i - 1; - case '>': - *contentret = line + 1; - return BLOCKQUOTE; - case '*': - if (isbreak(line)) - return HL; - *contentret = line + 1; - return UL; - case '-': case '_': - if (isbreak(line)) - return HL; - goto normal; - case '`': - for (i = 0; i < 3; ++i) - if (line[i] != '`') - goto normal; - return CODEBACK; - default: - if (isdigit(line[0])) { - for (i = 0; isdigit(line[i]); ++i) ; - if (line[i] == '.' || line[i] == ')') { - *contentret = line + i + 1; - return OL; - } - } - goto normal; - normal: - *contentret = line; - return NORMAL; - } -} - -static char *untrail(char *line) { - while (isspace(line[0])) - ++line; - return line; -} - -static size_t reallen(char *line) { - size_t fakelen; - fakelen = strlen(line); - if (line[fakelen - 1] == '\\') - --fakelen; - while (isspace(line[fakelen])) - --fakelen; - return fakelen; -} - -static int islinebreak(char *line) { - size_t len; - int i; - len = strlen(line); - if (line[len - 1] == '\\') - return 1; - if (len < 2) +static int endpara(struct parsestate *state, FILE *out) { + switch (state->type) { + case PARAGRAPH: + fputs("

", out); + fwrite(state->para->data, 1, state->para->len, out); + fputs("

", out); + resetstring(state->para); return 0; - for (i = 0; i < 2; ++i) - if (!isspace(line[len - i - 1])) - return 0; + case CODE: + fputs("
", out); + return 0; + case NONE: + return 0; + } return 1; } - -static int paraeasycase(struct linefile *infile, FILE *outfile, - char *line, char *buff, - char *tag, enum paratype type) { - size_t writelen; - - writelen = reallen(buff); - - fprintf(outfile, "<%s>", tag); - for (;;) { - writedata(buff, writelen, outfile); - free(line); - line = getline(infile); - if (line == NULL) - break; - if (identifypara(line, &buff) != type) { - ungetline(infile, line); - line = NULL; - break; - } - else - buff = untrail(buff); - } - fprintf(outfile, "", tag); - - free(line); - return 0; -} - -static int parahardcase(struct linefile *infile, FILE *outfile, - char *line, char *buff, - char *vars, char *linetag, char *tag, enum paratype type) { - size_t writelen; - - if (vars == NULL) - fprintf(outfile, "<%s>", tag); - else - fprintf(outfile, "<%s %s>", tag, vars); - for (;;) { - writelen = reallen(buff); - - if (linetag != NULL) - fprintf(outfile, "<%s>", linetag); - writedata(buff, writelen, outfile); - if (islinebreak(line)) - fputs("
", outfile); - if (linetag != NULL) - fprintf(outfile, "", linetag); - - free(line); - line = getline(infile); - if (line == NULL) - break; - if (identifypara(line, &buff) != type) { - buff = untrail(line); - if (buff[0] == '\0') { - free(line); - line = NULL; - break; - } - } - else - buff = untrail(buff); - fputc(' ', outfile); - } - fprintf(outfile, "", tag); - - free(line); - return 0; -} - -static int paracodecase(struct linefile *infile, FILE *outfile, - char *line, char *buff, - char *vars, enum paratype type) { - int seenfirst; - enum paratype newtype; - - if (type != CODESPACE && type != CODEBACK) - return 1; - - if (vars == NULL) - fputs("", outfile); - else - fprintf(outfile, "", vars); - seenfirst = 0; - newtype = type; - for (;;) { - if ((type == CODEBACK && type != newtype) || - newtype == CODESPACE) { - if (seenfirst) - fputs("
", outfile); - seenfirst = 1; - } - - if (newtype != CODEBACK) - writesimple(buff, -1, outfile); - - free(line); - line = getline(infile); - if (line == NULL) - return 1; - - newtype = identifypara(line, &buff); - if (type == CODEBACK && newtype == CODEBACK) - break; - if (type == CODESPACE && newtype != type) { - ungetline(infile, line); - break; - } - } - fputs("
", outfile); - - if (type == CODEBACK) - free(line); - return 0; -} - -static long strsearch(char *data, long start, size_t datalen, - char c, int reps) { - long i; - - for (i = start; data[i] == c; ++i) ; - - while (i + reps - 1 < datalen) { - int j; - for (j = 0; j < reps; ++j) - if (data[i + j] != c) - goto failure; - goto success; - continue; -failure: - ++i; - } - return -1; - -success: - while (data[i + reps] == c && i + reps < datalen) - ++i; - return i; -} - -static long writelinked(char *data, long i, size_t len, char *tag, - FILE *outfile) { - long linkend, textend; - textend = strsearch(data, i, len, ']', 1); - if (textend < 0) - return -1; - linkend = strsearch(data, textend, len, ')', 1); - if (linkend < 0) - return -1; - if (strcmp(tag, "a") == 0) { - fputs("", outfile); - writesimple(data + i + 1, - textend - i - 1, outfile); - fputs("", outfile); - return linkend; - } - else if (strcmp(tag, "img") == 0) { - fputs("", outfile);
-		writesimple(data + i + 1,
-				textend - i - 1, outfile);
-		fputs("", outfile); - return linkend; - } - return -1; -} - -static int writeescape(char c, FILE *outfile) { - int i; - for (i = 0; i < sizeof escapes / sizeof *escapes; ++i) { - if (escapes[i].c == c) { - fputs(escapes[i].escape, outfile); - return 0; - } - } - fputc(c, outfile); - return 0; -} - -static int writedata(char *data, size_t len, FILE *outfile) { - long i; - long start; - long end; - for (i = 0; i < len; ++i) { - switch (data[i]) { -#define STANDOUT_CHAR(c) \ - case c: \ - if (data[i + 1] == c) { \ - start = i + 2; \ - end = strsearch(data, start, len, \ - c, 2); \ - goto bold; \ - } \ - start = i + 1; \ - end = strsearch(data, start, len, c, 1); \ - goto italic; - STANDOUT_CHAR('*'); - STANDOUT_CHAR('_'); - italic: - if (end < 0) - goto normal; - fputs("", outfile); - writedata(data + start, end - start, outfile); - fputs("", outfile); - i = end; - break; - bold: - if (end < 0) - goto normal; - fputs("", outfile); - writedata(data + start, end - start, outfile); - fputs("", outfile); - i = end + 1; - break; - - case '`': - end = strsearch(data, i, len, '`', 1); - if (end < 0) - goto normal; - fputs("", outfile); - writedata(data + i, end - i, outfile); - fputs("", outfile); - i = end; - break; - case '[': - end = writelinked(data, i, len, "a", outfile); - if (end < 0) - goto normal; - i = end; - break; - case '!': - end = writelinked(data, i + 1, len, "img", outfile); - if (end < 0) - goto normal; - i = end; - break; - case '\\': - if (i == len || - strchr(escapedchars, data[i+1]) == NULL) { - fputc('\\', outfile); - break; - } - ++i; - goto normal; - default: normal: - writeescape(data[i], outfile); - break; - } - } - return 0; -} - -static int writesimple(char *data, size_t len, FILE *outfile) { - long i; - for (i = 0; (len < 0 && data[i] != '\0') || i < len; ++i) { - if (data[i] == '\\') - if (strchr(escapedchars, data[i]) == NULL) - fputc('\\', outfile); - writeescape(data[i], outfile); - } - return 0; -} diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..51261b6 --- /dev/null +++ b/src/util.c @@ -0,0 +1,79 @@ +/* + ncdg - A program to help generate natechoe.dev + Copyright (C) 2022 Nate Choe (natechoe9@gmail.com) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ +#include +#include + +#include + +struct string *newstring() { + struct string *ret; + ret = malloc(sizeof *ret); + if (ret == NULL) + return NULL; + ret->len = 0; + ret->alloc = 20; + ret->data = malloc(ret->alloc); + if (ret->data == NULL) + return NULL; + return ret; +} + +void freestring(struct string *str) { + free(str->data); + free(str); +} + +int appendcharstring(struct string *str, char c) { + if (str->len >= str->alloc) { + char *newdata; + size_t newalloc; + newalloc = str->alloc * 2; + newdata = realloc(str->data, newalloc); + if (newdata == NULL) { + return 1; + } + str->data = newdata; + str->alloc = newalloc; + } + str->data[str->len++] = c; + return 0; +} + +int appendstrstring(struct string *str, char *s) { + size_t len; + len = strlen(s); + if (str->len + len >= str->alloc) { + char *newdata; + size_t newalloc; + newalloc = str->alloc; + while (str->len + len >= newalloc) + newalloc *= 2; + newdata = realloc(str->data, newalloc); + if (newdata == NULL) + return 1; + str->data = newdata; + str->alloc = newalloc; + } + memcpy(str->data + str->len, s, len); + str->len += len; + return 0; +} + +void resetstring(struct string *str) { + str->len = 0; +}