Rewrote md stuff and made it more compliant

This commit is contained in:
Nate Choe
2022-04-23 01:43:38 -05:00
parent 91e9f2d20c
commit 416edf6c8e
6 changed files with 353 additions and 480 deletions

View File

@@ -16,6 +16,9 @@
along with this program. If not, see <https://www.gnu.org/licenses/>. along with this program. If not, see <https://www.gnu.org/licenses/>.
*/ */
#ifndef HAVE_IO
#define HAVE_IO
#include <stdio.h> #include <stdio.h>
struct linefile { struct linefile {
@@ -27,3 +30,5 @@ void ungetline(struct linefile *file, char *line);
char *getline(struct linefile *file); char *getline(struct linefile *file);
struct linefile *newlinefile(FILE *file); struct linefile *newlinefile(FILE *file);
void freelinefile(struct linefile *file); void freelinefile(struct linefile *file);
#endif

42
src/include/mdutil.h Normal file
View File

@@ -0,0 +1,42 @@
/*
ncdg - A program to help generate natechoe.dev
Copyright (C) 2022 Nate Choe (natechoe9@gmail.com)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef HAVE_MDUTIL
#define HAVE_MDUTIL
enum linetype {
EMPTY,
PLAIN,
SPACECODE,
HR,
SETEXT1,
/* === */
SETEXT2
/* --- */
};
enum nodetype {
PARAGRAPH,
CODE,
NONE
};
enum linetype identifyline(char *line);
char *realcontent(char *line, enum linetype type);
#endif

35
src/include/util.h Normal file
View File

@@ -0,0 +1,35 @@
/*
ncdg - A program to help generate natechoe.dev
Copyright (C) 2022 Nate Choe (natechoe9@gmail.com)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef HAVE_UTIL
#define HAVE_UTIL
#include <stddef.h>
struct string {
size_t len;
size_t alloc;
char *data;
};
struct string *newstring();
void freestring(struct string *str);
int appendcharstring(struct string *str, char c);
int appendstrstring(struct string *str, char *s);
void resetstring(struct string *str);
#endif

86
src/mdutil.c Normal file
View File

@@ -0,0 +1,86 @@
/*
ncdg - A program to help generate natechoe.dev
Copyright (C) 2022 Nate Choe (natechoe9@gmail.com)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <ctype.h>
#include <string.h>
#include <stddef.h>
#include <mdutil.h>
static char *truncate(char *str);
enum linetype identifyline(char *line) {
int i;
for (i = 0; i < 4; ++i) {
if (!isspace(line[i]))
goto notcode;
}
return SPACECODE;
notcode:
line = truncate(line);
if (line[0] == '\0')
return EMPTY;
{
int hrcount;
if (strchr("-*_=", line[0]) == NULL)
goto nothr;
/* A delimiting line can only contain '-', '*', '_', and ' '. */
hrcount = 0;
for (i = 0; line[i]; ++i) {
if (!isspace(line[i]) && line[i] != line[0])
goto nothr;
/* You can't mix delimiter characters, and you can't
* have anything other than a delimiter character or
* white space. */
if (line[i] == line[0])
++hrcount;
}
if (hrcount >= 3) {
switch (line[0]) {
case '=':
return SETEXT1;
case '-':
return SETEXT2;
default:
return HR;
}
}
/* There has to be at least 3 delimiter characters */
}
nothr:
return PLAIN;
}
/* TODO: Finish this */
static char *truncate(char *str) {
while (isspace(str[0]))
++str;
return str;
}
char *realcontent(char *line, enum linetype type) {
switch (type) {
case EMPTY: case HR: case SETEXT1: case SETEXT2:
return NULL;
case PLAIN:
return line;
case SPACECODE:
return line + 4;
}
return NULL;
}

View File

@@ -21,506 +21,132 @@
#include <string.h> #include <string.h>
#include <io.h> #include <io.h>
#include <util.h>
#include <mdutil.h>
#include <template.h> #include <template.h>
enum paratype { struct parsestate {
NORMAL, enum nodetype type;
EMPTY, struct string *para;
H1, H2, H3, H4, H5, H6,
BLOCKQUOTE,
CODESPACE, CODEBACK,
UL, OL,
HL
}; };
enum inlinetype { static int parseline(char *line, struct parsestate *currstate, FILE *out);
ITALIC, static int endpara(struct parsestate *state, FILE *out);
BOLD,
CODE
};
static const struct {
char c;
char *escape;
} escapes[] = {
{'&', "&amp;"},
{';', "&semi;"},
{'<', "&lt;"},
{'>', "&gt;"},
};
static int parsepara(struct linefile *infile, FILE *outfile);
static enum paratype identifypara(char *line, char **contentret);
static char *untrail(char *line);
static size_t reallen(char *line);
static int islinebreak(char *line);
static int paraeasycase(struct linefile *infile, FILE *outfile,
char *line, char *buff,
char *tag, enum paratype type);
static int parahardcase(struct linefile *infile, FILE *outfile,
char *line, char *buff,
char *vars, char *linetag, char *tag, enum paratype type);
static int paracodecase(struct linefile *infile, FILE *outfile,
char *line, char *buff,
char *vars, enum paratype type);
static long strsearch(char *data, long start, size_t datalen, char c, int reps);
/* strsearch finds instances in data with reps repetitions of c. returns the
* last instance in the first group. For example:
*
* c = '.', reps = 2, data = " ...", returns 2
* c = '.', reps = 2, data = ".. ...", returns 4
* c = '.', reps = 1, data = " ...", returns 3
* */
static long writelinked(char *data, long i, size_t len, char *tag,
FILE *outfile);
static int writeescape(char c, FILE *outfile);
static int writedata(char *data, size_t len, FILE *outfile);
static int writesimple(char *data, size_t len, FILE *outfile);
static const char *escapedchars = "!\"#%&'()*,./:;?@[\\]^{|}~";
int parsetemplate(FILE *infile, FILE *outfile) { int parsetemplate(FILE *infile, FILE *outfile) {
struct linefile *realin; struct linefile *realin;
struct parsestate currstate;
int code;
currstate.type = NONE;
currstate.para = newstring();
realin = newlinefile(infile); realin = newlinefile(infile);
while (parsepara(realin, outfile) == 0) ; for (;;) {
char *currline;
currline = getline(realin);
if (currline == NULL) {
code = 0;
break;
}
if (parseline(currline, &currstate, outfile)) {
code = 1;
break;
}
}
endpara(&currstate, outfile);
freelinefile(realin); freelinefile(realin);
return 0; return code;
} }
static int parsepara(struct linefile *infile, FILE *outfile) { static int parseline(char *line, struct parsestate *currstate, FILE *out) {
for (;;) { enum linetype type;
char *line, *buff;
/* line exists for the explicit purpose of being freed later */
enum paratype type;
line = getline(infile); type = identifyline(line);
if (line == NULL) fflush(stdout);
switch (type) {
case EMPTY:
endpara(currstate, out);
currstate->type = NONE;
return 0;
case SETEXT1:
if (currstate->type != PARAGRAPH)
return 1; return 1;
type = identifypara(line, &buff); currstate->type = NONE;
fputs("<h1>", out);
buff = untrail(buff); fwrite(currstate->para->data, 1, currstate->para->len, out);
fputs("</h1>", out);
if (buff[0] == '\0') { resetstring(currstate->para);
free(line); return 0;
continue; case SETEXT2:
} if (currstate->type != PARAGRAPH)
goto hr;
switch (type) { currstate->type = NONE;
#define EASY_CASE(enumtype, tag) \ fputs("<h2>", out);
case enumtype: \ fwrite(currstate->para->data, 1, currstate->para->len, out);
paraeasycase(infile, outfile, line, buff, \ fputs("</h2>", out);
tag, enumtype); \ resetstring(currstate->para);
return 0; return 0;
#define HARD_CASE(enumtype, tag, linetag, vars) \ case HR: hr:
case enumtype: \ endpara(currstate, out);
parahardcase(infile, outfile, line, buff, \ currstate->type = NONE;
vars, linetag, tag, enumtype); \ fputs("<hr>", out);
return 0; return 0;
#define CODE_CASE(enumtype, vars) \ case PLAIN:
case enumtype: \ if (currstate->type != PARAGRAPH) {
paracodecase(infile, outfile, line, buff, \ endpara(currstate, out);
vars, enumtype); \ currstate->type = PARAGRAPH;
return 0;
EASY_CASE(H1, "h1");
EASY_CASE(H2, "h2");
EASY_CASE(H3, "h3");
EASY_CASE(H4, "h4");
EASY_CASE(H5, "h5");
EASY_CASE(H6, "h6");
HARD_CASE(NORMAL, "p", NULL, NULL);
HARD_CASE(BLOCKQUOTE, "blockquote", NULL, NULL);
HARD_CASE(UL, "ul", "li", NULL);
HARD_CASE(OL, "ol", "li", NULL);
CODE_CASE(CODESPACE, "class='block'");
CODE_CASE(CODEBACK, "class='block'");
case HL:
fputs("<hr />", outfile);
free(line);
return 0;
case EMPTY:
free(line);
continue;
}
}
}
static int isbreak(char *line) {
int count, i;
char whitechar;
count = 0;
whitechar = '\0';
for (i = 0; line[i] != '\0'; ++i) {
if (line[i] == line[0])
++count;
else if (line[i] == ' ' || line[i] == '\t') {
if (whitechar == '\0')
whitechar = line[i];
if (whitechar != line[i])
return 0;
} }
else else
return 0; appendcharstring(currstate->para, ' ');
appendstrstring(currstate->para, realcontent(line, type));
return 0;
/* According to the commonmark spec, this markdown:
Chapter 1
---
* Should NOT compile to this:
<p>Chapter 1</p><hr>
* but rather to this
<h2>Chapter 1</h2>
* This means that we need to store the contents of the
* paragraph and only write after obtaining the whole thing
* as to not include the wrong tags.
* */
case SPACECODE:
if (currstate->type != CODE) {
endpara(currstate, out);
currstate->type = CODE;
fputs("<code class='block'>", out);
}
else
fputs("<br>", out);
fputs(realcontent(line, type), out);
break;
} }
return count >= 3;
return 0; return 0;
} }
static enum paratype identifypara(char *line, char **contentret) { static int endpara(struct parsestate *state, FILE *out) {
int i; switch (state->type) {
for (i = 0; i < 4; ++i) { case PARAGRAPH:
if (line[i] == ' ') fputs("<p>", out);
continue; fwrite(state->para->data, 1, state->para->len, out);
if (line[i] == '\0') fputs("</p>", out);
return EMPTY; resetstring(state->para);
goto whitegone;
}
*contentret = line + i;
return CODESPACE;
whitegone:
line += i;
/* At this point, line has no extraneous trailing whitespace */
switch (line[0]) {
case '\0':
return EMPTY;
case '#':
for (i = 0; i < 6 && line[i] == '#'; ++i) ;
*contentret = line + i;
if (line[i] != '\0' && line[i] != ' ')
goto normal;
return H1 + i - 1;
case '>':
*contentret = line + 1;
return BLOCKQUOTE;
case '*':
if (isbreak(line))
return HL;
*contentret = line + 1;
return UL;
case '-': case '_':
if (isbreak(line))
return HL;
goto normal;
case '`':
for (i = 0; i < 3; ++i)
if (line[i] != '`')
goto normal;
return CODEBACK;
default:
if (isdigit(line[0])) {
for (i = 0; isdigit(line[i]); ++i) ;
if (line[i] == '.' || line[i] == ')') {
*contentret = line + i + 1;
return OL;
}
}
goto normal;
normal:
*contentret = line;
return NORMAL;
}
}
static char *untrail(char *line) {
while (isspace(line[0]))
++line;
return line;
}
static size_t reallen(char *line) {
size_t fakelen;
fakelen = strlen(line);
if (line[fakelen - 1] == '\\')
--fakelen;
while (isspace(line[fakelen]))
--fakelen;
return fakelen;
}
static int islinebreak(char *line) {
size_t len;
int i;
len = strlen(line);
if (line[len - 1] == '\\')
return 1;
if (len < 2)
return 0; return 0;
for (i = 0; i < 2; ++i) case CODE:
if (!isspace(line[len - i - 1])) fputs("</code>", out);
return 0; return 0;
case NONE:
return 0;
}
return 1; return 1;
} }
static int paraeasycase(struct linefile *infile, FILE *outfile,
char *line, char *buff,
char *tag, enum paratype type) {
size_t writelen;
writelen = reallen(buff);
fprintf(outfile, "<%s>", tag);
for (;;) {
writedata(buff, writelen, outfile);
free(line);
line = getline(infile);
if (line == NULL)
break;
if (identifypara(line, &buff) != type) {
ungetline(infile, line);
line = NULL;
break;
}
else
buff = untrail(buff);
}
fprintf(outfile, "</%s>", tag);
free(line);
return 0;
}
static int parahardcase(struct linefile *infile, FILE *outfile,
char *line, char *buff,
char *vars, char *linetag, char *tag, enum paratype type) {
size_t writelen;
if (vars == NULL)
fprintf(outfile, "<%s>", tag);
else
fprintf(outfile, "<%s %s>", tag, vars);
for (;;) {
writelen = reallen(buff);
if (linetag != NULL)
fprintf(outfile, "<%s>", linetag);
writedata(buff, writelen, outfile);
if (islinebreak(line))
fputs("<br />", outfile);
if (linetag != NULL)
fprintf(outfile, "</%s>", linetag);
free(line);
line = getline(infile);
if (line == NULL)
break;
if (identifypara(line, &buff) != type) {
buff = untrail(line);
if (buff[0] == '\0') {
free(line);
line = NULL;
break;
}
}
else
buff = untrail(buff);
fputc(' ', outfile);
}
fprintf(outfile, "</%s>", tag);
free(line);
return 0;
}
static int paracodecase(struct linefile *infile, FILE *outfile,
char *line, char *buff,
char *vars, enum paratype type) {
int seenfirst;
enum paratype newtype;
if (type != CODESPACE && type != CODEBACK)
return 1;
if (vars == NULL)
fputs("<code>", outfile);
else
fprintf(outfile, "<code %s>", vars);
seenfirst = 0;
newtype = type;
for (;;) {
if ((type == CODEBACK && type != newtype) ||
newtype == CODESPACE) {
if (seenfirst)
fputs("<br />", outfile);
seenfirst = 1;
}
if (newtype != CODEBACK)
writesimple(buff, -1, outfile);
free(line);
line = getline(infile);
if (line == NULL)
return 1;
newtype = identifypara(line, &buff);
if (type == CODEBACK && newtype == CODEBACK)
break;
if (type == CODESPACE && newtype != type) {
ungetline(infile, line);
break;
}
}
fputs("</code>", outfile);
if (type == CODEBACK)
free(line);
return 0;
}
static long strsearch(char *data, long start, size_t datalen,
char c, int reps) {
long i;
for (i = start; data[i] == c; ++i) ;
while (i + reps - 1 < datalen) {
int j;
for (j = 0; j < reps; ++j)
if (data[i + j] != c)
goto failure;
goto success;
continue;
failure:
++i;
}
return -1;
success:
while (data[i + reps] == c && i + reps < datalen)
++i;
return i;
}
static long writelinked(char *data, long i, size_t len, char *tag,
FILE *outfile) {
long linkend, textend;
textend = strsearch(data, i, len, ']', 1);
if (textend < 0)
return -1;
linkend = strsearch(data, textend, len, ')', 1);
if (linkend < 0)
return -1;
if (strcmp(tag, "a") == 0) {
fputs("<a href='", outfile);
writesimple(data + textend + 2,
linkend - textend - 2, outfile);
fputs("'>", outfile);
writesimple(data + i + 1,
textend - i - 1, outfile);
fputs("</a>", outfile);
return linkend;
}
else if (strcmp(tag, "img") == 0) {
fputs("<img src='", outfile);
writesimple(data + textend + 2,
linkend - textend - 2, outfile);
fputs("' alt='", outfile);
writesimple(data + i + 1,
textend - i - 1, outfile);
fputs("'>", outfile);
return linkend;
}
return -1;
}
static int writeescape(char c, FILE *outfile) {
int i;
for (i = 0; i < sizeof escapes / sizeof *escapes; ++i) {
if (escapes[i].c == c) {
fputs(escapes[i].escape, outfile);
return 0;
}
}
fputc(c, outfile);
return 0;
}
static int writedata(char *data, size_t len, FILE *outfile) {
long i;
long start;
long end;
for (i = 0; i < len; ++i) {
switch (data[i]) {
#define STANDOUT_CHAR(c) \
case c: \
if (data[i + 1] == c) { \
start = i + 2; \
end = strsearch(data, start, len, \
c, 2); \
goto bold; \
} \
start = i + 1; \
end = strsearch(data, start, len, c, 1); \
goto italic;
STANDOUT_CHAR('*');
STANDOUT_CHAR('_');
italic:
if (end < 0)
goto normal;
fputs("<i>", outfile);
writedata(data + start, end - start, outfile);
fputs("</i>", outfile);
i = end;
break;
bold:
if (end < 0)
goto normal;
fputs("<b>", outfile);
writedata(data + start, end - start, outfile);
fputs("</b>", outfile);
i = end + 1;
break;
case '`':
end = strsearch(data, i, len, '`', 1);
if (end < 0)
goto normal;
fputs("<code>", outfile);
writedata(data + i, end - i, outfile);
fputs("</code>", outfile);
i = end;
break;
case '[':
end = writelinked(data, i, len, "a", outfile);
if (end < 0)
goto normal;
i = end;
break;
case '!':
end = writelinked(data, i + 1, len, "img", outfile);
if (end < 0)
goto normal;
i = end;
break;
case '\\':
if (i == len ||
strchr(escapedchars, data[i+1]) == NULL) {
fputc('\\', outfile);
break;
}
++i;
goto normal;
default: normal:
writeescape(data[i], outfile);
break;
}
}
return 0;
}
static int writesimple(char *data, size_t len, FILE *outfile) {
long i;
for (i = 0; (len < 0 && data[i] != '\0') || i < len; ++i) {
if (data[i] == '\\')
if (strchr(escapedchars, data[i]) == NULL)
fputc('\\', outfile);
writeescape(data[i], outfile);
}
return 0;
}

79
src/util.c Normal file
View File

@@ -0,0 +1,79 @@
/*
ncdg - A program to help generate natechoe.dev
Copyright (C) 2022 Nate Choe (natechoe9@gmail.com)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <string.h>
#include <util.h>
struct string *newstring() {
struct string *ret;
ret = malloc(sizeof *ret);
if (ret == NULL)
return NULL;
ret->len = 0;
ret->alloc = 20;
ret->data = malloc(ret->alloc);
if (ret->data == NULL)
return NULL;
return ret;
}
void freestring(struct string *str) {
free(str->data);
free(str);
}
int appendcharstring(struct string *str, char c) {
if (str->len >= str->alloc) {
char *newdata;
size_t newalloc;
newalloc = str->alloc * 2;
newdata = realloc(str->data, newalloc);
if (newdata == NULL) {
return 1;
}
str->data = newdata;
str->alloc = newalloc;
}
str->data[str->len++] = c;
return 0;
}
int appendstrstring(struct string *str, char *s) {
size_t len;
len = strlen(s);
if (str->len + len >= str->alloc) {
char *newdata;
size_t newalloc;
newalloc = str->alloc;
while (str->len + len >= newalloc)
newalloc *= 2;
newdata = realloc(str->data, newalloc);
if (newdata == NULL)
return 1;
str->data = newdata;
str->alloc = newalloc;
}
memcpy(str->data + str->len, s, len);
str->len += len;
return 0;
}
void resetstring(struct string *str) {
str->len = 0;
}