Rewrote md stuff and made it more compliant

2022-04-23 01:43:38 -05:00
parent 91e9f2d20c
commit 416edf6c8e
6 changed files with 353 additions and 480 deletions
--- a/src/template.c
+++ b/src/template.c
@@ -21,506 +21,132 @@
 #include <string.h>

 #include <io.h>
+#include <util.h>
+#include <mdutil.h>
 #include <template.h>

-enum paratype {
-	NORMAL,
-	EMPTY,
-	H1, H2, H3, H4, H5, H6,
-	BLOCKQUOTE,
-	CODESPACE, CODEBACK,
-	UL, OL,
-	HL
+struct parsestate {
+	enum nodetype type;
+	struct string *para;
 };

-enum inlinetype {
-	ITALIC,
-	BOLD,
-	CODE
-};
-
-static const struct {
-	char c;
-	char *escape;
-} escapes[] = {
-	{'&', "&amp;"},
-	{';', "&semi;"},
-	{'<', "&lt;"},
-	{'>', "&gt;"},
-};
-
-static int parsepara(struct linefile *infile, FILE *outfile);
-static enum paratype identifypara(char *line, char **contentret);
-
-static char *untrail(char *line);
-static size_t reallen(char *line);
-static int islinebreak(char *line);
-
-static int paraeasycase(struct linefile *infile, FILE *outfile,
-		char *line, char *buff,
-		char *tag, enum paratype type);
-static int parahardcase(struct linefile *infile, FILE *outfile,
-		char *line, char *buff,
-		char *vars, char *linetag, char *tag, enum paratype type);
-static int paracodecase(struct linefile *infile, FILE *outfile,
-		char *line, char *buff,
-		char *vars, enum paratype type);
-static long strsearch(char *data, long start, size_t datalen, char c, int reps);
-/* strsearch finds instances in data with reps repetitions of c. returns the
- * last instance in the first group. For example:
- *
- * c = '.', reps = 2, data = " ...", returns 2
- * c = '.', reps = 2, data = ".. ...", returns 4
- * c = '.', reps = 1, data = " ...", returns 3
- * */
-
-static long writelinked(char *data, long i, size_t len, char *tag,
-		FILE *outfile);
-
-static int writeescape(char c, FILE *outfile);
-static int writedata(char *data, size_t len, FILE *outfile);
-static int writesimple(char *data, size_t len, FILE *outfile);
-
-static const char *escapedchars = "!\"#%&'()*,./:;?@[\\]^{|}~";
+static int parseline(char *line, struct parsestate *currstate, FILE *out);
+static int endpara(struct parsestate *state, FILE *out);

 int parsetemplate(FILE *infile, FILE *outfile) {
 	struct linefile *realin;
+	struct parsestate currstate;
+	int code;
+
+	currstate.type = NONE;
+	currstate.para = newstring();
+
 	realin = newlinefile(infile);
-	while (parsepara(realin, outfile) == 0) ;
+	for (;;) {
+		char *currline;
+		currline = getline(realin);
+		if (currline == NULL) {
+			code = 0;
+			break;
+		}
+		if (parseline(currline, &currstate, outfile)) {
+			code = 1;
+			break;
+		}
+	}
+	endpara(&currstate, outfile);
 	freelinefile(realin);

-	return 0;
+	return code;
 }

-static int parsepara(struct linefile *infile, FILE *outfile) {
-	for (;;) {
-		char *line, *buff;
-		/* line exists for the explicit purpose of being freed later */
-		enum paratype type;
+static int parseline(char *line, struct parsestate *currstate, FILE *out) {
+	enum linetype type;

-		line = getline(infile);
-		if (line == NULL)
+	type = identifyline(line);
+	fflush(stdout);
+
+	switch (type) {
+	case EMPTY:
+		endpara(currstate, out);
+		currstate->type = NONE;
+		return 0;
+	case SETEXT1:
+		if (currstate->type != PARAGRAPH)
 			return 1;
-		type = identifypara(line, &buff);
-
-		buff = untrail(buff);
-
-		if (buff[0] == '\0') {
-			free(line);
-			continue;
-		}
-
-		switch (type) {
-#define EASY_CASE(enumtype, tag) \
-		case enumtype: \
-			paraeasycase(infile, outfile, line, buff, \
-					tag, enumtype); \
-			return 0;
-#define HARD_CASE(enumtype, tag, linetag, vars) \
-		case enumtype: \
-			parahardcase(infile, outfile, line, buff, \
-					vars, linetag, tag, enumtype); \
-			return 0;
-#define CODE_CASE(enumtype, vars) \
-		case enumtype: \
-			paracodecase(infile, outfile, line, buff, \
-					vars, enumtype); \
-			return 0;
-		EASY_CASE(H1, "h1");
-		EASY_CASE(H2, "h2");
-		EASY_CASE(H3, "h3");
-		EASY_CASE(H4, "h4");
-		EASY_CASE(H5, "h5");
-		EASY_CASE(H6, "h6");
-		HARD_CASE(NORMAL, "p", NULL, NULL);
-		HARD_CASE(BLOCKQUOTE, "blockquote", NULL, NULL);
-		HARD_CASE(UL, "ul", "li", NULL);
-		HARD_CASE(OL, "ol", "li", NULL);
-		CODE_CASE(CODESPACE, "class='block'");
-		CODE_CASE(CODEBACK, "class='block'");
-		case HL:
-			fputs("<hr />", outfile);
-			free(line);
-			return 0;
-		case EMPTY:
-			free(line);
-			continue;
-		}
-	}
-}
-
-static int isbreak(char *line) {
-	int count, i;
-	char whitechar;
-	count = 0;
-	whitechar = '\0';
-	for (i = 0; line[i] != '\0'; ++i) {
-		if (line[i] == line[0])
-			++count;
-		else if (line[i] == ' ' || line[i] == '\t') {
-			if (whitechar == '\0')
-				whitechar = line[i];
-			if (whitechar != line[i])
-				return 0;
+		currstate->type = NONE;
+		fputs("<h1>", out);
+		fwrite(currstate->para->data, 1, currstate->para->len, out);
+		fputs("</h1>", out);
+		resetstring(currstate->para);
+		return 0;
+	case SETEXT2:
+		if (currstate->type != PARAGRAPH)
+			goto hr;
+		currstate->type = NONE;
+		fputs("<h2>", out);
+		fwrite(currstate->para->data, 1, currstate->para->len, out);
+		fputs("</h2>", out);
+		resetstring(currstate->para);
+		return 0;
+	case HR: hr:
+		endpara(currstate, out);
+		currstate->type = NONE;
+		fputs("<hr>", out);
+		return 0;
+	case PLAIN:
+		if (currstate->type != PARAGRAPH) {
+			endpara(currstate, out);
+			currstate->type = PARAGRAPH;
 		}
 		else
-			return 0;
+			appendcharstring(currstate->para, ' ');
+		appendstrstring(currstate->para, realcontent(line, type));
+		return 0;
+		/* According to the commonmark spec, this markdown:
+
+		Chapter 1
+		---
+
+		 * Should NOT compile to this:
+
+		<p>Chapter 1</p><hr>
+
+		 * but rather to this
+
+		<h2>Chapter 1</h2>
+
+		 * This means that we need to store the contents of the
+		 * paragraph and only write after obtaining the whole thing
+		 * as to not include the wrong tags.
+		 * */
+	case SPACECODE:
+		if (currstate->type != CODE) {
+			endpara(currstate, out);
+			currstate->type = CODE;
+			fputs("<code class='block'>", out);
+		}
+		else
+			fputs("<br>", out);
+		fputs(realcontent(line, type), out);
+		break;
 	}
-	return count >= 3;
 	return 0;
 }

-static enum paratype identifypara(char *line, char **contentret) {
-	int i;
-	for (i = 0; i < 4; ++i) {
-		if (line[i] == ' ')
-			continue;
-		if (line[i] == '\0')
-			return EMPTY;
-		goto whitegone;
-	}
-
-	*contentret = line + i;
-	return CODESPACE;
-
-whitegone:
-	line += i;
-	/* At this point, line has no extraneous trailing whitespace */
-	switch (line[0]) {
-	case '\0':
-		return EMPTY;
-	case '#':
-		for (i = 0; i < 6 && line[i] == '#'; ++i) ;
-		*contentret = line + i;
-		if (line[i] != '\0' && line[i] != ' ')
-			goto normal;
-		return H1 + i - 1;
-	case '>':
-		*contentret = line + 1;
-		return BLOCKQUOTE;
-	case '*':
-		if (isbreak(line))
-			return HL;
-		*contentret = line + 1;
-		return UL;
-	case '-': case '_':
-		if (isbreak(line))
-			return HL;
-		goto normal;
-	case '`':
-		for (i = 0; i < 3; ++i)
-			if (line[i] != '`')
-				goto normal;
-		return CODEBACK;
-	default:
-		if (isdigit(line[0])) {
-			for (i = 0; isdigit(line[i]); ++i) ;
-			if (line[i] == '.' || line[i] == ')') {
-				*contentret = line + i + 1;
-				return OL;
-			}
-		}
-		goto normal;
-	normal:
-		*contentret = line;
-		return NORMAL;
-	}
-}
-
-static char *untrail(char *line) {
-	while (isspace(line[0]))
-		++line;
-	return line;
-}
-
-static size_t reallen(char *line) {
-	size_t fakelen;
-	fakelen = strlen(line);
-	if (line[fakelen - 1] == '\\')
-		--fakelen;
-	while (isspace(line[fakelen]))
-		--fakelen;
-	return fakelen;
-}
-
-static int islinebreak(char *line) {
-	size_t len;
-	int i;
-	len = strlen(line);
-	if (line[len - 1] == '\\')
-		return 1;
-	if (len < 2)
+static int endpara(struct parsestate *state, FILE *out) {
+	switch (state->type) {
+	case PARAGRAPH:
+		fputs("<p>", out);
+		fwrite(state->para->data, 1, state->para->len, out);
+		fputs("</p>", out);
+		resetstring(state->para);
 		return 0;
-	for (i = 0; i < 2; ++i)
-		if (!isspace(line[len - i - 1]))
-			return 0;
+	case CODE:
+		fputs("</code>", out);
+		return 0;
+	case NONE:
+		return 0;
+	}
 	return 1;
 }
-
-static int paraeasycase(struct linefile *infile, FILE *outfile,
-		char *line, char *buff,
-		char *tag, enum paratype type) {
-	size_t writelen;
-
-	writelen = reallen(buff);
-
-	fprintf(outfile, "<%s>", tag);
-	for (;;) {
-		writedata(buff, writelen, outfile);
-		free(line);
-		line = getline(infile);
-		if (line == NULL)
-			break;
-		if (identifypara(line, &buff) != type) {
-			ungetline(infile, line);
-			line = NULL;
-			break;
-		}
-		else
-			buff = untrail(buff);
-	}
-	fprintf(outfile, "</%s>", tag);
-
-	free(line);
-	return 0;
-}
-
-static int parahardcase(struct linefile *infile, FILE *outfile,
-		char *line, char *buff,
-		char *vars, char *linetag, char *tag, enum paratype type) {
-	size_t writelen;
-
-	if (vars == NULL)
-		fprintf(outfile, "<%s>", tag);
-	else
-		fprintf(outfile, "<%s %s>", tag, vars);
-	for (;;) {
-		writelen = reallen(buff);
-
-		if (linetag != NULL)
-			fprintf(outfile, "<%s>", linetag);
-		writedata(buff, writelen, outfile);
-		if (islinebreak(line))
-			fputs("<br />", outfile);
-		if (linetag != NULL)
-			fprintf(outfile, "</%s>", linetag);
-
-		free(line);
-		line = getline(infile);
-		if (line == NULL)
-			break;
-		if (identifypara(line, &buff) != type) {
-			buff = untrail(line);
-			if (buff[0] == '\0') {
-				free(line);
-				line = NULL;
-				break;
-			}
-		}
-		else
-			buff = untrail(buff);
-		fputc(' ', outfile);
-	}
-	fprintf(outfile, "</%s>", tag);
-
-	free(line);
-	return 0;
-}
-
-static int paracodecase(struct linefile *infile, FILE *outfile,
-		char *line, char *buff,
-		char *vars, enum paratype type) {
-	int seenfirst;
-	enum paratype newtype;
-
-	if (type != CODESPACE && type != CODEBACK)
-		return 1;
-
-	if (vars == NULL)
-		fputs("<code>", outfile);
-	else
-		fprintf(outfile, "<code %s>", vars);
-	seenfirst = 0;
-	newtype = type;
-	for (;;) {
-		if ((type == CODEBACK && type != newtype) ||
-		     newtype == CODESPACE) {
-			if (seenfirst)
-				fputs("<br />", outfile);
-			seenfirst = 1;
-		}
-
-		if (newtype != CODEBACK)
-			writesimple(buff, -1, outfile);
-
-		free(line);
-		line = getline(infile);
-		if (line == NULL)
-			return 1;
-
-		newtype = identifypara(line, &buff);
-		if (type == CODEBACK && newtype == CODEBACK)
-			break;
-		if (type == CODESPACE && newtype != type) {
-			ungetline(infile, line);
-			break;
-		}
-	}
-	fputs("</code>", outfile);
-
-	if (type == CODEBACK)
-		free(line);
-	return 0;
-}
-
-static long strsearch(char *data, long start, size_t datalen,
-		char c, int reps) {
-	long i;
-
-	for (i = start; data[i] == c; ++i) ;
-
-	while (i + reps - 1 < datalen) {
-		int j;
-		for (j = 0; j < reps; ++j)
-			if (data[i + j] != c)
-				goto failure;
-		goto success;
-		continue;
-failure:
-		++i;
-	}
-	return -1;
-
-success:
-	while (data[i + reps] == c && i + reps < datalen)
-		++i;
-	return i;
-}
-
-static long writelinked(char *data, long i, size_t len, char *tag,
-		FILE *outfile) {
-	long linkend, textend;
-	textend = strsearch(data, i, len, ']', 1);
-	if (textend < 0)
-		return -1;
-	linkend = strsearch(data, textend, len, ')', 1);
-	if (linkend < 0)
-		return -1;
-	if (strcmp(tag, "a") == 0) {
-		fputs("<a href='", outfile);
-		writesimple(data + textend + 2,
-				linkend - textend - 2, outfile);
-		fputs("'>", outfile);
-		writesimple(data + i + 1,
-				textend - i - 1, outfile);
-		fputs("</a>", outfile);
-		return linkend;
-	}
-	else if (strcmp(tag, "img") == 0) {
-		fputs("<img src='", outfile);
-		writesimple(data + textend + 2,
-				linkend - textend - 2, outfile);
-		fputs("' alt='", outfile);
-		writesimple(data + i + 1,
-				textend - i - 1, outfile);
-		fputs("'>", outfile);
-		return linkend;
-	}
-	return -1;
-}
-
-static int writeescape(char c, FILE *outfile) {
-	int i;
-	for (i = 0; i < sizeof escapes / sizeof *escapes; ++i) {
-		if (escapes[i].c == c) {
-			fputs(escapes[i].escape, outfile);
-			return 0;
-		}
-	}
-	fputc(c, outfile);
-	return 0;
-}
-
-static int writedata(char *data, size_t len, FILE *outfile) {
-	long i;
-	long start;
-	long end;
-	for (i = 0; i < len; ++i) {
-		switch (data[i]) {
-#define STANDOUT_CHAR(c) \
-		case c: \
-			if (data[i + 1] == c) { \
-				start = i + 2; \
-				end = strsearch(data, start, len, \
-						c, 2); \
-				goto bold; \
-			} \
-			start = i + 1; \
-			end = strsearch(data, start, len, c, 1); \
-			goto italic;
-		STANDOUT_CHAR('*');
-		STANDOUT_CHAR('_');
-		italic:
-			if (end < 0)
-				goto normal;
-			fputs("<i>", outfile);
-			writedata(data + start, end - start, outfile);
-			fputs("</i>", outfile);
-			i = end;
-			break;
-		bold:
-			if (end < 0)
-				goto normal;
-			fputs("<b>", outfile);
-			writedata(data + start, end - start, outfile);
-			fputs("</b>", outfile);
-			i = end + 1;
-			break;
-
-		case '`':
-			end = strsearch(data, i, len, '`', 1);
-			if (end < 0)
-				goto normal;
-			fputs("<code>", outfile);
-			writedata(data + i, end - i, outfile);
-			fputs("</code>", outfile);
-			i = end;
-			break;
-		case '[':
-			end = writelinked(data, i, len, "a", outfile);
-			if (end < 0)
-				goto normal;
-			i = end;
-			break;
-		case '!':
-			end = writelinked(data, i + 1, len, "img", outfile);
-			if (end < 0)
-				goto normal;
-			i = end;
-			break;
-		case '\\':
-			if (i == len ||
-				strchr(escapedchars, data[i+1]) == NULL) {
-				fputc('\\', outfile);
-				break;
-			}
-			++i;
-			goto normal;
-		default: normal:
-			writeescape(data[i], outfile);
-			break;
-		}
-	}
-	return 0;
-}
-
-static int writesimple(char *data, size_t len, FILE *outfile) {
-	long i;
-	for (i = 0; (len < 0 && data[i] != '\0') || i < len; ++i) {
-		if (data[i] == '\\')
-			if (strchr(escapedchars, data[i]) == NULL)
-				fputc('\\', outfile);
-		writeescape(data[i], outfile);
-	}
-	return 0;
-}