Rewrote md stuff and made it more compliant

2022-04-23 01:43:38 -05:00
parent 91e9f2d20c
commit 416edf6c8e
6 changed files with 353 additions and 480 deletions
--- a/src/include/io.h
+++ b/src/include/io.h
@@ -16,6 +16,9 @@
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
 #ifndef HAVE_IO
 #define HAVE_IO
 #include <stdio.h>
 struct linefile {
@@ -27,3 +30,5 @@ void ungetline(struct linefile *file, char *line);
 char *getline(struct linefile *file);
 struct linefile *newlinefile(FILE *file);
 void freelinefile(struct linefile *file);
 #endif
--- a/src/include/mdutil.h
+++ b/src/include/mdutil.h
@@ -0,0 +1,42 @@
 /*
   ncdg - A program to help generate natechoe.dev
   Copyright (C) 2022  Nate Choe (natechoe9@gmail.com)
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
 #ifndef HAVE_MDUTIL
 #define HAVE_MDUTIL
 enum linetype {
 	EMPTY,
 	PLAIN,
 	SPACECODE,
 	HR,
 	SETEXT1,
 	/* === */
 	SETEXT2
 	/* --- */
 };
 enum nodetype {
 	PARAGRAPH,
 	CODE,
 	NONE
 };
 enum linetype identifyline(char *line);
 char *realcontent(char *line, enum linetype type);
 #endif
--- a/src/include/util.h
+++ b/src/include/util.h
@@ -0,0 +1,35 @@
 /*
   ncdg - A program to help generate natechoe.dev
   Copyright (C) 2022  Nate Choe (natechoe9@gmail.com)
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
 #ifndef HAVE_UTIL
 #define HAVE_UTIL
 #include <stddef.h>
 struct string {
 	size_t len;
 	size_t alloc;
 	char *data;
 };
 struct string *newstring();
 void freestring(struct string *str);
 int appendcharstring(struct string *str, char c);
 int appendstrstring(struct string *str, char *s);
 void resetstring(struct string *str);
 #endif
--- a/src/mdutil.c
+++ b/src/mdutil.c
@@ -0,0 +1,86 @@
 /*
   ncdg - A program to help generate natechoe.dev
   Copyright (C) 2022  Nate Choe (natechoe9@gmail.com)
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
 #include <ctype.h>
 #include <string.h>
 #include <stddef.h>
 #include <mdutil.h>
 static char *truncate(char *str);
 enum linetype identifyline(char *line) {
 	int i;
 	for (i = 0; i < 4; ++i) {
 		if (!isspace(line[i]))
 			goto notcode;
 	}
 	return SPACECODE;
 notcode:
 	line = truncate(line);
 	if (line[0] == '\0')
 		return EMPTY;
 	{
 		int hrcount;
 		if (strchr("-*_=", line[0]) == NULL)
 			goto nothr;
 		/* A delimiting line can only contain '-', '*', '_', and ' '. */
 		hrcount = 0;
 		for (i = 0; line[i]; ++i) {
 			if (!isspace(line[i]) && line[i] != line[0])
 				goto nothr;
 			/* You can't mix delimiter characters, and you can't
 			 * have anything other than a delimiter character or
 			 * white space. */
 			if (line[i] == line[0])
 				++hrcount;
 		}
 		if (hrcount >= 3) {
 			switch (line[0]) {
 			case '=':
 				return SETEXT1;
 			case '-':
 				return SETEXT2;
 			default:
 				return HR;
 			}
 		}
 		/* There has to be at least 3 delimiter characters */
 	}
 nothr:
 	return PLAIN;
 }
 /* TODO: Finish this */
 static char *truncate(char *str) {
 	while (isspace(str[0]))
 		++str;
 	return str;
 }
 char *realcontent(char *line, enum linetype type) {
 	switch (type) {
 	case EMPTY: case HR: case SETEXT1: case SETEXT2:
 		return NULL;
 	case PLAIN:
 		return line;
 	case SPACECODE:
 		return line + 4;
 	}
 	return NULL;
 }
--- a/src/template.c
+++ b/src/template.c
@@ -21,506 +21,132 @@
 #include <string.h>
 #include <io.h>
 #include <util.h>
 #include <mdutil.h>
 #include <template.h>
-enum paratype {
+struct parsestate {
-	NORMAL,
+	enum nodetype type;
-	EMPTY,
+	struct string *para;
 	H1, H2, H3, H4, H5, H6,
 	BLOCKQUOTE,
 	CODESPACE, CODEBACK,
 	UL, OL,
 	HL
 };
-enum inlinetype {
+static int parseline(char *line, struct parsestate *currstate, FILE *out);
-	ITALIC,
+static int endpara(struct parsestate *state, FILE *out);
 	BOLD,
 	CODE
 };
 static const struct {
 	char c;
 	char *escape;
 } escapes[] = {
 	{'&', "&amp;"},
 	{';', "&semi;"},
 	{'<', "&lt;"},
 	{'>', "&gt;"},
 };
 static int parsepara(struct linefile *infile, FILE *outfile);
 static enum paratype identifypara(char *line, char **contentret);
 static char *untrail(char *line);
 static size_t reallen(char *line);
 static int islinebreak(char *line);
 static int paraeasycase(struct linefile *infile, FILE *outfile,
 		char *line, char *buff,
 		char *tag, enum paratype type);
 static int parahardcase(struct linefile *infile, FILE *outfile,
 		char *line, char *buff,
 		char *vars, char *linetag, char *tag, enum paratype type);
 static int paracodecase(struct linefile *infile, FILE *outfile,
 		char *line, char *buff,
 		char *vars, enum paratype type);
 static long strsearch(char *data, long start, size_t datalen, char c, int reps);
 /* strsearch finds instances in data with reps repetitions of c. returns the
 * last instance in the first group. For example:
 *
 * c = '.', reps = 2, data = " ...", returns 2
 * c = '.', reps = 2, data = ".. ...", returns 4
 * c = '.', reps = 1, data = " ...", returns 3
 * */
 static long writelinked(char *data, long i, size_t len, char *tag,
 		FILE *outfile);
 static int writeescape(char c, FILE *outfile);
 static int writedata(char *data, size_t len, FILE *outfile);
 static int writesimple(char *data, size_t len, FILE *outfile);
 static const char *escapedchars = "!\"#%&'()*,./:;?@[\\]^{|}~";
 int parsetemplate(FILE *infile, FILE *outfile) {
 	struct linefile *realin;
 	struct parsestate currstate;
 	int code;
 	currstate.type = NONE;
 	currstate.para = newstring();
 	realin = newlinefile(infile);
-	while (parsepara(realin, outfile) == 0) ;
+	for (;;) {
 		char *currline;
 		currline = getline(realin);
 		if (currline == NULL) {
 			code = 0;
 			break;
 		}
 		if (parseline(currline, &currstate, outfile)) {
 			code = 1;
 			break;
 		}
 	}
 	endpara(&currstate, outfile);
 	freelinefile(realin);
-	return 0;
+	return code;
 }
-static int parsepara(struct linefile *infile, FILE *outfile) {
+static int parseline(char *line, struct parsestate *currstate, FILE *out) {
-	for (;;) {
+	enum linetype type;
 		char *line, *buff;
 		/* line exists for the explicit purpose of being freed later */
 		enum paratype type;
-		line = getline(infile);
+	type = identifyline(line);
-		if (line == NULL)
+	fflush(stdout);
 	switch (type) {
 	case EMPTY:
 		endpara(currstate, out);
 		currstate->type = NONE;
 		return 0;
 	case SETEXT1:
 		if (currstate->type != PARAGRAPH)
 			return 1;
-		type = identifypara(line, &buff);
+		currstate->type = NONE;
-
+		fputs("<h1>", out);
-		buff = untrail(buff);
+		fwrite(currstate->para->data, 1, currstate->para->len, out);
-
+		fputs("</h1>", out);
-		if (buff[0] == '\0') {
+		resetstring(currstate->para);
-			free(line);
+		return 0;
-			continue;
+	case SETEXT2:
-		}
+		if (currstate->type != PARAGRAPH)
-
+			goto hr;
-		switch (type) {
+		currstate->type = NONE;
-#define EASY_CASE(enumtype, tag) \
+		fputs("<h2>", out);
-		case enumtype: \
+		fwrite(currstate->para->data, 1, currstate->para->len, out);
-			paraeasycase(infile, outfile, line, buff, \
+		fputs("</h2>", out);
-					tag, enumtype); \
+		resetstring(currstate->para);
-			return 0;
+		return 0;
-#define HARD_CASE(enumtype, tag, linetag, vars) \
+	case HR: hr:
-		case enumtype: \
+		endpara(currstate, out);
-			parahardcase(infile, outfile, line, buff, \
+		currstate->type = NONE;
-					vars, linetag, tag, enumtype); \
+		fputs("<hr>", out);
-			return 0;
+		return 0;
-#define CODE_CASE(enumtype, vars) \
+	case PLAIN:
-		case enumtype: \
+		if (currstate->type != PARAGRAPH) {
-			paracodecase(infile, outfile, line, buff, \
+			endpara(currstate, out);
-					vars, enumtype); \
+			currstate->type = PARAGRAPH;
 			return 0;
 		EASY_CASE(H1, "h1");
 		EASY_CASE(H2, "h2");
 		EASY_CASE(H3, "h3");
 		EASY_CASE(H4, "h4");
 		EASY_CASE(H5, "h5");
 		EASY_CASE(H6, "h6");
 		HARD_CASE(NORMAL, "p", NULL, NULL);
 		HARD_CASE(BLOCKQUOTE, "blockquote", NULL, NULL);
 		HARD_CASE(UL, "ul", "li", NULL);
 		HARD_CASE(OL, "ol", "li", NULL);
 		CODE_CASE(CODESPACE, "class='block'");
 		CODE_CASE(CODEBACK, "class='block'");
 		case HL:
 			fputs("<hr />", outfile);
 			free(line);
 			return 0;
 		case EMPTY:
 			free(line);
 			continue;
 		}
 	}
 }
 static int isbreak(char *line) {
 	int count, i;
 	char whitechar;
 	count = 0;
 	whitechar = '\0';
 	for (i = 0; line[i] != '\0'; ++i) {
 		if (line[i] == line[0])
 			++count;
 		else if (line[i] == ' ' || line[i] == '\t') {
 			if (whitechar == '\0')
 				whitechar = line[i];
 			if (whitechar != line[i])
 				return 0;
 		}
 		else
-			return 0;
+			appendcharstring(currstate->para, ' ');
 		appendstrstring(currstate->para, realcontent(line, type));
 		return 0;
 		/* According to the commonmark spec, this markdown:
 		Chapter 1
 		---
 		 * Should NOT compile to this:
 		<p>Chapter 1</p><hr>
 		 * but rather to this
 		<h2>Chapter 1</h2>
 		 * This means that we need to store the contents of the
 		 * paragraph and only write after obtaining the whole thing
 		 * as to not include the wrong tags.
 		 * */
 	case SPACECODE:
 		if (currstate->type != CODE) {
 			endpara(currstate, out);
 			currstate->type = CODE;
 			fputs("<code class='block'>", out);
 		}
 		else
 			fputs("<br>", out);
 		fputs(realcontent(line, type), out);
 		break;
 	}
 	return count >= 3;
 	return 0;
 }
-static enum paratype identifypara(char *line, char **contentret) {
+static int endpara(struct parsestate *state, FILE *out) {
-	int i;
+	switch (state->type) {
-	for (i = 0; i < 4; ++i) {
+	case PARAGRAPH:
-		if (line[i] == ' ')
+		fputs("<p>", out);
-			continue;
+		fwrite(state->para->data, 1, state->para->len, out);
-		if (line[i] == '\0')
+		fputs("</p>", out);
-			return EMPTY;
+		resetstring(state->para);
 		goto whitegone;
 	}
 	*contentret = line + i;
 	return CODESPACE;
 whitegone:
 	line += i;
 	/* At this point, line has no extraneous trailing whitespace */
 	switch (line[0]) {
 	case '\0':
 		return EMPTY;
 	case '#':
 		for (i = 0; i < 6 && line[i] == '#'; ++i) ;
 		*contentret = line + i;
 		if (line[i] != '\0' && line[i] != ' ')
 			goto normal;
 		return H1 + i - 1;
 	case '>':
 		*contentret = line + 1;
 		return BLOCKQUOTE;
 	case '*':
 		if (isbreak(line))
 			return HL;
 		*contentret = line + 1;
 		return UL;
 	case '-': case '_':
 		if (isbreak(line))
 			return HL;
 		goto normal;
 	case '`':
 		for (i = 0; i < 3; ++i)
 			if (line[i] != '`')
 				goto normal;
 		return CODEBACK;
 	default:
 		if (isdigit(line[0])) {
 			for (i = 0; isdigit(line[i]); ++i) ;
 			if (line[i] == '.' || line[i] == ')') {
 				*contentret = line + i + 1;
 				return OL;
 			}
 		}
 		goto normal;
 	normal:
 		*contentret = line;
 		return NORMAL;
 	}
 }
 static char *untrail(char *line) {
 	while (isspace(line[0]))
 		++line;
 	return line;
 }
 static size_t reallen(char *line) {
 	size_t fakelen;
 	fakelen = strlen(line);
 	if (line[fakelen - 1] == '\\')
 		--fakelen;
 	while (isspace(line[fakelen]))
 		--fakelen;
 	return fakelen;
 }
 static int islinebreak(char *line) {
 	size_t len;
 	int i;
 	len = strlen(line);
 	if (line[len - 1] == '\\')
 		return 1;
 	if (len < 2)
 		return 0;
-	for (i = 0; i < 2; ++i)
+	case CODE:
-		if (!isspace(line[len - i - 1]))
+		fputs("</code>", out);
-			return 0;
+		return 0;
 	case NONE:
 		return 0;
 	}
 	return 1;
 }
 static int paraeasycase(struct linefile *infile, FILE *outfile,
 		char *line, char *buff,
 		char *tag, enum paratype type) {
 	size_t writelen;
 	writelen = reallen(buff);
 	fprintf(outfile, "<%s>", tag);
 	for (;;) {
 		writedata(buff, writelen, outfile);
 		free(line);
 		line = getline(infile);
 		if (line == NULL)
 			break;
 		if (identifypara(line, &buff) != type) {
 			ungetline(infile, line);
 			line = NULL;
 			break;
 		}
 		else
 			buff = untrail(buff);
 	}
 	fprintf(outfile, "</%s>", tag);
 	free(line);
 	return 0;
 }
 static int parahardcase(struct linefile *infile, FILE *outfile,
 		char *line, char *buff,
 		char *vars, char *linetag, char *tag, enum paratype type) {
 	size_t writelen;
 	if (vars == NULL)
 		fprintf(outfile, "<%s>", tag);
 	else
 		fprintf(outfile, "<%s %s>", tag, vars);
 	for (;;) {
 		writelen = reallen(buff);
 		if (linetag != NULL)
 			fprintf(outfile, "<%s>", linetag);
 		writedata(buff, writelen, outfile);
 		if (islinebreak(line))
 			fputs("<br />", outfile);
 		if (linetag != NULL)
 			fprintf(outfile, "</%s>", linetag);
 		free(line);
 		line = getline(infile);
 		if (line == NULL)
 			break;
 		if (identifypara(line, &buff) != type) {
 			buff = untrail(line);
 			if (buff[0] == '\0') {
 				free(line);
 				line = NULL;
 				break;
 			}
 		}
 		else
 			buff = untrail(buff);
 		fputc(' ', outfile);
 	}
 	fprintf(outfile, "</%s>", tag);
 	free(line);
 	return 0;
 }
 static int paracodecase(struct linefile *infile, FILE *outfile,
 		char *line, char *buff,
 		char *vars, enum paratype type) {
 	int seenfirst;
 	enum paratype newtype;
 	if (type != CODESPACE && type != CODEBACK)
 		return 1;
 	if (vars == NULL)
 		fputs("<code>", outfile);
 	else
 		fprintf(outfile, "<code %s>", vars);
 	seenfirst = 0;
 	newtype = type;
 	for (;;) {
 		if ((type == CODEBACK && type != newtype) ||
 		     newtype == CODESPACE) {
 			if (seenfirst)
 				fputs("<br />", outfile);
 			seenfirst = 1;
 		}
 		if (newtype != CODEBACK)
 			writesimple(buff, -1, outfile);
 		free(line);
 		line = getline(infile);
 		if (line == NULL)
 			return 1;
 		newtype = identifypara(line, &buff);
 		if (type == CODEBACK && newtype == CODEBACK)
 			break;
 		if (type == CODESPACE && newtype != type) {
 			ungetline(infile, line);
 			break;
 		}
 	}
 	fputs("</code>", outfile);
 	if (type == CODEBACK)
 		free(line);
 	return 0;
 }
 static long strsearch(char *data, long start, size_t datalen,
 		char c, int reps) {
 	long i;
 	for (i = start; data[i] == c; ++i) ;
 	while (i + reps - 1 < datalen) {
 		int j;
 		for (j = 0; j < reps; ++j)
 			if (data[i + j] != c)
 				goto failure;
 		goto success;
 		continue;
 failure:
 		++i;
 	}
 	return -1;
 success:
 	while (data[i + reps] == c && i + reps < datalen)
 		++i;
 	return i;
 }
 static long writelinked(char *data, long i, size_t len, char *tag,
 		FILE *outfile) {
 	long linkend, textend;
 	textend = strsearch(data, i, len, ']', 1);
 	if (textend < 0)
 		return -1;
 	linkend = strsearch(data, textend, len, ')', 1);
 	if (linkend < 0)
 		return -1;
 	if (strcmp(tag, "a") == 0) {
 		fputs("<a href='", outfile);
 		writesimple(data + textend + 2,
 				linkend - textend - 2, outfile);
 		fputs("'>", outfile);
 		writesimple(data + i + 1,
 				textend - i - 1, outfile);
 		fputs("</a>", outfile);
 		return linkend;
 	}
 	else if (strcmp(tag, "img") == 0) {
 		fputs("<img src='", outfile);
 		writesimple(data + textend + 2,
 				linkend - textend - 2, outfile);
 		fputs("' alt='", outfile);
 		writesimple(data + i + 1,
 				textend - i - 1, outfile);
 		fputs("'>", outfile);
 		return linkend;
 	}
 	return -1;
 }
 static int writeescape(char c, FILE *outfile) {
 	int i;
 	for (i = 0; i < sizeof escapes / sizeof *escapes; ++i) {
 		if (escapes[i].c == c) {
 			fputs(escapes[i].escape, outfile);
 			return 0;
 		}
 	}
 	fputc(c, outfile);
 	return 0;
 }
 static int writedata(char *data, size_t len, FILE *outfile) {
 	long i;
 	long start;
 	long end;
 	for (i = 0; i < len; ++i) {
 		switch (data[i]) {
 #define STANDOUT_CHAR(c) \
 		case c: \
 			if (data[i + 1] == c) { \
 				start = i + 2; \
 				end = strsearch(data, start, len, \
 						c, 2); \
 				goto bold; \
 			} \
 			start = i + 1; \
 			end = strsearch(data, start, len, c, 1); \
 			goto italic;
 		STANDOUT_CHAR('*');
 		STANDOUT_CHAR('_');
 		italic:
 			if (end < 0)
 				goto normal;
 			fputs("<i>", outfile);
 			writedata(data + start, end - start, outfile);
 			fputs("</i>", outfile);
 			i = end;
 			break;
 		bold:
 			if (end < 0)
 				goto normal;
 			fputs("<b>", outfile);
 			writedata(data + start, end - start, outfile);
 			fputs("</b>", outfile);
 			i = end + 1;
 			break;
 		case '`':
 			end = strsearch(data, i, len, '`', 1);
 			if (end < 0)
 				goto normal;
 			fputs("<code>", outfile);
 			writedata(data + i, end - i, outfile);
 			fputs("</code>", outfile);
 			i = end;
 			break;
 		case '[':
 			end = writelinked(data, i, len, "a", outfile);
 			if (end < 0)
 				goto normal;
 			i = end;
 			break;
 		case '!':
 			end = writelinked(data, i + 1, len, "img", outfile);
 			if (end < 0)
 				goto normal;
 			i = end;
 			break;
 		case '\\':
 			if (i == len ||
 				strchr(escapedchars, data[i+1]) == NULL) {
 				fputc('\\', outfile);
 				break;
 			}
 			++i;
 			goto normal;
 		default: normal:
 			writeescape(data[i], outfile);
 			break;
 		}
 	}
 	return 0;
 }
 static int writesimple(char *data, size_t len, FILE *outfile) {
 	long i;
 	for (i = 0; (len < 0 && data[i] != '\0') || i < len; ++i) {
 		if (data[i] == '\\')
 			if (strchr(escapedchars, data[i]) == NULL)
 				fputc('\\', outfile);
 		writeescape(data[i], outfile);
 	}
 	return 0;
 }
--- a/src/util.c
+++ b/src/util.c
@@ -0,0 +1,79 @@
 /*
   ncdg - A program to help generate natechoe.dev
   Copyright (C) 2022  Nate Choe (natechoe9@gmail.com)
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
 #include <stdlib.h>
 #include <string.h>
 #include <util.h>
 struct string *newstring() {
 	struct string *ret;
 	ret = malloc(sizeof *ret);
 	if (ret == NULL)
 		return NULL;
 	ret->len = 0;
 	ret->alloc = 20;
 	ret->data = malloc(ret->alloc);
 	if (ret->data == NULL)
 		return NULL;
 	return ret;
 }
 void freestring(struct string *str) {
 	free(str->data);
 	free(str);
 }
 int appendcharstring(struct string *str, char c) {
 	if (str->len >= str->alloc) {
 		char *newdata;
 		size_t newalloc;
 		newalloc = str->alloc * 2;
 		newdata = realloc(str->data, newalloc);
 		if (newdata == NULL) {
 			return 1;
 		}
 		str->data = newdata;
 		str->alloc = newalloc;
 	}
 	str->data[str->len++] = c;
 	return 0;
 }
 int appendstrstring(struct string *str, char *s) {
 	size_t len;
 	len = strlen(s);
 	if (str->len + len >= str->alloc) {
 		char *newdata;
 		size_t newalloc;
 		newalloc = str->alloc;
 		while (str->len + len >= newalloc)
 			newalloc *= 2;
 		newdata = realloc(str->data, newalloc);
 		if (newdata == NULL)
 			return 1;
 		str->data = newdata;
 		str->alloc = newalloc;
 	}
 	memcpy(str->data + str->len, s, len);
 	str->len += len;
 	return 0;
 }
 void resetstring(struct string *str) {
 	str->len = 0;
 }