Added HTML blocks

2022-04-24 21:34:44 -05:00
parent ad26f3e7be
commit 6319c4be5d
5 changed files with 292 additions and 64 deletions
--- a/src/include/mdutil.h
+++ b/src/include/mdutil.h
@@ -29,24 +29,29 @@ enum linetype {
 	/* === */
 	SETEXT2,
 	/* --- */
-	HEADER
+	HEADER,
 	HTMLCONCRETE,
 	COMMENTLONG,
 	PHP,
 	COMMENTSHORT,
 	CDATA,
 	SKELETON,
 	GENERICTAG
 };
 #define HTMLSTART HTMLCONCRETE
 #define HTMLEND GENERICTAG
 struct linedata {
 	enum linetype type;
 	union {
 		int intensity;
 		int isfirst;
 	} data;
 };
-enum nodetype {
+void identifyline(char *line, struct linedata *prev, struct linedata *ret);
 	PARAGRAPH,
 	CODE,
 	/* Used for code that starts with spaces */
 	CODEBLOCK,
 	/* Used for triple backtick code */
 	NONE
 };
 void identifyline(char *line, enum nodetype prev, struct linedata *ret);
 /* prev is almost never used, but sometimes it is. */
 char *realcontent(char *line, struct linedata *data);
--- a/src/include/util.h
+++ b/src/include/util.h
@@ -20,6 +20,8 @@
 #include <stddef.h>
 #define LEN(arr) (sizeof(arr) / sizeof *(arr))
 struct string {
 	size_t len;
 	size_t alloc;
--- a/src/mdutil.c
+++ b/src/mdutil.c
@@ -17,16 +17,35 @@
 */
 #include <ctype.h>
 #include <stdio.h>
 #include <string.h>
 #include <stddef.h>
 #include <util.h>
 #include <mdutil.h>
 static char *truncate(char *str);
 static char *after(char *begin, char *str);
 static void identifyend(char *line, enum linetype prev, struct linedata *ret);
-void identifyline(char *line, enum nodetype prev, struct linedata *ret) {
+static char *concretetags[] = { "pre", "script", "style", "textarea" };
 static char *skeletontags[] = {
 	"address", "article", "aside", "base", "basefont", "blockquote", "body",
 	"caption", "center", "col", "colgroup", "dd", "details", "dialog",
 	"dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer",
 	"form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head",
 	"header", "hr", "html", "iframe", "legend", "li", "link", "main",
 	"menu", "menuitem", "nav", "noframes", "ol", "optgroup", "option", "p",
 	"param", "section", "source", "summary", "table", "tbody", "td",
 	"tfoot", "th", "thead", "title", "tr", "track", "ul",
 };
 void identifyline(char *line, struct linedata *prev, struct linedata *ret) {
 	int i;
-	if (prev != PARAGRAPH) {
+	if (HTMLSTART <= prev->type && prev->type <= HTMLEND) {
 		identifyend(truncate(line), prev->type, ret);
 		return;
 	}
 	if (prev->type != PLAIN) {
 		for (i = 0; i < 4; ++i) {
 			if (!isspace(line[i]))
 				goto notspacecode;
@@ -71,13 +90,15 @@ notspacecode:
 		/* There has to be at least 3 delimiter characters */
 	}
 nothr:
-	for (i = 0; i < 3; ++i) {
+	for (i = 0; line[i] == '`'; ++i) ;
-		if (line[i] != '`')
+	if (i >= 3) {
 			goto notfencedcode;
 	}
 		ret->type = FENCECODE;
 		ret->data.intensity = i;
 		/* The last line of a fenced code block must have at least the
 		 * same number of backticks as the first. */
 		return;
-notfencedcode:
+	}
 /* notfencedcode: */
 	if (line[0] == '#') {
 		int pcount;
@@ -85,15 +106,73 @@ notfencedcode:
 		if (line[pcount] != ' ' && line[pcount] != '\0')
 			goto notheader;
 		ret->type = HEADER;
-		ret->intensity = pcount;
+		ret->data.intensity = pcount;
 		return;
 	}
 notheader:
 #define HTMLSTARTCASE(start, rettype) \
 	if (after(start, line) != NULL) { \
 		ret->type = rettype; \
 		ret->data.isfirst = 1; \
 		return; \
 	}
 	HTMLSTARTCASE("<!--", COMMENTLONG);
 	HTMLSTARTCASE("<![CDATA[", CDATA);
 	HTMLSTARTCASE("<?", PHP);
 	HTMLSTARTCASE("<!", COMMENTSHORT);
 	if (line[0] == '<') {
 		char *testline;
 		testline = line + 1;
 		for (i = 0; i < LEN(concretetags); ++i) {
 			char *aftertag;
 			aftertag = after(concretetags[i], testline);
 			if (aftertag == NULL)
 				continue;
 			if (aftertag[0] == '\0' || strchr(" >", aftertag[0])) {
 				ret->type = HTMLCONCRETE;
 				ret->data.isfirst = 1;
 				return;
 			}
 		}
 		if (testline[0] == '/')
 			++testline;
 		for (i = 0; i < LEN(skeletontags); ++i) {
 			char *aftertag;
 			aftertag = after(skeletontags[i], testline);
 			if (aftertag == NULL)
 				continue;
 			if (aftertag[0] == '\0' ||
 					strchr(" >", aftertag[0]) ||
 					after("/>", aftertag) != NULL) {
 				ret->type = SKELETON;
 				ret->data.isfirst = 1;
 				return;
 			}
 		}
 	}
 	ret->type = PLAIN;
 	return;
 }
-/* TODO: Finish this */
+
 char *realcontent(char *line, struct linedata *data) {
 	switch (data->type) {
 	case EMPTY: case HR: case SETEXT1: case SETEXT2: case FENCECODE:
 	case HTMLCONCRETE: case COMMENTLONG: case PHP: case CDATA:
 	case SKELETON: case COMMENTSHORT: case GENERICTAG:
 		return NULL;
 	case PLAIN:
 		return line;
 	case SPACECODE:
 		return line + 4;
 	case HEADER:
 		return truncate(line + data->data.intensity);
 	}
 	return NULL;
 }
 static char *truncate(char *str) {
 	while (isspace(str[0]))
@@ -101,16 +180,71 @@ static char *truncate(char *str) {
 	return str;
 }
-char *realcontent(char *line, struct linedata *data) {
+static char *after(char *begin, char *str) {
-	switch (data->type) {
+	int i;
-	case EMPTY: case HR: case SETEXT1: case SETEXT2: case FENCECODE:
+	for (i = 0; begin[i]; ++i) {
 		if (begin[i] != str[0])
 			return NULL;
-	case PLAIN:
+		++str;
-		return line;
+	}
-	case SPACECODE:
+	return str;
-		return line + 4;
+}
-	case HEADER:
+
-		return truncate(line + data->intensity);
+static void identifyend(char *line, enum linetype prev, struct linedata *ret) {
 	int i;
 	ret->type = EMPTY;
 	switch (prev) {
 	case EMPTY: case PLAIN: case SPACECODE: case FENCECODE: case HR:
 	case SETEXT1: case SETEXT2: case HEADER:
 		return;
 	/* In this case, something has gone terribly wrong. */
 	case HTMLCONCRETE:
 		for (i = 0; i < LEN(concretetags); ++i) {
 			char endtag[30];
 			sprintf(endtag, "</%s>", concretetags[i]);
 			if (strstr(line, endtag) != NULL) {
 				ret->type = HTMLCONCRETE;
 				ret->data.isfirst = 0;
 				return;
 			}
 		}
 		return;
 	case COMMENTLONG:
 		if (strstr(line, "-->") != NULL) {
 			ret->type = COMMENTLONG;
 			ret->data.isfirst = 0;
 		}
 		return;
 	case PHP:
 		if (strstr(line, "?>") != NULL) {
 			ret->type = PHP;
 			ret->data.isfirst = 0;
 		}
 		return;
 	case COMMENTSHORT:
 		if (strchr(line, '>') != NULL) {
 			ret->type = COMMENTSHORT;
 			ret->data.isfirst = 0;
 		}
 		return;
 	case CDATA:
 		if (strstr(line, "]]>") != NULL) {
 			ret->type = CDATA;
 			ret->data.isfirst = 0;
 		}
 		return;
 	case SKELETON:
 		if (line[0] == '\0') {
 			ret->type = SKELETON;
 			ret->data.isfirst = 0;
 		}
 		return;
 	case GENERICTAG:
 		if (line[0] == '\0') {
 			ret->type = GENERICTAG;
 			ret->data.isfirst = 0;
 		}
 		return;
 	}
 	return NULL;
 }
--- a/src/template.c
+++ b/src/template.c
@@ -26,21 +26,28 @@
 #include <template.h>
 struct parsestate {
-	enum nodetype type;
+	struct linedata prev;
 	struct string *para;
 	int isfirst;
-	/* Used to insert <br> tags. Currently onlu used for FENCECODE. */
+	int intensity;
 	/* Similar to the intensity field in the linedata struct. Currently
 	 * stores the number of backticks used in FENCECODE.*/
 };
 static int parseline(char *line, struct parsestate *currstate, FILE *out);
 static int endpara(struct parsestate *state, FILE *out);
 static void handlehtmlcase(struct linedata *data, struct parsestate *state,
 		char *line, FILE *out);
 static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
 		char *line, FILE *out);
 int parsetemplate(FILE *infile, FILE *outfile) {
 	struct linefile *realin;
 	struct parsestate currstate;
 	int code;
-	currstate.type = NONE;
+	currstate.prev.type = EMPTY;
 	currstate.para = newstring();
 	realin = newlinefile(infile);
@@ -65,11 +72,14 @@ int parsetemplate(FILE *infile, FILE *outfile) {
 static int parseline(char *line, struct parsestate *currstate, FILE *out) {
 	struct linedata type;
-	identifyline(line, currstate->type, &type);
+	identifyline(line, &currstate->prev, &type);
-	if (currstate->type == CODEBLOCK) {
+	switch (currstate->prev.type) {
-		if (type.type == FENCECODE) {
+	case FENCECODE:
-			currstate->type = NONE;
+		if (type.type == FENCECODE &&
 				type.data.intensity >=
 				currstate->intensity) {
 			currstate->prev.type = EMPTY;
 			fputs("</code>", out);
 			return 0;
 		}
@@ -78,45 +88,66 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
 		fputs(line, out);
 		currstate->isfirst = 0;
 		return 0;
 	case HTMLCONCRETE:
 		handlehtmlmiddle(&type, currstate, line, out);
 		return 0;
 	case COMMENTLONG:
 		handlehtmlmiddle(&type, currstate, line, out);
 		return 0;
 	case PHP:
 		handlehtmlmiddle(&type, currstate, line, out);
 		return 0;
 	case COMMENTSHORT:
 		handlehtmlmiddle(&type, currstate, line, out);
 		return 0;
 	case CDATA:
 		handlehtmlmiddle(&type, currstate, line, out);
 		return 0;
 	case SKELETON:
 		handlehtmlmiddle(&type, currstate, line, out);
 		return 0;
 	case EMPTY: case PLAIN: case SPACECODE: case HR:
 	case SETEXT1: case SETEXT2: case HEADER: case GENERICTAG:
 		break;
 	}
 	switch (type.type) {
 	case EMPTY:
 		endpara(currstate, out);
-		currstate->type = NONE;
+		currstate->prev.type = EMPTY;
-		return 0;
+		break;
 	case SETEXT1:
-		if (currstate->type != PARAGRAPH)
+		if (currstate->prev.type != PLAIN)
 			return 1;
-		currstate->type = NONE;
+		currstate->prev.type = EMPTY;
 		fputs("<h1>", out);
 		fwrite(currstate->para->data, 1, currstate->para->len, out);
 		fputs("</h1>", out);
 		resetstring(currstate->para);
-		return 0;
+		break;
 	case SETEXT2:
-		if (currstate->type != PARAGRAPH)
+		if (currstate->prev.type != PLAIN)
 			goto hr;
-		currstate->type = NONE;
+		currstate->prev.type = EMPTY;
 		fputs("<h2>", out);
 		fwrite(currstate->para->data, 1, currstate->para->len, out);
 		fputs("</h2>", out);
 		resetstring(currstate->para);
-		return 0;
+		break;
 	case HR: hr:
 		endpara(currstate, out);
-		currstate->type = NONE;
+		currstate->prev.type = EMPTY;
 		fputs("<hr>", out);
-		return 0;
+		break;
 	case PLAIN:
-		if (currstate->type != PARAGRAPH) {
+		if (currstate->prev.type != PLAIN) {
 			endpara(currstate, out);
-			currstate->type = PARAGRAPH;
+			currstate->prev.type = PLAIN;
 		}
 		else
 			appendcharstring(currstate->para, ' ');
 		appendstrstring(currstate->para, realcontent(line, &type));
-		return 0;
+		break;
 		/* According to the commonmark spec, this markdown:
 		Chapter 1
@@ -136,13 +167,14 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
 		 * */
 	case FENCECODE:
 		fputs("<code class='block'>", out);
-		currstate->type = CODEBLOCK;
+		currstate->prev.type = FENCECODE;
 		currstate->isfirst = 1;
 		currstate->intensity = type.data.intensity;
 		break;
 	case SPACECODE:
-		if (currstate->type != CODE) {
+		if (currstate->prev.type != SPACECODE) {
 			endpara(currstate, out);
-			currstate->type = CODE;
+			currstate->prev.type = SPACECODE;
 			fputs("<code class='block'>", out);
 		}
 		else
@@ -152,28 +184,78 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
 	case HEADER:
 		endpara(currstate, out);
 		fprintf(out, "<h%d>%s</h%d>",
-				type.intensity,
+				type.data.intensity,
 				realcontent(line, &type),
-				type.intensity);
+				type.data.intensity);
-		currstate->type = NONE;
+		currstate->prev.type = EMPTY;
 		break;
 	case HTMLCONCRETE:
 		handlehtmlcase(&type, currstate, line, out);
 		break;
 	case COMMENTLONG:
 		handlehtmlcase(&type, currstate, line, out);
 		break;
 	case PHP:
 		handlehtmlcase(&type, currstate, line, out);
 		break;
 	case COMMENTSHORT:
 		handlehtmlcase(&type, currstate, line, out);
 		break;
 	case CDATA:
 		handlehtmlcase(&type, currstate, line, out);
 		break;
 	case SKELETON:
 		handlehtmlcase(&type, currstate, line, out);
 		break;
 	case GENERICTAG:
 		handlehtmlcase(&type, currstate, line, out);
 		break;
 	}
 	return 0;
 }
 static int endpara(struct parsestate *state, FILE *out) {
-	switch (state->type) {
+	switch (state->prev.type) {
-	case PARAGRAPH:
+	case EMPTY: case HR:
 	case HTMLCONCRETE: case COMMENTLONG: case PHP: case COMMENTSHORT:
 	case CDATA: case SKELETON: case GENERICTAG:
 		return 0;
 	case PLAIN:
 		fputs("<p>", out);
 		fwrite(state->para->data, 1, state->para->len, out);
 		fputs("</p>", out);
 		resetstring(state->para);
 		return 0;
-	case CODE: case CODEBLOCK:
+	case SPACECODE: case FENCECODE:
 		fputs("</code>", out);
 		return 0;
-	case NONE:
+	case HEADER:
 		fprintf(out, "</h%d>", state->prev.data.intensity);
 		return 0;
 	case SETEXT1:
 		fputs("</h1>", out);
 		break;
 	case SETEXT2:
 		fputs("</h2>", out);
 		break;
 	}
 	return 1;
 }
 static void handlehtmlcase(struct linedata *data, struct parsestate *state,
 		char *line, FILE *out) {
 	endpara(state, out);
 	fputs(line, out);
 	fputc('\n', out);
 	state->prev.type = data->type;
 }
 static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
 		char *line, FILE *out) {
 	if (state->prev.type == data->type && !data->data.isfirst) {
 		state->prev.type = EMPTY;
 		return;
 	}
 	fputs(line, out);
 	fputc('\n', out);
 }
--- a/test.md
+++ b/test.md
@@ -0,0 +1,5 @@
 <h1>
 	This should just be raw HTML
 </h1>
 This should not be