Proper raw html handling

2022-05-11 12:17:37 -05:00
parent cda87f2ac0
commit 9d0d998b71
3 changed files with 33 additions and 69 deletions
--- a/src/include/mdutil.h
+++ b/src/include/mdutil.h
@@ -47,7 +47,7 @@ struct linedata {
 	enum linetype type;
 	union {
 		int intensity;
-		int isfirst;
+		int islast;
 	} data;
 };

--- a/src/markdown.c
+++ b/src/markdown.c
@@ -40,8 +40,6 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out);
 static int endpara(struct parsestate *state, FILE *out);
 static void handlehtmlcase(struct linedata *data, struct parsestate *state,
 		char *line, FILE *out);
-static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
-		char *line, FILE *out);

 int parsemarkdown(FILE *infile, FILE *outfile) {
 	struct linefile *realin;
@@ -88,25 +86,25 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
 		currstate->isfirst = 0;
 		return 0;
 	case HTMLCONCRETE:
-		handlehtmlmiddle(&type, currstate, line, out);
+		handlehtmlcase(&type, currstate, line, out);
 		return 0;
 	case COMMENTLONG:
-		handlehtmlmiddle(&type, currstate, line, out);
+		handlehtmlcase(&type, currstate, line, out);
 		return 0;
 	case PHP:
-		handlehtmlmiddle(&type, currstate, line, out);
+		handlehtmlcase(&type, currstate, line, out);
 		return 0;
 	case COMMENTSHORT:
-		handlehtmlmiddle(&type, currstate, line, out);
+		handlehtmlcase(&type, currstate, line, out);
 		return 0;
 	case CDATA:
-		handlehtmlmiddle(&type, currstate, line, out);
+		handlehtmlcase(&type, currstate, line, out);
 		return 0;
 	case SKELETON:
-		handlehtmlmiddle(&type, currstate, line, out);
+		handlehtmlcase(&type, currstate, line, out);
 		return 0;
 	case GENERICTAG:
-		handlehtmlmiddle(&type, currstate, line, out);
+		handlehtmlcase(&type, currstate, line, out);
 		return 0;
 	case EMPTY: case PLAIN: case SPACECODE: case HR:
 	case SETEXT1: case SETEXT2: case HEADER:
@@ -157,7 +155,7 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {

 		 * Should NOT compile to this:

-		<p>Chapter 1</p><hr>
+		<p>Chapter 1</p><hr />

 		 * but rather to this

@@ -249,14 +247,8 @@ static void handlehtmlcase(struct linedata *data, struct parsestate *state,
 	fputs(line, out);
 	fputc('\n', out);
 	state->prev.type = data->type;
-}
-
-static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
-		char *line, FILE *out) {
-	if (state->prev.type == data->type && !data->data.isfirst) {
+	if (state->prev.type == data->type && data->data.islast) {
 		state->prev.type = EMPTY;
 		return;
 	}
-	fputs(line, out);
-	fputc('\n', out);
 }
--- a/src/mdutil.c
+++ b/src/mdutil.c
@@ -25,7 +25,7 @@

 static char *truncate(char *str);
 static char *after(char *begin, char *str);
-static void identifyend(char *line, enum linetype prev, struct linedata *ret);
+static int isend(char *line, enum linetype prev);

 static char *concretetags[] = { "pre", "script", "style", "textarea" };
 static char *skeletontags[] = {
@@ -42,7 +42,8 @@ static char *skeletontags[] = {
 void identifyline(char *line, struct linedata *prev, struct linedata *ret) {
 	int i;
 	if (HTMLSTART <= prev->type && prev->type <= HTMLEND) {
-		identifyend(truncate(line), prev->type, ret);
+		ret->type = prev->type;
+		ret->data.islast = isend(truncate(line), prev->type);
 		return;
 	}
 	if (prev->type != PLAIN) {
@@ -117,7 +118,7 @@ notheader:
 #define HTMLSTARTCASE(start, rettype) \
 	if (after(start, line) != NULL) { \
 		ret->type = rettype; \
-		ret->data.isfirst = 1; \
+		ret->data.islast = isend(line, rettype); \
 		return; \
 	}
 	HTMLSTARTCASE("<!--", COMMENTLONG);
@@ -128,6 +129,8 @@ notheader:
 	if (line[0] == '<') {
 		char *testline;
 		testline = line + 1;
+		if (testline[0] == '/')
+			++testline;
 		for (i = 0; i < LEN(concretetags); ++i) {
 			char *aftertag;
 			aftertag = after(concretetags[i], testline);
@@ -135,35 +138,33 @@ notheader:
 				continue;
 			if (aftertag[0] == '\0' || strchr(" >", aftertag[0])) {
 				ret->type = HTMLCONCRETE;
-				ret->data.isfirst = 1;
+				ret->data.islast = 0;
 				return;
 			}
 		}
-		if (testline[0] == '/')
-			++testline;
 		for (i = 0; i < LEN(skeletontags); ++i) {
 			char *aftertag;
 			aftertag = after(skeletontags[i], testline);
 			if (aftertag == NULL)
 				continue;
 			if (aftertag[0] == '\0' ||
-					strchr(" >", aftertag[0]) ||
+					strchr(" \t>", aftertag[0]) ||
 					after("/>", aftertag) != NULL) {
 				ret->type = SKELETON;
-				ret->data.isfirst = 1;
+				ret->data.islast = 0;
 				return;
 			}
 		}

 		if (isgenerictag(line)) {
 			ret->type = GENERICTAG;
-			ret->data.isfirst = 1;
+			ret->data.islast = 0;
 			return;
 		}
 	}

 	ret->type = PLAIN;
-	ret->data.isfirst = 1;
+	ret->data.islast = 0;
 	return;
 }

@@ -275,61 +276,32 @@ static char *after(char *begin, char *str) {
 	return str;
 }

-static void identifyend(char *line, enum linetype prev, struct linedata *ret) {
+static int isend(char *line, enum linetype prev) {
 	int i;
-	ret->type = EMPTY;
+
 	switch (prev) {
 	case EMPTY: case PLAIN: case SPACECODE: case FENCECODE: case HR:
 	case SETEXT1: case SETEXT2: case HEADER:
-		return;
+		return 1;
 	/* In this case, something has gone terribly wrong. */

 	case HTMLCONCRETE:
 		for (i = 0; i < LEN(concretetags); ++i) {
 			char endtag[30];
 			sprintf(endtag, "</%s>", concretetags[i]);
-			if (strstr(line, endtag) != NULL) {
-				ret->type = HTMLCONCRETE;
-				ret->data.isfirst = 0;
-				return;
-			}
+			return strstr(line, endtag) != NULL;
 		}
-		return;
+		return 0;
 	case COMMENTLONG:
-		if (strstr(line, "-->") != NULL) {
-			ret->type = COMMENTLONG;
-			ret->data.isfirst = 0;
-		}
-		return;
+		return strstr(line, "-->") != NULL;
 	case PHP:
-		if (strstr(line, "?>") != NULL) {
-			ret->type = PHP;
-			ret->data.isfirst = 0;
-		}
-		return;
+		return strstr(line, "?>") != NULL;
 	case COMMENTSHORT:
-		if (strchr(line, '>') != NULL) {
-			ret->type = COMMENTSHORT;
-			ret->data.isfirst = 0;
-		}
-		return;
+		return strchr(line, '>') != NULL;
 	case CDATA:
-		if (strstr(line, "]]>") != NULL) {
-			ret->type = CDATA;
-			ret->data.isfirst = 0;
-		}
-		return;
-	case SKELETON:
-		if (line[0] == '\0') {
-			ret->type = SKELETON;
-			ret->data.isfirst = 0;
-		}
-		return;
-	case GENERICTAG:
-		if (line[0] == '\0') {
-			ret->type = GENERICTAG;
-			ret->data.isfirst = 0;
-		}
-		return;
+		return strstr(line, "]]>") != NULL;
+	case SKELETON: case GENERICTAG:
+		return line[0] == '\0';
 	}
+	return 1;
 }