Proper raw html handling
This commit is contained in:
		| @@ -47,7 +47,7 @@ struct linedata { | ||||
| 	enum linetype type; | ||||
| 	union { | ||||
| 		int intensity; | ||||
| 		int isfirst; | ||||
| 		int islast; | ||||
| 	} data; | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -40,8 +40,6 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out); | ||||
| static int endpara(struct parsestate *state, FILE *out); | ||||
| static void handlehtmlcase(struct linedata *data, struct parsestate *state, | ||||
| 		char *line, FILE *out); | ||||
| static void handlehtmlmiddle(struct linedata *data, struct parsestate *state, | ||||
| 		char *line, FILE *out); | ||||
|  | ||||
| int parsemarkdown(FILE *infile, FILE *outfile) { | ||||
| 	struct linefile *realin; | ||||
| @@ -88,25 +86,25 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) { | ||||
| 		currstate->isfirst = 0; | ||||
| 		return 0; | ||||
| 	case HTMLCONCRETE: | ||||
| 		handlehtmlmiddle(&type, currstate, line, out); | ||||
| 		handlehtmlcase(&type, currstate, line, out); | ||||
| 		return 0; | ||||
| 	case COMMENTLONG: | ||||
| 		handlehtmlmiddle(&type, currstate, line, out); | ||||
| 		handlehtmlcase(&type, currstate, line, out); | ||||
| 		return 0; | ||||
| 	case PHP: | ||||
| 		handlehtmlmiddle(&type, currstate, line, out); | ||||
| 		handlehtmlcase(&type, currstate, line, out); | ||||
| 		return 0; | ||||
| 	case COMMENTSHORT: | ||||
| 		handlehtmlmiddle(&type, currstate, line, out); | ||||
| 		handlehtmlcase(&type, currstate, line, out); | ||||
| 		return 0; | ||||
| 	case CDATA: | ||||
| 		handlehtmlmiddle(&type, currstate, line, out); | ||||
| 		handlehtmlcase(&type, currstate, line, out); | ||||
| 		return 0; | ||||
| 	case SKELETON: | ||||
| 		handlehtmlmiddle(&type, currstate, line, out); | ||||
| 		handlehtmlcase(&type, currstate, line, out); | ||||
| 		return 0; | ||||
| 	case GENERICTAG: | ||||
| 		handlehtmlmiddle(&type, currstate, line, out); | ||||
| 		handlehtmlcase(&type, currstate, line, out); | ||||
| 		return 0; | ||||
| 	case EMPTY: case PLAIN: case SPACECODE: case HR: | ||||
| 	case SETEXT1: case SETEXT2: case HEADER: | ||||
| @@ -157,7 +155,7 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) { | ||||
|  | ||||
| 		 * Should NOT compile to this: | ||||
|  | ||||
| 		<p>Chapter 1</p><hr> | ||||
| 		<p>Chapter 1</p><hr /> | ||||
|  | ||||
| 		 * but rather to this | ||||
|  | ||||
| @@ -249,14 +247,8 @@ static void handlehtmlcase(struct linedata *data, struct parsestate *state, | ||||
| 	fputs(line, out); | ||||
| 	fputc('\n', out); | ||||
| 	state->prev.type = data->type; | ||||
| } | ||||
|  | ||||
| static void handlehtmlmiddle(struct linedata *data, struct parsestate *state, | ||||
| 		char *line, FILE *out) { | ||||
| 	if (state->prev.type == data->type && !data->data.isfirst) { | ||||
| 	if (state->prev.type == data->type && data->data.islast) { | ||||
| 		state->prev.type = EMPTY; | ||||
| 		return; | ||||
| 	} | ||||
| 	fputs(line, out); | ||||
| 	fputc('\n', out); | ||||
| } | ||||
|   | ||||
							
								
								
									
										74
									
								
								src/mdutil.c
									
									
									
									
									
								
							
							
						
						
									
										74
									
								
								src/mdutil.c
									
									
									
									
									
								
							| @@ -25,7 +25,7 @@ | ||||
|  | ||||
| static char *truncate(char *str); | ||||
| static char *after(char *begin, char *str); | ||||
| static void identifyend(char *line, enum linetype prev, struct linedata *ret); | ||||
| static int isend(char *line, enum linetype prev); | ||||
|  | ||||
| static char *concretetags[] = { "pre", "script", "style", "textarea" }; | ||||
| static char *skeletontags[] = { | ||||
| @@ -42,7 +42,8 @@ static char *skeletontags[] = { | ||||
| void identifyline(char *line, struct linedata *prev, struct linedata *ret) { | ||||
| 	int i; | ||||
| 	if (HTMLSTART <= prev->type && prev->type <= HTMLEND) { | ||||
| 		identifyend(truncate(line), prev->type, ret); | ||||
| 		ret->type = prev->type; | ||||
| 		ret->data.islast = isend(truncate(line), prev->type); | ||||
| 		return; | ||||
| 	} | ||||
| 	if (prev->type != PLAIN) { | ||||
| @@ -117,7 +118,7 @@ notheader: | ||||
| #define HTMLSTARTCASE(start, rettype) \ | ||||
| 	if (after(start, line) != NULL) { \ | ||||
| 		ret->type = rettype; \ | ||||
| 		ret->data.isfirst = 1; \ | ||||
| 		ret->data.islast = isend(line, rettype); \ | ||||
| 		return; \ | ||||
| 	} | ||||
| 	HTMLSTARTCASE("<!--", COMMENTLONG); | ||||
| @@ -128,6 +129,8 @@ notheader: | ||||
| 	if (line[0] == '<') { | ||||
| 		char *testline; | ||||
| 		testline = line + 1; | ||||
| 		if (testline[0] == '/') | ||||
| 			++testline; | ||||
| 		for (i = 0; i < LEN(concretetags); ++i) { | ||||
| 			char *aftertag; | ||||
| 			aftertag = after(concretetags[i], testline); | ||||
| @@ -135,35 +138,33 @@ notheader: | ||||
| 				continue; | ||||
| 			if (aftertag[0] == '\0' || strchr(" >", aftertag[0])) { | ||||
| 				ret->type = HTMLCONCRETE; | ||||
| 				ret->data.isfirst = 1; | ||||
| 				ret->data.islast = 0; | ||||
| 				return; | ||||
| 			} | ||||
| 		} | ||||
| 		if (testline[0] == '/') | ||||
| 			++testline; | ||||
| 		for (i = 0; i < LEN(skeletontags); ++i) { | ||||
| 			char *aftertag; | ||||
| 			aftertag = after(skeletontags[i], testline); | ||||
| 			if (aftertag == NULL) | ||||
| 				continue; | ||||
| 			if (aftertag[0] == '\0' || | ||||
| 					strchr(" >", aftertag[0]) || | ||||
| 					strchr(" \t>", aftertag[0]) || | ||||
| 					after("/>", aftertag) != NULL) { | ||||
| 				ret->type = SKELETON; | ||||
| 				ret->data.isfirst = 1; | ||||
| 				ret->data.islast = 0; | ||||
| 				return; | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		if (isgenerictag(line)) { | ||||
| 			ret->type = GENERICTAG; | ||||
| 			ret->data.isfirst = 1; | ||||
| 			ret->data.islast = 0; | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	ret->type = PLAIN; | ||||
| 	ret->data.isfirst = 1; | ||||
| 	ret->data.islast = 0; | ||||
| 	return; | ||||
| } | ||||
|  | ||||
| @@ -275,61 +276,32 @@ static char *after(char *begin, char *str) { | ||||
| 	return str; | ||||
| } | ||||
|  | ||||
| static void identifyend(char *line, enum linetype prev, struct linedata *ret) { | ||||
| static int isend(char *line, enum linetype prev) { | ||||
| 	int i; | ||||
| 	ret->type = EMPTY; | ||||
|  | ||||
| 	switch (prev) { | ||||
| 	case EMPTY: case PLAIN: case SPACECODE: case FENCECODE: case HR: | ||||
| 	case SETEXT1: case SETEXT2: case HEADER: | ||||
| 		return; | ||||
| 		return 1; | ||||
| 	/* In this case, something has gone terribly wrong. */ | ||||
|  | ||||
| 	case HTMLCONCRETE: | ||||
| 		for (i = 0; i < LEN(concretetags); ++i) { | ||||
| 			char endtag[30]; | ||||
| 			sprintf(endtag, "</%s>", concretetags[i]); | ||||
| 			if (strstr(line, endtag) != NULL) { | ||||
| 				ret->type = HTMLCONCRETE; | ||||
| 				ret->data.isfirst = 0; | ||||
| 				return; | ||||
| 			} | ||||
| 			return strstr(line, endtag) != NULL; | ||||
| 		} | ||||
| 		return; | ||||
| 		return 0; | ||||
| 	case COMMENTLONG: | ||||
| 		if (strstr(line, "-->") != NULL) { | ||||
| 			ret->type = COMMENTLONG; | ||||
| 			ret->data.isfirst = 0; | ||||
| 		} | ||||
| 		return; | ||||
| 		return strstr(line, "-->") != NULL; | ||||
| 	case PHP: | ||||
| 		if (strstr(line, "?>") != NULL) { | ||||
| 			ret->type = PHP; | ||||
| 			ret->data.isfirst = 0; | ||||
| 		} | ||||
| 		return; | ||||
| 		return strstr(line, "?>") != NULL; | ||||
| 	case COMMENTSHORT: | ||||
| 		if (strchr(line, '>') != NULL) { | ||||
| 			ret->type = COMMENTSHORT; | ||||
| 			ret->data.isfirst = 0; | ||||
| 		} | ||||
| 		return; | ||||
| 		return strchr(line, '>') != NULL; | ||||
| 	case CDATA: | ||||
| 		if (strstr(line, "]]>") != NULL) { | ||||
| 			ret->type = CDATA; | ||||
| 			ret->data.isfirst = 0; | ||||
| 		} | ||||
| 		return; | ||||
| 	case SKELETON: | ||||
| 		if (line[0] == '\0') { | ||||
| 			ret->type = SKELETON; | ||||
| 			ret->data.isfirst = 0; | ||||
| 		} | ||||
| 		return; | ||||
| 	case GENERICTAG: | ||||
| 		if (line[0] == '\0') { | ||||
| 			ret->type = GENERICTAG; | ||||
| 			ret->data.isfirst = 0; | ||||
| 		} | ||||
| 		return; | ||||
| 		return strstr(line, "]]>") != NULL; | ||||
| 	case SKELETON: case GENERICTAG: | ||||
| 		return line[0] == '\0'; | ||||
| 	} | ||||
| 	return 1; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user