Proper raw html handling
This commit is contained in:
@@ -47,7 +47,7 @@ struct linedata {
|
|||||||
enum linetype type;
|
enum linetype type;
|
||||||
union {
|
union {
|
||||||
int intensity;
|
int intensity;
|
||||||
int isfirst;
|
int islast;
|
||||||
} data;
|
} data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -40,8 +40,6 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out);
|
|||||||
static int endpara(struct parsestate *state, FILE *out);
|
static int endpara(struct parsestate *state, FILE *out);
|
||||||
static void handlehtmlcase(struct linedata *data, struct parsestate *state,
|
static void handlehtmlcase(struct linedata *data, struct parsestate *state,
|
||||||
char *line, FILE *out);
|
char *line, FILE *out);
|
||||||
static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
|
|
||||||
char *line, FILE *out);
|
|
||||||
|
|
||||||
int parsemarkdown(FILE *infile, FILE *outfile) {
|
int parsemarkdown(FILE *infile, FILE *outfile) {
|
||||||
struct linefile *realin;
|
struct linefile *realin;
|
||||||
@@ -88,25 +86,25 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
|
|||||||
currstate->isfirst = 0;
|
currstate->isfirst = 0;
|
||||||
return 0;
|
return 0;
|
||||||
case HTMLCONCRETE:
|
case HTMLCONCRETE:
|
||||||
handlehtmlmiddle(&type, currstate, line, out);
|
handlehtmlcase(&type, currstate, line, out);
|
||||||
return 0;
|
return 0;
|
||||||
case COMMENTLONG:
|
case COMMENTLONG:
|
||||||
handlehtmlmiddle(&type, currstate, line, out);
|
handlehtmlcase(&type, currstate, line, out);
|
||||||
return 0;
|
return 0;
|
||||||
case PHP:
|
case PHP:
|
||||||
handlehtmlmiddle(&type, currstate, line, out);
|
handlehtmlcase(&type, currstate, line, out);
|
||||||
return 0;
|
return 0;
|
||||||
case COMMENTSHORT:
|
case COMMENTSHORT:
|
||||||
handlehtmlmiddle(&type, currstate, line, out);
|
handlehtmlcase(&type, currstate, line, out);
|
||||||
return 0;
|
return 0;
|
||||||
case CDATA:
|
case CDATA:
|
||||||
handlehtmlmiddle(&type, currstate, line, out);
|
handlehtmlcase(&type, currstate, line, out);
|
||||||
return 0;
|
return 0;
|
||||||
case SKELETON:
|
case SKELETON:
|
||||||
handlehtmlmiddle(&type, currstate, line, out);
|
handlehtmlcase(&type, currstate, line, out);
|
||||||
return 0;
|
return 0;
|
||||||
case GENERICTAG:
|
case GENERICTAG:
|
||||||
handlehtmlmiddle(&type, currstate, line, out);
|
handlehtmlcase(&type, currstate, line, out);
|
||||||
return 0;
|
return 0;
|
||||||
case EMPTY: case PLAIN: case SPACECODE: case HR:
|
case EMPTY: case PLAIN: case SPACECODE: case HR:
|
||||||
case SETEXT1: case SETEXT2: case HEADER:
|
case SETEXT1: case SETEXT2: case HEADER:
|
||||||
@@ -157,7 +155,7 @@ static int parseline(char *line, struct parsestate *currstate, FILE *out) {
|
|||||||
|
|
||||||
* Should NOT compile to this:
|
* Should NOT compile to this:
|
||||||
|
|
||||||
<p>Chapter 1</p><hr>
|
<p>Chapter 1</p><hr />
|
||||||
|
|
||||||
* but rather to this
|
* but rather to this
|
||||||
|
|
||||||
@@ -249,14 +247,8 @@ static void handlehtmlcase(struct linedata *data, struct parsestate *state,
|
|||||||
fputs(line, out);
|
fputs(line, out);
|
||||||
fputc('\n', out);
|
fputc('\n', out);
|
||||||
state->prev.type = data->type;
|
state->prev.type = data->type;
|
||||||
}
|
if (state->prev.type == data->type && data->data.islast) {
|
||||||
|
|
||||||
static void handlehtmlmiddle(struct linedata *data, struct parsestate *state,
|
|
||||||
char *line, FILE *out) {
|
|
||||||
if (state->prev.type == data->type && !data->data.isfirst) {
|
|
||||||
state->prev.type = EMPTY;
|
state->prev.type = EMPTY;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fputs(line, out);
|
|
||||||
fputc('\n', out);
|
|
||||||
}
|
}
|
||||||
|
|||||||
74
src/mdutil.c
74
src/mdutil.c
@@ -25,7 +25,7 @@
|
|||||||
|
|
||||||
static char *truncate(char *str);
|
static char *truncate(char *str);
|
||||||
static char *after(char *begin, char *str);
|
static char *after(char *begin, char *str);
|
||||||
static void identifyend(char *line, enum linetype prev, struct linedata *ret);
|
static int isend(char *line, enum linetype prev);
|
||||||
|
|
||||||
static char *concretetags[] = { "pre", "script", "style", "textarea" };
|
static char *concretetags[] = { "pre", "script", "style", "textarea" };
|
||||||
static char *skeletontags[] = {
|
static char *skeletontags[] = {
|
||||||
@@ -42,7 +42,8 @@ static char *skeletontags[] = {
|
|||||||
void identifyline(char *line, struct linedata *prev, struct linedata *ret) {
|
void identifyline(char *line, struct linedata *prev, struct linedata *ret) {
|
||||||
int i;
|
int i;
|
||||||
if (HTMLSTART <= prev->type && prev->type <= HTMLEND) {
|
if (HTMLSTART <= prev->type && prev->type <= HTMLEND) {
|
||||||
identifyend(truncate(line), prev->type, ret);
|
ret->type = prev->type;
|
||||||
|
ret->data.islast = isend(truncate(line), prev->type);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (prev->type != PLAIN) {
|
if (prev->type != PLAIN) {
|
||||||
@@ -117,7 +118,7 @@ notheader:
|
|||||||
#define HTMLSTARTCASE(start, rettype) \
|
#define HTMLSTARTCASE(start, rettype) \
|
||||||
if (after(start, line) != NULL) { \
|
if (after(start, line) != NULL) { \
|
||||||
ret->type = rettype; \
|
ret->type = rettype; \
|
||||||
ret->data.isfirst = 1; \
|
ret->data.islast = isend(line, rettype); \
|
||||||
return; \
|
return; \
|
||||||
}
|
}
|
||||||
HTMLSTARTCASE("<!--", COMMENTLONG);
|
HTMLSTARTCASE("<!--", COMMENTLONG);
|
||||||
@@ -128,6 +129,8 @@ notheader:
|
|||||||
if (line[0] == '<') {
|
if (line[0] == '<') {
|
||||||
char *testline;
|
char *testline;
|
||||||
testline = line + 1;
|
testline = line + 1;
|
||||||
|
if (testline[0] == '/')
|
||||||
|
++testline;
|
||||||
for (i = 0; i < LEN(concretetags); ++i) {
|
for (i = 0; i < LEN(concretetags); ++i) {
|
||||||
char *aftertag;
|
char *aftertag;
|
||||||
aftertag = after(concretetags[i], testline);
|
aftertag = after(concretetags[i], testline);
|
||||||
@@ -135,35 +138,33 @@ notheader:
|
|||||||
continue;
|
continue;
|
||||||
if (aftertag[0] == '\0' || strchr(" >", aftertag[0])) {
|
if (aftertag[0] == '\0' || strchr(" >", aftertag[0])) {
|
||||||
ret->type = HTMLCONCRETE;
|
ret->type = HTMLCONCRETE;
|
||||||
ret->data.isfirst = 1;
|
ret->data.islast = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (testline[0] == '/')
|
|
||||||
++testline;
|
|
||||||
for (i = 0; i < LEN(skeletontags); ++i) {
|
for (i = 0; i < LEN(skeletontags); ++i) {
|
||||||
char *aftertag;
|
char *aftertag;
|
||||||
aftertag = after(skeletontags[i], testline);
|
aftertag = after(skeletontags[i], testline);
|
||||||
if (aftertag == NULL)
|
if (aftertag == NULL)
|
||||||
continue;
|
continue;
|
||||||
if (aftertag[0] == '\0' ||
|
if (aftertag[0] == '\0' ||
|
||||||
strchr(" >", aftertag[0]) ||
|
strchr(" \t>", aftertag[0]) ||
|
||||||
after("/>", aftertag) != NULL) {
|
after("/>", aftertag) != NULL) {
|
||||||
ret->type = SKELETON;
|
ret->type = SKELETON;
|
||||||
ret->data.isfirst = 1;
|
ret->data.islast = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isgenerictag(line)) {
|
if (isgenerictag(line)) {
|
||||||
ret->type = GENERICTAG;
|
ret->type = GENERICTAG;
|
||||||
ret->data.isfirst = 1;
|
ret->data.islast = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret->type = PLAIN;
|
ret->type = PLAIN;
|
||||||
ret->data.isfirst = 1;
|
ret->data.islast = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -275,61 +276,32 @@ static char *after(char *begin, char *str) {
|
|||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void identifyend(char *line, enum linetype prev, struct linedata *ret) {
|
static int isend(char *line, enum linetype prev) {
|
||||||
int i;
|
int i;
|
||||||
ret->type = EMPTY;
|
|
||||||
switch (prev) {
|
switch (prev) {
|
||||||
case EMPTY: case PLAIN: case SPACECODE: case FENCECODE: case HR:
|
case EMPTY: case PLAIN: case SPACECODE: case FENCECODE: case HR:
|
||||||
case SETEXT1: case SETEXT2: case HEADER:
|
case SETEXT1: case SETEXT2: case HEADER:
|
||||||
return;
|
return 1;
|
||||||
/* In this case, something has gone terribly wrong. */
|
/* In this case, something has gone terribly wrong. */
|
||||||
|
|
||||||
case HTMLCONCRETE:
|
case HTMLCONCRETE:
|
||||||
for (i = 0; i < LEN(concretetags); ++i) {
|
for (i = 0; i < LEN(concretetags); ++i) {
|
||||||
char endtag[30];
|
char endtag[30];
|
||||||
sprintf(endtag, "</%s>", concretetags[i]);
|
sprintf(endtag, "</%s>", concretetags[i]);
|
||||||
if (strstr(line, endtag) != NULL) {
|
return strstr(line, endtag) != NULL;
|
||||||
ret->type = HTMLCONCRETE;
|
|
||||||
ret->data.isfirst = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return;
|
return 0;
|
||||||
case COMMENTLONG:
|
case COMMENTLONG:
|
||||||
if (strstr(line, "-->") != NULL) {
|
return strstr(line, "-->") != NULL;
|
||||||
ret->type = COMMENTLONG;
|
|
||||||
ret->data.isfirst = 0;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
case PHP:
|
case PHP:
|
||||||
if (strstr(line, "?>") != NULL) {
|
return strstr(line, "?>") != NULL;
|
||||||
ret->type = PHP;
|
|
||||||
ret->data.isfirst = 0;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
case COMMENTSHORT:
|
case COMMENTSHORT:
|
||||||
if (strchr(line, '>') != NULL) {
|
return strchr(line, '>') != NULL;
|
||||||
ret->type = COMMENTSHORT;
|
|
||||||
ret->data.isfirst = 0;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
case CDATA:
|
case CDATA:
|
||||||
if (strstr(line, "]]>") != NULL) {
|
return strstr(line, "]]>") != NULL;
|
||||||
ret->type = CDATA;
|
case SKELETON: case GENERICTAG:
|
||||||
ret->data.isfirst = 0;
|
return line[0] == '\0';
|
||||||
}
|
|
||||||
return;
|
|
||||||
case SKELETON:
|
|
||||||
if (line[0] == '\0') {
|
|
||||||
ret->type = SKELETON;
|
|
||||||
ret->data.isfirst = 0;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
case GENERICTAG:
|
|
||||||
if (line[0] == '\0') {
|
|
||||||
ret->type = GENERICTAG;
|
|
||||||
ret->data.isfirst = 0;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user