diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2014-12-12 22:12:09 -0800 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2014-12-12 22:55:11 -0800 | 
| commit | c41bf11bb38ef513fa53f88b2c80afd1504aaeaf (patch) | |
| tree | 87447007f83e40b627037e15108576113dc020c9 | |
| parent | 5c20df20af6be9444f27a8c1bbfa5b027a1fa8d8 (diff) | |
Rewrote HTML renderer using cmark_walk.
This version is shorter, more readable, and more regular.
It should serve as a template for creating new writers.
Performance is the same.  All tests pass.
| -rw-r--r-- | src/html.c | 418 | ||||
| -rw-r--r-- | src/node.c | 10 | 
2 files changed, 180 insertions, 248 deletions
@@ -11,9 +11,6 @@  // Functions to convert cmark_nodes to HTML strings. -static bool -finish_node(strbuf *html, cmark_node *node, bool tight); -  static void escape_html(strbuf *dest, const unsigned char *source, int length)  {  	if (length < 0) @@ -36,322 +33,251 @@ static inline void cr(strbuf *html)  		strbuf_putc(html, '\n');  } -// Convert the inline children of a node to a plain string. -static void inlines_to_plain_html(strbuf *html, cmark_node* node) +struct render_state { +	strbuf* html; +	cmark_node *plain; +}; + +static int +S_render_node(cmark_node *node, int entering, void *vstate)  { -	cmark_node* cur = node->first_child; +	struct render_state *state = vstate; +	cmark_node *parent; +	cmark_node *grandparent; +	strbuf *html = state->html; +	char start_header[] = "<h0>"; +	char end_header[] = "</h0>"; +	strbuf *info; +	bool tight; -	if (cur == NULL) { -		return; +	if (state->plain == node) { // back at original node +		state->plain = NULL;  	} -	while (true) { -		switch(cur->type) { -		case NODE_TEXT: -		case NODE_INLINE_CODE: -		case NODE_INLINE_HTML: -			escape_html(html, cur->as.literal.data, cur->as.literal.len); +	if (state->plain != NULL) { +		switch(node->type) { +		case CMARK_NODE_TEXT: +		case CMARK_NODE_INLINE_CODE: +		case CMARK_NODE_INLINE_HTML: +			escape_html(html, node->as.literal.data, +				    node->as.literal.len);  			break; -		case NODE_LINEBREAK: -		case NODE_SOFTBREAK: +		case CMARK_NODE_LINEBREAK: +		case CMARK_NODE_SOFTBREAK:  			strbuf_putc(html, ' ');  			break;  		default:  			break;  		} - -		if (cur->first_child) { -			cur = cur->first_child; -			continue; -		} - -	next_sibling: -		if (cur->next) { -			cur = cur->next; -			continue; -		} -		cur = cur->parent; -		if (cur == node) { -			break; -		} -		goto next_sibling; -	} -} - - -// Convert a cmark_node to HTML. -static void node_to_html(strbuf *html, cmark_node *node) -{ -	cmark_node *cur; -	char start_header[] = "<h0>"; -	bool tight = false; -	bool visit_children; -	strbuf *info; - -	if (node == NULL) { -		return; +		return 1;  	} -	cur = node; -	while (true) { -		// Only NODE_IMAGE wants to skip its children. -		visit_children = true; - -		switch(cur->type) { -		case NODE_DOCUMENT: -			break; - -		case NODE_PARAGRAPH: -			if (!tight) { -				cr(html); -				strbuf_puts(html, "<p>"); -			} -			break; - -		case NODE_BLOCK_QUOTE: +	switch (node->type) { +	case CMARK_NODE_BLOCK_QUOTE: +		if (entering) {  			cr(html);  			strbuf_puts(html, "<blockquote>\n"); -			// BLOCK_QUOTE doesn't use any of the 'as' structs, -			// so the 'list' member can be used to store the -			// current value of 'tight'. -			cur->as.list.tight = tight; -			tight = false; -			break; - -		case NODE_LIST_ITEM: +		} else {  			cr(html); -			strbuf_puts(html, "<li>"); -			break; +			strbuf_puts(html, "</blockquote>\n"); +		} +		break; -		case NODE_LIST: { -			cmark_list *list = &cur->as.list; -			bool tmp; +	case CMARK_NODE_LIST: { +		cmark_list_type list_type = node->as.list.list_type; +		int start = node->as.list.start; -			// make sure a list starts at the beginning of the line: +		if (entering) {  			cr(html); - -			if (list->list_type == CMARK_BULLET_LIST) { +			if (list_type == CMARK_BULLET_LIST) {  				strbuf_puts(html, "<ul>\n");  			} -			else if (list->start == 1) { +			else if (start == 1) {  				strbuf_puts(html, "<ol>\n");  			}  			else {  				strbuf_printf(html, "<ol start=\"%d\">\n", -					      list->start); +					      start);  			} - -			// Store the current value of 'tight' by swapping. -			tmp = list->tight; -			list->tight = tight; -			tight = tmp; -			break; +		} else { +			strbuf_puts(html, +				    list_type == CMARK_BULLET_LIST ? +				    "</ul>\n" : "</ol>\n");  		} +		break; +	} -		case NODE_HEADER: -			cr(html); -			start_header[2] = '0' + cur->as.header.level; -			strbuf_puts(html, start_header); -			break; - -		case NODE_CODE_BLOCK: -			info = &cur->as.code.info; -			cr(html); - -			if (&cur->as.code.fence_length == 0 -			    || strbuf_len(info) == 0) { -				strbuf_puts(html, "<pre><code>"); -			} -			else { -				int first_tag = strbuf_strchr(info, ' ', 0); -				if (first_tag < 0) -					first_tag = strbuf_len(info); - -				strbuf_puts(html, -					    "<pre><code class=\"language-"); -				escape_html(html, info->ptr, first_tag); -				strbuf_puts(html, "\">"); -			} - -			escape_html(html, cur->string_content.ptr, cur->string_content.size); -			break; - -		case NODE_HTML: +	case CMARK_NODE_LIST_ITEM: +		if (entering) {  			cr(html); -			strbuf_put(html, cur->string_content.ptr, cur->string_content.size); -			break; +			strbuf_puts(html, "<li>"); +		} else { +			strbuf_puts(html, "</li>\n"); +		} +		break; -		case NODE_HRULE: +	case CMARK_NODE_HEADER: +		if (entering) {  			cr(html); -			strbuf_puts(html, "<hr />\n"); -			break; - -		case NODE_REFERENCE_DEF: -			break; - -		case NODE_TEXT: -			escape_html(html, cur->as.literal.data, cur->as.literal.len); -			break; - -		case NODE_LINEBREAK: -			strbuf_puts(html, "<br />\n"); -			break; - -		case NODE_SOFTBREAK: +			start_header[2] = '0' + node->as.header.level; +			strbuf_puts(html, start_header); +		} else { +			end_header[3] = '0' + node->as.header.level; +			strbuf_puts(html, end_header);  			strbuf_putc(html, '\n'); -			break; - -		case NODE_INLINE_CODE: -			strbuf_puts(html, "<code>"); -			escape_html(html, cur->as.literal.data, cur->as.literal.len); -			break; - -		case NODE_INLINE_HTML: -			strbuf_put(html, -				   cur->as.literal.data, -				   cur->as.literal.len); -			break; +		} +		break; -		case NODE_LINK: -			strbuf_puts(html, "<a href=\""); -			if (cur->as.link.url) -				escape_href(html, cur->as.link.url, -1); +	case CMARK_NODE_CODE_BLOCK: +		info = &node->as.code.info; +		cr(html); -			if (cur->as.link.title) { -				strbuf_puts(html, "\" title=\""); -				escape_html(html, cur->as.link.title, -1); -			} +		if (&node->as.code.fence_length == 0 +		    || strbuf_len(info) == 0) { +			strbuf_puts(html, "<pre><code>"); +		} +		else { +			int first_tag = strbuf_strchr(info, ' ', 0); +			if (first_tag < 0) +				first_tag = strbuf_len(info); +			strbuf_puts(html, "<pre><code class=\"language-"); +			escape_html(html, info->ptr, first_tag);  			strbuf_puts(html, "\">"); -			break; - -		case NODE_IMAGE: -			strbuf_puts(html, "<img src=\""); -			if (cur->as.link.url) -				escape_href(html, cur->as.link.url, -1); - -			strbuf_puts(html, "\" alt=\""); -			inlines_to_plain_html(html, cur); - -			if (cur->as.link.title) { -				strbuf_puts(html, "\" title=\""); -				escape_html(html, cur->as.link.title, -1); -			} - -			strbuf_puts(html, "\" />"); -			visit_children = false; -			break; +		} -		case NODE_STRONG: -			strbuf_puts(html, "<strong>"); -			break; +		escape_html(html, node->string_content.ptr, node->string_content.size); +		strbuf_puts(html, "</code></pre>\n"); +		break; -		case NODE_EMPH: -			strbuf_puts(html, "<em>"); -			break; +	case CMARK_NODE_HTML: +		cr(html); +		strbuf_put(html, node->string_content.ptr, +			   node->string_content.size); +		break; -		default: -			assert(false); -		} +	case CMARK_NODE_HRULE: +		cr(html); +		strbuf_puts(html, "<hr />\n"); +		break; -		if (visit_children && cur->first_child) { -			cur = cur->first_child; -			continue; -		} +	case CMARK_NODE_REFERENCE_DEF: +		break; -	next_sibling: -		tight = finish_node(html, cur, tight); -		if (cur == node) { -			break; -		} -		if (cur->next) { -			cur = cur->next; -			continue; +	case CMARK_NODE_PARAGRAPH: +		parent = cmark_node_parent(node); +		grandparent = cmark_node_parent(parent); +		if (grandparent != NULL && +		    grandparent->type == CMARK_NODE_LIST) { +			tight = grandparent->as.list.tight; +		} else { +			tight = false;  		} -		cur = cur->parent; -		goto next_sibling; -	} -} - -// Returns the restored value of 'tight'. -static bool -finish_node(strbuf *html, cmark_node *node, bool tight) -{ -	char end_header[] = "</h0>\n"; - -	switch (node->type) { -	case NODE_PARAGRAPH:  		if (!tight) { -			strbuf_puts(html, "</p>\n"); +			if (entering) { +				cr(html); +				strbuf_puts(html, "<p>"); +			} else { +				strbuf_puts(html, "</p>\n"); +			}  		}  		break; -	case NODE_BLOCK_QUOTE: { -		cmark_list *list = &node->as.list; -		strbuf_puts(html, "</blockquote>\n"); -		// Restore old 'tight' value. -		tight = list->tight; -		list->tight = false; +	case CMARK_NODE_TEXT: +		escape_html(html, node->as.literal.data, +			    node->as.literal.len);  		break; -	} -	case NODE_LIST_ITEM: -		strbuf_puts(html, "</li>\n"); +	case CMARK_NODE_LINEBREAK: +		strbuf_puts(html, "<br />\n");  		break; -	case NODE_LIST: { -		cmark_list *list = &node->as.list; -		bool tmp; -		strbuf_puts(html, -			    list->list_type == CMARK_BULLET_LIST ? -			    "</ul>\n" : "</ol>\n"); -		// Restore old 'tight' value. -		tmp = tight; -		tight = list->tight; -		list->tight = tmp; +	case CMARK_NODE_SOFTBREAK: +		strbuf_putc(html, '\n');  		break; -	} -	case NODE_HEADER: -		end_header[3] = '0' + node->as.header.level; -		strbuf_puts(html, end_header); +	case CMARK_NODE_INLINE_CODE: +		strbuf_puts(html, "<code>"); +		escape_html(html, node->as.literal.data, node->as.literal.len); +		strbuf_puts(html, "</code>");  		break; -	case NODE_CODE_BLOCK: -		strbuf_puts(html, "</code></pre>\n"); +	case CMARK_NODE_INLINE_HTML: +		strbuf_put(html, node->as.literal.data, node->as.literal.len);  		break; -	case NODE_INLINE_CODE: -		strbuf_puts(html, "</code>"); +	case CMARK_NODE_STRONG: +		if (entering) { +			strbuf_puts(html, "<strong>"); +		} else { +			strbuf_puts(html, "</strong>"); +		}  		break; -	case NODE_LINK: -		strbuf_puts(html, "</a>"); +	case CMARK_NODE_EMPH: +		if (entering) { +			strbuf_puts(html, "<em>"); +		} else { +			strbuf_puts(html, "</em>"); +		}  		break; -	case NODE_STRONG: -		strbuf_puts(html, "</strong>"); +	case CMARK_NODE_LINK: +		if (entering) { +			strbuf_puts(html, "<a href=\""); +			if (node->as.link.url) +				escape_href(html, node->as.link.url, -1); + +			if (node->as.link.title) { +				strbuf_puts(html, "\" title=\""); +				escape_html(html, node->as.link.title, -1); +			} + +			strbuf_puts(html, "\">"); +		} else { +			strbuf_puts(html, "</a>"); +		}  		break; -	case NODE_EMPH: -		strbuf_puts(html, "</em>"); +	case CMARK_NODE_IMAGE: +		if (entering) { +			strbuf_puts(html, "<img src=\""); +			if (node->as.link.url) +				escape_href(html, node->as.link.url, -1); + +			strbuf_puts(html, "\" alt=\""); +			state->plain = node; +		} else { +			if (node->as.link.title) { +				strbuf_puts(html, "\" title=\""); +				escape_html(html, node->as.link.title, -1); +			} + +			strbuf_puts(html, "\" />"); +		}  		break;  	default: +		assert(false);  		break;  	} -	return tight; +	// strbuf_putc(html, 'x'); +	return 1;  }  char *cmark_render_html(cmark_node *root)  {  	char *result;  	strbuf html = GH_BUF_INIT; -	node_to_html(&html, root); -	result = (char *)strbuf_detach(&html); -	strbuf_free(&html); -	return result; +	struct render_state state = { &html, NULL }; +	if (cmark_walk(root, S_render_node, &state)) { +		result = (char *)strbuf_detach(&html); +		strbuf_free(&html); +		return result; +	} else { +		return NULL; +	}  } @@ -773,6 +773,7 @@ int S_is_leaf_node(cmark_node *current_node)  	switch (cmark_node_get_type(current_node)) {  	case CMARK_NODE_HTML:  	case CMARK_NODE_HRULE: +	case CMARK_NODE_CODE_BLOCK:  	case CMARK_NODE_REFERENCE_DEF:  	case CMARK_NODE_TEXT:  	case CMARK_NODE_SOFTBREAK: @@ -815,8 +816,13 @@ int cmark_walk(cmark_node *root, cmark_node_handler handler, void *state)  				parent = current_node->parent;  			}  			if (next) { -				begin = 1; -				current_node = next; +				// don't go past root: +				if (current_node == root) { +					return 1; +				} else { +					begin = 1; +					current_node = next; +				}  			} else {  				begin = 0;  				depth -= 1;  | 
