diff options
| -rw-r--r-- | src/blocks.c | 95 | ||||
| -rw-r--r-- | src/scanners.c | 360 | ||||
| -rw-r--r-- | src/scanners.re | 26 | 
3 files changed, 321 insertions, 160 deletions
diff --git a/src/blocks.c b/src/blocks.c index a15f819..0222c0f 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -89,6 +89,10 @@ static bool is_blank(cmark_strbuf *s, int offset)  {  	while (offset < s->size) {  		switch (s->ptr[offset]) { +		case '\r': +			if (s->ptr[offset + 1] == '\n') +				offset++; +			return true;  		case '\n':  			return true;  		case ' ': @@ -126,9 +130,10 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset)  static void remove_trailing_blank_lines(cmark_strbuf *ln)  {  	int i; +	unsigned char c;  	for (i = ln->size - 1; i >= 0; --i) { -		unsigned char c = ln->ptr[i]; +		c = ln->ptr[i];  		if (c != ' ' && c != '\t' && c != '\r' && c != '\n')  			break; @@ -139,9 +144,20 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln)  		return;  	} -	i = cmark_strbuf_strchr(ln, '\n', i); -	if (i >= 0) + +	for(i = 0; i < ln->size; ++i) { +		c = ln->ptr[i]; + +		if (c != '\r' && c != '\n') +			continue; + +		// Don't cut a CRLF in half +		if (c == '\r' && i+1 < ln->size && ln->ptr[i+1] == '\n') +			++i; +  		cmark_strbuf_truncate(ln, i); +		break; +	}  }  // Check to see if a node ends with a blank line, descending @@ -185,7 +201,6 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr)  static cmark_node*  finalize(cmark_parser *parser, cmark_node* b)  { -	int firstlinelen;  	int pos;  	cmark_node* item;  	cmark_node* subitem; @@ -204,9 +219,11 @@ finalize(cmark_parser *parser, cmark_node* b)  	           (b->type == NODE_CODE_BLOCK && b->as.code.fenced) ||  	           (b->type == NODE_HEADER && b->as.header.setext)) {  		b->end_line = parser->line_number; -		b->end_column = parser->curline->size - -		                (parser->curline->ptr[parser->curline->size - 1] == '\n' ? -		                 1 : 0); +		b->end_column = parser->curline->size; +		if (b->end_column && parser->curline->ptr[b->end_column-1] == '\n') +			b->end_column -= 1; +		if (b->end_column && parser->curline->ptr[b->end_column-1] == '\r') +			b->end_column -= 1;  	} else {  		b->end_line = parser->line_number - 1;  		b->end_column = parser->last_line_length; @@ -232,19 +249,28 @@ finalize(cmark_parser *parser, cmark_node* b)  		} else {  			// first line of contents becomes info -			firstlinelen = cmark_strbuf_strchr(&b->string_content, '\n', 0); +			for (pos = 0; pos < b->string_content.size; ++pos) { +				if (b->string_content.ptr[pos] == '\r' || +				    b->string_content.ptr[pos] == '\n') +					break; +			} +			assert(pos < b->string_content.size);  			cmark_strbuf tmp = GH_BUF_INIT;  			houdini_unescape_html_f(  			    &tmp,  			    b->string_content.ptr, -			    firstlinelen +			    pos  			);  			cmark_strbuf_trim(&tmp);  			cmark_strbuf_unescape(&tmp);  			b->as.code.info = cmark_chunk_buf_detach(&tmp); -			cmark_strbuf_drop(&b->string_content, firstlinelen + 1); +			if (b->string_content.ptr[pos] == '\r') +				pos += 1; +			if (b->string_content.ptr[pos] == '\n') +				pos += 1; +			cmark_strbuf_drop(&b->string_content, pos);  		}  		b->as.code.literal = cmark_chunk_buf_detach(&b->string_content);  		break; @@ -467,13 +493,22 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,  	const unsigned char *end = buffer + len;  	while (buffer < end) { -		const unsigned char *eol -		    = (const unsigned char *)memchr(buffer, '\n', -		                                    end - buffer); +		const unsigned char *eol;  		size_t line_len; +		for (eol = buffer; eol < end; ++eol) { +			if (*eol == '\r' || *eol == '\n') +				break; +		} +		if (eol >= end) +			eol = NULL; +  		if (eol) { -			line_len = eol + 1 - buffer; +			if (eol < end && *eol == '\r') +				eol++; +			if (eol < end && *eol == '\n') +				eol++; +			line_len = eol - buffer;  		} else if (eof) {  			line_len = end - buffer;  		} else { @@ -533,6 +568,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)  	// Add a newline to the end if not present:  	// TODO this breaks abstraction: +	// Note: we assume output is LF-only  	if (parser->curline->ptr[parser->curline->size - 1] != '\n') {  		cmark_strbuf_putc(parser->curline, '\n');  	} @@ -556,7 +592,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)  		}  		indent = first_nonspace - offset; -		blank = peek_at(&input, first_nonspace) == '\n'; +		blank = peek_at(&input, first_nonspace) == '\n' || +		        peek_at(&input, first_nonspace) == '\r';  		if (container->type == NODE_BLOCK_QUOTE) {  			matched = indent <= 3 && peek_at(&input, first_nonspace) == '>'; @@ -657,7 +694,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)  			first_nonspace++;  		indent = first_nonspace - offset; -		blank = peek_at(&input, first_nonspace) == '\n'; +		blank = peek_at(&input, first_nonspace) == '\n' || +		        peek_at(&input, first_nonspace) == '\r';  		if (indent >= CODE_INDENT) {  			if (!maybe_lazy && !blank) { @@ -713,8 +751,10 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)  		} else if (container->type == NODE_PARAGRAPH &&  		           (lev = scan_setext_header_line(&input, first_nonspace)) &&  		           // check that there is only one line in the paragraph: -		           cmark_strbuf_strrchr(&container->string_content, '\n', -		                                cmark_strbuf_len(&container->string_content) - 2) < 0) { +		           (cmark_strbuf_strrchr(&container->string_content, '\n', +		                                 cmark_strbuf_len(&container->string_content) - 2) < 0 && +		           cmark_strbuf_strrchr(&container->string_content, '\r', +		                                cmark_strbuf_len(&container->string_content) - 2) < 0)) {  			container->type = NODE_HEADER;  			container->as.header.level = lev; @@ -738,7 +778,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)  				i++;  			}  			// i = number of spaces after marker, up to 5 -			if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') { +			if (i >= 5 || i < 1 || +			    peek_at(&input, offset) == '\n' || +			    peek_at(&input, offset) == '\r') {  				data->padding = matched + 1;  				if (i > 0) {  					offset += 1; @@ -786,7 +828,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)  		first_nonspace++;  	indent = first_nonspace - offset; -	blank = peek_at(&input, first_nonspace) == '\n'; +	blank = peek_at(&input, first_nonspace) == '\n' || +	        peek_at(&input, first_nonspace) == '\r';  	if (blank && container->last_child) {  		container->last_child->last_line_blank = true; @@ -854,10 +897,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)  		parser->current = container;  	}  finished: -	parser->last_line_length = parser->curline->size - -	                           (parser->curline->ptr[parser->curline->size - 1] == '\n' ? -	                            1 : 0); -	; +	parser->last_line_length = parser->curline->size; +	if (parser->last_line_length && +	    parser->curline->ptr[parser->last_line_length-1] == '\n') +		parser->last_line_length -= 1; +	if (parser->last_line_length && +	    parser->curline->ptr[parser->last_line_length-1] == '\r') +		parser->last_line_length -= 1; +  	cmark_strbuf_clear(parser->curline);  } diff --git a/src/scanners.c b/src/scanners.c index 42b9275..7f9ed2e 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -13253,7 +13253,7 @@ int _scan_link_url(const unsigned char *p)  	unsigned int yyaccept = 0;  	static const unsigned char yybm[] = {  		  0,  64,  64,  64,  64,  64,  64,  64,  -		 64,  64,   8,  64,  64,  64,  64,  64,  +		 64,  64,   8,  64,  64,   8,  64,  64,   		 64,  64,  64,  64,  64,  64,  64,  64,   		 64,  64,  64,  64,  64,  64,  64,  64,   		 72, 112, 112, 112, 112, 112, 112, 112,  @@ -13286,21 +13286,23 @@ int _scan_link_url(const unsigned char *p)  		112, 112, 112, 112, 112, 112, 112, 112,   	};  	yych = *p; -	if (yych <= '(') { -		if (yych <= 0x1F) { +	if (yych <= '\'') { +		if (yych <= '\f') {  			if (yych == '\n') goto yy1589;  			goto yy1597;  		} else { +			if (yych <= '\r') goto yy1591; +			if (yych <= 0x1F) goto yy1597;  			if (yych <= ' ') goto yy1591; -			if (yych <= '\'') goto yy1593; -			goto yy1596; +			goto yy1593;  		}  	} else { -		if (yych <= '<') { +		if (yych <= ';') { +			if (yych <= '(') goto yy1596;  			if (yych <= ')') goto yy1597; -			if (yych <= ';') goto yy1593; -			goto yy1592; +			goto yy1593;  		} else { +			if (yych <= '<') goto yy1592;  			if (yych == '\\') goto yy1594;  			goto yy1593;  		} @@ -13339,13 +13341,18 @@ yy1592:  	if (yybm[0+yych] & 32) {  		goto yy1605;  	} -	if (yych <= '\'') { -		if (yych <= 0x00) goto yy1588; -		if (yych == '\n') goto yy1588; -		goto yy1612; +	if (yych <= '\r') { +		if (yych <= '\t') { +			if (yych <= 0x00) goto yy1588; +			goto yy1612; +		} else { +			if (yych <= '\n') goto yy1588; +			if (yych <= '\f') goto yy1612; +			goto yy1588; +		}  	} else {  		if (yych <= ')') { -			if (yych <= '(') goto yy1610; +			if (yych == '(') goto yy1610;  			goto yy1612;  		} else {  			if (yych <= '=') goto yy1602; @@ -13459,13 +13466,18 @@ yy1605:  	if (yybm[0+yych] & 32) {  		goto yy1605;  	} -	if (yych <= '\'') { -		if (yych <= 0x00) goto yy1588; -		if (yych == '\n') goto yy1588; -		goto yy1612; +	if (yych <= '\r') { +		if (yych <= '\t') { +			if (yych <= 0x00) goto yy1588; +			goto yy1612; +		} else { +			if (yych <= '\n') goto yy1588; +			if (yych <= '\f') goto yy1612; +			goto yy1588; +		}  	} else {  		if (yych <= ')') { -			if (yych <= '(') goto yy1610; +			if (yych == '(') goto yy1610;  			goto yy1612;  		} else {  			if (yych <= '=') goto yy1602; @@ -13482,44 +13494,53 @@ yy1608:  yy1609:  	++p;  	yych = *p; -	if (yych <= '>') { -		if (yych <= ' ') { +	if (yych <= '=') { +		if (yych <= '\f') {  			if (yych <= 0x00) goto yy1600;  			if (yych == '\n') goto yy1600;  			goto yy1612;  		} else { -			if (yych <= '/') goto yy1605; -			if (yych <= '9') goto yy1612; -			if (yych <= '=') goto yy1605; -			goto yy1622; +			if (yych <= ' ') { +				if (yych <= '\r') goto yy1600; +				goto yy1612; +			} else { +				if (yych <= '/') goto yy1605; +				if (yych <= '9') goto yy1612; +				goto yy1605; +			}  		}  	} else { -		if (yych <= '\\') { +		if (yych <= '[') { +			if (yych <= '>') goto yy1622;  			if (yych <= '@') goto yy1605;  			if (yych <= 'Z') goto yy1612; -			if (yych <= '[') goto yy1605; -			goto yy1623; +			goto yy1605;  		} else { -			if (yych <= '`') goto yy1605; -			if (yych <= 'z') goto yy1612; -			if (yych <= '~') goto yy1605; -			goto yy1612; +			if (yych <= '`') { +				if (yych <= '\\') goto yy1623; +				goto yy1605; +			} else { +				if (yych <= 'z') goto yy1612; +				if (yych <= '~') goto yy1605; +				goto yy1612; +			}  		}  	}  yy1610:  	++p;  	yych = *p; -	if (yych <= ')') { -		if (yych <= '\n') { +	if (yych <= '(') { +		if (yych <= '\f') {  			if (yych <= 0x00) goto yy1600; -			if (yych >= '\n') goto yy1600; +			if (yych == '\n') goto yy1600;  		} else { +			if (yych <= '\r') goto yy1600;  			if (yych <= ' ') goto yy1612;  			if (yych <= '\'') goto yy1610; -			if (yych >= ')') goto yy1605;  		}  	} else {  		if (yych <= '=') { +			if (yych <= ')') goto yy1605;  			if (yych == '<') goto yy1598;  			goto yy1610;  		} else { @@ -13545,11 +13566,12 @@ yy1615:  	if (yybm[0+yych] & 128) {  		goto yy1615;  	} -	if (yych <= '\n') { +	if (yych <= '\f') {  		if (yych <= 0x00) goto yy1600; -		if (yych <= '\t') goto yy1612; -		goto yy1600; +		if (yych == '\n') goto yy1600; +		goto yy1612;  	} else { +		if (yych <= '\r') goto yy1600;  		if (yych != '>') goto yy1612;  	}  	yyaccept = 2; @@ -13570,46 +13592,56 @@ yy1618:  yy1619:  	++p;  	yych = *p; -	if (yych <= '>') { -		if (yych <= ' ') { +	if (yych <= '=') { +		if (yych <= '\f') {  			if (yych <= 0x00) goto yy1600;  			if (yych == '\n') goto yy1600;  			goto yy1612;  		} else { -			if (yych <= '/') goto yy1610; -			if (yych <= '9') goto yy1612; -			if (yych <= '=') goto yy1610; +			if (yych <= ' ') { +				if (yych <= '\r') goto yy1600; +				goto yy1612; +			} else { +				if (yych <= '/') goto yy1610; +				if (yych <= '9') goto yy1612; +				goto yy1610; +			}  		}  	} else { -		if (yych <= '\\') { +		if (yych <= '[') { +			if (yych <= '>') goto yy1620;  			if (yych <= '@') goto yy1610;  			if (yych <= 'Z') goto yy1612; -			if (yych <= '[') goto yy1610; -			goto yy1621; +			goto yy1610;  		} else { -			if (yych <= '`') goto yy1610; -			if (yych <= 'z') goto yy1612; -			if (yych <= '~') goto yy1610; -			goto yy1612; +			if (yych <= '`') { +				if (yych <= '\\') goto yy1621; +				goto yy1610; +			} else { +				if (yych <= 'z') goto yy1612; +				if (yych <= '~') goto yy1610; +				goto yy1612; +			}  		}  	}  yy1620:  	yyaccept = 2;  	marker = ++p;  	yych = *p; -	if (yych <= ')') { -		if (yych <= '\n') { +	if (yych <= '(') { +		if (yych <= '\f') {  			if (yych <= 0x00) goto yy1608; -			if (yych <= '\t') goto yy1612; -			goto yy1608; +			if (yych == '\n') goto yy1608; +			goto yy1612;  		} else { +			if (yych <= '\r') goto yy1608;  			if (yych <= ' ') goto yy1612;  			if (yych <= '\'') goto yy1610; -			if (yych <= '(') goto yy1612; -			goto yy1605; +			goto yy1612;  		}  	} else {  		if (yych <= '=') { +			if (yych <= ')') goto yy1605;  			if (yych == '<') goto yy1598;  			goto yy1610;  		} else { @@ -13621,22 +13653,23 @@ yy1620:  yy1621:  	++p;  	yych = *p; -	if (yych <= '(') { +	if (yych <= '\'') {  		if (yych <= '\n') {  			if (yych <= 0x00) goto yy1600;  			if (yych <= '\t') goto yy1612;  			goto yy1600;  		} else { +			if (yych == '\r') goto yy1600;  			if (yych <= ' ') goto yy1612; -			if (yych <= '\'') goto yy1610; -			goto yy1612; +			goto yy1610;  		}  	} else { -		if (yych <= '>') { +		if (yych <= '=') { +			if (yych <= '(') goto yy1612;  			if (yych <= ')') goto yy1605; -			if (yych <= '=') goto yy1610; -			goto yy1620; +			goto yy1610;  		} else { +			if (yych <= '>') goto yy1620;  			if (yych == '\\') goto yy1619;  			goto yy1610;  		} @@ -13648,13 +13681,18 @@ yy1622:  	if (yybm[0+yych] & 32) {  		goto yy1605;  	} -	if (yych <= '\'') { -		if (yych <= 0x00) goto yy1608; -		if (yych == '\n') goto yy1608; -		goto yy1612; +	if (yych <= '\r') { +		if (yych <= '\t') { +			if (yych <= 0x00) goto yy1608; +			goto yy1612; +		} else { +			if (yych <= '\n') goto yy1608; +			if (yych <= '\f') goto yy1612; +			goto yy1608; +		}  	} else {  		if (yych <= ')') { -			if (yych <= '(') goto yy1610; +			if (yych == '(') goto yy1610;  			goto yy1612;  		} else {  			if (yych <= '=') goto yy1602; @@ -13666,22 +13704,23 @@ yy1623:  	yyaccept = 0;  	marker = ++p;  	yych = *p; -	if (yych <= '(') { +	if (yych <= '\'') {  		if (yych <= '\n') {  			if (yych <= 0x00) goto yy1588;  			if (yych <= '\t') goto yy1612;  			goto yy1588;  		} else { +			if (yych == '\r') goto yy1588;  			if (yych <= ' ') goto yy1612; -			if (yych <= '\'') goto yy1605; -			goto yy1610; +			goto yy1605;  		}  	} else { -		if (yych <= '>') { +		if (yych <= '=') { +			if (yych <= '(') goto yy1610;  			if (yych <= ')') goto yy1612; -			if (yych <= '=') goto yy1605; -			goto yy1622; +			goto yy1605;  		} else { +			if (yych <= '>') goto yy1622;  			if (yych == '\\') goto yy1609;  			goto yy1605;  		} @@ -14006,9 +14045,14 @@ yy1664:  	if (yybm[0+yych] & 128) {  		goto yy1668;  	} -	if (yych == '\n') goto yy1666; -	if (yych == '#') goto yy1670; -	goto yy1663; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1666; +		goto yy1663; +	} else { +		if (yych <= '\r') goto yy1666; +		if (yych == '#') goto yy1670; +		goto yy1663; +	}  yy1665:  	yych = *++p;  	goto yy1663; @@ -14028,8 +14072,12 @@ yy1670:  	if (yybm[0+yych] & 128) {  		goto yy1668;  	} -	if (yych == '\n') goto yy1666; -	if (yych == '#') goto yy1672; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1666; +	} else { +		if (yych <= '\r') goto yy1666; +		if (yych == '#') goto yy1672; +	}  yy1671:  	p = marker;  	goto yy1663; @@ -14038,31 +14086,47 @@ yy1672:  	if (yybm[0+yych] & 128) {  		goto yy1668;  	} -	if (yych == '\n') goto yy1666; -	if (yych != '#') goto yy1671; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1666; +		goto yy1671; +	} else { +		if (yych <= '\r') goto yy1666; +		if (yych != '#') goto yy1671; +	}  	yych = *++p;  	if (yybm[0+yych] & 128) {  		goto yy1668;  	} -	if (yych == '\n') goto yy1666; -	if (yych != '#') goto yy1671; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1666; +		goto yy1671; +	} else { +		if (yych <= '\r') goto yy1666; +		if (yych != '#') goto yy1671; +	}  	yych = *++p;  	if (yybm[0+yych] & 128) {  		goto yy1668;  	} -	if (yych == '\n') goto yy1666; -	if (yych != '#') goto yy1671; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1666; +		goto yy1671; +	} else { +		if (yych <= '\r') goto yy1666; +		if (yych != '#') goto yy1671; +	}  	++p;  	if (yybm[0+(yych = *p)] & 128) {  		goto yy1668;  	}  	if (yych == '\n') goto yy1666; +	if (yych == '\r') goto yy1666;  	goto yy1671;  }  } -// Match sexext header line.  Return 1 for level-1 header, +// Match setext header line.  Return 1 for level-1 header,  // 2 for level-2, 0 for no match.  int _scan_setext_header_line(const unsigned char *p)  { @@ -14119,17 +14183,27 @@ yy1679:  	if (yybm[0+yych] & 128) {  		goto yy1693;  	} -	if (yych == '\n') goto yy1691; -	if (yych == ' ') goto yy1689; -	goto yy1678; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1691; +		goto yy1678; +	} else { +		if (yych <= '\r') goto yy1691; +		if (yych == ' ') goto yy1689; +		goto yy1678; +	}  yy1680:  	yych = *(marker = ++p);  	if (yybm[0+yych] & 32) {  		goto yy1682;  	} -	if (yych == '\n') goto yy1685; -	if (yych == '-') goto yy1687; -	goto yy1678; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1685; +		goto yy1678; +	} else { +		if (yych <= '\r') goto yy1685; +		if (yych == '-') goto yy1687; +		goto yy1678; +	}  yy1681:  	yych = *++p;  	goto yy1678; @@ -14140,6 +14214,7 @@ yy1682:  		goto yy1682;  	}  	if (yych == '\n') goto yy1685; +	if (yych == '\r') goto yy1685;  yy1684:  	p = marker;  	goto yy1678; @@ -14152,15 +14227,24 @@ yy1687:  	if (yybm[0+yych] & 32) {  		goto yy1682;  	} -	if (yych == '\n') goto yy1685; -	if (yych == '-') goto yy1687; -	goto yy1684; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1685; +		goto yy1684; +	} else { +		if (yych <= '\r') goto yy1685; +		if (yych == '-') goto yy1687; +		goto yy1684; +	}  yy1689:  	++p;  	yych = *p; -	if (yych == '\n') goto yy1691; -	if (yych == ' ') goto yy1689; -	goto yy1684; +	if (yych <= '\f') { +		if (yych != '\n') goto yy1684; +	} else { +		if (yych <= '\r') goto yy1691; +		if (yych == ' ') goto yy1689; +		goto yy1684; +	}  yy1691:  	++p;  	{ return 1; } @@ -14170,9 +14254,14 @@ yy1693:  	if (yybm[0+yych] & 128) {  		goto yy1693;  	} -	if (yych == '\n') goto yy1691; -	if (yych == ' ') goto yy1689; -	goto yy1684; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1691; +		goto yy1684; +	} else { +		if (yych <= '\r') goto yy1691; +		if (yych == ' ') goto yy1689; +		goto yy1684; +	}  }  } @@ -14278,17 +14367,21 @@ yy1707:  	if (yybm[0+yych] & 16) {  		goto yy1707;  	} -	if (yych <= 0x08) goto yy1704; -	if (yych <= '\t') goto yy1709; -	if (yych <= '\n') goto yy1711; -	goto yy1704; +	if (yych <= '\n') { +		if (yych <= 0x08) goto yy1704; +		if (yych >= '\n') goto yy1711; +	} else { +		if (yych == '\r') goto yy1711; +		goto yy1704; +	}  yy1709:  	++p;  	yych = *p;  	if (yybm[0+yych] & 32) {  		goto yy1709;  	} -	if (yych != '\n') goto yy1704; +	if (yych == '\n') goto yy1711; +	if (yych != '\r') goto yy1704;  yy1711:  	++p;  	{ return (p - start); } @@ -14308,17 +14401,22 @@ yy1717:  	if (yybm[0+yych] & 64) {  		goto yy1717;  	} -	if (yych <= 0x08) goto yy1704; -	if (yych <= '\t') goto yy1719; -	if (yych <= '\n') goto yy1721; -	goto yy1704; +	if (yych <= '\n') { +		if (yych <= 0x08) goto yy1704; +		if (yych >= '\n') goto yy1721; +	} else { +		if (yych == '\r') goto yy1721; +		goto yy1704; +	}  yy1719:  	++p;  	yych = *p; -	if (yych <= '\n') { +	if (yych <= '\f') {  		if (yych <= 0x08) goto yy1704;  		if (yych <= '\t') goto yy1719; +		if (yych >= '\v') goto yy1704;  	} else { +		if (yych <= '\r') goto yy1721;  		if (yych == ' ') goto yy1719;  		goto yy1704;  	} @@ -14341,17 +14439,22 @@ yy1727:  	if (yybm[0+yych] & 128) {  		goto yy1727;  	} -	if (yych <= 0x08) goto yy1704; -	if (yych <= '\t') goto yy1729; -	if (yych <= '\n') goto yy1731; -	goto yy1704; +	if (yych <= '\n') { +		if (yych <= 0x08) goto yy1704; +		if (yych >= '\n') goto yy1731; +	} else { +		if (yych == '\r') goto yy1731; +		goto yy1704; +	}  yy1729:  	++p;  	yych = *p; -	if (yych <= '\n') { +	if (yych <= '\f') {  		if (yych <= 0x08) goto yy1704;  		if (yych <= '\t') goto yy1729; +		if (yych >= '\v') goto yy1704;  	} else { +		if (yych <= '\r') goto yy1731;  		if (yych == ' ') goto yy1729;  		goto yy1704;  	} @@ -14372,7 +14475,7 @@ int _scan_open_code_fence(const unsigned char *p)  	unsigned char yych;  	static const unsigned char yybm[] = {  		  0, 160, 160, 160, 160, 160, 160, 160,  -		160, 160,   0, 160, 160, 160, 160, 160,  +		160, 160,   0, 160, 160,   0, 160, 160,   		160, 160, 160, 160, 160, 160, 160, 160,   		160, 160, 160, 160, 160, 160, 160, 160,   		160, 160, 160, 160, 160, 160, 160, 160,  @@ -14565,16 +14668,22 @@ yy1762:  	if (yybm[0+yych] & 64) {  		goto yy1764;  	} -	if (yych == '\n') goto yy1766; -	if (yych == '~') goto yy1762; -	goto yy1761; +	if (yych <= '\f') { +		if (yych == '\n') goto yy1766; +		goto yy1761; +	} else { +		if (yych <= '\r') goto yy1766; +		if (yych == '~') goto yy1762; +		goto yy1761; +	}  yy1764:  	++p;  	yych = *p;  	if (yybm[0+yych] & 64) {  		goto yy1764;  	} -	if (yych != '\n') goto yy1761; +	if (yych == '\n') goto yy1766; +	if (yych != '\r') goto yy1761;  yy1766:  	++p;  	p = marker; @@ -14592,19 +14701,24 @@ yy1769:  	if (yybm[0+yych] & 128) {  		goto yy1769;  	} -	if (yych <= '\n') { +	if (yych <= '\f') {  		if (yych <= 0x08) goto yy1761; -		if (yych >= '\n') goto yy1773; +		if (yych <= '\t') goto yy1771; +		if (yych <= '\n') goto yy1773; +		goto yy1761;  	} else { +		if (yych <= '\r') goto yy1773;  		if (yych != ' ') goto yy1761;  	}  yy1771:  	++p;  	yych = *p; -	if (yych <= '\n') { +	if (yych <= '\f') {  		if (yych <= 0x08) goto yy1761;  		if (yych <= '\t') goto yy1771; +		if (yych >= '\v') goto yy1761;  	} else { +		if (yych <= '\r') goto yy1773;  		if (yych == ' ') goto yy1771;  		goto yy1761;  	} diff --git a/src/scanners.re b/src/scanners.re index 31cdb4f..9411018 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -140,8 +140,8 @@ int _scan_link_url(const unsigned char *p)    const unsigned char *marker = NULL;    const unsigned char *start = p;  /*!re2c -  [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } -  [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } +  [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } +  [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }    .? { return 0; }  */  } @@ -177,19 +177,19 @@ int _scan_atx_header_start(const unsigned char *p)    const unsigned char *marker = NULL;    const unsigned char *start = p;  /*!re2c -  [#]{1,6} ([ ]+|[\n])  { return (p - start); } +  [#]{1,6} ([ ]+|[\r\n])  { return (p - start); }    .? { return 0; }  */  } -// Match sexext header line.  Return 1 for level-1 header, +// Match setext header line.  Return 1 for level-1 header,  // 2 for level-2, 0 for no match.  int _scan_setext_header_line(const unsigned char *p)  {    const unsigned char *marker = NULL;  /*!re2c -  [=]+ [ ]* [\n] { return 1; } -  [-]+ [ ]* [\n] { return 2; } +  [=]+ [ ]* [\r\n] { return 1; } +  [-]+ [ ]* [\r\n] { return 2; }    .? { return 0; }  */  } @@ -202,9 +202,9 @@ int _scan_hrule(const unsigned char *p)    const unsigned char *marker = NULL;    const unsigned char *start = p;  /*!re2c -  ([*][ ]*){3,} [ \t]* [\n] { return (p - start); } -  ([_][ ]*){3,} [ \t]* [\n] { return (p - start); } -  ([-][ ]*){3,} [ \t]* [\n] { return (p - start); } +  ([*][ ]*){3,} [ \t]* [\r\n] { return (p - start); } +  ([_][ ]*){3,} [ \t]* [\r\n] { return (p - start); } +  ([-][ ]*){3,} [ \t]* [\r\n] { return (p - start); }    .? { return 0; }  */  } @@ -215,8 +215,8 @@ int _scan_open_code_fence(const unsigned char *p)    const unsigned char *marker = NULL;    const unsigned char *start = p;  /*!re2c -  [`]{3,} / [^`\n\x00]*[\n] { return (p - start); } -  [~]{3,} / [^~\n\x00]*[\n] { return (p - start); } +  [`]{3,} / [^`\r\n\x00]*[\r\n] { return (p - start); } +  [~]{3,} / [^~\r\n\x00]*[\r\n] { return (p - start); }    .?                        { return 0; }  */  } @@ -227,8 +227,8 @@ int _scan_close_code_fence(const unsigned char *p)    const unsigned char *marker = NULL;    const unsigned char *start = p;  /*!re2c -  [`]{3,} / [ \t]*[\n] { return (p - start); } -  [~]{3,} / [ \t]*[\n] { return (p - start); } +  [`]{3,} / [ \t]*[\r\n] { return (p - start); } +  [~]{3,} / [ \t]*[\r\n] { return (p - start); }    .? { return 0; }  */  }  | 
