diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2015-01-18 10:41:54 -0800 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2015-01-18 10:47:08 -0800 | 
| commit | 14b997d9350b3ee3f6d67fb12b470bf406d4a31b (patch) | |
| tree | b9a1416f18c27458849d48b8769c53100f55d008 | |
| parent | b34e19cd2f32342fafd6ae76de4e537240784f71 (diff) | |
Changed rule for `_` emphasis and strong emphasis.
To prevent intra-word emphasis, we used to check to see if
the delimiter was followed/preceded by an ASCII alphanumeric.
We now do something more elegant:  whereas an opening `*` must
be left-flanking, an opening `_` must be left-flanking *and not
right-flanking*.  And so on for the other cases.
All the original tests passed except some tests with Russian
text with internal `_`, which formerly created emphasis but no
longer do with the new rule.  These tests have been adjusted.
A few new test cases have been added to illustrate the rule.
The C and JS implementations have both been updated.
| -rw-r--r-- | js/lib/inlines.js | 16 | ||||
| -rw-r--r-- | spec.txt | 40 | ||||
| -rw-r--r-- | src/inlines.c | 14 | 
3 files changed, 46 insertions, 24 deletions
diff --git a/js/lib/inlines.js b/js/lib/inlines.js index b9bf805..79d2c90 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -87,8 +87,6 @@ var reFinalSpace = / *$/;  var reInitialSpace = /^ */; -var reAsciiAlnum = /[a-z0-9]/i; -  var reLinkLabel = /^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/;  // Matches a string of non-special characters. @@ -238,6 +236,7 @@ var scanDelims = function(cc) {      var numdelims = 0;      var char_before, char_after, cc_after;      var startpos = this.pos; +    var left_flanking, right_flanking, can_open, can_close;      char_before = this.pos === 0 ? '\n' :          this.subject.charAt(this.pos - 1); @@ -254,17 +253,22 @@ var scanDelims = function(cc) {          char_after = fromCodePoint(cc_after);      } -    var can_open = numdelims > 0 && !(reWhitespaceChar.test(char_after)) && +    left_flanking = numdelims > 0 && +            !(reWhitespaceChar.test(char_after)) &&              !(rePunctuation.test(char_after) &&               !(/\s/.test(char_before)) &&               !(rePunctuation.test(char_before))); -    var can_close = numdelims > 0 && !(reWhitespaceChar.test(char_before)) && +    right_flanking = numdelims > 0 && +            !(reWhitespaceChar.test(char_before)) &&              !(rePunctuation.test(char_before) &&                !(reWhitespaceChar.test(char_after)) &&                !(rePunctuation.test(char_after)));      if (cc === C_UNDERSCORE) { -        can_open = can_open && !((reAsciiAlnum).test(char_before)); -        can_close = can_close && !((reAsciiAlnum).test(char_after)); +        can_open = left_flanking && !right_flanking; +        can_close = right_flanking && !left_flanking; +    } else { +        can_open = left_flanking; +        can_close = right_flanking;      }      this.pos = startpos;      return { numdelims: numdelims, @@ -4547,28 +4547,28 @@ The following rules define emphasis and strong emphasis:  2.  A single `_` character [can open emphasis] iff      it is part of a [left-flanking delimiter run] -    and is not preceded by an ASCII alphanumeric character. +    and not part of a [right-flanking delimiter run].  3.  A single `*` character [can close emphasis](@can-close-emphasis)      iff it is part of a [right-flanking delimiter run].  4.  A single `_` character [can close emphasis] -    iff it is part of a [right-flanking delimiter run]. -    and it is not followed by an ASCII alphanumeric character. +    iff it is part of a [right-flanking delimiter run] +    and not part of a [left-flanking delimiter run].  5.  A double `**` [can open strong emphasis](@can-open-strong-emphasis)      iff it is part of a [left-flanking delimiter run].  6.  A double `__` [can open strong emphasis]      iff it is part of a [left-flanking delimiter run] -    and is not preceded by an ASCII alphanumeric character. +    and not part of a [right-flanking delimiter run].  7.  A double `**` [can close strong emphasis](@can-close-strong-emphasis)      iff it is part of a [right-flanking delimiter run].  8.  A double `__` [can close strong emphasis]      iff it is part of a [right-flanking delimiter run] -    and is not followed by an ASCII alphanumeric character. +    and not part of a [left-flanking delimiter run].  9.  Emphasis begins with a delimiter that [can open emphasis] and ends      with a delimiter that [can close emphasis], and that uses the same @@ -4701,7 +4701,7 @@ a_"foo"_  <p>a_"foo"_</p>  . -Emphasis with `_` is not allowed inside ASCII words: +Emphasis with `_` is not allowed inside words:  .  foo_bar_ @@ -4715,12 +4715,28 @@ foo_bar_  <p>5_6_78</p>  . -But it is permitted inside non-ASCII words: -  .  пристаням_стремятся_  . -<p>пристаням<em>стремятся</em></p> +<p>пристаням_стремятся_</p> +. + +Here `_` does not generate emphasis, because the first delimiter run +is right-flanking and the second left-flanking: + +. +aa_"bb"_cc +. +<p>aa_"bb"_cc</p> +. + +Here there is no emphasis, because the delimiter runs are +both left- and right-flanking: + +. +"aa"_"bb"_"cc" +. +<p>"aa"_"bb"_"cc"</p>  .  Rule 3: @@ -4810,7 +4826,7 @@ _foo_bar  .  _пристаням_стремятся  . -<p><em>пристаням</em>стремятся</p> +<p>_пристаням_стремятся</p>  .  . @@ -4897,7 +4913,7 @@ foo__bar__  .  пристаням__стремятся__  . -<p>пристаням<strong>стремятся</strong></p> +<p>пристаням__стремятся__</p>  .  . @@ -5000,7 +5016,7 @@ __foo__bar  .  __пристаням__стремятся  . -<p><strong>пристаням</strong>стремятся</p> +<p>__пристаням__стремятся</p>  .  . diff --git a/src/inlines.c b/src/inlines.c index 2487f63..2c12408 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -250,6 +250,7 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)  	int32_t after_char = 0;  	int32_t before_char = 0;  	int len; +	bool left_flanking, right_flanking;  	if (subj->pos == 0) {  		before_char = 10; @@ -277,19 +278,20 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)  	if (len == -1) {  		after_char = 10;  	} -	*can_open = numdelims > 0 && !utf8proc_is_space(after_char) && +	left_flanking = numdelims > 0 && !utf8proc_is_space(after_char) &&  	            !(utf8proc_is_punctuation(after_char) &&  	              !utf8proc_is_space(before_char) &&  	              !utf8proc_is_punctuation(before_char)); -	*can_close = numdelims > 0 && !utf8proc_is_space(before_char) && +	right_flanking = numdelims > 0 && !utf8proc_is_space(before_char) &&  	             !(utf8proc_is_punctuation(before_char) &&  	               !utf8proc_is_space(after_char) &&  	               !utf8proc_is_punctuation(after_char));  	if (c == '_') { -		*can_open = *can_open && !(before_char < 128 && -		                           cmark_isalnum((char)before_char)); -		*can_close = *can_close && !(before_char < 128 && -		                             cmark_isalnum((char)after_char)); +		*can_open = left_flanking && !right_flanking; +		*can_close = right_flanking && !left_flanking; +	} else { +		*can_open = left_flanking; +		*can_close = right_flanking;  	}  	return numdelims;  }  | 
