diff options
| author | Jason A. Donenfeld <Jason@zx2c4.com> | 2015-10-09 15:13:35 +0200 | 
|---|---|---|
| committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2015-10-09 15:13:35 +0200 | 
| commit | 525c815cc400bc49881144bcd7e7b717bbc1af5d (patch) | |
| tree | 1d4ed0d11a950c45cc1fceb26cd3aa20ed6c2300 /filters/html-converters | |
| parent | 6edfc1672cdc5eb0dfb0ff5db0ec1de1ec53415e (diff) | |
filters: Simplify converters
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'filters/html-converters')
| -rwxr-xr-x | filters/html-converters/md2html | 283 | ||||
| -rwxr-xr-x | filters/html-converters/resources/markdown.pl | 1727 | ||||
| -rw-r--r-- | filters/html-converters/resources/rst-template.txt | 4 | ||||
| -rwxr-xr-x | filters/html-converters/rst2html | 4 | 
4 files changed, 284 insertions, 1734 deletions
| diff --git a/filters/html-converters/md2html b/filters/html-converters/md2html index 5cab749..138713d 100755 --- a/filters/html-converters/md2html +++ b/filters/html-converters/md2html @@ -1,2 +1,283 @@  #!/bin/sh -exec "$(dirname "$0")/resources/markdown.pl" +cat <<_EOF +<style> +.markdown-body { +    font-size: 14px; +    line-height: 1.6; +    overflow: hidden; +} +.markdown-body>*:first-child { +    margin-top: 0 !important; +} +.markdown-body>*:last-child { +    margin-bottom: 0 !important; +} +.markdown-body a.absent { +    color: #c00; +} +.markdown-body a.anchor { +    display: block; +    padding-left: 30px; +    margin-left: -30px; +    cursor: pointer; +    position: absolute; +    top: 0; +    left: 0; +    bottom: 0; +} +.markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 { +    margin: 20px 0 10px; +    padding: 0; +    font-weight: bold; +    -webkit-font-smoothing: antialiased; +    cursor: text; +    position: relative; +} +.markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link { +    display: none; +    color: #000; +} +.markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor { +    text-decoration: none; +    line-height: 1; +    padding-left: 0; +    margin-left: -22px; +    top: 15%} +.markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link { +    display: inline-block; +} +.markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code { +    font-size: inherit; +} +.markdown-body h1 { +    font-size: 28px; +    color: #000; +} +.markdown-body h2 { +    font-size: 24px; +    border-bottom: 1px solid #ccc; +    color: #000; +} +.markdown-body h3 { +    font-size: 18px; +} +.markdown-body h4 { +    font-size: 16px; +} +.markdown-body h5 { +    font-size: 14px; +} +.markdown-body h6 { +    color: #777; +    font-size: 14px; +} +.markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre { +    margin: 15px 0; +} +.markdown-body hr { +    background: transparent url("/dirty-shade.png") repeat-x 0 0; +    border: 0 none; +    color: #ccc; +    height: 4px; +    padding: 0; +} +.markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child { +    margin-top: 0; +    padding-top: 0; +} +.markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 { +    margin-top: 0; +    padding-top: 0; +} +.markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p { +    margin-top: 0; +} +.markdown-body li p.first { +    display: inline-block; +} +.markdown-body ul, .markdown-body ol { +    padding-left: 30px; +} +.markdown-body ul.no-list, .markdown-body ol.no-list { +    list-style-type: none; +    padding: 0; +} +.markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type { +    margin-top: 0px; +} +.markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type { +    margin-bottom: 0; +} +.markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul { +    margin-bottom: 0; +} +.markdown-body dl { +    padding: 0; +} +.markdown-body dl dt { +    font-size: 14px; +    font-weight: bold; +    font-style: italic; +    padding: 0; +    margin: 15px 0 5px; +} +.markdown-body dl dt:first-child { +    padding: 0; +} +.markdown-body dl dt>:first-child { +    margin-top: 0px; +} +.markdown-body dl dt>:last-child { +    margin-bottom: 0px; +} +.markdown-body dl dd { +    margin: 0 0 15px; +    padding: 0 15px; +} +.markdown-body dl dd>:first-child { +    margin-top: 0px; +} +.markdown-body dl dd>:last-child { +    margin-bottom: 0px; +} +.markdown-body blockquote { +    border-left: 4px solid #DDD; +    padding: 0 15px; +    color: #777; +} +.markdown-body blockquote>:first-child { +    margin-top: 0px; +} +.markdown-body blockquote>:last-child { +    margin-bottom: 0px; +} +.markdown-body table th { +    font-weight: bold; +} +.markdown-body table th, .markdown-body table td { +    border: 1px solid #ccc; +    padding: 6px 13px; +} +.markdown-body table tr { +    border-top: 1px solid #ccc; +    background-color: #fff; +} +.markdown-body table tr:nth-child(2n) { +    background-color: #f8f8f8; +} +.markdown-body img { +    max-width: 100%; +    -moz-box-sizing: border-box; +    box-sizing: border-box; +} +.markdown-body span.frame { +    display: block; +    overflow: hidden; +} +.markdown-body span.frame>span { +    border: 1px solid #ddd; +    display: block; +    float: left; +    overflow: hidden; +    margin: 13px 0 0; +    padding: 7px; +    width: auto; +} +.markdown-body span.frame span img { +    display: block; +    float: left; +} +.markdown-body span.frame span span { +    clear: both; +    color: #333; +    display: block; +    padding: 5px 0 0; +} +.markdown-body span.align-center { +    display: block; +    overflow: hidden; +    clear: both; +} +.markdown-body span.align-center>span { +    display: block; +    overflow: hidden; +    margin: 13px auto 0; +    text-align: center; +} +.markdown-body span.align-center span img { +    margin: 0 auto; +    text-align: center; +} +.markdown-body span.align-right { +    display: block; +    overflow: hidden; +    clear: both; +} +.markdown-body span.align-right>span { +    display: block; +    overflow: hidden; +    margin: 13px 0 0; +    text-align: right; +} +.markdown-body span.align-right span img { +    margin: 0; +    text-align: right; +} +.markdown-body span.float-left { +    display: block; +    margin-right: 13px; +    overflow: hidden; +    float: left; +} +.markdown-body span.float-left span { +    margin: 13px 0 0; +} +.markdown-body span.float-right { +    display: block; +    margin-left: 13px; +    overflow: hidden; +    float: right; +} +.markdown-body span.float-right>span { +    display: block; +    overflow: hidden; +    margin: 13px auto 0; +    text-align: right; +} +.markdown-body code, .markdown-body tt { +    margin: 0 2px; +    padding: 0px 5px; +    border: 1px solid #eaeaea; +    background-color: #f8f8f8; +    border-radius: 3px; +} +.markdown-body code { +    white-space: nowrap; +} +.markdown-body pre>code { +    margin: 0; +    padding: 0; +    white-space: pre; +    border: none; +    background: transparent; +} +.markdown-body .highlight pre, .markdown-body pre { +    background-color: #f8f8f8; +    border: 1px solid #ccc; +    font-size: 13px; +    line-height: 19px; +    overflow: auto; +    padding: 6px 10px; +    border-radius: 3px; +} +.markdown-body pre code, .markdown-body pre tt { +    margin: 0; +    padding: 0; +    background-color: transparent; +    border: none; +} +</style>    +_EOF +echo "<div class='markdown-body'>" +markdown_py -o html5 +echo "</div>" diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl deleted file mode 100755 index 4c39808..0000000 --- a/filters/html-converters/resources/markdown.pl +++ /dev/null @@ -1,1727 +0,0 @@ -#!/usr/bin/perl - -# -# Markdown -- A text-to-HTML conversion tool for web writers -# -# Copyright (c) 2004 John Gruber -# <http://daringfireball.net/projects/markdown/> -# - - -package Markdown; -require 5.006_000; -use strict; -use warnings; - -use Digest::MD5 qw(md5_hex); -use vars qw($VERSION); -$VERSION = '1.0.1'; -# Tue 14 Dec 2004 - - -# -# Global default settings: -# -my $g_empty_element_suffix = " />";     # Change to ">" for HTML output -my $g_tab_width = 4; - - -# -# Globals: -# - -# Regex to match balanced [brackets]. See Friedl's -# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. -my $g_nested_brackets; -$g_nested_brackets = qr{ -	(?> 								# Atomic matching -	   [^\[\]]+							# Anything other than brackets -	 | -	   \[ -		 (??{ $g_nested_brackets })		# Recursive set of nested brackets -	   \] -	)* -}x; - - -# Table of hash values for escaped characters: -my %g_escape_table; -foreach my $char (split //, '\\`*_{}[]()>#+-.!') { -	$g_escape_table{$char} = md5_hex($char); -} - - -# Global hashes, used by various utility routines -my %g_urls; -my %g_titles; -my %g_html_blocks; - -# Used to track when we're inside an ordered or unordered list -# (see _ProcessListItems() for details): -my $g_list_level = 0; - - -#### Blosxom plug-in interface ########################################## - -# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine -# which posts Markdown should process, using a "meta-markup: markdown" -# header. If it's set to 0 (the default), Markdown will process all -# entries. -my $g_blosxom_use_meta = 0; - -sub start { 1; } -sub story { -	my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; - -	if ( (! $g_blosxom_use_meta) or -	     (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) -	     ){ -			$$body_ref  = Markdown($$body_ref); -     } -     1; -} - - -#### Movable Type plug-in interface ##################################### -eval {require MT};  # Test to see if we're running in MT. -unless ($@) { -    require MT; -    import  MT; -    require MT::Template::Context; -    import  MT::Template::Context; - -	eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0. -	unless ($@) { -		require MT::Plugin; -		import  MT::Plugin; -		my $plugin = new MT::Plugin({ -			name => "Markdown", -			description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", -			doc_link => 'http://daringfireball.net/projects/markdown/' -		}); -		MT->add_plugin( $plugin ); -	} - -	MT::Template::Context->add_container_tag(MarkdownOptions => sub { -		my $ctx	 = shift; -		my $args = shift; -		my $builder = $ctx->stash('builder'); -		my $tokens = $ctx->stash('tokens'); - -		if (defined ($args->{'output'}) ) { -			$ctx->stash('markdown_output', lc $args->{'output'}); -		} - -		defined (my $str = $builder->build($ctx, $tokens) ) -			or return $ctx->error($builder->errstr); -		$str;		# return value -	}); - -	MT->add_text_filter('markdown' => { -		label     => 'Markdown', -		docs      => 'http://daringfireball.net/projects/markdown/', -		on_format => sub { -			my $text = shift; -			my $ctx  = shift; -			my $raw  = 0; -		    if (defined $ctx) { -				my $output = $ctx->stash('markdown_output'); -				if (defined $output  &&  $output =~ m/^html/i) { -					$g_empty_element_suffix = ">"; -					$ctx->stash('markdown_output', ''); -				} -				elsif (defined $output  &&  $output eq 'raw') { -					$raw = 1; -					$ctx->stash('markdown_output', ''); -				} -				else { -					$raw = 0; -					$g_empty_element_suffix = " />"; -				} -			} -			$text = $raw ? $text : Markdown($text); -			$text; -		}, -	}); - -	# If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: -	my $smartypants; - -	{ -		no warnings "once"; -		$smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; -	} - -	if ($smartypants) { -		MT->add_text_filter('markdown_with_smartypants' => { -			label     => 'Markdown With SmartyPants', -			docs      => 'http://daringfireball.net/projects/markdown/', -			on_format => sub { -				my $text = shift; -				my $ctx  = shift; -				if (defined $ctx) { -					my $output = $ctx->stash('markdown_output'); -					if (defined $output  &&  $output eq 'html') { -						$g_empty_element_suffix = ">"; -					} -					else { -						$g_empty_element_suffix = " />"; -					} -				} -				$text = Markdown($text); -				$text = $smartypants->($text, '1'); -			}, -		}); -	} -} -else { -#### BBEdit/command-line text filter interface ########################## -# Needs to be hidden from MT (and Blosxom when running in static mode). - -    # We're only using $blosxom::version once; tell Perl not to warn us: -	no warnings 'once'; -    unless ( defined($blosxom::version) ) { -		use warnings; - -		#### Check for command-line switches: ################# -		my %cli_opts; -		use Getopt::Long; -		Getopt::Long::Configure('pass_through'); -		GetOptions(\%cli_opts, -			'version', -			'shortversion', -			'html4tags', -		); -		if ($cli_opts{'version'}) {		# Version info -			print "\nThis is Markdown, version $VERSION.\n"; -			print "Copyright 2004 John Gruber\n"; -			print "http://daringfireball.net/projects/markdown/\n\n"; -			exit 0; -		} -		if ($cli_opts{'shortversion'}) {		# Just the version number string. -			print $VERSION; -			exit 0; -		} -		if ($cli_opts{'html4tags'}) {			# Use HTML tag style instead of XHTML -			$g_empty_element_suffix = ">"; -		} - - -		#### Process incoming text: ########################### -		my $text; -		{ -			local $/;               # Slurp the whole file -			$text = <>; -		} -	print <<'EOT'; -<style> -.markdown-body { -    font-size: 14px; -    line-height: 1.6; -    overflow: hidden; -} -.markdown-body>*:first-child { -    margin-top: 0 !important; -} -.markdown-body>*:last-child { -    margin-bottom: 0 !important; -} -.markdown-body a.absent { -    color: #c00; -} -.markdown-body a.anchor { -    display: block; -    padding-left: 30px; -    margin-left: -30px; -    cursor: pointer; -    position: absolute; -    top: 0; -    left: 0; -    bottom: 0; -} -.markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 { -    margin: 20px 0 10px; -    padding: 0; -    font-weight: bold; -    -webkit-font-smoothing: antialiased; -    cursor: text; -    position: relative; -} -.markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link { -    display: none; -    color: #000; -} -.markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor { -    text-decoration: none; -    line-height: 1; -    padding-left: 0; -    margin-left: -22px; -    top: 15%} -.markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link { -    display: inline-block; -} -.markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code { -    font-size: inherit; -} -.markdown-body h1 { -    font-size: 28px; -    color: #000; -} -.markdown-body h2 { -    font-size: 24px; -    border-bottom: 1px solid #ccc; -    color: #000; -} -.markdown-body h3 { -    font-size: 18px; -} -.markdown-body h4 { -    font-size: 16px; -} -.markdown-body h5 { -    font-size: 14px; -} -.markdown-body h6 { -    color: #777; -    font-size: 14px; -} -.markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre { -    margin: 15px 0; -} -.markdown-body hr { -    background: transparent url("/dirty-shade.png") repeat-x 0 0; -    border: 0 none; -    color: #ccc; -    height: 4px; -    padding: 0; -} -.markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child { -    margin-top: 0; -    padding-top: 0; -} -.markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 { -    margin-top: 0; -    padding-top: 0; -} -.markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p { -    margin-top: 0; -} -.markdown-body li p.first { -    display: inline-block; -} -.markdown-body ul, .markdown-body ol { -    padding-left: 30px; -} -.markdown-body ul.no-list, .markdown-body ol.no-list { -    list-style-type: none; -    padding: 0; -} -.markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type { -    margin-top: 0px; -} -.markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type { -    margin-bottom: 0; -} -.markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul { -    margin-bottom: 0; -} -.markdown-body dl { -    padding: 0; -} -.markdown-body dl dt { -    font-size: 14px; -    font-weight: bold; -    font-style: italic; -    padding: 0; -    margin: 15px 0 5px; -} -.markdown-body dl dt:first-child { -    padding: 0; -} -.markdown-body dl dt>:first-child { -    margin-top: 0px; -} -.markdown-body dl dt>:last-child { -    margin-bottom: 0px; -} -.markdown-body dl dd { -    margin: 0 0 15px; -    padding: 0 15px; -} -.markdown-body dl dd>:first-child { -    margin-top: 0px; -} -.markdown-body dl dd>:last-child { -    margin-bottom: 0px; -} -.markdown-body blockquote { -    border-left: 4px solid #DDD; -    padding: 0 15px; -    color: #777; -} -.markdown-body blockquote>:first-child { -    margin-top: 0px; -} -.markdown-body blockquote>:last-child { -    margin-bottom: 0px; -} -.markdown-body table th { -    font-weight: bold; -} -.markdown-body table th, .markdown-body table td { -    border: 1px solid #ccc; -    padding: 6px 13px; -} -.markdown-body table tr { -    border-top: 1px solid #ccc; -    background-color: #fff; -} -.markdown-body table tr:nth-child(2n) { -    background-color: #f8f8f8; -} -.markdown-body img { -    max-width: 100%; -    -moz-box-sizing: border-box; -    box-sizing: border-box; -} -.markdown-body span.frame { -    display: block; -    overflow: hidden; -} -.markdown-body span.frame>span { -    border: 1px solid #ddd; -    display: block; -    float: left; -    overflow: hidden; -    margin: 13px 0 0; -    padding: 7px; -    width: auto; -} -.markdown-body span.frame span img { -    display: block; -    float: left; -} -.markdown-body span.frame span span { -    clear: both; -    color: #333; -    display: block; -    padding: 5px 0 0; -} -.markdown-body span.align-center { -    display: block; -    overflow: hidden; -    clear: both; -} -.markdown-body span.align-center>span { -    display: block; -    overflow: hidden; -    margin: 13px auto 0; -    text-align: center; -} -.markdown-body span.align-center span img { -    margin: 0 auto; -    text-align: center; -} -.markdown-body span.align-right { -    display: block; -    overflow: hidden; -    clear: both; -} -.markdown-body span.align-right>span { -    display: block; -    overflow: hidden; -    margin: 13px 0 0; -    text-align: right; -} -.markdown-body span.align-right span img { -    margin: 0; -    text-align: right; -} -.markdown-body span.float-left { -    display: block; -    margin-right: 13px; -    overflow: hidden; -    float: left; -} -.markdown-body span.float-left span { -    margin: 13px 0 0; -} -.markdown-body span.float-right { -    display: block; -    margin-left: 13px; -    overflow: hidden; -    float: right; -} -.markdown-body span.float-right>span { -    display: block; -    overflow: hidden; -    margin: 13px auto 0; -    text-align: right; -} -.markdown-body code, .markdown-body tt { -    margin: 0 2px; -    padding: 0px 5px; -    border: 1px solid #eaeaea; -    background-color: #f8f8f8; -    border-radius: 3px; -} -.markdown-body code { -    white-space: nowrap; -} -.markdown-body pre>code { -    margin: 0; -    padding: 0; -    white-space: pre; -    border: none; -    background: transparent; -} -.markdown-body .highlight pre, .markdown-body pre { -    background-color: #f8f8f8; -    border: 1px solid #ccc; -    font-size: 13px; -    line-height: 19px; -    overflow: auto; -    padding: 6px 10px; -    border-radius: 3px; -} -.markdown-body pre code, .markdown-body pre tt { -    margin: 0; -    padding: 0; -    background-color: transparent; -    border: none; -} -</style> -EOT -	print "<div class='markdown-body'>"; -        print Markdown($text); -	print "</div>"; -    } -} - - - -sub Markdown { -# -# Main function. The order in which other subs are called here is -# essential. Link and image substitutions need to happen before -# _EscapeSpecialChars(), so that any *'s or _'s in the <a> -# and <img> tags get encoded. -# -	my $text = shift; - -	# Clear the global hashes. If we don't clear these, you get conflicts -	# from other articles when generating a page which contains more than -	# one article (e.g. an index page that shows the N most recent -	# articles): -	%g_urls = (); -	%g_titles = (); -	%g_html_blocks = (); - - -	# Standardize line endings: -	$text =~ s{\r\n}{\n}g; 	# DOS to Unix -	$text =~ s{\r}{\n}g; 	# Mac to Unix - -	# Make sure $text ends with a couple of newlines: -	$text .= "\n\n"; - -	# Convert all tabs to spaces. -	$text = _Detab($text); - -	# Strip any lines consisting only of spaces and tabs. -	# This makes subsequent regexen easier to write, because we can -	# match consecutive blank lines with /\n+/ instead of something -	# contorted like /[ \t]*\n+/ . -	$text =~ s/^[ \t]+$//mg; - -	# Turn block-level HTML blocks into hash entries -	$text = _HashHTMLBlocks($text); - -	# Strip link definitions, store in hashes. -	$text = _StripLinkDefinitions($text); - -	$text = _RunBlockGamut($text); - -	$text = _UnescapeSpecialChars($text); - -	return $text . "\n"; -} - - -sub _StripLinkDefinitions { -# -# Strips link definitions from text, stores the URLs and titles in -# hash references. -# -	my $text = shift; -	my $less_than_tab = $g_tab_width - 1; - -	# Link defs are in the form: ^[id]: url "optional title" -	while ($text =~ s{ -						^[ ]{0,$less_than_tab}\[(.+)\]:	# id = $1 -						  [ \t]* -						  \n?				# maybe *one* newline -						  [ \t]* -						<?(\S+?)>?			# url = $2 -						  [ \t]* -						  \n?				# maybe one newline -						  [ \t]* -						(?: -							(?<=\s)			# lookbehind for whitespace -							["(] -							(.+?)			# title = $3 -							[")] -							[ \t]* -						)?	# title is optional -						(?:\n+|\Z) -					} -					{}mx) { -		$g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );	# Link IDs are case-insensitive -		if ($3) { -			$g_titles{lc $1} = $3; -			$g_titles{lc $1} =~ s/"/"/g; -		} -	} - -	return $text; -} - - -sub _HashHTMLBlocks { -	my $text = shift; -	my $less_than_tab = $g_tab_width - 1; - -	# Hashify HTML blocks: -	# We only want to do this for block-level HTML tags, such as headers, -	# lists, and tables. That's because we still want to wrap <p>s around -	# "paragraphs" that are wrapped in non-block-level tags, such as anchors, -	# phrase emphasis, and spans. The list of tags we're looking for is -	# hard-coded: -	my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; -	my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; - -	# First, look for nested blocks, e.g.: -	# 	<div> -	# 		<div> -	# 		tags for inner block must be indented. -	# 		</div> -	# 	</div> -	# -	# The outermost tags must start at the left margin for this to match, and -	# the inner nested divs must be indented. -	# We need to do this before the next, more liberal match, because the next -	# match will start at the first `<div>` and stop at the first `</div>`. -	$text =~ s{ -				(						# save in $1 -					^					# start of line  (with /m) -					<($block_tags_a)	# start tag = $2 -					\b					# word break -					(.*\n)*?			# any number of lines, minimally matching -					</\2>				# the matching end tag -					[ \t]*				# trailing spaces/tabs -					(?=\n+|\Z)	# followed by a newline or end of document -				) -			}{ -				my $key = md5_hex($1); -				$g_html_blocks{$key} = $1; -				"\n\n" . $key . "\n\n"; -			}egmx; - - -	# -	# Now match more liberally, simply from `\n<tag>` to `</tag>\n` -	# -	$text =~ s{ -				(						# save in $1 -					^					# start of line  (with /m) -					<($block_tags_b)	# start tag = $2 -					\b					# word break -					(.*\n)*?			# any number of lines, minimally matching -					.*</\2>				# the matching end tag -					[ \t]*				# trailing spaces/tabs -					(?=\n+|\Z)	# followed by a newline or end of document -				) -			}{ -				my $key = md5_hex($1); -				$g_html_blocks{$key} = $1; -				"\n\n" . $key . "\n\n"; -			}egmx; -	# Special case just for <hr />. It was easier to make a special case than -	# to make the other regex more complicated. -	$text =~ s{ -				(?: -					(?<=\n\n)		# Starting after a blank line -					|				# or -					\A\n?			# the beginning of the doc -				) -				(						# save in $1 -					[ ]{0,$less_than_tab} -					<(hr)				# start tag = $2 -					\b					# word break -					([^<>])*?			# -					/?>					# the matching end tag -					[ \t]* -					(?=\n{2,}|\Z)		# followed by a blank line or end of document -				) -			}{ -				my $key = md5_hex($1); -				$g_html_blocks{$key} = $1; -				"\n\n" . $key . "\n\n"; -			}egx; - -	# Special case for standalone HTML comments: -	$text =~ s{ -				(?: -					(?<=\n\n)		# Starting after a blank line -					|				# or -					\A\n?			# the beginning of the doc -				) -				(						# save in $1 -					[ ]{0,$less_than_tab} -					(?s: -						<! -						(--.*?--\s*)+ -						> -					) -					[ \t]* -					(?=\n{2,}|\Z)		# followed by a blank line or end of document -				) -			}{ -				my $key = md5_hex($1); -				$g_html_blocks{$key} = $1; -				"\n\n" . $key . "\n\n"; -			}egx; - - -	return $text; -} - - -sub _RunBlockGamut { -# -# These are all the transformations that form block-level -# tags like paragraphs, headers, and list items. -# -	my $text = shift; - -	$text = _DoHeaders($text); - -	# Do Horizontal Rules: -	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; -	$text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; -	$text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; - -	$text = _DoLists($text); - -	$text = _DoCodeBlocks($text); - -	$text = _DoBlockQuotes($text); - -	# We already ran _HashHTMLBlocks() before, in Markdown(), but that -	# was to escape raw HTML in the original Markdown source. This time, -	# we're escaping the markup we've just created, so that we don't wrap -	# <p> tags around block-level tags. -	$text = _HashHTMLBlocks($text); - -	$text = _FormParagraphs($text); - -	return $text; -} - - -sub _RunSpanGamut { -# -# These are all the transformations that occur *within* block-level -# tags like paragraphs, headers, and list items. -# -	my $text = shift; - -	$text = _DoCodeSpans($text); - -	$text = _EscapeSpecialChars($text); - -	# Process anchor and image tags. Images must come first, -	# because ![foo][f] looks like an anchor. -	$text = _DoImages($text); -	$text = _DoAnchors($text); - -	# Make links out of things like `<http://example.com/>` -	# Must come after _DoAnchors(), because you can use < and > -	# delimiters in inline links like [this](<url>). -	$text = _DoAutoLinks($text); - -	$text = _EncodeAmpsAndAngles($text); - -	$text = _DoItalicsAndBold($text); - -	# Do hard breaks: -	$text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g; - -	return $text; -} - - -sub _EscapeSpecialChars { -	my $text = shift; -	my $tokens ||= _TokenizeHTML($text); - -	$text = '';   # rebuild $text from the tokens -# 	my $in_pre = 0;	 # Keep track of when we're inside <pre> or <code> tags. -# 	my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; - -	foreach my $cur_token (@$tokens) { -		if ($cur_token->[0] eq "tag") { -			# Within tags, encode * and _ so they don't conflict -			# with their use in Markdown for italics and strong. -			# We're replacing each such character with its -			# corresponding MD5 checksum value; this is likely -			# overkill, but it should prevent us from colliding -			# with the escape values by accident. -			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx; -			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx; -			$text .= $cur_token->[1]; -		} else { -			my $t = $cur_token->[1]; -			$t = _EncodeBackslashEscapes($t); -			$text .= $t; -		} -	} -	return $text; -} - - -sub _DoAnchors { -# -# Turn Markdown link shortcuts into XHTML <a> tags. -# -	my $text = shift; - -	# -	# First, handle reference-style links: [link text] [id] -	# -	$text =~ s{ -		(					# wrap whole match in $1 -		  \[ -		    ($g_nested_brackets)	# link text = $2 -		  \] - -		  [ ]?				# one optional space -		  (?:\n[ ]*)?		# one optional newline followed by spaces - -		  \[ -		    (.*?)		# id = $3 -		  \] -		) -	}{ -		my $result; -		my $whole_match = $1; -		my $link_text   = $2; -		my $link_id     = lc $3; - -		if ($link_id eq "") { -			$link_id = lc $link_text;     # for shortcut links like [this][]. -		} - -		if (defined $g_urls{$link_id}) { -			my $url = $g_urls{$link_id}; -			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid -			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold. -			$result = "<a href=\"$url\""; -			if ( defined $g_titles{$link_id} ) { -				my $title = $g_titles{$link_id}; -				$title =~ s! \* !$g_escape_table{'*'}!gx; -				$title =~ s!  _ !$g_escape_table{'_'}!gx; -				$result .=  " title=\"$title\""; -			} -			$result .= ">$link_text</a>"; -		} -		else { -			$result = $whole_match; -		} -		$result; -	}xsge; - -	# -	# Next, inline-style links: [link text](url "optional title") -	# -	$text =~ s{ -		(				# wrap whole match in $1 -		  \[ -		    ($g_nested_brackets)	# link text = $2 -		  \] -		  \(			# literal paren -		  	[ \t]* -			<?(.*?)>?	# href = $3 -		  	[ \t]* -			(			# $4 -			  (['"])	# quote char = $5 -			  (.*?)		# Title = $6 -			  \5		# matching quote -			)?			# title is optional -		  \) -		) -	}{ -		my $result; -		my $whole_match = $1; -		my $link_text   = $2; -		my $url	  		= $3; -		my $title		= $6; - -		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid -		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold. -		$result = "<a href=\"$url\""; - -		if (defined $title) { -			$title =~ s/"/"/g; -			$title =~ s! \* !$g_escape_table{'*'}!gx; -			$title =~ s!  _ !$g_escape_table{'_'}!gx; -			$result .=  " title=\"$title\""; -		} - -		$result .= ">$link_text</a>"; - -		$result; -	}xsge; - -	return $text; -} - - -sub _DoImages { -# -# Turn Markdown image shortcuts into <img> tags. -# -	my $text = shift; - -	# -	# First, handle reference-style labeled images: ![alt text][id] -	# -	$text =~ s{ -		(				# wrap whole match in $1 -		  !\[ -		    (.*?)		# alt text = $2 -		  \] - -		  [ ]?				# one optional space -		  (?:\n[ ]*)?		# one optional newline followed by spaces - -		  \[ -		    (.*?)		# id = $3 -		  \] - -		) -	}{ -		my $result; -		my $whole_match = $1; -		my $alt_text    = $2; -		my $link_id     = lc $3; - -		if ($link_id eq "") { -			$link_id = lc $alt_text;     # for shortcut links like ![this][]. -		} - -		$alt_text =~ s/"/"/g; -		if (defined $g_urls{$link_id}) { -			my $url = $g_urls{$link_id}; -			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid -			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold. -			$result = "<img src=\"$url\" alt=\"$alt_text\""; -			if (defined $g_titles{$link_id}) { -				my $title = $g_titles{$link_id}; -				$title =~ s! \* !$g_escape_table{'*'}!gx; -				$title =~ s!  _ !$g_escape_table{'_'}!gx; -				$result .=  " title=\"$title\""; -			} -			$result .= $g_empty_element_suffix; -		} -		else { -			# If there's no such link ID, leave intact: -			$result = $whole_match; -		} - -		$result; -	}xsge; - -	# -	# Next, handle inline images:   -	# Don't forget: encode * and _ - -	$text =~ s{ -		(				# wrap whole match in $1 -		  !\[ -		    (.*?)		# alt text = $2 -		  \] -		  \(			# literal paren -		  	[ \t]* -			<?(\S+?)>?	# src url = $3 -		  	[ \t]* -			(			# $4 -			  (['"])	# quote char = $5 -			  (.*?)		# title = $6 -			  \5		# matching quote -			  [ \t]* -			)?			# title is optional -		  \) -		) -	}{ -		my $result; -		my $whole_match = $1; -		my $alt_text    = $2; -		my $url	  		= $3; -		my $title		= ''; -		if (defined($6)) { -			$title		= $6; -		} - -		$alt_text =~ s/"/"/g; -		$title    =~ s/"/"/g; -		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid -		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold. -		$result = "<img src=\"$url\" alt=\"$alt_text\""; -		if (defined $title) { -			$title =~ s! \* !$g_escape_table{'*'}!gx; -			$title =~ s!  _ !$g_escape_table{'_'}!gx; -			$result .=  " title=\"$title\""; -		} -		$result .= $g_empty_element_suffix; - -		$result; -	}xsge; - -	return $text; -} - - -sub _DoHeaders { -	my $text = shift; - -	# Setext-style headers: -	#	  Header 1 -	#	  ======== -	# -	#	  Header 2 -	#	  -------- -	# -	$text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{ -		"<h1>"  .  _RunSpanGamut($1)  .  "</h1>\n\n"; -	}egmx; - -	$text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ -		"<h2>"  .  _RunSpanGamut($1)  .  "</h2>\n\n"; -	}egmx; - - -	# atx-style headers: -	#	# Header 1 -	#	## Header 2 -	#	## Header 2 with closing hashes ## -	#	... -	#	###### Header 6 -	# -	$text =~ s{ -			^(\#{1,6})	# $1 = string of #'s -			[ \t]* -			(.+?)		# $2 = Header text -			[ \t]* -			\#*			# optional closing #'s (not counted) -			\n+ -		}{ -			my $h_level = length($1); -			"<h$h_level>"  .  _RunSpanGamut($2)  .  "</h$h_level>\n\n"; -		}egmx; - -	return $text; -} - - -sub _DoLists { -# -# Form HTML ordered (numbered) and unordered (bulleted) lists. -# -	my $text = shift; -	my $less_than_tab = $g_tab_width - 1; - -	# Re-usable patterns to match list item bullets and number markers: -	my $marker_ul  = qr/[*+-]/; -	my $marker_ol  = qr/\d+[.]/; -	my $marker_any = qr/(?:$marker_ul|$marker_ol)/; - -	# Re-usable pattern to match any entirel ul or ol list: -	my $whole_list = qr{ -		(								# $1 = whole list -		  (								# $2 -			[ ]{0,$less_than_tab} -			(${marker_any})				# $3 = first list item marker -			[ \t]+ -		  ) -		  (?s:.+?) -		  (								# $4 -			  \z -			| -			  \n{2,} -			  (?=\S) -			  (?!						# Negative lookahead for another list item marker -				[ \t]* -				${marker_any}[ \t]+ -			  ) -		  ) -		) -	}mx; - -	# We use a different prefix before nested lists than top-level lists. -	# See extended comment in _ProcessListItems(). -	# -	# Note: There's a bit of duplication here. My original implementation -	# created a scalar regex pattern as the conditional result of the test on -	# $g_list_level, and then only ran the $text =~ s{...}{...}egmx -	# substitution once, using the scalar as the pattern. This worked, -	# everywhere except when running under MT on my hosting account at Pair -	# Networks. There, this caused all rebuilds to be killed by the reaper (or -	# perhaps they crashed, but that seems incredibly unlikely given that the -	# same script on the same server ran fine *except* under MT. I've spent -	# more time trying to figure out why this is happening than I'd like to -	# admit. My only guess, backed up by the fact that this workaround works, -	# is that Perl optimizes the substition when it can figure out that the -	# pattern will never change, and when this optimization isn't on, we run -	# afoul of the reaper. Thus, the slightly redundant code to that uses two -	# static s/// patterns rather than one conditional pattern. - -	if ($g_list_level) { -		$text =~ s{ -				^ -				$whole_list -			}{ -				my $list = $1; -				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; -				# Turn double returns into triple returns, so that we can make a -				# paragraph for the last item in a list, if necessary: -				$list =~ s/\n{2,}/\n\n\n/g; -				my $result = _ProcessListItems($list, $marker_any); -				$result = "<$list_type>\n" . $result . "</$list_type>\n"; -				$result; -			}egmx; -	} -	else { -		$text =~ s{ -				(?:(?<=\n\n)|\A\n?) -				$whole_list -			}{ -				my $list = $1; -				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; -				# Turn double returns into triple returns, so that we can make a -				# paragraph for the last item in a list, if necessary: -				$list =~ s/\n{2,}/\n\n\n/g; -				my $result = _ProcessListItems($list, $marker_any); -				$result = "<$list_type>\n" . $result . "</$list_type>\n"; -				$result; -			}egmx; -	} - - -	return $text; -} - - -sub _ProcessListItems { -# -#	Process the contents of a single ordered or unordered list, splitting it -#	into individual list items. -# - -	my $list_str = shift; -	my $marker_any = shift; - - -	# The $g_list_level global keeps track of when we're inside a list. -	# Each time we enter a list, we increment it; when we leave a list, -	# we decrement. If it's zero, we're not in a list anymore. -	# -	# We do this because when we're not inside a list, we want to treat -	# something like this: -	# -	#		I recommend upgrading to version -	#		8. Oops, now this line is treated -	#		as a sub-list. -	# -	# As a single paragraph, despite the fact that the second line starts -	# with a digit-period-space sequence. -	# -	# Whereas when we're inside a list (or sub-list), that line will be -	# treated as the start of a sub-list. What a kludge, huh? This is -	# an aspect of Markdown's syntax that's hard to parse perfectly -	# without resorting to mind-reading. Perhaps the solution is to -	# change the syntax rules such that sub-lists must start with a -	# starting cardinal number; e.g. "1." or "a.". - -	$g_list_level++; - -	# trim trailing blank lines: -	$list_str =~ s/\n{2,}\z/\n/; - - -	$list_str =~ s{ -		(\n)?							# leading line = $1 -		(^[ \t]*)						# leading whitespace = $2 -		($marker_any) [ \t]+			# list marker = $3 -		((?s:.+?)						# list item text   = $4 -		(\n{1,2})) -		(?= \n* (\z | \2 ($marker_any) [ \t]+)) -	}{ -		my $item = $4; -		my $leading_line = $1; -		my $leading_space = $2; - -		if ($leading_line or ($item =~ m/\n{2,}/)) { -			$item = _RunBlockGamut(_Outdent($item)); -		} -		else { -			# Recursion for sub-lists: -			$item = _DoLists(_Outdent($item)); -			chomp $item; -			$item = _RunSpanGamut($item); -		} - -		"<li>" . $item . "</li>\n"; -	}egmx; - -	$g_list_level--; -	return $list_str; -} - - - -sub _DoCodeBlocks { -# -#	Process Markdown `<pre><code>` blocks. -# - -	my $text = shift; - -	$text =~ s{ -			(?:\n\n|\A) -			(	            # $1 = the code block -- one or more lines, starting with a space/tab -			  (?: -			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces -			    .*\n+ -			  )+ -			) -			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc -		}{ -			my $codeblock = $1; -			my $result; # return value - -			$codeblock = _EncodeCode(_Outdent($codeblock)); -			$codeblock = _Detab($codeblock); -			$codeblock =~ s/\A\n+//; # trim leading newlines -			$codeblock =~ s/\s+\z//; # trim trailing whitespace - -			$result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n"; - -			$result; -		}egmx; - -	return $text; -} - - -sub _DoCodeSpans { -# -# 	*	Backtick quotes are used for <code></code> spans. -# -# 	*	You can use multiple backticks as the delimiters if you want to -# 		include literal backticks in the code span. So, this input: -# -#         Just type ``foo `bar` baz`` at the prompt. -# -#     	Will translate to: -# -#         <p>Just type <code>foo `bar` baz</code> at the prompt.</p> -# -#		There's no arbitrary limit to the number of backticks you -#		can use as delimters. If you need three consecutive backticks -#		in your code, use four for delimiters, etc. -# -#	*	You can use spaces to get literal backticks at the edges: -# -#         ... type `` `bar` `` ... -# -#     	Turns to: -# -#         ... type <code>`bar`</code> ... -# - -	my $text = shift; - -	$text =~ s@ -			(`+)		# $1 = Opening run of ` -			(.+?)		# $2 = The code block -			(?<!`) -			\1			# Matching closer -			(?!`) -		@ - 			my $c = "$2"; - 			$c =~ s/^[ \t]*//g; # leading whitespace - 			$c =~ s/[ \t]*$//g; # trailing whitespace - 			$c = _EncodeCode($c); -			"<code>$c</code>"; -		@egsx; - -	return $text; -} - - -sub _EncodeCode { -# -# Encode/escape certain characters inside Markdown code runs. -# The point is that in code, these characters are literals, -# and lose their special Markdown meanings. -# -    local $_ = shift; - -	# Encode all ampersands; HTML entities are not -	# entities within a Markdown code span. -	s/&/&/g; - -	# Encode $'s, but only if we're running under Blosxom. -	# (Blosxom interpolates Perl variables in article bodies.) -	{ -		no warnings 'once'; -    	if (defined($blosxom::version)) { -		s/\$/$/g; -    	} -    } - - -	# Do the angle bracket song and dance: -	s! <  !<!gx; -	s! >  !>!gx; - -	# Now, escape characters that are magic in Markdown: -	s! \* !$g_escape_table{'*'}!gx; -	s! _  !$g_escape_table{'_'}!gx; -	s! {  !$g_escape_table{'{'}!gx; -	s! }  !$g_escape_table{'}'}!gx; -	s! \[ !$g_escape_table{'['}!gx; -	s! \] !$g_escape_table{']'}!gx; -	s! \\ !$g_escape_table{'\\'}!gx; - -	return $_; -} - - -sub _DoItalicsAndBold { -	my $text = shift; - -	# <strong> must go first: -	$text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 } -		{<strong>$2</strong>}gsx; - -	$text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 } -		{<em>$2</em>}gsx; - -	return $text; -} - - -sub _DoBlockQuotes { -	my $text = shift; - -	$text =~ s{ -		  (								# Wrap whole match in $1 -			( -			  ^[ \t]*>[ \t]?			# '>' at the start of a line -			    .+\n					# rest of the first line -			  (.+\n)*					# subsequent consecutive lines -			  \n*						# blanks -			)+ -		  ) -		}{ -			my $bq = $1; -			$bq =~ s/^[ \t]*>[ \t]?//gm;	# trim one level of quoting -			$bq =~ s/^[ \t]+$//mg;			# trim whitespace-only lines -			$bq = _RunBlockGamut($bq);		# recurse - -			$bq =~ s/^/  /g; -			# These leading spaces screw with <pre> content, so we need to fix that: -			$bq =~ s{ -					(\s*<pre>.+?</pre>) -				}{ -					my $pre = $1; -					$pre =~ s/^  //mg; -					$pre; -				}egsx; - -			"<blockquote>\n$bq\n</blockquote>\n\n"; -		}egmx; - - -	return $text; -} - - -sub _FormParagraphs { -# -#	Params: -#		$text - string to process with html <p> tags -# -	my $text = shift; - -	# Strip leading and trailing lines: -	$text =~ s/\A\n+//; -	$text =~ s/\n+\z//; - -	my @grafs = split(/\n{2,}/, $text); - -	# -	# Wrap <p> tags. -	# -	foreach (@grafs) { -		unless (defined( $g_html_blocks{$_} )) { -			$_ = _RunSpanGamut($_); -			s/^([ \t]*)/<p>/; -			$_ .= "</p>"; -		} -	} - -	# -	# Unhashify HTML blocks -	# -	foreach (@grafs) { -		if (defined( $g_html_blocks{$_} )) { -			$_ = $g_html_blocks{$_}; -		} -	} - -	return join "\n\n", @grafs; -} - - -sub _EncodeAmpsAndAngles { -# Smart processing for ampersands and angle brackets that need to be encoded. - -	my $text = shift; - -	# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: -	#   http://bumppo.net/projects/amputator/ - 	$text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; - -	# Encode naked <'s - 	$text =~ s{<(?![a-z/?\$!])}{<}gi; - -	return $text; -} - - -sub _EncodeBackslashEscapes { -# -#   Parameter:  String. -#   Returns:    The string, with after processing the following backslash -#               escape sequences. -# -    local $_ = shift; - -    s! \\\\  !$g_escape_table{'\\'}!gx;		# Must process escaped backslashes first. -    s! \\`   !$g_escape_table{'`'}!gx; -    s! \\\*  !$g_escape_table{'*'}!gx; -    s! \\_   !$g_escape_table{'_'}!gx; -    s! \\\{  !$g_escape_table{'{'}!gx; -    s! \\\}  !$g_escape_table{'}'}!gx; -    s! \\\[  !$g_escape_table{'['}!gx; -    s! \\\]  !$g_escape_table{']'}!gx; -    s! \\\(  !$g_escape_table{'('}!gx; -    s! \\\)  !$g_escape_table{')'}!gx; -    s! \\>   !$g_escape_table{'>'}!gx; -    s! \\\#  !$g_escape_table{'#'}!gx; -    s! \\\+  !$g_escape_table{'+'}!gx; -    s! \\\-  !$g_escape_table{'-'}!gx; -    s! \\\.  !$g_escape_table{'.'}!gx; -    s{ \\!  }{$g_escape_table{'!'}}gx; - -    return $_; -} - - -sub _DoAutoLinks { -	my $text = shift; - -	$text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi; - -	# Email addresses: <address@domain.foo> -	$text =~ s{ -		< -        (?:mailto:)? -		( -			[-.\w]+ -			\@ -			[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ -		) -		> -	}{ -		_EncodeEmailAddress( _UnescapeSpecialChars($1) ); -	}egix; - -	return $text; -} - - -sub _EncodeEmailAddress { -# -#	Input: an email address, e.g. "foo@example.com" -# -#	Output: the email address as a mailto link, with each character -#		of the address encoded as either a decimal or hex entity, in -#		the hopes of foiling most address harvesting spam bots. E.g.: -# -#	  <a href="mailto:foo@e -#       xample.com">foo -#       @example.com</a> -# -#	Based on a filter by Matthew Wickline, posted to the BBEdit-Talk -#	mailing list: <http://tinyurl.com/yu7ue> -# - -	my $addr = shift; - -	srand; -	my @encode = ( -		sub { '&#' .                 ord(shift)   . ';' }, -		sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, -		sub {                            shift          }, -	); - -	$addr = "mailto:" . $addr; - -	$addr =~ s{(.)}{ -		my $char = $1; -		if ( $char eq '@' ) { -			# this *must* be encoded. I insist. -			$char = $encode[int rand 1]->($char); -		} elsif ( $char ne ':' ) { -			# leave ':' alone (to spot mailto: later) -			my $r = rand; -			# roughly 10% raw, 45% hex, 45% dec -			$char = ( -				$r > .9   ?  $encode[2]->($char)  : -				$r < .45  ?  $encode[1]->($char)  : -							 $encode[0]->($char) -			); -		} -		$char; -	}gex; - -	$addr = qq{<a href="$addr">$addr</a>}; -	$addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part - -	return $addr; -} - - -sub _UnescapeSpecialChars { -# -# Swap back in all the special characters we've hidden. -# -	my $text = shift; - -	while( my($char, $hash) = each(%g_escape_table) ) { -		$text =~ s/$hash/$char/g; -	} -    return $text; -} - - -sub _TokenizeHTML { -# -#   Parameter:  String containing HTML markup. -#   Returns:    Reference to an array of the tokens comprising the input -#               string. Each token is either a tag (possibly with nested, -#               tags contained therein, such as <a href="<MTFoo>">, or a -#               run of text between tags. Each element of the array is a -#               two-element array; the first is either 'tag' or 'text'; -#               the second is the actual value. -# -# -#   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. -#       <http://www.bradchoate.com/past/mtregex.php> -# - -    my $str = shift; -    my $pos = 0; -    my $len = length $str; -    my @tokens; - -    my $depth = 6; -    my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth); -    my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) |  # comment -                   (?s: <\? .*? \?> ) |              # processing instruction -                   $nested_tags/ix;                   # nested tags - -    while ($str =~ m/($match)/g) { -        my $whole_tag = $1; -        my $sec_start = pos $str; -        my $tag_start = $sec_start - length $whole_tag; -        if ($pos < $tag_start) { -            push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; -        } -        push @tokens, ['tag', $whole_tag]; -        $pos = pos $str; -    } -    push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; -    \@tokens; -} - - -sub _Outdent { -# -# Remove one level of line-leading tabs or spaces -# -	my $text = shift; - -	$text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; -	return $text; -} - - -sub _Detab { -# -# Cribbed from a post by Bart Lateur: -# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154> -# -	my $text = shift; - -	$text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; -	return $text; -} - - -1; - -__END__ - - -=pod - -=head1 NAME - -B<Markdown> - - -=head1 SYNOPSIS - -B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] -    [ I<file> ... ] - - -=head1 DESCRIPTION - -Markdown is a text-to-HTML filter; it translates an easy-to-read / -easy-to-write structured text format into HTML. Markdown's text format -is most similar to that of plain text email, and supports features such -as headers, *emphasis*, code blocks, blockquotes, and links. - -Markdown's syntax is designed not as a generic markup language, but -specifically to serve as a front-end to (X)HTML. You can  use span-level -HTML tags anywhere in a Markdown document, and you can use block level -HTML tags (like <div> and <table> as well). - -For more information about Markdown's syntax, see: - -    http://daringfireball.net/projects/markdown/ - - -=head1 OPTIONS - -Use "--" to end switch parsing. For example, to open a file named "-z", use: - -	Markdown.pl -- -z - -=over 4 - - -=item B<--html4tags> - -Use HTML 4 style for empty element tags, e.g.: - -    <br> - -instead of Markdown's default XHTML style tags, e.g.: - -    <br /> - - -=item B<-v>, B<--version> - -Display Markdown's version number and copyright information. - - -=item B<-s>, B<--shortversion> - -Display the short-form version number. - - -=back - - - -=head1 BUGS - -To file bug reports or feature requests (other than topics listed in the -Caveats section above) please send email to: - -    support@daringfireball.net - -Please include with your report: (1) the example input; (2) the output -you expected; (3) the output Markdown actually produced. - - -=head1 VERSION HISTORY - -See the readme file for detailed release notes for this version. - -1.0.1 - 14 Dec 2004 - -1.0 - 28 Aug 2004 - - -=head1 AUTHOR - -    John Gruber -    http://daringfireball.net - -    PHP port and other contributions by Michel Fortin -    http://michelf.com - - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2003-2004 John Gruber -<http://daringfireball.net/> -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, -  this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright -  notice, this list of conditions and the following disclaimer in the -  documentation and/or other materials provided with the distribution. - -* Neither the name "Markdown" nor the names of its contributors may -  be used to endorse or promote products derived from this software -  without specific prior written permission. - -This software is provided by the copyright holders and contributors "as -is" and any express or implied warranties, including, but not limited -to, the implied warranties of merchantability and fitness for a -particular purpose are disclaimed. In no event shall the copyright owner -or contributors be liable for any direct, indirect, incidental, special, -exemplary, or consequential damages (including, but not limited to, -procurement of substitute goods or services; loss of use, data, or -profits; or business interruption) however caused and on any theory of -liability, whether in contract, strict liability, or tort (including -negligence or otherwise) arising in any way out of the use of this -software, even if advised of the possibility of such damage. - -=cut diff --git a/filters/html-converters/resources/rst-template.txt b/filters/html-converters/resources/rst-template.txt deleted file mode 100644 index 43cde42..0000000 --- a/filters/html-converters/resources/rst-template.txt +++ /dev/null @@ -1,4 +0,0 @@ -%(stylesheet)s -%(body_pre_docinfo)s -%(docinfo)s -%(body)s diff --git a/filters/html-converters/rst2html b/filters/html-converters/rst2html index c51f5be..756a4e1 100755 --- a/filters/html-converters/rst2html +++ b/filters/html-converters/rst2html @@ -1,2 +1,2 @@ -#!/bin/sh -rst2html.py --template="$(dirname $0)/resources/rst-template.txt" +#!/bin/bash +rst2html.py --template <(echo -e "%(stylesheet)s\n%(body_pre_docinfo)s\n%(docinfo)s\n%(body)s") | 
