From 525c815cc400bc49881144bcd7e7b717bbc1af5d Mon Sep 17 00:00:00 2001 From: Jason A. Donenfeld Date: Fri, 9 Oct 2015 15:13:35 +0200 Subject: filters: Simplify converters Signed-off-by: Jason A. Donenfeld --- filters/html-converters/resources/markdown.pl | 1727 -------------------- filters/html-converters/resources/rst-template.txt | 4 - 2 files changed, 1731 deletions(-) delete mode 100755 filters/html-converters/resources/markdown.pl delete mode 100644 filters/html-converters/resources/rst-template.txt (limited to 'filters/html-converters/resources') diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl deleted file mode 100755 index 4c39808..0000000 --- a/filters/html-converters/resources/markdown.pl +++ /dev/null @@ -1,1727 +0,0 @@ -#!/usr/bin/perl - -# -# Markdown -- A text-to-HTML conversion tool for web writers -# -# Copyright (c) 2004 John Gruber -# -# - - -package Markdown; -require 5.006_000; -use strict; -use warnings; - -use Digest::MD5 qw(md5_hex); -use vars qw($VERSION); -$VERSION = '1.0.1'; -# Tue 14 Dec 2004 - - -# -# Global default settings: -# -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output -my $g_tab_width = 4; - - -# -# Globals: -# - -# Regex to match balanced [brackets]. See Friedl's -# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. -my $g_nested_brackets; -$g_nested_brackets = qr{ - (?> # Atomic matching - [^\[\]]+ # Anything other than brackets - | - \[ - (??{ $g_nested_brackets }) # Recursive set of nested brackets - \] - )* -}x; - - -# Table of hash values for escaped characters: -my %g_escape_table; -foreach my $char (split //, '\\`*_{}[]()>#+-.!') { - $g_escape_table{$char} = md5_hex($char); -} - - -# Global hashes, used by various utility routines -my %g_urls; -my %g_titles; -my %g_html_blocks; - -# Used to track when we're inside an ordered or unordered list -# (see _ProcessListItems() for details): -my $g_list_level = 0; - - -#### Blosxom plug-in interface ########################################## - -# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine -# which posts Markdown should process, using a "meta-markup: markdown" -# header. If it's set to 0 (the default), Markdown will process all -# entries. -my $g_blosxom_use_meta = 0; - -sub start { 1; } -sub story { - my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; - - if ( (! $g_blosxom_use_meta) or - (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) - ){ - $$body_ref = Markdown($$body_ref); - } - 1; -} - - -#### Movable Type plug-in interface ##################################### -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { - require MT; - import MT; - require MT::Template::Context; - import MT::Template::Context; - - eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. - unless ($@) { - require MT::Plugin; - import MT::Plugin; - my $plugin = new MT::Plugin({ - name => "Markdown", - description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", - doc_link => 'http://daringfireball.net/projects/markdown/' - }); - MT->add_plugin( $plugin ); - } - - MT::Template::Context->add_container_tag(MarkdownOptions => sub { - my $ctx = shift; - my $args = shift; - my $builder = $ctx->stash('builder'); - my $tokens = $ctx->stash('tokens'); - - if (defined ($args->{'output'}) ) { - $ctx->stash('markdown_output', lc $args->{'output'}); - } - - defined (my $str = $builder->build($ctx, $tokens) ) - or return $ctx->error($builder->errstr); - $str; # return value - }); - - MT->add_text_filter('markdown' => { - label => 'Markdown', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - my $raw = 0; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output =~ m/^html/i) { - $g_empty_element_suffix = ">"; - $ctx->stash('markdown_output', ''); - } - elsif (defined $output && $output eq 'raw') { - $raw = 1; - $ctx->stash('markdown_output', ''); - } - else { - $raw = 0; - $g_empty_element_suffix = " />"; - } - } - $text = $raw ? $text : Markdown($text); - $text; - }, - }); - - # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: - my $smartypants; - - { - no warnings "once"; - $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; - } - - if ($smartypants) { - MT->add_text_filter('markdown_with_smartypants' => { - label => 'Markdown With SmartyPants', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output eq 'html') { - $g_empty_element_suffix = ">"; - } - else { - $g_empty_element_suffix = " />"; - } - } - $text = Markdown($text); - $text = $smartypants->($text, '1'); - }, - }); - } -} -else { -#### BBEdit/command-line text filter interface ########################## -# Needs to be hidden from MT (and Blosxom when running in static mode). - - # We're only using $blosxom::version once; tell Perl not to warn us: - no warnings 'once'; - unless ( defined($blosxom::version) ) { - use warnings; - - #### Check for command-line switches: ################# - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure('pass_through'); - GetOptions(\%cli_opts, - 'version', - 'shortversion', - 'html4tags', - ); - if ($cli_opts{'version'}) { # Version info - print "\nThis is Markdown, version $VERSION.\n"; - print "Copyright 2004 John Gruber\n"; - print "http://daringfireball.net/projects/markdown/\n\n"; - exit 0; - } - if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; - exit 0; - } - if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $g_empty_element_suffix = ">"; - } - - - #### Process incoming text: ########################### - my $text; - { - local $/; # Slurp the whole file - $text = <>; - } - print <<'EOT'; - -EOT - print "
"; - print Markdown($text); - print "
"; - } -} - - - -sub Markdown { -# -# Main function. The order in which other subs are called here is -# essential. Link and image substitutions need to happen before -# _EscapeSpecialChars(), so that any *'s or _'s in the -# and tags get encoded. -# - my $text = shift; - - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - %g_urls = (); - %g_titles = (); - %g_html_blocks = (); - - - # Standardize line endings: - $text =~ s{\r\n}{\n}g; # DOS to Unix - $text =~ s{\r}{\n}g; # Mac to Unix - - # Make sure $text ends with a couple of newlines: - $text .= "\n\n"; - - # Convert all tabs to spaces. - $text = _Detab($text); - - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ \t]*\n+/ . - $text =~ s/^[ \t]+$//mg; - - # Turn block-level HTML blocks into hash entries - $text = _HashHTMLBlocks($text); - - # Strip link definitions, store in hashes. - $text = _StripLinkDefinitions($text); - - $text = _RunBlockGamut($text); - - $text = _UnescapeSpecialChars($text); - - return $text . "\n"; -} - - -sub _StripLinkDefinitions { -# -# Strips link definitions from text, stores the URLs and titles in -# hash references. -# - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Link defs are in the form: ^[id]: url "optional title" - while ($text =~ s{ - ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - ? # url = $2 - [ \t]* - \n? # maybe one newline - [ \t]* - (?: - (?<=\s) # lookbehind for whitespace - ["(] - (.+?) # title = $3 - [")] - [ \t]* - )? # title is optional - (?:\n+|\Z) - } - {}mx) { - $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive - if ($3) { - $g_titles{lc $1} = $3; - $g_titles{lc $1} =~ s/"/"/g; - } - } - - return $text; -} - - -sub _HashHTMLBlocks { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Hashify HTML blocks: - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap

s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; - my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; - - # First, look for nested blocks, e.g.: - #

- #
- # tags for inner block must be indented. - #
- #
- # - # The outermost tags must start at the left margin for this to match, and - # the inner nested divs must be indented. - # We need to do this before the next, more liberal match, because the next - # match will start at the first `
` and stop at the first `
`. - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_a) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - - - # - # Now match more liberally, simply from `\n` to `\n` - # - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_b) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - .* # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - # Special case just for
. It was easier to make a special case than - # to make the other regex more complicated. - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - <(hr) # start tag = $2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - # Special case for standalone HTML comments: - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - (?s: - - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - - return $text; -} - - -sub _RunBlockGamut { -# -# These are all the transformations that form block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoHeaders($text); - - # Do Horizontal Rules: - $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags. - $text = _HashHTMLBlocks($text); - - $text = _FormParagraphs($text); - - return $text; -} - - -sub _RunSpanGamut { -# -# These are all the transformations that occur *within* block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoCodeSpans($text); - - $text = _EscapeSpecialChars($text); - - # Process anchor and image tags. Images must come first, - # because ![foo][f] looks like an anchor. - $text = _DoImages($text); - $text = _DoAnchors($text); - - # Make links out of things like `` - # Must come after _DoAnchors(), because you can use < and > - # delimiters in inline links like [this](). - $text = _DoAutoLinks($text); - - $text = _EncodeAmpsAndAngles($text); - - $text = _DoItalicsAndBold($text); - - # Do hard breaks: - $text =~ s/ {2,}\n/ or tags. -# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; - - foreach my $cur_token (@$tokens) { - if ($cur_token->[0] eq "tag") { - # Within tags, encode * and _ so they don't conflict - # with their use in Markdown for italics and strong. - # We're replacing each such character with its - # corresponding MD5 checksum value; this is likely - # overkill, but it should prevent us from colliding - # with the escape values by accident. - $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; - $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; - $text .= $cur_token->[1]; - } else { - my $t = $cur_token->[1]; - $t = _EncodeBackslashEscapes($t); - $text .= $t; - } - } - return $text; -} - - -sub _DoAnchors { -# -# Turn Markdown link shortcuts into XHTML
tags. -# - my $text = shift; - - # - # First, handle reference-style links: [link text] [id] - # - $text =~ s{ - ( # wrap whole match in $1 - \[ - ($g_nested_brackets) # link text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - ) - }{ - my $result; - my $whole_match = $1; - my $link_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $link_text; # for shortcut links like [this][]. - } - - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "? # href = $3 - [ \t]* - ( # $4 - (['"]) # quote char = $5 - (.*?) # Title = $6 - \5 # matching quote - )? # title is optional - \) - ) - }{ - my $result; - my $whole_match = $1; - my $link_text = $2; - my $url = $3; - my $title = $6; - - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = " tags. -# - my $text = shift; - - # - # First, handle reference-style labeled images: ![alt text][id] - # - $text =~ s{ - ( # wrap whole match in $1 - !\[ - (.*?) # alt text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $alt_text; # for shortcut links like ![this][]. - } - - $alt_text =~ s/"/"/g; - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "\"$alt_text\"";? # src url = $3 - [ \t]* - ( # $4 - (['"]) # quote char = $5 - (.*?) # title = $6 - \5 # matching quote - [ \t]* - )? # title is optional - \) - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - my $url = $3; - my $title = ''; - if (defined($6)) { - $title = $6; - } - - $alt_text =~ s/"/"/g; - $title =~ s/"/"/g; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "\"$alt_text\"";" . _RunSpanGamut($1) . "\n\n"; - }egmx; - - $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ - "

" . _RunSpanGamut($1) . "

\n\n"; - }egmx; - - - # atx-style headers: - # # Header 1 - # ## Header 2 - # ## Header 2 with closing hashes ## - # ... - # ###### Header 6 - # - $text =~ s{ - ^(\#{1,6}) # $1 = string of #'s - [ \t]* - (.+?) # $2 = Header text - [ \t]* - \#* # optional closing #'s (not counted) - \n+ - }{ - my $h_level = length($1); - "" . _RunSpanGamut($2) . "\n\n"; - }egmx; - - return $text; -} - - -sub _DoLists { -# -# Form HTML ordered (numbered) and unordered (bulleted) lists. -# - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Re-usable patterns to match list item bullets and number markers: - my $marker_ul = qr/[*+-]/; - my $marker_ol = qr/\d+[.]/; - my $marker_any = qr/(?:$marker_ul|$marker_ol)/; - - # Re-usable pattern to match any entirel ul or ol list: - my $whole_list = qr{ - ( # $1 = whole list - ( # $2 - [ ]{0,$less_than_tab} - (${marker_any}) # $3 = first list item marker - [ \t]+ - ) - (?s:.+?) - ( # $4 - \z - | - \n{2,} - (?=\S) - (?! # Negative lookahead for another list item marker - [ \t]* - ${marker_any}[ \t]+ - ) - ) - ) - }mx; - - # We use a different prefix before nested lists than top-level lists. - # See extended comment in _ProcessListItems(). - # - # Note: There's a bit of duplication here. My original implementation - # created a scalar regex pattern as the conditional result of the test on - # $g_list_level, and then only ran the $text =~ s{...}{...}egmx - # substitution once, using the scalar as the pattern. This worked, - # everywhere except when running under MT on my hosting account at Pair - # Networks. There, this caused all rebuilds to be killed by the reaper (or - # perhaps they crashed, but that seems incredibly unlikely given that the - # same script on the same server ran fine *except* under MT. I've spent - # more time trying to figure out why this is happening than I'd like to - # admit. My only guess, backed up by the fact that this workaround works, - # is that Perl optimizes the substition when it can figure out that the - # pattern will never change, and when this optimization isn't on, we run - # afoul of the reaper. Thus, the slightly redundant code to that uses two - # static s/// patterns rather than one conditional pattern. - - if ($g_list_level) { - $text =~ s{ - ^ - $whole_list - }{ - my $list = $1; - my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; - # Turn double returns into triple returns, so that we can make a - # paragraph for the last item in a list, if necessary: - $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); - $result = "<$list_type>\n" . $result . "\n"; - $result; - }egmx; - } - else { - $text =~ s{ - (?:(?<=\n\n)|\A\n?) - $whole_list - }{ - my $list = $1; - my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; - # Turn double returns into triple returns, so that we can make a - # paragraph for the last item in a list, if necessary: - $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); - $result = "<$list_type>\n" . $result . "\n"; - $result; - }egmx; - } - - - return $text; -} - - -sub _ProcessListItems { -# -# Process the contents of a single ordered or unordered list, splitting it -# into individual list items. -# - - my $list_str = shift; - my $marker_any = shift; - - - # The $g_list_level global keeps track of when we're inside a list. - # Each time we enter a list, we increment it; when we leave a list, - # we decrement. If it's zero, we're not in a list anymore. - # - # We do this because when we're not inside a list, we want to treat - # something like this: - # - # I recommend upgrading to version - # 8. Oops, now this line is treated - # as a sub-list. - # - # As a single paragraph, despite the fact that the second line starts - # with a digit-period-space sequence. - # - # Whereas when we're inside a list (or sub-list), that line will be - # treated as the start of a sub-list. What a kludge, huh? This is - # an aspect of Markdown's syntax that's hard to parse perfectly - # without resorting to mind-reading. Perhaps the solution is to - # change the syntax rules such that sub-lists must start with a - # starting cardinal number; e.g. "1." or "a.". - - $g_list_level++; - - # trim trailing blank lines: - $list_str =~ s/\n{2,}\z/\n/; - - - $list_str =~ s{ - (\n)? # leading line = $1 - (^[ \t]*) # leading whitespace = $2 - ($marker_any) [ \t]+ # list marker = $3 - ((?s:.+?) # list item text = $4 - (\n{1,2})) - (?= \n* (\z | \2 ($marker_any) [ \t]+)) - }{ - my $item = $4; - my $leading_line = $1; - my $leading_space = $2; - - if ($leading_line or ($item =~ m/\n{2,}/)) { - $item = _RunBlockGamut(_Outdent($item)); - } - else { - # Recursion for sub-lists: - $item = _DoLists(_Outdent($item)); - chomp $item; - $item = _RunSpanGamut($item); - } - - "
  • " . $item . "
  • \n"; - }egmx; - - $g_list_level--; - return $list_str; -} - - - -sub _DoCodeBlocks { -# -# Process Markdown `
    ` blocks.
    -#
    -
    -	my $text = shift;
    -
    -	$text =~ s{
    -			(?:\n\n|\A)
    -			(	            # $1 = the code block -- one or more lines, starting with a space/tab
    -			  (?:
    -			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    -			    .*\n+
    -			  )+
    -			)
    -			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    -		}{
    -			my $codeblock = $1;
    -			my $result; # return value
    -
    -			$codeblock = _EncodeCode(_Outdent($codeblock));
    -			$codeblock = _Detab($codeblock);
    -			$codeblock =~ s/\A\n+//; # trim leading newlines
    -			$codeblock =~ s/\s+\z//; # trim trailing whitespace
    -
    -			$result = "\n\n
    " . $codeblock . "\n
    \n\n"; - - $result; - }egmx; - - return $text; -} - - -sub _DoCodeSpans { -# -# * Backtick quotes are used for spans. -# -# * You can use multiple backticks as the delimiters if you want to -# include literal backticks in the code span. So, this input: -# -# Just type ``foo `bar` baz`` at the prompt. -# -# Will translate to: -# -#

    Just type foo `bar` baz at the prompt.

    -# -# There's no arbitrary limit to the number of backticks you -# can use as delimters. If you need three consecutive backticks -# in your code, use four for delimiters, etc. -# -# * You can use spaces to get literal backticks at the edges: -# -# ... type `` `bar` `` ... -# -# Turns to: -# -# ... type `bar` ... -# - - my $text = shift; - - $text =~ s@ - (`+) # $1 = Opening run of ` - (.+?) # $2 = The code block - (?$c
    "; - @egsx; - - return $text; -} - - -sub _EncodeCode { -# -# Encode/escape certain characters inside Markdown code runs. -# The point is that in code, these characters are literals, -# and lose their special Markdown meanings. -# - local $_ = shift; - - # Encode all ampersands; HTML entities are not - # entities within a Markdown code span. - s/&/&/g; - - # Encode $'s, but only if we're running under Blosxom. - # (Blosxom interpolates Perl variables in article bodies.) - { - no warnings 'once'; - if (defined($blosxom::version)) { - s/\$/$/g; - } - } - - - # Do the angle bracket song and dance: - s! < !<!gx; - s! > !>!gx; - - # Now, escape characters that are magic in Markdown: - s! \* !$g_escape_table{'*'}!gx; - s! _ !$g_escape_table{'_'}!gx; - s! { !$g_escape_table{'{'}!gx; - s! } !$g_escape_table{'}'}!gx; - s! \[ !$g_escape_table{'['}!gx; - s! \] !$g_escape_table{']'}!gx; - s! \\ !$g_escape_table{'\\'}!gx; - - return $_; -} - - -sub _DoItalicsAndBold { - my $text = shift; - - # must go first: - $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 } - {$2}gsx; - - $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 } - {$2}gsx; - - return $text; -} - - -sub _DoBlockQuotes { - my $text = shift; - - $text =~ s{ - ( # Wrap whole match in $1 - ( - ^[ \t]*>[ \t]? # '>' at the start of a line - .+\n # rest of the first line - (.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - ) - }{ - my $bq = $1; - $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting - $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines - $bq = _RunBlockGamut($bq); # recurse - - $bq =~ s/^/ /g; - # These leading spaces screw with
     content, so we need to fix that:
    -			$bq =~ s{
    -					(\s*
    .+?
    ) - }{ - my $pre = $1; - $pre =~ s/^ //mg; - $pre; - }egsx; - - "
    \n$bq\n
    \n\n"; - }egmx; - - - return $text; -} - - -sub _FormParagraphs { -# -# Params: -# $text - string to process with html

    tags -# - my $text = shift; - - # Strip leading and trailing lines: - $text =~ s/\A\n+//; - $text =~ s/\n+\z//; - - my @grafs = split(/\n{2,}/, $text); - - # - # Wrap

    tags. - # - foreach (@grafs) { - unless (defined( $g_html_blocks{$_} )) { - $_ = _RunSpanGamut($_); - s/^([ \t]*)/

    /; - $_ .= "

    "; - } - } - - # - # Unhashify HTML blocks - # - foreach (@grafs) { - if (defined( $g_html_blocks{$_} )) { - $_ = $g_html_blocks{$_}; - } - } - - return join "\n\n", @grafs; -} - - -sub _EncodeAmpsAndAngles { -# Smart processing for ampersands and angle brackets that need to be encoded. - - my $text = shift; - - # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: - # http://bumppo.net/projects/amputator/ - $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; - - # Encode naked <'s - $text =~ s{<(?![a-z/?\$!])}{<}gi; - - return $text; -} - - -sub _EncodeBackslashEscapes { -# -# Parameter: String. -# Returns: The string, with after processing the following backslash -# escape sequences. -# - local $_ = shift; - - s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. - s! \\` !$g_escape_table{'`'}!gx; - s! \\\* !$g_escape_table{'*'}!gx; - s! \\_ !$g_escape_table{'_'}!gx; - s! \\\{ !$g_escape_table{'{'}!gx; - s! \\\} !$g_escape_table{'}'}!gx; - s! \\\[ !$g_escape_table{'['}!gx; - s! \\\] !$g_escape_table{']'}!gx; - s! \\\( !$g_escape_table{'('}!gx; - s! \\\) !$g_escape_table{')'}!gx; - s! \\> !$g_escape_table{'>'}!gx; - s! \\\# !$g_escape_table{'#'}!gx; - s! \\\+ !$g_escape_table{'+'}!gx; - s! \\\- !$g_escape_table{'-'}!gx; - s! \\\. !$g_escape_table{'.'}!gx; - s{ \\! }{$g_escape_table{'!'}}gx; - - return $_; -} - - -sub _DoAutoLinks { - my $text = shift; - - $text =~ s{<((https?|ftp):[^'">\s]+)>}{
    $1}gi; - - # Email addresses: - $text =~ s{ - < - (?:mailto:)? - ( - [-.\w]+ - \@ - [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ - ) - > - }{ - _EncodeEmailAddress( _UnescapeSpecialChars($1) ); - }egix; - - return $text; -} - - -sub _EncodeEmailAddress { -# -# Input: an email address, e.g. "foo@example.com" -# -# Output: the email address as a mailto link, with each character -# of the address encoded as either a decimal or hex entity, in -# the hopes of foiling most address harvesting spam bots. E.g.: -# -# foo -# @example.com -# -# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk -# mailing list: -# - - my $addr = shift; - - srand; - my @encode = ( - sub { '&#' . ord(shift) . ';' }, - sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, - sub { shift }, - ); - - $addr = "mailto:" . $addr; - - $addr =~ s{(.)}{ - my $char = $1; - if ( $char eq '@' ) { - # this *must* be encoded. I insist. - $char = $encode[int rand 1]->($char); - } elsif ( $char ne ':' ) { - # leave ':' alone (to spot mailto: later) - my $r = rand; - # roughly 10% raw, 45% hex, 45% dec - $char = ( - $r > .9 ? $encode[2]->($char) : - $r < .45 ? $encode[1]->($char) : - $encode[0]->($char) - ); - } - $char; - }gex; - - $addr = qq{$addr}; - $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part - - return $addr; -} - - -sub _UnescapeSpecialChars { -# -# Swap back in all the special characters we've hidden. -# - my $text = shift; - - while( my($char, $hash) = each(%g_escape_table) ) { - $text =~ s/$hash/$char/g; - } - return $text; -} - - -sub _TokenizeHTML { -# -# Parameter: String containing HTML markup. -# Returns: Reference to an array of the tokens comprising the input -# string. Each token is either a tag (possibly with nested, -# tags contained therein, such as , or a -# run of text between tags. Each element of the array is a -# two-element array; the first is either 'tag' or 'text'; -# the second is the actual value. -# -# -# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. -# -# - - my $str = shift; - my $pos = 0; - my $len = length $str; - my @tokens; - - my $depth = 6; - my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); - my $match = qr/(?s: ) | # comment - (?s: <\? .*? \?> ) | # processing instruction - $nested_tags/ix; # nested tags - - while ($str =~ m/($match)/g) { - my $whole_tag = $1; - my $sec_start = pos $str; - my $tag_start = $sec_start - length $whole_tag; - if ($pos < $tag_start) { - push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; - } - push @tokens, ['tag', $whole_tag]; - $pos = pos $str; - } - push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; - \@tokens; -} - - -sub _Outdent { -# -# Remove one level of line-leading tabs or spaces -# - my $text = shift; - - $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; - return $text; -} - - -sub _Detab { -# -# Cribbed from a post by Bart Lateur: -# -# - my $text = shift; - - $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; - return $text; -} - - -1; - -__END__ - - -=pod - -=head1 NAME - -B - - -=head1 SYNOPSIS - -B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] - [ I ... ] - - -=head1 DESCRIPTION - -Markdown is a text-to-HTML filter; it translates an easy-to-read / -easy-to-write structured text format into HTML. Markdown's text format -is most similar to that of plain text email, and supports features such -as headers, *emphasis*, code blocks, blockquotes, and links. - -Markdown's syntax is designed not as a generic markup language, but -specifically to serve as a front-end to (X)HTML. You can use span-level -HTML tags anywhere in a Markdown document, and you can use block level -HTML tags (like
    and as well). - -For more information about Markdown's syntax, see: - - http://daringfireball.net/projects/markdown/ - - -=head1 OPTIONS - -Use "--" to end switch parsing. For example, to open a file named "-z", use: - - Markdown.pl -- -z - -=over 4 - - -=item B<--html4tags> - -Use HTML 4 style for empty element tags, e.g.: - -
    - -instead of Markdown's default XHTML style tags, e.g.: - -
    - - -=item B<-v>, B<--version> - -Display Markdown's version number and copyright information. - - -=item B<-s>, B<--shortversion> - -Display the short-form version number. - - -=back - - - -=head1 BUGS - -To file bug reports or feature requests (other than topics listed in the -Caveats section above) please send email to: - - support@daringfireball.net - -Please include with your report: (1) the example input; (2) the output -you expected; (3) the output Markdown actually produced. - - -=head1 VERSION HISTORY - -See the readme file for detailed release notes for this version. - -1.0.1 - 14 Dec 2004 - -1.0 - 28 Aug 2004 - - -=head1 AUTHOR - - John Gruber - http://daringfireball.net - - PHP port and other contributions by Michel Fortin - http://michelf.com - - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2003-2004 John Gruber - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name "Markdown" nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -This software is provided by the copyright holders and contributors "as -is" and any express or implied warranties, including, but not limited -to, the implied warranties of merchantability and fitness for a -particular purpose are disclaimed. In no event shall the copyright owner -or contributors be liable for any direct, indirect, incidental, special, -exemplary, or consequential damages (including, but not limited to, -procurement of substitute goods or services; loss of use, data, or -profits; or business interruption) however caused and on any theory of -liability, whether in contract, strict liability, or tort (including -negligence or otherwise) arising in any way out of the use of this -software, even if advised of the possibility of such damage. - -=cut diff --git a/filters/html-converters/resources/rst-template.txt b/filters/html-converters/resources/rst-template.txt deleted file mode 100644 index 43cde42..0000000 --- a/filters/html-converters/resources/rst-template.txt +++ /dev/null @@ -1,4 +0,0 @@ -%(stylesheet)s -%(body_pre_docinfo)s -%(docinfo)s -%(body)s -- cgit v1.2.3-2-gb3c3