module MaRuKu::In::Markdown::SpanLevelParser

Constants

CharSource

Choose!

Close_class
EscapedCharInInlineCode
EscapedCharInQuotes
EscapedCharInText
Punct_class
R_REF_ID

R_REF_ID = Regexp.compile(/(]*)(s*])/) R_REF_ID = Regexp.compile(/(]*)(s*])/)

Rules
SPACE

Public Instance Methods

apply_one_rule(reg, subst, input) click to toggle source

note: input will be destroyed

# File lib/maruku/input/rubypants.rb, line 192
def apply_one_rule(reg, subst, input)
        output = []
        while first = input.shift
                if first.kind_of?(String) && (m = reg.match(first))
                        output.push    m. pre_match if m. pre_match.size > 0
                         input.unshift m.post_match if m.post_match.size > 0
                        subst.reverse.each do |x|
                                input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
                else
                        output.push first
                end
        end
        return output
end
describe_pos(buffer, buffer_index) click to toggle source
# File lib/maruku/input/charsource.rb, line 154
def describe_pos(buffer, buffer_index)
        len = 75
        num_before = [len/2, buffer_index].min
        num_after = [len/2, buffer.size-buffer_index].min
        num_before_max = buffer_index
        num_after_max = buffer.size-buffer_index
        
#               puts "num #{num_before} #{num_after}"
        num_before = [num_before_max, len-num_after].min
        num_after  = [num_after_max, len-num_before].min
#               puts "num #{num_before} #{num_after}"
        
        index_start = [buffer_index - num_before, 0].max
        index_end   = [buffer_index + num_after, buffer.size].min
        
        size = index_end- index_start
        
#               puts "- #{index_start} #{size}"

        str = buffer[index_start, size]
        str.gsub!("\n",'N')
        str.gsub!("\t",'T')
        
        if index_end == buffer.size 
                str += "EOF"
        end
                
        pre_s = buffer_index-index_start
        pre_s = [pre_s, 0].max
        pre_s2 = [len-pre_s,0].max
#               puts "pre_S = #{pre_s}"
        pre =" "*(pre_s) 
        
        "-"*len+"\n"+
        str + "\n" +
        "-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
#               pre + "|\n"+
        pre + "+--- Byte #{buffer_index}\n"+
        
        "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
        add_tabs(buffer,1,">")
        
#               "CharSource: At character #{@buffer_index} of block "+
#               " beginning with:\n    #{@buffer[0,50].inspect} ...\n"+
#               " before: \n     ... #{cur_chars(50).inspect} ... "
end
educate(elements) click to toggle source
# File lib/maruku/input/rubypants.rb, line 207
def educate(elements)
        Rules.each do |reg, subst|
                elements = apply_one_rule(reg, subst, elements)
        end
        # strips empty strings
        elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
        final = []
        # join consecutive strings
        elements.each do |x|
                if x.kind_of?(String) && final.last.kind_of?(String)
                        final.last << x
                else
                        final << x
                end
        end
        return final
end
extension_meta(src, con, break_on_chars) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 301
        def extension_meta(src, con, break_on_chars)
                if m = src.read_regexp(%r([^\s\:\"\']+):/)
                        name = m[1]
                        al = read_attribute_list(src, con, break_on_chars)
#                       puts "#{name}=#{al.inspect}"
                        self.doc.ald[name] = al
                      con.push md_ald(name, al)
                else
                        al = read_attribute_list(src, con, break_on_chars)
                        self.doc.ald[name] = al
                        con.push md_ial(al)
                end
        end
interpret_extension(src, con, break_on_chars) click to toggle source

Start: cursor on character *after* '{' End: curson on '}' or EOF

# File lib/maruku/input/parse_span_better.rb, line 277
def interpret_extension(src, con, break_on_chars)
        case src.cur_char
        when ::
                src.ignore_char # :
                extension_meta(src, con, break_on_chars)
        when ##, ..
                extension_meta(src, con, break_on_chars)
        else
                stuff = read_simple(src, escaped=[}}], break_on_chars, [])
                if stuff =~ %r^(\w+\s|[^\w])/
                        extension_id = $1.strip
                        if false
                        else
                                maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
                                        "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
                                extension_meta(src, con, break_on_chars)
                        end
                else 
                        maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
                        extension_meta(src, con, break_on_chars)
                end
        end
end
parse_lines_as_span(lines, parent=nil) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 35
def parse_lines_as_span(lines, parent=nil)
        parse_span_better lines.join("\n"), parent
end
parse_span_better(string, parent=nil) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 39
def parse_span_better(string, parent=nil)
        if not string.kind_of? String then 
                error "Passed #{string.class}." end

        st = (string + "")
        st.freeze
        src = CharSource.new(st, parent)
        read_span(src, EscapedCharInText, [nil])
end
read_em(src, delim) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 421
def read_em(src, delim)
        src.ignore_char
        children = read_span(src, EscapedCharInText, nil, [delim])
        src.ignore_char
        md_em(children)
end
read_email_el(src,con) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 323
def read_email_el(src,con)
        src.ignore_char # leading <
        mail = read_simple(src, [], [>>])
        src.ignore_char # closing >
        
        address = mail.gsub(%r^mailto:/,'')
        con.push_element md_email(address)
end
read_emstrong(src, delim) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 435
def read_emstrong(src, delim)
        src.ignore_chars(3)
        children = read_span(src, EscapedCharInText, nil, [delim])
        src.ignore_chars(3)
        md_emstrong(children)
end
read_footnote_ref(src,con) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 462
def read_footnote_ref(src,con)
        ref = read_ref_id(src,con)
        con.push_element md_foot_ref(ref)
end
read_image(src, con) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 615
def read_image(src, con)
        src.ignore_chars(2) # opening "!["
        alt_text = read_span(src, EscapedCharInText, []]])
        src.ignore_char # closing bracket
        # ignore space
        if src.cur_char == SPACE and 
                (src.next_char == [[ or src.next_char == (( )
                src.ignore_char
        end
        case src.cur_char
        when ((
                src.ignore_char # opening (
                src.consume_whitespace
                url = read_url(src, [SPACE,\t\,))])
                if not url
                        error "Could not read url from #{src.cur_chars(10).inspect}",
                                src,con
                end
                src.consume_whitespace
                title = nil
                if src.cur_char != )) # we have a title
                        quote_char = src.cur_char
                        title = read_quoted(src,con)
                        if not title
                                maruku_error 'Must quote title',src,con
                        else                                
                                # Tries to read a title with quotes: ![a](url "ti"tle")
                                # this is the most ugly thing in Markdown
                                if not src.next_matches(%r\s*\)/)
                                        # if there is not a closing par ), then read
                                        # the rest and guess it's title with quotes
                                        rest = read_simple(src, escaped=[], break_on_chars=[))], 
                                                break_on_strings=[])
                                        # chop the closing char
                                        rest.chop!
                                        title << quote_char << rest
                                end
                        end
                end
                src.consume_whitespace
                closing = src.shift_char # closing )
                if closing != ))
                        error( ("Unclosed link: '"<<closing<<"'")+
                                " Read url=#{url.inspect} title=#{title.inspect}",src,con)
                end
                con.push_element md_im_image(alt_text, url, title)
        when [[ # link ref
                ref_id = read_ref_id(src,con)
                if not ref_id # TODO: check around
                        error('Reference not closed.', src, con)
                        ref_id = ""
                end
                if ref_id.size == 0
                        ref_id =  alt_text.to_s
                end

                ref_id = sanitize_ref_id(ref_id)

                con.push_element md_image(alt_text, ref_id)
        else # no stuff
                ref_id =  sanitize_ref_id(alt_text.to_s)
                con.push_element md_image(alt_text, ref_id)
        end
end
read_inline_code(src, con) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 510
        def read_inline_code(src, con)
                # Count the number of ticks
                num_ticks = 0
                while src.cur_char == `` 
                        num_ticks += 1
                        src.ignore_char
                end
                # We will read until this string
                end_string = "`"*num_ticks

                code = 
                        read_simple(src, escaped=[], break_on_chars=[], 
                                break_on_strings=[end_string])
                
#               puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
                src.ignore_chars num_ticks
                
                # Ignore at most one space
                if num_ticks > 1 && code[0] == SPACE
                        code = code[1, code.size-1]
                end
                
                # drop last space 
                if num_ticks > 1 && code[-1] == SPACE
                        code = code[0,code.size-1]
                end

#               puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
                con.push_element md_code(code)
        end
read_inline_html(src, con) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 467
def read_inline_html(src, con)
        h = HTMLHelper.new
        begin
                # This is our current buffer in the context
                next_stuff = src.current_remaining_buffer
                
                consumed = 0
                while true
                        if consumed >= next_stuff.size
                                maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
                                break
                        end

                        h.eat_this next_stuff[consumed].chr; consumed += 1
                        break if h.is_finished? 
                end
                src.ignore_chars(consumed)
                con.push_element md_html(h.stuff_you_read)
                
                #start = src.current_remaining_buffer
                # h.eat_this start
                # if not h.is_finished?
                #    error "inline_html: Malformed:\n "+
                #            "#{start.inspect}\n #{h.inspect}",src,con
                # end
                # 
                # consumed = start.size - h.rest.size 
                # if consumed > 0
                #    con.push_element md_html(h.stuff_you_read)
                #    src.ignore_chars(consumed)
                # else
                #    puts "HTML helper did not work on #{start.inspect}"
                #    con.push_char src.shift_char
                # end
        rescue Exception => e
                maruku_error "Bad html: \n" + 
                        add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
                        src,con
                maruku_recover "I will try to continue after bad HTML.", src, con
                con.push_char src.shift_char
        end
end
read_quoted(src, con) click to toggle source

Tries to read a quoted value. If stream does not start with ' or ", returns nil.

# File lib/maruku/input/parse_span_better.rb, line 365
        def read_quoted(src, con)
                case src.cur_char
                        when '', ""
                                quote_char = src.shift_char # opening quote
                                string = read_simple(src, EscapedCharInQuotes, [quote_char])
                                src.ignore_char # closing quote
                                return string
                        else 
#                               puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
                                return nil
                end
        end
read_quoted_or_unquoted(src, con, escaped, exit_on_chars) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 354
def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
        case src.cur_char
        when '', ""
                read_quoted(src, con)
        else
                read_simple(src, escaped, exit_on_chars)
        end
end
read_ref_id(src, con) click to toggle source

Reads a bracketed id "[refid]". Consumes also both brackets.

# File lib/maruku/input/parse_span_better.rb, line 449
        def read_ref_id(src, con)
                src.ignore_char # [
                src.consume_whitespace
#               puts "Next: #{src.cur_chars(10).inspect}"
                if m = src.read_regexp(R_REF_ID) 
#                       puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
#                       puts "Then: #{src.cur_chars(10).inspect}"
                        m[1]
                else
                        nil
                end
        end
read_simple(src, escaped, exit_on_chars, exit_on_strings=nil) click to toggle source

Reads a simple string (no formatting) until one of break_on_chars, while escaping the escaped. If the string is empty, it returns nil. Raises on error if the string terminates unexpectedly.

# If eat_delim is true, and if the delim is not the EOF, then the delim
# gets eaten from the stream.
# File lib/maruku/input/parse_span_better.rb, line 384
        def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
                text = ""
                while true
#                       puts "Reading simple #{text.inspect}"
                        c = src.cur_char
                        if exit_on_chars && exit_on_chars.include?(c)
#                               src.ignore_char if eat_delim
                                break
                        end
                        
                        break if exit_on_strings && 
                                exit_on_strings.any? {|x| src.cur_chars_are x}
                        
                        case c
                        when nil
                                s= "String finished while reading (break on "+
                                "#{exit_on_chars.map{|x|""<<x}.inspect})"+
                                " already read: #{text.inspect}"
                                maruku_error s, src
                                maruku_recover "I boldly continue", src
                                break
                        when \\\
                                d = src.next_char
                                if escaped.include? d
                                        src.ignore_chars(2)
                                        text << d
                                else
                                        text << src.shift_char
                                end
                        else 
                                text << src.shift_char
                        end
                end
#               puts "Read simple #{text.inspect}"
                text.empty? ? nil : text
        end
read_span(src, escaped, exit_on_chars, exit_on_strings=nil) click to toggle source

This is the main loop for reading span elements

It's long, but not complex or difficult to understand.

# File lib/maruku/input/parse_span_better.rb, line 54
def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
        con = SpanContext.new
        c = d = nil
        while true
                c = src.cur_char

                # This is only an optimization which cuts 50% of the time used.
                # (but you can't use a-zA-z in exit_on_chars)
                if c && ((c>=aa && c<=zz) || ((c>=AA && c<=ZZ)))
                        con.cur_string << src.shift_char
                        next
                end

                break if exit_on_chars && exit_on_chars.include?(c)
                break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
                
                # check if there are extensions
                if check_span_extensions(src, con)
                        next
                end
                
                case c = src.cur_char        
                when \ \# it's space (32)
                        if src.cur_chars_are "  \n"
                                src.ignore_chars(3)
                                con.push_element  md_br()
                                next
                        else
                                src.ignore_char
                                con.push_space 
                        end
                when \n\, \t\ 
                        src.ignore_char
                        con.push_space 
                when ``
                        read_inline_code(src,con)
                when <<
                        # It could be:
                        # 1) HTML "<div ..."
                        # 2) HTML "<!-- ..."
                        # 3) url "<http:// ", "<ftp:// ..."
                        # 4) email "<andrea@... ", "<mailto:andrea@..."
                        # 5) on itself! "a < b      "
                        # 6) Start of <<guillemettes>>
                        
                        case d = src.next_char
                                when <<;  # guillemettes
                                        src.ignore_chars(2)
                                        con.push_char <<
                                        con.push_char <<
                                when !!; 
                                        if src.cur_chars_are '<!--'
                                                read_inline_html(src, con)
                                        else 
                                                con.push_char src.shift_char
                                        end
                                when ?? 
                                        read_xml_instr_span(src, con) 
                                when \ \, \t\ 
                                        con.push_char src.shift_char
                                else
                                        if src.next_matches(%r<mailto:/) or
                                           src.next_matches(%r<[\w\.]+\@/)
                                                read_email_el(src, con)
                                        elsif src.next_matches(%r<\w+:/)
                                                read_url_el(src, con)
                                        elsif src.next_matches(%r<\w/)
                                                #puts "This is HTML: #{src.cur_chars(20)}"
                                                read_inline_html(src, con)
                                        else 
                                                #puts "This is NOT HTML: #{src.cur_chars(20)}"
                                                con.push_char src.shift_char
                                        end
                        end
                when \\\
                        d = src.next_char
                        if d == ''
                                src.ignore_chars(2)
                                con.push_element md_entity('apos')
                        elsif d == ""
                                src.ignore_chars(2)
                                con.push_element md_entity('quot')
                        elsif escaped.include? d
                                src.ignore_chars(2)
                                con.push_char d
                        else
                                con.push_char src.shift_char
                        end
                when [[
                        if markdown_extra? && src.next_char == ^^
                                read_footnote_ref(src,con)
                        else
                                read_link(src, con)
                        end
                when !!
                        if src.next_char == [[
                                read_image(src, con)
                        else
                                con.push_char src.shift_char
                        end
                when &&
                        # named references
                        if m = src.read_regexp(%r\&([\w\d]+);/)
                                con.push_element md_entity(m[1])
                        # numeric
                        elsif m = src.read_regexp(%r\&\#(x)?([\w\d]+);/)
                                num = m[1]  ? m[2].hex : m[2].to_i
                                con.push_element md_entity(num)
                        else
                                con.push_char src.shift_char
                        end
                when **
                        if not src.next_char
                                maruku_error "Opening * as last char.", src, con
                                maruku_recover "Threating as literal"
                                con.push_char src.shift_char
                        else
                                follows = src.cur_chars(4)
                                if follows =~ %r^\*\*\*[^\s\*]/
                                        con.push_element read_emstrong(src,'***')
                                elsif follows  =~ %r^\*\*[^\s\*]/
                                        con.push_element read_strong(src,'**')
                                elsif follows =~ %r^\*[^\s\*]/
                                        con.push_element read_em(src,'*')
                                else # * is just a normal char
                                        con.push_char src.shift_char
                                end
                        end
                when __
                        if not src.next_char
                                maruku_error "Opening _ as last char", src, con
                                maruku_recover "Threating as literal", src, con
                                con.push_char src.shift_char
                        else
                                # we don't want "mod_ruby" to start an emphasis
                                # so we start one only if
                                # 1) there's nothing else in the span (first char)
                                # or 2) the last char was a space
                                # or 3) the current string is empty
                                #if con.elements.empty? ||
                                if  (con.cur_string =~ %r\s\Z/) || (con.cur_string.size == 0)
                                        # also, we check the next characters
                                        follows = src.cur_chars(4)
                                        if  follows =~ %r^\_\_\_[^\s\_]/
                                                con.push_element read_emstrong(src,'___')
                                        elsif follows  =~ %r^\_\_[^\s\_]/
                                                con.push_element read_strong(src,'__')
                                        elsif follows =~ %r^\_[^\s\_]/
                                                con.push_element read_em(src,'_')
                                        else # _ is just a normal char
                                                con.push_char src.shift_char
                                        end
                                else
                                        # _ is just a normal char
                                                con.push_char src.shift_char
                                end
                        end
                when {{ # extension
                        if [##, .., ::].include? src.next_char
                                src.ignore_char # {
                                interpret_extension(src, con, [}}])
                                src.ignore_char # }
                        else
                                con.push_char src.shift_char
                        end
                when nil
                        maruku_error( ("Unclosed span (waiting for %s"+
                         "#{exit_on_strings.inspect})") % [
                                        exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
                                        src,con)
                        break
                else # normal text
                        con.push_char src.shift_char
                end # end case
        end # end while true
        con.push_string_if_present 

        # Assign IAL to elements
        merge_ial(con.elements, src, con)
        
        
        # Remove leading space
        if (s = con.elements.first).kind_of? String
                if s[0] == \ \then con.elements[0] = s[1, s.size-1] end
                con.elements.shift if s.size == 0 
        end
        
        # Remove final spaces
        if (s = con.elements.last).kind_of? String
                s.chop! if s[-1] == \ \
                con.elements.pop if s.size == 0 
        end
        
        educated = educate(con.elements)

        educated
end
read_strong(src, delim) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 428
def read_strong(src, delim)
        src.ignore_chars(2)
        children = read_span(src, EscapedCharInText, nil, [delim])
        src.ignore_chars(2)
        md_strong(children)
end
read_url(src, break_on) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 332
def read_url(src, break_on)
        if ['',""].include? src.cur_char 
                error 'Invalid char for url', src
        end
        
        url = read_simple(src, [], break_on)
        if not url # empty url
                url = ""
        end
        
        if url[0] == << && url[-1] == >>
                url = url[1, url.size-2]
        end
        
        if url.size == 0 
                return nil
        end
        
        url
end
read_url_el(src,con) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 315
def read_url_el(src,con)
        src.ignore_char # leading <
        url = read_simple(src, [], [>>])
        src.ignore_char # closing >
        
        con.push_element md_url(url)
end
read_xml_instr_span(src, con) click to toggle source
# File lib/maruku/input/parse_span_better.rb, line 253
def read_xml_instr_span(src, con) 
        src.ignore_chars(2) # starting <?

        # read target <?target code... ?>
        target = if m = src.read_regexp(%r(\w+)/)
                m[1]
        else
                ''
        end
        
        delim = "?>"
        
        code = 
                read_simple(src, escaped=[], break_on_chars=[], 
                break_on_strings=[delim])
        
        src.ignore_chars delim.size
        
        code = (code || "").strip
        con.push_element md_xml_instr(target, code)
end