module MaRuKu::Strings

These are strings utilities.

Constants

Abbreviation

Example:

*[HTML]: Hyper Text Markup Language
AttributeDefinitionList

$1 = id $2 = attribute list

Definition

Example:

^:blah blah
^: blah blah
^   : blah blah
EMailAddress
FootnoteText
HeaderWithAttributes
HeaderWithId
InlineAttributeList
LinkRegex

This regex is taken from BlueCloth sources Link defs are in the form: ^[id]: n? url "optional title"

MightBeTableHeader

if contains a pipe, it could be a table header

Sep

-------------:

TabSize
TableSeparator

| -------------:| ------------------------------ |

Public Instance Methods

add_tabs(s,n=1,char="\t") click to toggle source
# File lib/maruku/string_utils.rb, line 25
def add_tabs(s,n=1,char="\t")
        s.split("\n").map{|x| char*n+x }.join("\n")
end
dbg_describe_ary(a, prefix='') click to toggle source
# File lib/maruku/string_utils.rb, line 179
def dbg_describe_ary(a, prefix='')
        i = 0 
        a.each do |l|
                puts "#{prefix} (#{i+=1})# #{l.inspect}"
        end
end
force_linebreak?(l) click to toggle source
# File lib/maruku/string_utils.rb, line 186
def force_linebreak?(l)
        l =~ %r  $/
end
line_md_type(l) click to toggle source
# File lib/maruku/input/type_detection.rb, line 36
        def line_md_type(l)
                # The order of evaluation is important (:text is a catch-all)
                return :text   if l =~ %r^[a-zA-Z]/
                return :code             if number_of_leading_spaces(l)>=4
                return :empty    if l =~ %r^\s*$/
                return :footnote_text    if l =~ FootnoteText
                return :ref_definition   if l =~ LinkRegex or l=~ IncompleteLink
                return :abbreviation     if l =~ Abbreviation
                return :definition       if l =~ Definition
                # I had a bug with emails and urls at the beginning of the 
                # line that were mistaken for raw_html
                return :text if l=~ %r^[ ]{0,3}#{EMailAddress}/
                return :text if l=~ %r^[ ]{0,3}<http:/
                # raw html is like PHP Markdown Extra: at most three spaces before
                return :xml_instr if l =~ %r{^\s*<\?}
                return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
                return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?<\!\-\-}
                # Something is wrong with how we parse lists! :-(
                #return :ulist    if l =~ /^[ ]{0,3}([\*\-\+])\s+.*\w+/
                #return :olist    if l =~ /^[ ]{0,3}\d+\..*\w+/
                return :ulist    if l =~ %r^[ ]{0,1}([\*\-\+])\s+.*\w+/
                return :olist    if l =~ %r^[ ]{0,1}\d+\..*\w+/
                return :header1  if l =~ %r^(=)+/ 
                return :header2  if l =~ %r^([-\s])+$/ 
                return :header3  if l =~ %r^(#)+\s*\S+/ 
                # at least three asterisks on a line, and only whitespace
                return :hrule    if l =~ %r^(\s*\*\s*){3,1000}$/ 
                return :hrule    if l =~ %r^(\s*-\s*){3,1000}$/ # or hyphens
                return :hrule    if l =~ %r^(\s*_\s*){3,1000}$/ # or underscores       
                return :quote    if l =~ %r^>/
                return :metadata if l =~ %r^@/
#               if @@new_meta_data?
                        return :ald   if l =~ AttributeDefinitionList
                        return :ial   if l =~ InlineAttributeList
#               end
#               return :equation_end if l =~ EquationEnd
                return :text # else, it's just text
        end
normalize_key_and_value(k,v) click to toggle source

Keys are downcased, space becomes underscore, converted to symbols.

# File lib/maruku/string_utils.rb, line 69
def normalize_key_and_value(k,v)
        v = v ? v.strip : true # no value defaults to true
        k = k.strip
        
        # check synonyms
        v = true if ['yes','true'].include?(v.to_s.downcase)
        v = false if ['no','false'].include?(v.to_s.downcase)

        k = k.downcase.gsub(' ','_')
        return k, v
end
num_leading_hashes(s) click to toggle source

Counts the number of leading '#' in the string

# File lib/maruku/string_utils.rb, line 137
def num_leading_hashes(s)
        i=0;
        while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
        i     
end
number_of_leading_spaces(s) click to toggle source

Returns the number of leading spaces, considering that a tab counts as `TabSize` spaces.

# File lib/maruku/string_utils.rb, line 83
def number_of_leading_spaces(s)
        n=0; i=0;
        while i < s.size 
                c = s[i,1]
                if c == ' '
                        i+=1; n+=1;
                elsif c == "\t"
                        i+=1; n+=TabSize;
                else
                        break
                end
        end
        n
end
parse_email_headers(s) click to toggle source

This parses email headers. Returns an hash.

+hash+ is the message.

Keys are downcased, space becomes underscore, converted to symbols.

My key: true

becomes:

{:my_key => true}
# File lib/maruku/string_utils.rb, line 47
        def parse_email_headers(s)
                keys={}
                match = (s =~ %r\A((\w[\w\s\_\-]+: .*\n)+)\s*\n/)
                if match != 0
                        keys[:data] = s
                else
                        keys[:data] = $'
                        headers = $1
                        headers.split("\n").each do |l| 
# Fails if there are other ':' characters.
#                               k, v = l.split(':')
                                k, v = l.split(':', 2)
                                k, v = normalize_key_and_value(k, v)
                                k = k.to_sym
#                               puts "K = #{k}, V=#{v}"
                                keys[k] = v
                        end
                end
                keys
        end
sanitize_ref_id(x) click to toggle source

change space to "_" and remove any non-word character

# File lib/maruku/string_utils.rb, line 152
def sanitize_ref_id(x)
        x.strip.downcase.gsub(' ','_').gsub(%r[^\w]/,'')
end
spaces_before_first_char(s) click to toggle source

This returns the position of the first real char in a list item

For example:

'*Hello' # => 1
'* Hello' # => 2
' * Hello' # => 3
' *   Hello' # => 5
'1.Hello' # => 2
' 1.  Hello' # => 5
# File lib/maruku/string_utils.rb, line 108
def spaces_before_first_char(s)
        case s.md_type
        when :ulist
                i=0;
                # skip whitespace if present
                while s[i,1] =~ %r\s/; i+=1 end
                # skip indicator (+, -, *)
                i+=1
                # skip optional whitespace
                while s[i,1] =~ %r\s/; i+=1 end
                return i
        when :olist
                i=0;
                # skip whitespace
                while s[i,1] =~ %r\s/; i+=1 end
                # skip digits
                while s[i,1] =~ %r\d/; i+=1 end
                # skip dot
                i+=1
                # skip whitespace
                while s[i,1] =~ %r\s/; i+=1 end
                return i
        else
                tell_user "BUG (my bad): '#{s}' is not a list"
                0
        end
end
split_lines(s) click to toggle source
# File lib/maruku/string_utils.rb, line 31
def split_lines(s)
        s.gsub("\r","").split("\n")
end
strip_hashes(s) click to toggle source

Strips initial and final hashes

# File lib/maruku/string_utils.rb, line 144
def strip_hashes(s)
        s = s[num_leading_hashes(s), s.size]
        i = s.size-1
        while i > 0 && (s[i,1] =~ %r(#|\s)/); i-=1; end
        s[0, i+1].strip
end
strip_indent(s, n) click to toggle source

toglie al massimo n caratteri

# File lib/maruku/string_utils.rb, line 163
def strip_indent(s, n) 
        i = 0
        while i < s.size && n>0
                c = s[i,1]
                if c == ' '
                        n-=1;
                elsif c == "\t"
                        n-=TabSize;
                else
                        break
                end
                i+=1
        end
        s[i, s.size]
end
unquote(s) click to toggle source

removes initial quote

# File lib/maruku/string_utils.rb, line 158
def unquote(s)
        s.gsub(%r^>\s?/,'')
end