A tokenizer for the Ruby language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.
The list of all identifiers recognized as keywords.
Perform ruby-specific setup
# File lib/syntax/lang/ruby.rb, line 18 def setup @selector = false @allow_operator = false @heredocs = [] end
Step through a single iteration of the tokenization process.
# File lib/syntax/lang/ruby.rb, line 25 def step case when bol? && check( /=begin/ ) start_group( :comment, scan_until( /^=end#{EOL}/ ) ) when bol? && check( /__END__#{EOL}/ ) start_group( :comment, scan_until( /\Z/ ) ) else case when check( /def\s+/ ) start_group :keyword, scan( /def\s+/ ) start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ ) when check( /class\s+/ ) start_group :keyword, scan( /class\s+/ ) start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ ) when check( /module\s+/ ) start_group :keyword, scan( /module\s+/ ) start_group :module, scan_until( /(?=[;\s]|#{EOL})/ ) when check( /::/ ) start_group :punct, scan(/::/) when check( /:"/ ) start_group :symbol, scan(/:/) scan_delimited_region :symbol, :symbol, "", true @allow_operator = true when check( /:'/ ) start_group :symbol, scan(/:/) scan_delimited_region :symbol, :symbol, "", false @allow_operator = true when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ ) start_group :symbol, matched @allow_operator = true when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ ) start_group :char, matched @allow_operator = true when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ ) if @selector || matched[-1] == ?? || matched[-1] == !! start_group :ident, scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/) else start_group :constant, scan(/(__FILE__|__LINE__|true|false|nil|self)/) end @selector = false @allow_operator = true when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/) start_group :number, matched @allow_operator = true else case peek(2) when "%r" scan_delimited_region :punct, :regex, scan( /../ ), true @allow_operator = true when "%w", "%q" scan_delimited_region :punct, :string, scan( /../ ), false @allow_operator = true when "%s" scan_delimited_region :punct, :symbol, scan( /../ ), false @allow_operator = true when "%W", "%Q", "%x" scan_delimited_region :punct, :string, scan( /../ ), true @allow_operator = true when /%[^\sa-zA-Z0-9]/ scan_delimited_region :punct, :string, scan( /./ ), true @allow_operator = true when "<<" saw_word = ( chunk[-1,1] =~ /[\w!?]/ ) start_group :punct, scan( /<</ ) if saw_word @allow_operator = false return end float_right = scan( /-/ ) append "-" if float_right if ( type = scan( /['"]/ ) ) append type delim = scan_until( /(?=#{type})/ ) if delim.nil? append scan_until( /\Z/ ) return end else delim = scan( /\w+/ ) or return end start_group :constant, delim start_group :punct, scan( /#{type}/ ) if type @heredocs << [ float_right, type, delim ] @allow_operator = true else case peek(1) when /[\n\r]/ unless @heredocs.empty? scan_heredoc(*@heredocs.shift) else start_group :normal, scan( /\s+/ ) end @allow_operator = false when /\s/ start_group :normal, scan( /\s+/ ) when "#" start_group :comment, scan( /#[^\n\r]*/ ) when /[A-Z]/ start_group @selector ? :ident : :constant, scan( /\w+/ ) @allow_operator = true when /[a-z_]/ word = scan( /\w+[?!]?/ ) if !@selector && KEYWORDS.include?( word ) start_group :keyword, word @allow_operator = false elsif start_group :ident, word @allow_operator = true end @selector = false when /\d/ start_group :number, scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ ) @allow_operator = true when '"' scan_delimited_region :punct, :string, "", true @allow_operator = true when '/' if @allow_operator start_group :punct, scan(%{/}) @allow_operator = false else scan_delimited_region :punct, :regex, "", true @allow_operator = true end when "'" scan_delimited_region :punct, :string, "", false @allow_operator = true when "." dots = scan( /\.{1,3}/ ) start_group :punct, dots @selector = ( dots.length == 1 ) when /[@]/ start_group :attribute, scan( /@{1,2}\w*/ ) @allow_operator = true when /[$]/ start_group :global, scan(/\$/) start_group :global, scan( /\w+|./ ) if check(/./) @allow_operator = true when /[-!?*\/+=<>(\[\{}:;,&|%]/ start_group :punct, scan(/./) @allow_operator = false when /[)\]]/ start_group :punct, scan(/./) @allow_operator = true else # all else just falls through this, to prevent # infinite loops... append getch end end end end end
Generated with the Darkfish Rdoc Generator 2.