Skip to content

Commit

Permalink
Showing 1 changed file with 194 additions and 138 deletions.
332 changes: 194 additions & 138 deletions lib/opal/parser/lexer.rb
Original file line number Diff line number Diff line change
@@ -8,6 +8,8 @@ class Lexer

attr_accessor :lex_state, :strterm, :scanner

attr_accessor :yylval

def initialize(source, file)
@lex_state = :expr_beg
@cond = 0
@@ -81,6 +83,14 @@ def set_arg_state
end

def scan(regexp)
if result = @scanner.scan(regexp)
@yylval += @scanner.matched
end

result
end

def skip_scan(regexp)
@scanner.scan regexp
end

@@ -97,9 +107,11 @@ def matched
end

def next_token
old = self.yylex
loc = [@line, @column]
[old[0], [old[1], loc]]
token = self.yylex
value = self.yylval
location = [@line, @column]

[token, [value, location]]
end

def strterm_expand?(strterm)
@@ -122,15 +134,20 @@ def process_numeric
scanner = @scanner

if scan(/0b?(0|1|_)+/)
return [:tINTEGER, scanner.matched.to_i(2)]
self.yylval = scanner.matched.to_i(2)
return :tINTEGER
elsif scan(/0o?([0-7]|_)+/)
return [:tINTEGER, scanner.matched.to_i(8)]
self.yylval = scanner.matched.to_i(8)
return :tINTEGER
elsif scan(/[\d_]+\.[\d_]+\b|[\d_]+(\.[\d_]+)?[eE][-+]?[\d_]+\b/)
return [:tFLOAT, scanner.matched.gsub(/_/, '').to_f]
self.yylval = scanner.matched.gsub(/_/, '').to_f
return :tFLOAT
elsif scan(/[\d_]+\b/)
return [:tINTEGER, scanner.matched.gsub(/_/, '').to_i]
self.yylval = scanner.matched.gsub(/_/, '').to_i
return :tINTEGER
elsif scan(/0(x|X)(\d|[a-f]|[A-F]|_)+/)
return [:tINTEGER, scanner.matched.to_i(16)]
self.yylval = scanner.matched.to_i(16)
return :tINTEGER
else
raise "Lexing error on numeric type: `#{scanner.peek 5}`"
end
@@ -163,7 +180,7 @@ def next_string_token
end

@lex_state = :expr_end
return :tSTRING_END, scanner.matched
return :tSTRING_END
end
end

@@ -173,7 +190,8 @@ def next_string_token
if words && !str_parse[:done_last_space]#&& space
str_parse[:done_last_space] = true
pushback(1)
return :tSPACE, ' '
self.yylval = ' '
return :tSPACE
end
self.strterm = nil

@@ -182,10 +200,10 @@ def next_string_token
@lex_state = :expr_end

if str_parse[:type] == :regexp
result = scan(/\w+/)
return :tREGEXP_END, result
self.yylval = scan(/\w+/)
return :tREGEXP_END
end
return :tSTRING_END, scanner.matched
return :tSTRING_END
else
str_buffer << scanner.matched
str_parse[:nesting] -= 1
@@ -194,16 +212,16 @@ def next_string_token

elsif ['"', "'"].include? str_parse[:beg]
@lex_state = :expr_end
return :tSTRING_END, scanner.matched
return :tSTRING_END

elsif str_parse[:beg] == '`'
@lex_state = :expr_end
return :tSTRING_END, scanner.matched
return :tSTRING_END

elsif str_parse[:beg] == '/' || str_parse[:type] == :regexp
result = scan(/\w+/)
@lex_state = :expr_end
return :tREGEXP_END, result
self.yylval = scan(/\w+/)
return :tREGEXP_END

else
if str_parse[:scanner]
@@ -212,27 +230,30 @@ def next_string_token
end

@lex_state = :expr_end
return :tSTRING_END, scanner.matched
return :tSTRING_END
end
end

return :tSPACE, ' ' if space
if space
self.yylval = ' '
return :tSPACE
end

if str_parse[:balance] and scan Regexp.new(Regexp.escape(str_parse[:beg]))
str_buffer << scanner.matched
str_parse[:nesting] += 1
elsif check(/#[@$]/)
scan(/#/)
if expand
return :tSTRING_DVAR, scanner.matched
return :tSTRING_DVAR
else
str_buffer << scanner.matched
end

elsif scan(/#\{/)
if expand
# we are into ruby code, so stop parsing content (for now)
return :tSTRING_DBEG, scanner.matched
return :tSTRING_DBEG
else
str_buffer << scanner.matched
end
@@ -250,7 +271,9 @@ def next_string_token

complete_str = str_buffer.join ''
@line += complete_str.count("\n")
return :tSTRING_CONTENT, complete_str

self.yylval = complete_str
return :tSTRING_CONTENT
end

def add_heredoc_content(str_buffer, str_parse)
@@ -402,7 +425,8 @@ def heredoc_identifier
end_of_line = @scanner.scan(/.*\n/)
self.strterm[:scanner] = StringScanner.new(end_of_line) if end_of_line != "\n"

return :tSTRING_BEG, heredoc
self.yylval = heredoc
return :tSTRING_BEG
end
end

@@ -412,17 +436,18 @@ def process_identifier(matched, cmd_start)

if scanner.peek(2) != '::' && scan(/:/)
@lex_state = :expr_beg
return :tLABEL, "#{matched}"
self.yylval = matched
return :tLABEL
end

if matched == 'defined?'
if after_operator?
@lex_state = :expr_end
return :tIDENTIFIER, matched
return :tIDENTIFIER
end

@lex_state = :expr_arg
return :kDEFINED, 'defined?'
return :kDEFINED
end

if matched.end_with? '?', '!'
@@ -446,7 +471,8 @@ def process_identifier(matched, cmd_start)
@lex_state = kw.state

if old_state == :expr_fname
return [kw.id[0], kw.name]
self.yylval = kw.name
return kw.id[0]
end

if @lex_state == :expr_beg
@@ -456,34 +482,36 @@ def process_identifier(matched, cmd_start)
if matched == "do"
if after_operator?
@lex_state = :expr_end
return :tIDENTIFIER, matched
return :tIDENTIFIER
end

if @start_of_lambda
@start_of_lambda = false
@lex_state = :expr_beg
return [:kDO_LAMBDA, scanner.matched]
return :kDO_LAMBDA
elsif cond?
@lex_state = :expr_beg
return :kDO_COND, matched
return :kDO_COND
elsif cmdarg? && @lex_state != :expr_cmdarg
@lex_state = :expr_beg
return :kDO_BLOCK, matched
return :kDO_BLOCK
elsif @lex_state == :expr_endarg
return :kDO_BLOCK, matched
return :kDO_BLOCK
else
@lex_state = :expr_beg
return :kDO, matched
return :kDO
end
else
if old_state == :expr_beg or old_state == :expr_value
return [kw.id[0], matched]
self.yylval = matched
return kw.id[0]
else
if kw.id[0] != kw.id[1]
@lex_state = :expr_beg
end

return [kw.id[1], matched]
self.yylval = matched
return kw.id[1]
end
end
end
@@ -494,10 +522,11 @@ def process_identifier(matched, cmd_start)
@lex_state = :expr_end
end

return [matched =~ /^[A-Z]/ ? :tCONSTANT : :tIDENTIFIER, matched]
return matched =~ /^[A-Z]/ ? :tCONSTANT : :tIDENTIFIER
end

def yylex
@yylval = ''
@space_seen = false
cmd_start = false
c = ''
@@ -507,20 +536,20 @@ def yylex
end

while true
if scan(/\ |\t|\r/)
if skip_scan(/\ |\t|\r/)
@space_seen = true
next

elsif scan(/(\n|#)/)
elsif skip_scan(/(\n|#)/)
c = scanner.matched
if c == '#' then scan(/(.*)/) else @line += 1; end
if c == '#' then skip_scan(/(.*)/) else @line += 1; end

scan(/(\n+)/)
skip_scan(/(\n+)/)
@line += scanner.matched.length if scanner.matched

next if [:expr_beg, :expr_dot].include? @lex_state

if scan(/([\ \t\r\f\v]*)\./)
if skip_scan(/([\ \t\r\f\v]*)\./)
@space_seen = true unless scanner[1].empty?
scanner.pos = scanner.pos - 1

@@ -529,37 +558,40 @@ def yylex

cmd_start = true
@lex_state = :expr_beg
return :tNL, '\\n'
self.yylval = '\\n'
return :tNL

elsif scan(/\;/)
@lex_state = :expr_beg
return :tSEMI, ';'
return :tSEMI

elsif check(/\*/)
if scan(/\*\*\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '**'
self.yylval = '**'
return :tOP_ASGN
elsif scan(/\*\*/)
self.set_arg_state
return :tPOW, '**'
return :tPOW
elsif scan(/\*\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '*'
self.yylval = '*'
return :tOP_ASGN
else
result = scan(/\*/)
scan(/\*/)

if after_operator?
@lex_state = :expr_arg
return :tSTAR2, result
return :tSTAR2
elsif @space_seen && check(/\S/)
@lex_state = :expr_beg
return :tSTAR, result
return :tSTAR
elsif [:expr_beg, :expr_mid].include? @lex_state
@lex_state = :expr_beg
return :tSTAR, result
return :tSTAR
else
@lex_state = :expr_beg
return :tSTAR2, result
return :tSTAR2
end
end

@@ -575,13 +607,13 @@ def yylex
end

if c == '='
return :tNEQ, '!='
return :tNEQ
elsif c == '~'
return :tNMATCH, '!~'
return :tNMATCH
end

scanner.pos = scanner.pos - 1
return :tBANG, '!'
pushback(1)
return :tBANG

elsif scan(/\=/)
if @lex_state == :expr_beg and !@space_seen
@@ -613,45 +645,47 @@ def yylex

if scan(/\=/)
if scan(/\=/)
return :tEQQ, '==='
return :tEQQ
end

return :tEQ, '=='
return :tEQ
end

if scan(/\~/)
return :tMATCH, '=~'
return :tMATCH
elsif scan(/\>/)
return :tASSOC, '=>'
return :tASSOC
end

return :tEQL, '='
return :tEQL

elsif scan(/\"/)
self.strterm = new_strterm(:dquote, '"', '"')
return :tSTRING_BEG, scanner.matched
return :tSTRING_BEG

elsif scan(/\'/)
self.strterm = new_strterm(:squote, "'", "'")
return :tSTRING_BEG, scanner.matched
return :tSTRING_BEG

elsif scan(/\`/)
self.strterm = new_strterm(:xquote, '`', '`')
return :tXSTRING_BEG, scanner.matched
return :tXSTRING_BEG

elsif scan(/\&/)
if scan(/\&/)
@lex_state = :expr_beg

if scan(/\=/)
return :tOP_ASGN, '&&'
self.yylval = '&&'
return :tOP_ASGN
end

return :tANDOP, '&&'
return :tANDOP

elsif scan(/\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '&'
self.yylval = '&'
return :tOP_ASGN
end

if spcarg?
@@ -665,23 +699,25 @@ def yylex
end

self.set_arg_state
return result, '&'
return result

elsif scan(/\|/)
if scan(/\|/)
@lex_state = :expr_beg
if scan(/\=/)
return :tOP_ASGN, '||'
self.yylval = '||'
return :tOP_ASGN
end

return :tOROP, '||'
return :tOROP

elsif scan(/\=/)
return :tOP_ASGN, '|'
self.yylval = '|'
return :tOP_ASGN
end

self.set_arg_state
return :tPIPE, '|'
return :tPIPE

elsif scan(/\%[QqWwixr]/)
str_type = scanner.matched[1, 1]
@@ -697,62 +733,64 @@ def yylex
case str_type
when 'Q'
self.strterm = new_strterm2(:dquote, paren, term)
return :tSTRING_BEG, scanner.matched
return :tSTRING_BEG
when 'q'
self.strterm = new_strterm2(:squote, paren, term)
return :tSTRING_BEG, scanner.matched
return :tSTRING_BEG
when 'W'
self.strterm = new_strterm(:dword, 'W', term)
scan(/\s*/)
return :tWORDS_BEG, scanner.matched
skip_scan(/\s*/)
return :tWORDS_BEG
when 'w', 'i'
self.strterm = new_strterm(:sword, 'w', term)
scan(/\s*/)
return :tAWORDS_BEG, scanner.matched
skip_scan(/\s*/)
return :tAWORDS_BEG
when 'x'
self.strterm = new_strterm2(:xquote, paren, term)
return :tXSTRING_BEG, scanner.matched
return :tXSTRING_BEG
when 'r'
self.strterm = new_strterm2(:regexp, paren, term)
return :tREGEXP_BEG, scanner.matched
return :tREGEXP_BEG
end

elsif scan(/\//)
if beg?
self.strterm = new_strterm(:regexp, '/', '/')
return :tREGEXP_BEG, scanner.matched
return :tREGEXP_BEG
elsif scan(/\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '/'
self.yylval = '/'
return :tOP_ASGN
elsif after_operator?
@lex_state = :expr_arg
elsif arg?
if !check(/\s/) && @space_seen
self.strterm = new_strterm(:regexp, '/', '/')
return :tREGEXP_BEG, scanner.matched
return :tREGEXP_BEG
end
else
@lex_state = :expr_beg
end

return :tDIVIDE, '/'
return :tDIVIDE

elsif scan(/\%/)
if scan(/\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '%'
self.yylval = '%'
return :tOP_ASGN
elsif check(/[^\s]/)
if @lex_state == :expr_beg or (@lex_state == :expr_arg && @space_seen)
start_word = scan(/./)
end_word = { '(' => ')', '[' => ']', '{' => '}' }[start_word] || start_word
self.strterm = new_strterm2(:dquote, start_word, end_word)
return :tSTRING_BEG, scanner.matched
return :tSTRING_BEG
end
end

self.set_arg_state

return :tPERCENT, '%'
return :tPERCENT

elsif scan(/\\/)
if scan(/\r?\n/)
@@ -776,84 +814,84 @@ def yylex
cond_push 0
cmdarg_push 0

return result, scanner.matched
return result

elsif scan(/\)/)
cond_lexpop
cmdarg_lexpop
@lex_state = :expr_end
return :tRPAREN, scanner.matched
return :tRPAREN

elsif scan(/\[/)
result = scanner.matched

if after_operator?
@lex_state = :expr_arg
if scan(/\]=/)
return :tASET, '[]='
return :tASET
elsif scan(/\]/)
return :tAREF, '[]'
return :tAREF
else
raise "Unexpected '[' token"
end
elsif beg? || @space_seen
@lex_state = :expr_beg
cond_push 0
cmdarg_push 0
return :tLBRACK, scanner.matched
return :tLBRACK
else
@lex_state = :expr_beg
cond_push 0
cmdarg_push 0
return :tLBRACK2, scanner.matched
return :tLBRACK2
end

elsif scan(/\]/)
cond_lexpop
cmdarg_lexpop
@lex_state = :expr_end
return :tRBRACK, scanner.matched
return :tRBRACK

elsif scan(/\}/)
cond_lexpop
cmdarg_lexpop
@lex_state = :expr_end

return :tRCURLY, scanner.matched
return :tRCURLY

elsif scan(/\.\.\./)
@lex_state = :expr_beg
return :tDOT3, scanner.matched
return :tDOT3

elsif scan(/\.\./)
@lex_state = :expr_beg
return :tDOT2, scanner.matched
return :tDOT2

elsif scan(/\./)
@lex_state = :expr_dot unless @lex_state == :expr_fname
return :tDOT, scanner.matched
return :tDOT

elsif scan(/\:\:/)
if beg?
@lex_state = :expr_beg
return :tCOLON3, scanner.matched
return :tCOLON3
elsif spcarg?
@lex_state = :expr_beg
return :tCOLON3, scanner.matched
return :tCOLON3
end

@lex_state = :expr_dot
return :tCOLON2, scanner.matched
return :tCOLON2

elsif scan(/\:/)
if end? || check(/\s/)
unless check(/\w/)
@lex_state = :expr_beg
return :tCOLON, ':'
return :tCOLON
end

@lex_state = :expr_fname
return :tSYMBEG, ':'
return :tSYMBEG
end

if scan(/\'/)
@@ -863,33 +901,37 @@ def yylex
end

@lex_state = :expr_fname
return :tSYMBEG, ':'
return :tSYMBEG

elsif scan(/\^\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '^'
self.yylval = '^'
return :tOP_ASGN

elsif scan(/\^/)
self.set_arg_state
return :tCARET, scanner.matched
return :tCARET

elsif check(/\</)
if scan(/\<\<\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '<<'
self.yylval = '<<'
return :tOP_ASGN

elsif scan(/\<\</)
if after_operator?
@lex_state = :expr_arg
return :tLSHFT, '<<'
return :tLSHFT
elsif !after_operator? && !end? && (!arg? || @space_seen)
if token = heredoc_identifier
return token
end

@lex_state = :expr_beg
return :tLSHFT, '<<'
return :tLSHFT
end
@lex_state = :expr_beg
return :tLSHFT, '<<'
return :tLSHFT
elsif scan(/\<\=\>/)
if after_operator?
@lex_state = :expr_arg
@@ -901,37 +943,39 @@ def yylex
@lex_state = :expr_beg
end

return :tCMP, '<=>'
return :tCMP
elsif scan(/\<\=/)
self.set_arg_state
return :tLEQ, '<='
return :tLEQ

elsif scan(/\</)
self.set_arg_state
return :tLT, '<'
return :tLT
end

elsif check(/\>/)
if scan(/\>\>\=/)
return :tOP_ASGN, '>>'
self.yylval = '>>'
return :tOP_ASGN

elsif scan(/\>\>/)
self.set_arg_state
return :tRSHFT, '>>'
return :tRSHFT

elsif scan(/\>\=/)
self.set_arg_state
return :tGEQ, scanner.matched
return :tGEQ

elsif scan(/\>/)
self.set_arg_state
return :tGT, '>'
return :tGT
end

elsif scan(/->/)
# FIXME: # should be :expr_arg, but '(' breaks it...
@lex_state = :expr_end
@start_of_lambda = true
return [:tLAMBDA, scanner.matched]
return :tLAMBDA

elsif scan(/[+-]/)
matched = scanner.matched
@@ -943,84 +987,95 @@ def yylex

if beg?
@lex_state = :expr_mid
return [utype, matched]
self.yylval = matched
return utype
elsif after_operator?
@lex_state = :expr_arg
return [:tIDENTIFIER, matched + '@'] if scan(/@/)
return [sign, matched]
if scan(/@/)
self.yylval = matched + '@'
return :tIDENTIFIER
end

self.yylval = matched
return sign
end

if scan(/\=/)
@lex_state = :expr_beg
return [:tOP_ASGN, matched]
self.yylval = matched
return :tOP_ASGN
end

if spcarg?
@lex_state = :expr_mid
return [utype, matched]
self.yylval = matched
return utype
end

@lex_state = :expr_beg
return [sign, matched]
self.yylval = matched
return sign

elsif scan(/\?/)
if end?
@lex_state = :expr_beg
return :tEH, scanner.matched
return :tEH
end

unless check(/\ |\t|\r|\s/)
@lex_state = :expr_end
return :tSTRING, scan(/./)
self.yylval = scan(/./)
return :tSTRING
end

@lex_state = :expr_beg
return :tEH, scanner.matched
return :tEH

elsif scan(/\~/)
self.set_arg_state
return :tTILDE, '~'
return :tTILDE

elsif check(/\$/)
if scan(/\$([1-9]\d*)/)
@lex_state = :expr_end
return :tNTH_REF, scanner.matched.sub('$', '')
self.yylval = scanner.matched.sub('$', '')
return :tNTH_REF

elsif scan(/(\$_)(\w+)/)
@lex_state = :expr_end
return :tGVAR, scanner.matched
return :tGVAR

elsif scan(/\$[\+\'\`\&!@\"~*$?\/\\:;=.,<>_]/)
@lex_state = :expr_end
return :tGVAR, scanner.matched
return :tGVAR
elsif scan(/\$\w+/)
@lex_state = :expr_end
return :tGVAR, scanner.matched
return :tGVAR
else
raise "Bad gvar name: #{scanner.peek(5).inspect}"
end

elsif scan(/\$\w+/)
@lex_state = :expr_end
return :tGVAR, scanner.matched
return :tGVAR

elsif scan(/\@\@\w*/)
@lex_state = :expr_end
return :tCVAR, scanner.matched
return :tCVAR

elsif scan(/\@\w*/)
@lex_state = :expr_end
return :tIVAR, scanner.matched
return :tIVAR

elsif scan(/\,/)
@lex_state = :expr_beg
return :tCOMMA, scanner.matched
return :tCOMMA

elsif scan(/\{/)
if @start_of_lambda
@start_of_lambda = false
@lex_state = :expr_beg
return [:tLAMBEG, scanner.matched]
return :tLAMBEG

elsif arg? or @lex_state == :expr_end
result = :tLCURLY
@@ -1033,7 +1088,7 @@ def yylex
@lex_state = :expr_beg
cond_push 0
cmdarg_push 0
return result, scanner.matched
return result

elsif check(/[0-9]/)
return process_numeric
@@ -1044,7 +1099,8 @@ def yylex

if scanner.eos?
if @scanner_stack.size == 1 # our main scanner, we cant pop this
return [false, false]
self.yylval = false
return false
else # we were probably parsing a heredoc, so pop that parser and continue
@scanner_stack.pop
@scanner = @scanner_stack.last

0 comments on commit 755e0ba

Please sign in to comment.