Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jruby
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: dcb15518f75c
Choose a base ref
...
head repository: jruby/jruby
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: a5e70693e151
Choose a head ref
  • 13 commits
  • 29 files changed
  • 1 contributor

Commits on Apr 5, 2015

  1. New lexer running plenty of stuff but missing some features like SCRI…

    …PT_LINES and with some bad line position info
    enebo committed Apr 5, 2015
    Copy the full SHA
    fbaf038 View commit details
  2. Copy the full SHA
    61a0356 View commit details
  3. Copy the full SHA
    0849271 View commit details
  4. Copy the full SHA
    2b8fe7e View commit details
  5. Documented and further fixed precise_mbclen since I did not correctly…

    … understand the API :P
    enebo committed Apr 5, 2015
    Copy the full SHA
    20f8190 View commit details
  6. bytelistlexersource was not setting defaultexternalencoding by defaul…

    …t. invalid encodings in parser should raise argument error and not a syntax error
    enebo committed Apr 5, 2015
    Copy the full SHA
    51a04d2 View commit details
  7. Copy the full SHA
    7651e94 View commit details
  8. fix a majority of linenumber positioning bugs. make magic comment lin…

    …e encoding only happen at the right lines
    enebo committed Apr 5, 2015
    Copy the full SHA
    f4db00c View commit details
  9. Copy the full SHA
    e82555e View commit details
  10. $=...go to hell

    enebo committed Apr 5, 2015
    Copy the full SHA
    9732108 View commit details
  11. Copy the full SHA
    4caa087 View commit details
  12. Copy the full SHA
    881743e View commit details
  13. do no intern keywords

    enebo committed Apr 5, 2015
    Copy the full SHA
    a5e7069 View commit details
Showing with 1,302 additions and 2,235 deletions.
  1. +21 −20 core/src/main/java/org/jruby/ext/ripper/HeredocTerm.java
  2. +20 −112 core/src/main/java/org/jruby/ext/ripper/RipperLexer.java
  3. +4 −3 core/src/main/java/org/jruby/ext/ripper/RipperParser.java
  4. +1 −0 core/src/main/java/org/jruby/ext/ripper/RipperParser.y
  5. +1 −0 core/src/main/java/org/jruby/ext/ripper/RipperParserBase.java
  6. +6 −3 core/src/main/java/org/jruby/ext/ripper/RubyRipper.java
  7. +2 −0 core/src/main/java/org/jruby/ext/ripper/StrTerm.java
  8. +46 −43 core/src/main/java/org/jruby/ext/ripper/StringTerm.java
  9. +16 −6 core/src/main/java/org/jruby/{ext/ripper → lexer}/ByteListLexerSource.java
  10. +19 −5 core/src/main/java/org/jruby/{ext/ripper → lexer}/GetsLexerSource.java
  11. +31 −11 core/src/main/java/org/jruby/{ext/ripper → lexer}/LexerSource.java
  12. +130 −0 core/src/main/java/org/jruby/lexer/LexingCommon.java
  13. +0 −268 core/src/main/java/org/jruby/lexer/yacc/ByteArrayLexerSource.java
  14. +92 −59 core/src/main/java/org/jruby/lexer/yacc/HeredocTerm.java
  15. +0 −320 core/src/main/java/org/jruby/lexer/yacc/InputStreamLexerSource.java
  16. +0 −307 core/src/main/java/org/jruby/lexer/yacc/LexerSource.java
  17. +700 −566 core/src/main/java/org/jruby/lexer/yacc/RubyLexer.java
  18. +1 −1 core/src/main/java/org/jruby/lexer/yacc/StrTerm.java
  19. +159 −146 core/src/main/java/org/jruby/lexer/yacc/StringTerm.java
  20. +22 −10 core/src/main/java/org/jruby/parser/Parser.java
  21. +10 −8 core/src/main/java/org/jruby/parser/ParserSupport.java
  22. +10 −15 core/src/main/java/org/jruby/parser/RubyParser.java
  23. +7 −12 core/src/main/java/org/jruby/parser/RubyParser.y
  24. +1 −1 core/src/main/java/org/jruby/runtime/load/ExternalScript.java
  25. +2 −2 core/src/main/java/org/jruby/runtime/load/LibrarySearcher.java
  26. +1 −1 core/src/main/java/org/jruby/runtime/load/LoadServiceResourceInputStream.java
  27. +0 −301 core/src/test/java/org/jruby/lexer/yacc/ByteArrayLexerSourceTest.java
  28. +0 −14 test/jruby/test_globals.rb
  29. +0 −1 test/mri/excludes/TestM17N.rb
41 changes: 21 additions & 20 deletions core/src/main/java/org/jruby/ext/ripper/HeredocTerm.java
Original file line number Diff line number Diff line change
@@ -1,36 +1,39 @@
/*
***** BEGIN LICENSE BLOCK *****
* Version: CPL 1.0/GPL 2.0/LGPL 2.1
* Version: EPL 1.0/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Common Public
* The contents of this file are subject to the Eclipse Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.eclipse.org/legal/cpl-v10.html
* the License at http://www.eclipse.org/legal/epl-v10.html
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* Copyright (C) 2013 The JRuby Team (jruby@jruby.org)
*
* Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
* Copyright (C) 2004-2007 Thomas E Enebo <enebo@acm.org>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the CPL, indicate your
* use your version of this file under the terms of the EPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the CPL, the GPL or the LGPL.
* the terms of any one of the EPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****/
package org.jruby.ext.ripper;

import org.jcodings.Encoding;
import org.jruby.lexer.LexerSource;
import org.jruby.util.ByteList;

import static org.jruby.lexer.LexingCommon.*;

/**
* A lexing unit for scanning a heredoc element.
@@ -93,18 +96,18 @@ protected int restore(RipperLexer lexer) {
lexer.heredoc_restore(this);
lexer.setStrTerm(null);

return RipperLexer.EOF;
return EOF;
}

@Override
public int parseString(RipperLexer lexer, LexerSource src) throws java.io.IOException {
ByteList str = null;
ByteList eos = nd_lit;
int len = nd_lit.length() - 1;
boolean indent = (flags & RipperLexer.STR_FUNC_INDENT) != 0;
boolean indent = (flags & STR_FUNC_INDENT) != 0;
int c = lexer.nextc();

if (c == RipperLexer.EOF) return error(lexer, len, str, eos);
if (c == EOF) return error(lexer, len, str, eos);

// Found end marker for this heredoc
if (lexer.was_bol() && lexer.whole_match_p(nd_lit, indent)) {
@@ -113,21 +116,23 @@ public int parseString(RipperLexer lexer, LexerSource src) throws java.io.IOExce
return Tokens.tSTRING_END;
}

if ((flags & RipperLexer.STR_FUNC_EXPAND) == 0) {
if ((flags & STR_FUNC_EXPAND) == 0) {
do {
ByteList lbuf = lexer.lex_lastline;
int p = 0;
int pend = lexer.lex_pend;
if (pend > p) {
switch(lexer.p(pend-1)) { // ENEBO: This seems wrong.
switch(lexer.p(pend-1)) {
case '\n':
pend--;
if (pend == p || lexer.p(pend-1) == '\r') {
pend++;
break;
}
break;
case '\r':
pend--;
break;
}
}
if (str != null) {
@@ -138,12 +143,8 @@ public int parseString(RipperLexer lexer, LexerSource src) throws java.io.IOExce

if (pend < lexer.lex_pend) str.append('\n');
lexer.lex_goto_eol();
if (lexer.nextc() == -1) {
if (str != null) {
str = null;
return error(lexer, len, str, eos);
}
}
// MRI null checks str in this case but it is unconditionally non-null?
if (lexer.nextc() == -1) return error(lexer, len, null, eos);
} while(!lexer.whole_match_p(eos, indent));
} else {
ByteList tok = new ByteList();
@@ -168,7 +169,7 @@ public int parseString(RipperLexer lexer, LexerSource src) throws java.io.IOExce
Encoding enc[] = new Encoding[1];
enc[0] = lexer.getEncoding();

if ((c = new StringTerm(flags, '\0', '\n').parseStringIntoBuffer(lexer, src, tok, enc)) == RipperLexer.EOF) {
if ((c = new StringTerm(flags, '\0', '\n').parseStringIntoBuffer(lexer, src, tok, enc)) == EOF) {
if (lexer.eofp) return error(lexer, len, str, eos);
return restore(lexer);
}
@@ -179,7 +180,7 @@ public int parseString(RipperLexer lexer, LexerSource src) throws java.io.IOExce
}
tok.append(lexer.nextc());

if ((c = lexer.nextc()) == RipperLexer.EOF) return error(lexer, len, str, eos);
if ((c = lexer.nextc()) == EOF) return error(lexer, len, str, eos);
} while (!lexer.whole_match_p(eos, indent));
str = tok;
}
132 changes: 20 additions & 112 deletions core/src/main/java/org/jruby/ext/ripper/RipperLexer.java
Original file line number Diff line number Diff line change
@@ -32,42 +32,30 @@
import java.math.BigDecimal;
import java.util.HashMap;
import org.jcodings.Encoding;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.joni.Matcher;
import org.joni.Option;
import org.joni.Regex;
import org.jruby.Ruby;
import org.jruby.RubyRegexp;
import org.jruby.lexer.LexerSource;
import org.jruby.lexer.yacc.StackState;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.jruby.util.SafeDoubleParser;
import org.jruby.util.StringSupport;

import static org.jruby.lexer.LexingCommon.*;
import static org.jruby.lexer.LexingCommon.parseMagicComment;

/**
*
* @author enebo
*/
public class RipperLexer {
public static final Encoding UTF8_ENCODING = UTF8Encoding.INSTANCE;
public static final Encoding USASCII_ENCODING = USASCIIEncoding.INSTANCE;
public static final Encoding ASCII8BIT_ENCODING = ASCIIEncoding.INSTANCE;

private static ByteList END_MARKER = new ByteList(new byte[] {'_', '_', 'E', 'N', 'D', '_', '_'});
private static ByteList BEGIN_DOC_MARKER = new ByteList(new byte[] {'b', 'e', 'g', 'i', 'n'});
private static ByteList END_DOC_MARKER = new ByteList(new byte[] {'e', 'n', 'd'});
private static ByteList CODING = new ByteList(new byte[] {'c', 'o', 'd', 'i', 'n', 'g'});
private static final HashMap<String, Keyword> map;

private static final int SUFFIX_R = 1<<0;
private static final int SUFFIX_I = 1<<1;
private static final int SUFFIX_ALL = 3;

static {
map = new HashMap<String, Keyword>();

map = new HashMap<>();
map.put("end", Keyword.END);
map.put("else", Keyword.ELSE);
map.put("case", Keyword.CASE);
@@ -222,12 +210,12 @@ public enum Keyword {
WHILE ("while", Tokens.kWHILE, Tokens.kWHILE_MOD, LexState.EXPR_BEG),
ALIAS ("alias", Tokens.kALIAS, Tokens.kALIAS, LexState.EXPR_FNAME),
__ENCODING__("__ENCODING__", Tokens.k__ENCODING__, Tokens.k__ENCODING__, LexState.EXPR_END);

public final String name;
public final int id0;
public final int id1;
public final LexState state;

Keyword(String name, int id0, int id1, LexState state) {
this.name = name;
this.id0 = id0;
@@ -277,25 +265,9 @@ public static Keyword getKeyword(String str) {
private StrTerm lex_strterm;
public boolean commandStart;

// Give a name to a value. Enebo: This should be used more.
static final int EOF = -1; // 0 in MRI

// ruby constants for strings (should this be moved somewhere else?)
static final int STR_FUNC_ESCAPE=0x01;
static final int STR_FUNC_EXPAND=0x02;
static final int STR_FUNC_REGEXP=0x04;
static final int STR_FUNC_QWORDS=0x08;
static final int STR_FUNC_SYMBOL=0x10;
// When the heredoc identifier specifies <<-EOF that indents before ident. are ok (the '-').
static final int STR_FUNC_INDENT=0x20;

private static final int str_squote = 0;
private static final int str_dquote = STR_FUNC_EXPAND;
private static final int str_xquote = STR_FUNC_EXPAND;
private static final int str_regexp = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND;
private static final int str_ssym = STR_FUNC_SYMBOL;
private static final int str_dsym = STR_FUNC_SYMBOL | STR_FUNC_EXPAND;

// Count of nested parentheses (1.9 only)
private int parenNest = 0;
// 1.9 only
@@ -441,7 +413,7 @@ public int column() {
}

public int lineno() {
return ruby_sourceline + src.getLineOffset();
return ruby_sourceline + src.getLineOffset() - 1;
}

public void dispatchHeredocEnd() {
@@ -905,6 +877,7 @@ private int hereDocumentIdentifier() throws IOException {
return 0;
}
markerValue = new ByteList();
markerValue.setEncoding(current_enc);
term = '"';
func |= str_dquote;
do {
@@ -927,43 +900,6 @@ private void arg_ambiguous() {
parser.dispatch("on_arg_ambiguous");
}


/* MRI: magic_comment_marker */
/* This impl is a little sucky. We basically double scan the same bytelist twice. Once here
* and once in parseMagicComment.
*/
private int magicCommentMarker(ByteList str, int begin) {
int i = begin;
int len = str.length();

while (i < len) {
switch (str.charAt(i)) {
case '-':
if (i >= 2 && str.charAt(i - 1) == '*' && str.charAt(i - 2) == '-') return i + 1;
i += 2;
break;
case '*':
if (i + 1 >= len) return -1;

if (str.charAt(i + 1) != '-') {
i += 4;
} else if (str.charAt(i - 1) != '-') {
i += 2;
} else {
return i + 2;
}
break;
default:
i += 3;
break;
}
}
return -1;
}

private static final String magicString = "([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*";
private static final Regex magicRegexp = new Regex(magicString.getBytes(), 0, magicString.length(), 0, Encoding.load("ASCII"));

private boolean comment_at_top() {
int p = lex_pbeg;
int pend = lex_p - 1;
@@ -974,35 +910,6 @@ private boolean comment_at_top() {
}
return true;
}

// MRI: parser_magic_comment
protected boolean parseMagicComment(ByteList magicLine) throws IOException {
int length = magicLine.length();
if (length <= 7) return false;
int beg = magicCommentMarker(magicLine, 0);
if (beg < 0) return false;
int end = magicCommentMarker(magicLine, beg);
if (end < 0) return false;

// We only use a regex if -*- ... -*- is found. Not too hot a path?
int realSize = magicLine.getRealSize();
int begin = magicLine.getBegin();
Matcher matcher = magicRegexp.matcher(magicLine.getUnsafeBytes(), begin, begin + realSize);
int result = RubyRegexp.matcherSearch(getRuntime(), matcher, begin, begin + realSize, Option.NONE);
if (result < 0) return false;

// Regexp is guarateed to have three matches
int begs[] = matcher.getRegion().beg;
int ends[] = matcher.getRegion().end;
String name = magicLine.subSequence(begs[1], ends[1]).toString();
if (!name.equalsIgnoreCase("encoding")) return false;

ByteList val = new ByteList(magicLine.getUnsafeBytes(), begs[2], ends[2] - begs[2]);

parser.dispatch("on_magic_comment", parser.getRuntime().newString(name), createStr(val, 0));

return true;
}

protected void set_file_encoding(int str, int send) {
boolean sep = false;
@@ -1181,7 +1088,7 @@ private String printToken(int token) {
case Tokens.tLABEL: return "tLABEL("+ ((Token) value()).getValue() +":),";
case '\n': return "NL";
case EOF: return "EOF";
default: return "'" + (char)token + " [" + (int) token + "',";
default: return "'" + (char)token + " [" + token + "',";
}
}

@@ -1472,17 +1379,20 @@ private int yylex() throws IOException {
dispatchScanEvent(Tokens.tSP);
continue;
}
case '#': /* it's a comment */
if (!parseMagicComment(lexb.makeShared(lex_p, lex_pend - lex_p))) {
if (comment_at_top()) {
set_file_encoding(lex_p, lex_pend);
}
case '#': { /* it's a comment */
ByteList encodingName = parseMagicComment(getRuntime(), lexb.makeShared(lex_p, lex_pend - lex_p));
// FIXME: boolean to mark we already found a magic comment to stop searching. When found or we went too far
if (encodingName != null) {
setEncoding(encodingName);
} else if (comment_at_top()) {
set_file_encoding(lex_p, lex_pend);
}
lex_p = lex_pend;
dispatchScanEvent(Tokens.tCOMMENT);

fallthru = true;
/* fall through */
}
/* fall through */
case '\n':
switch (lex_state) {
case EXPR_BEG:
@@ -2839,8 +2749,6 @@ public void readUTFEscapeRegexpLiteral(ByteList buffer) throws IOException {
buffer.setEncoding(UTF8_ENCODING);
}

private byte[] mbcBuf = new byte[6];

// mri: parser_tokadd_mbchar
// This is different than MRI in that we return a boolean since we only care whether it was added
// or not. The MRI version returns the byte supplied which is never used as a value.
7 changes: 4 additions & 3 deletions core/src/main/java/org/jruby/ext/ripper/RipperParser.java
Original file line number Diff line number Diff line change
@@ -32,6 +32,7 @@
package org.jruby.ext.ripper;

import org.jruby.RubyArray;
import org.jruby.lexer.LexerSource;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.ext.ripper.RipperLexer.LexState;
@@ -40,7 +41,7 @@ public class RipperParser extends RipperParserBase {
public RipperParser(ThreadContext context, IRubyObject ripper, LexerSource source) {
super(context, ripper, source);
}
// line 44 "-"
// line 45 "-"
// %token constants
public static final int kCLASS = 257;
public static final int kMODULE = 258;
@@ -4645,6 +4646,6 @@ public Object yyparse (RipperLexer yyLex) throws java.io.IOException {
}
};
}
// line 2069 "RipperParser.y"
// line 2070 "RipperParser.y"
}
// line 9136 "-"
// line 9137 "-"
1 change: 1 addition & 0 deletions core/src/main/java/org/jruby/ext/ripper/RipperParser.y
Original file line number Diff line number Diff line change
@@ -29,6 +29,7 @@
package org.jruby.ext.ripper;

import org.jruby.RubyArray;
import org.jruby.lexer.LexerSource;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.ext.ripper.RipperLexer.LexState;
Loading