Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jruby
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 15a4f2a1245b
Choose a base ref
...
head repository: jruby/jruby
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 43ccddcd344c
Choose a head ref
  • 2 commits
  • 7 files changed
  • 1 contributor

Commits on Jan 27, 2016

  1. First attempt at supporting frozen_string_literal. This needs support…

    … in ripper yet
    
    so how it is organized may need to change.  Also StrNode is only setting frozen in
    createStr in lexer and not being done for all StrNode instances.  This is because
    MRI has a lit node which we do not have.  An audit will need to be done and possibly
    we make the lit type just to simplify this audit.
    
    This is not hooked up to --enable since that is not even in our impl yet.  also it
    is not yet hooked up to --disable but I know the path to hooking these fields up to
    the parser.  That will be next change.
    enebo committed Jan 27, 2016
    Copy the full SHA
    158d2c0 View commit details
  2. Some more frozen-literal support. This hooks command line options to …

    …the parser.
    
    There is still some problems to work through which I think is partially from
    the last commit being incomplete (about our missing lit node type) but almost all
    of the MRI tests are passing now.
    
    Next up will be updating ripper to be 2.3 grammar and probably a little
    refactoring to make this frozen literal stuff fit in better.
    
    The one missing feature within this feature is --debug-frozen-string-literal.
    This extra feature records where the frozen string literal was created.  So
    to implement this we will need to somehow record the position into the string
    so we can report it's original location in the freeze error message.  I am not
    sure how important that will be for 2.3 support but it is still outstanding.
    enebo committed Jan 27, 2016
    Copy the full SHA
    43ccddc View commit details
10 changes: 9 additions & 1 deletion core/src/main/java/org/jruby/RubyInstanceConfig.java
Original file line number Diff line number Diff line change
@@ -1454,6 +1454,14 @@ public void setProfilingService( String service ) {
this.profilingService = service;
}

public boolean isFrozenStringLiteral() {
return frozenStringLiteral;
}

public void setFrozenStringLiteral(boolean frozenStringLiteral) {
this.frozenStringLiteral = frozenStringLiteral;
}

public static ClassLoader defaultClassLoader() {
ClassLoader loader = RubyInstanceConfig.class.getClassLoader();

@@ -1554,7 +1562,7 @@ public ClassLoader getCurrentThreadClassLoader() {
private boolean kernelGsubDefined;
private boolean hasScriptArgv = false;
private boolean preferIPv4 = Options.PREFER_IPV4.load();

private boolean frozenStringLiteral = false;
private String jrubyHome;

/**
1 change: 1 addition & 0 deletions core/src/main/java/org/jruby/ast/StrNode.java
Original file line number Diff line number Diff line change
@@ -69,6 +69,7 @@ public StrNode(ISourcePosition position, StrNode head, StrNode tail) {
myValue.append(headBL);
myValue.append(tailBL);

frozen = head.isFrozen() && tail.isFrozen();
value = myValue;
codeRange = StringSupport.codeRangeScan(value.getEncoding(), value);
}
51 changes: 21 additions & 30 deletions core/src/main/java/org/jruby/ext/ripper/RipperLexer.java
Original file line number Diff line number Diff line change
@@ -32,26 +32,20 @@
import java.math.BigDecimal;
import java.util.HashMap;
import org.jcodings.Encoding;
import org.joni.Matcher;
import org.joni.Option;
import org.joni.Regex;
import org.jruby.Ruby;
import org.jruby.RubyRegexp;
import org.jruby.lexer.LexerSource;
import org.jruby.lexer.LexingCommon;
import org.jruby.lexer.yacc.StackState;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.jruby.util.SafeDoubleParser;
import org.jruby.util.StringSupport;

import static org.jruby.lexer.LexingCommon.*;
import static org.jruby.lexer.LexingCommon.parseMagicComment;

/**
*
* @author enebo
*/
public class RipperLexer {
public class RipperLexer extends LexingCommon {
private static final HashMap<String, Keyword> map;

static {
@@ -585,6 +579,23 @@ public void setParser(RipperParserBase parserSupport) {
this.parser = parserSupport;
}

@Override
protected void magicCommentEncoding(ByteList encoding) {
if (!comment_at_top()) return;

setEncoding(encoding);
}

@Override
protected void setCompileOptionFlag(String name, ByteList value) {

}

@Override
protected void setTokenInfo(String name, ByteList value) {

}

private void setEncoding(ByteList name) {
Encoding newEncoding = parser.getRuntime().getEncodingService().loadEncoding(name);

@@ -705,22 +716,6 @@ private void determineExpressionState() {
break;
}
}

/**
* @param c the character to test
* @return true if character is a hex value (0-9a-f)
*/
static boolean isHexChar(int c) {
return Character.isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
}

/**
* @param c the character to test
* @return true if character is an octal value (0-7)
*/
static boolean isOctChar(int c) {
return '0' <= c && c <= '7';
}

/**
* This is a valid character for an identifier?
@@ -1404,12 +1399,8 @@ private int yylex() throws IOException {
continue;
}
case '#': { /* it's a comment */
ByteList encodingName = parseMagicComment(getRuntime(), lexb.makeShared(lex_p, lex_pend - lex_p));
// FIXME: boolean to mark we already found a magic comment to stop searching. When found or we went too far
if (encodingName != null) {
setEncoding(encodingName);
} else if (comment_at_top()) {
set_file_encoding(lex_p, lex_pend);
if (!parseMagicComment(getRuntime(), lexb.makeShared(lex_p, lex_pend - lex_p))) {
if (comment_at_top()) set_file_encoding(lex_p, lex_pend);
}
lex_p = lex_pend;
dispatchScanEvent(Tokens.tCOMMENT);
39 changes: 26 additions & 13 deletions core/src/main/java/org/jruby/lexer/LexingCommon.java
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@
/**
* Code and constants common to both ripper and main parser.
*/
public class LexingCommon {
public abstract class LexingCommon {
// ruby constants for strings (should this be moved somewhere else?)
public static final int STR_FUNC_ESCAPE=0x01;
public static final int STR_FUNC_EXPAND=0x02;
@@ -99,32 +99,45 @@ public static int magicCommentMarker(ByteList str, int begin) {
public static final String magicString = "([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*";
public static final Regex magicRegexp = new Regex(magicString.getBytes(), 0, magicString.length(), 0, Encoding.load("ASCII"));


// MRI: parser_magic_comment
public static ByteList parseMagicComment(Ruby runtime, ByteList magicLine) throws IOException {
public boolean parseMagicComment(Ruby runtime, ByteList magicLine) throws IOException {
int length = magicLine.length();

if (length <= 7) return null;
if (length <= 7) return false;
int beg = magicCommentMarker(magicLine, 0);
if (beg < 0) return null;
int end = magicCommentMarker(magicLine, beg);
if (end < 0) return null;
if (beg >= 0) {
int end = magicCommentMarker(magicLine, beg);
if (end < 0) return false;
length = end - beg - 3; // -3 is to skip past beg
}

// We only use a regex if -*- ... -*- is found.
length = end - beg - 3; // -3 is to skip past beg
int realSize = magicLine.getRealSize();
int begin = magicLine.getBegin();
Matcher matcher = magicRegexp.matcher(magicLine.getUnsafeBytes(), begin, begin + realSize);
int result = RubyRegexp.matcherSearch(runtime, matcher, begin, begin + realSize, Option.NONE);

if (result < 0) return null;
if (result < 0) return false;

// Regexp is guarateed to have three matches
// Regexp is guaranteed to have three matches
int begs[] = matcher.getRegion().beg;
int ends[] = matcher.getRegion().end;
String name = magicLine.subSequence(begs[1], ends[1]).toString();
if (!name.contains("ccoding")) return null;
String name = magicLine.subSequence(begs[1], ends[1]).toString().replace('-', '_');

if ("coding".equals(name) || "encoding".equals(name)) {
magicCommentEncoding(magicLine.makeShared(begs[2], ends[2] - begs[2]));
} else if ("frozen_string_literal".equals(name)) {
setCompileOptionFlag(name, magicLine.makeShared(begs[2], ends[2] - begs[2]));
} else if ("warn_indent".equals(name)) {
setTokenInfo(name, magicLine.makeShared(begs[2], ends[2] - begs[2]));
} else {
return false;
}

return magicLine.makeShared(begs[2], ends[2] - begs[2]);
return true;
}

protected abstract void magicCommentEncoding(ByteList encoding);
protected abstract void setCompileOptionFlag(String name, ByteList value);
protected abstract void setTokenInfo(String name, ByteList value);
}
89 changes: 55 additions & 34 deletions core/src/main/java/org/jruby/lexer/yacc/RubyLexer.java
Original file line number Diff line number Diff line change
@@ -61,6 +61,7 @@
import org.jruby.common.IRubyWarnings;
import org.jruby.common.IRubyWarnings.ID;
import org.jruby.lexer.LexerSource;
import org.jruby.lexer.LexingCommon;
import org.jruby.lexer.yacc.SyntaxException.PID;
import org.jruby.parser.ParserSupport;
import org.jruby.parser.RubyParser;
@@ -70,34 +71,10 @@
import org.jruby.util.StringSupport;
import org.jruby.util.cli.Options;

import static org.jruby.lexer.LexingCommon.ASCII8BIT_ENCODING;
import static org.jruby.lexer.LexingCommon.BEGIN_DOC_MARKER;
import static org.jruby.lexer.LexingCommon.CODING;
import static org.jruby.lexer.LexingCommon.END_DOC_MARKER;
import static org.jruby.lexer.LexingCommon.END_MARKER;
import static org.jruby.lexer.LexingCommon.EOF;
import static org.jruby.lexer.LexingCommon.STR_FUNC_INDENT;
import static org.jruby.lexer.LexingCommon.STR_FUNC_QWORDS;
import static org.jruby.lexer.LexingCommon.STR_FUNC_REGEXP;
import static org.jruby.lexer.LexingCommon.SUFFIX_ALL;
import static org.jruby.lexer.LexingCommon.SUFFIX_I;
import static org.jruby.lexer.LexingCommon.SUFFIX_R;
import static org.jruby.lexer.LexingCommon.USASCII_ENCODING;
import static org.jruby.lexer.LexingCommon.UTF8_ENCODING;
import static org.jruby.lexer.LexingCommon.isHexChar;
import static org.jruby.lexer.LexingCommon.isOctChar;
import static org.jruby.lexer.LexingCommon.parseMagicComment;
import static org.jruby.lexer.LexingCommon.str_dquote;
import static org.jruby.lexer.LexingCommon.str_dsym;
import static org.jruby.lexer.LexingCommon.str_regexp;
import static org.jruby.lexer.LexingCommon.str_squote;
import static org.jruby.lexer.LexingCommon.str_ssym;
import static org.jruby.lexer.LexingCommon.str_xquote;

/*
* This is a port of the MRI lexer to Java.
*/
public class RubyLexer {
public class RubyLexer extends LexingCommon {
private static final HashMap<String, Keyword> map;

static {
@@ -287,6 +264,7 @@ public static Keyword getKeyword(String str) {
private LexState last_state;
public ISourcePosition tokline;
private int tokenCR;
private boolean tokenSeen;

public int getTokenCR() {
return tokenCR;
@@ -401,6 +379,7 @@ public final void reset() {
tokp = 0;
ruby_sourceline = src.getLineOffset() - 1;
last_cr_line = -1;
tokenSeen = false;

parser_prepare();
}
@@ -657,6 +636,46 @@ public void setParserSupport(ParserSupport parserSupport) {
this.parserSupport = parserSupport;
}

@Override
protected void magicCommentEncoding(ByteList encoding) {
if (!comment_at_top()) return;

setEncoding(encoding);
}

@Override
protected void setCompileOptionFlag(String name, ByteList value) {
if (tokenSeen) {
warnings.warn(ID.ACCESSOR_MODULE_FUNCTION, "`" + name + "' is ignored after any tokens");
return;
}

int b = asTruth(name, value);
if (b < 0) return;

// Enebo: This is a hash in MRI for multiple potential compile options but we currently only support one.
// I am just going to set it and when a second is done we will reevaluate how they are populated.
parserSupport.getConfiguration().setFrozenStringLiteral(b == 1);
}

private final ByteList TRUE = new ByteList(new byte[] {'t', 'r', 'u', 'e'});
private final ByteList FALSE = new ByteList(new byte[] {'f', 'a', 'l', 's', 'e'});
protected int asTruth(String name, ByteList value) {
int result = value.caseInsensitiveCmp(TRUE);
if (result == 0) return 1;

result = value.caseInsensitiveCmp(FALSE);
if (result == 0) return 0;

warnings.warn(ID.ACCESSOR_MODULE_FUNCTION, "invalid value for " + name + ": " + value);
return -1;
}

@Override
protected void setTokenInfo(String name, ByteList value) {

}

private void setEncoding(ByteList name) {
Ruby runtime = parserSupport.getConfiguration().getRuntime();
Encoding newEncoding = runtime.getEncodingService().loadEncoding(name);
@@ -874,7 +893,11 @@ public StrNode createStr(ByteList buffer, int flags) {
}
}

return new StrNode(getPosition(), buffer, codeRange);
StrNode newStr = new StrNode(getPosition(), buffer, codeRange);

if (parserSupport.getConfiguration().isFrozenStringLiteral()) newStr.setFrozen(true);

return newStr;
}

/**
@@ -1276,6 +1299,7 @@ private int yylex() throws IOException {
int c;
boolean spaceSeen = false;
boolean commandState;
boolean tokenSeen = this.tokenSeen;

if (lex_strterm != null) {
int tok = lex_strterm.parseString(this);
@@ -1300,6 +1324,7 @@ private int yylex() throws IOException {

commandState = commandStart;
commandStart = false;
this.tokenSeen = true;

loop: for(;;) {
last_state = lex_state;
@@ -1318,19 +1343,15 @@ private int yylex() throws IOException {
spaceSeen = true;
continue;
case '#': { /* it's a comment */
ByteList encodingName = parseMagicComment(parserSupport.getConfiguration().getRuntime(), lexb.makeShared(lex_p, lex_pend - lex_p));
// FIXME: boolean to mark we already found a magic comment to stop searching. When found or we went too far
if (comment_at_top()) {
if (encodingName != null) {
setEncoding(encodingName);
} else {
set_file_encoding(lex_p, lex_pend);
}
this.tokenSeen = tokenSeen;
if (!parseMagicComment(parserSupport.getConfiguration().getRuntime(), lexb.makeShared(lex_p, lex_pend - lex_p))) {
if (comment_at_top()) set_file_encoding(lex_p, lex_pend);
}
lex_p = lex_pend;
}
/* fall through */
case '\n':
this.tokenSeen = tokenSeen;
switch (lex_state) {
case EXPR_BEG: case EXPR_FNAME: case EXPR_DOT:
case EXPR_CLASS: case EXPR_VALUE:
12 changes: 12 additions & 0 deletions core/src/main/java/org/jruby/parser/ParserConfiguration.java
Original file line number Diff line number Diff line change
@@ -55,6 +55,8 @@ public class ParserConfiguration {
// whether we should save the end-of-file data as DATA
private boolean saveData = false;

private boolean frozenStringLiteral = false;

private Encoding defaultEncoding;
private Ruby runtime;

@@ -80,6 +82,16 @@ public ParserConfiguration(Ruby runtime, int lineNumber,
this(runtime, lineNumber, inlineSource, isFileParse, saveData);

this.isDebug = config.isParserDebug();
this.frozenStringLiteral = config.isFrozenStringLiteral();
}


public void setFrozenStringLiteral(boolean frozenStringLiteral) {
this.frozenStringLiteral = frozenStringLiteral;
}

public boolean isFrozenStringLiteral() {
return frozenStringLiteral;
}

public void setDefaultEncoding(Encoding encoding) {
Loading