Skip to content

Commit

Permalink
We don't need no stinking intern(). First attempt at wiping it out fr…
Browse files Browse the repository at this point in the history
…om the

parser.  So far it looks like nothing broke.  This commit has a little risk
in that the bytelist method will use the entire source line as a backing byte
array.  So this might use more memory?  This can be tuned away with extra
array copy later if we notice an increase.
enebo committed Jul 10, 2017
1 parent 8275435 commit c201b09
Showing 5 changed files with 25 additions and 33 deletions.
11 changes: 4 additions & 7 deletions core/src/main/java/org/jruby/lexer/LexingCommon.java
Original file line number Diff line number Diff line change
@@ -163,18 +163,15 @@ public ByteList createTokenByteList() {
return new ByteList(lexb.unsafeBytes(), lexb.begin() + tokp, lex_p - tokp, getEncoding(), false);
}

public String createTokenString(int start) {
return createAsEncodedString(lexb.getUnsafeBytes(), lexb.begin() + start, lex_p - start, getEncoding());
public ByteList createTokenByteList(int start) {
return new ByteList(lexb.unsafeBytes(), lexb.begin() + start, lex_p - tokp, getEncoding(), false);
}

public ByteList getIdentifier() {
return identifier;
public String createTokenString(int start) {
return createAsEncodedString(lexb.getUnsafeBytes(), lexb.begin() + start, lex_p - start, getEncoding());
}

public String createAsEncodedString(byte[] bytes, int start, int length, Encoding encoding) {
// We copy because the source is typically an entire source line (it will appear leakish?).
identifier = new ByteList(bytes, start, length, encoding, true);

// FIXME: We should be able to move some faster non-exception cache using Encoding.isDefined
try {
Charset charset = getEncoding().getCharset();
34 changes: 15 additions & 19 deletions core/src/main/java/org/jruby/lexer/yacc/RubyLexer.java
Original file line number Diff line number Diff line change
@@ -271,8 +271,7 @@ public static Keyword getKeyword(String str) {

public int tokenize_ident(int result) {
// FIXME: Get token from newtok index to lex_p?
createTokenString(); // FIXME: only until all is bytelist.
ByteList value = getIdentifier();
ByteList value = createTokenByteList();

if (isLexState(last_state, EXPR_DOT|EXPR_FNAME) && parserSupport.getCurrentScope().isDefined(value) >= 0) {
setState(EXPR_END);
@@ -1077,13 +1076,13 @@ private int yylex() throws IOException {
}
}

private int identifierToken(int result, String value) {
private int identifierToken(int result, ByteList value) {
if (result == RubyParser.tIDENTIFIER && !isLexState(last_state, EXPR_DOT|EXPR_FNAME) &&
parserSupport.getCurrentScope().isDefined(value) >= 0) {
setState(EXPR_END|EXPR_LABEL);
}

yaccValue = getIdentifier();
yaccValue = value;
return result;
}

@@ -1306,8 +1305,7 @@ private int dollar() throws IOException {

last_state = lex_state;
setState(EXPR_END);
createTokenString();
yaccValue = getIdentifier();
yaccValue = createTokenByteList();
return RubyParser.tGVAR;

}
@@ -1343,8 +1341,7 @@ private int dollar() throws IOException {
pushback('-');
return '$';
}
createTokenString().intern();
yaccValue = getIdentifier();
yaccValue = createTokenByteList();
/* xxx shouldn't check if valid option variable */
return RubyParser.tGVAR;

@@ -1368,16 +1365,15 @@ private int dollar() throws IOException {
} while (Character.isDigit(c));
pushback(c);
if (isLexState(last_state, EXPR_FNAME)) {
createTokenString().intern();
yaccValue = getIdentifier();
yaccValue = createTokenByteList();
return RubyParser.tGVAR;
}

int ref;
String refAsString = createTokenString();

try {
ref = Integer.parseInt(refAsString.substring(1).intern());
ref = Integer.parseInt(refAsString.substring(1));
} catch (NumberFormatException e) {
warnings.warn(ID.AMBIGUOUS_ARGUMENT, "`" + refAsString + "' is too big for a number variable, always nil");
ref = 0;
@@ -1389,7 +1385,7 @@ private int dollar() throws IOException {
setState(EXPR_END);

identifier = new ByteList(new byte[] {'$', (byte) c}, USASCII_ENCODING);
return identifierToken(RubyParser.tGVAR, ("$" + (char) c).intern());
return identifierToken(RubyParser.tGVAR, new ByteList(new byte[] {'$', (byte) c}));
default:
if (!isIdentifierChar(c)) {
if (c == EOF || Character.isSpaceChar(c)) {
@@ -1405,7 +1401,7 @@ private int dollar() throws IOException {

tokadd_ident(c);

return identifierToken(RubyParser.tGVAR, createTokenString().intern()); // $blah
return identifierToken(RubyParser.tGVAR, createTokenByteList()); // $blah
}
}

@@ -1494,10 +1490,10 @@ private int identifier(int c, boolean commandState) throws IOException {
int result = 0;

last_state = lex_state;
String tempVal;
ByteList tempVal;
if (lastBangOrPredicate) {
result = RubyParser.tFID;
tempVal = createTokenString();
tempVal = createTokenByteList();
} else {
if (isLexState(lex_state, EXPR_FNAME)) {
if ((c = nextc()) == '=') {
@@ -1515,9 +1511,9 @@ private int identifier(int c, boolean commandState) throws IOException {
pushback(c);
}
}
tempVal = createTokenString();
tempVal = createTokenByteList();

if (result == 0 && Character.isUpperCase(tempVal.charAt(0))) {
if (result == 0 && Character.isUpperCase(StringSupport.preciseCodePoint(getEncoding(), tempVal.unsafeBytes(), tempVal.begin(), tempVal.begin() + 1))) {
result = RubyParser.tCONSTANT;
} else {
result = RubyParser.tIDENTIFIER;
@@ -1528,7 +1524,7 @@ private int identifier(int c, boolean commandState) throws IOException {
if (isLabelSuffix()) {
setState(EXPR_ARG|EXPR_LABELED);
nextc();
yaccValue = getIdentifier();
yaccValue = tempVal;
return RubyParser.tLABEL;
}
}
@@ -1568,7 +1564,7 @@ private int identifier(int c, boolean commandState) throws IOException {
setState(EXPR_END);
}

return identifierToken(result, tempVal.intern());
return identifierToken(result, tempVal);
}

private int leftBracket(boolean spaceSeen) throws IOException {
5 changes: 2 additions & 3 deletions core/src/main/java/org/jruby/parser/ParserSupport.java
Original file line number Diff line number Diff line change
@@ -56,7 +56,6 @@
import org.jruby.runtime.DynamicScope;
import org.jruby.runtime.Signature;
import org.jruby.util.ByteList;
import org.jruby.util.IdUtil;
import org.jruby.util.KeyValuePair;
import org.jruby.util.RegexpOptions;
import org.jruby.util.StringSupport;
@@ -179,7 +178,7 @@ public AssignableNode assignableLabelOrIdentifier(ByteList name, Node value) {

@Deprecated
public AssignableNode assignableLabelOrIdentifier(String name, Node value) {
return currentScope.assign(lexer.getPosition(), name.intern(), makeNullNil(value));
return currentScope.assign(lexer.getPosition(), name, makeNullNil(value));
}

// We know it has to be tLABEL or tIDENTIFIER so none of the other assignable logic is needed
@@ -189,7 +188,7 @@ public AssignableNode assignableKeyword(ByteList name, Node value) {

@Deprecated
public AssignableNode assignableKeyword(String name, Node value) {
return currentScope.assignKeyword(lexer.getPosition(), name.intern(), makeNullNil(value));
return currentScope.assignKeyword(lexer.getPosition(), name, makeNullNil(value));
}

// Only calls via f_kw so we know it has to be tLABEL
4 changes: 2 additions & 2 deletions core/src/main/java/org/jruby/parser/RubyParser.java
Original file line number Diff line number Diff line change
@@ -4805,7 +4805,7 @@ public Object yyparse (RubyLexer yyLex) throws java.io.IOException {
};
states[501] = new ParserState() {
@Override public Object execute(ParserSupport support, RubyLexer lexer, Object yyVal, Object[] yyVals, int yyTop) {
yyVal = lexer.getIdentifier();
yyVal = ((ByteList)yyVals[0+yyTop]);
return yyVal;
}
};
@@ -4817,7 +4817,7 @@ public Object yyparse (RubyLexer yyLex) throws java.io.IOException {
};
states[503] = new ParserState() {
@Override public Object execute(ParserSupport support, RubyLexer lexer, Object yyVal, Object[] yyVals, int yyTop) {
yyVal = lexer.getIdentifier();
yyVal = ((ByteList)yyVals[0+yyTop]);
return yyVal;
}
};
4 changes: 2 additions & 2 deletions core/src/main/java/org/jruby/parser/RubyParser.y
Original file line number Diff line number Diff line change
@@ -2187,13 +2187,13 @@ symbol : tSYMBEG sym {
// ByteList:symbol
sym : fname
| tIVAR {
$$ = lexer.getIdentifier();
$$ = $1;
}
| tGVAR {
$$ = $1;
}
| tCVAR {
$$ = lexer.getIdentifier();
$$ = $1;
}

dsym : tSYMBEG xstring_contents tSTRING_END {

0 comments on commit c201b09

Please sign in to comment.