Skip to content

Commit

Permalink
Start rooting symbols used as identifiers, to preserve encoding.
Browse files Browse the repository at this point in the history
The lack of rooting caused a UTF-8 method name in Rails ActionPack
to get lost from the symbol table, and as a result when the list
of methods later turned into a list of symbols, the symbols did
not have proper encoding information.

This is difficult to test, since it will require forcing GC to
run and remove properly-encoded symbols.
headius committed Oct 14, 2015
1 parent d37982c commit a8bc41e
Showing 4 changed files with 134 additions and 54 deletions.
60 changes: 27 additions & 33 deletions core/src/main/java/org/jruby/RubyModule.java
Original file line number Diff line number Diff line change
@@ -91,6 +91,7 @@
import org.jruby.runtime.opto.Invalidator;
import org.jruby.runtime.opto.OptoFactory;
import org.jruby.runtime.profile.MethodEnhancer;
import org.jruby.util.ByteList;
import org.jruby.util.ClassProvider;
import org.jruby.util.IdUtil;
import org.jruby.util.TypeConverter;
@@ -1611,8 +1612,9 @@ public RubyModule defineModuleUnder(String name) {
return getRuntime().defineModuleUnder(name, this);
}

private void addAccessor(ThreadContext context, String internedName, Visibility visibility, boolean readable, boolean writeable) {
assert internedName == internedName.intern() : internedName + " is not interned";
private void addAccessor(ThreadContext context, RubySymbol identifier, Visibility visibility, boolean readable, boolean writeable) {
ByteList idBytes = identifier.getBytes();
String internedIdentifier = identifier.toString();

final Ruby runtime = context.runtime;

@@ -1621,19 +1623,22 @@ private void addAccessor(ThreadContext context, String internedName, Visibility
visibility = PRIVATE;
}

if (!(IdUtil.isLocal(internedName) || IdUtil.isConstant(internedName))) {
throw runtime.newNameError("invalid attribute name", internedName);
if (!(IdUtil.isLocal(internedIdentifier) || IdUtil.isConstant(internedIdentifier))) {
throw runtime.newNameError("invalid attribute name", internedIdentifier);
}

final String variableName = ("@" + internedName).intern();
// FIXME: This only works if identifier's encoding is ASCII-compatible
final String variableName = TypeConverter.checkID(runtime, "@" + internedIdentifier).toString();
if (readable) {
addMethod(internedName, new AttrReaderMethod(methodLocation, visibility, variableName));
callMethod(context, "method_added", runtime.fastNewSymbol(internedName));
addMethod(internedIdentifier, new AttrReaderMethod(methodLocation, visibility, variableName));
callMethod(context, "method_added", identifier);
}
if (writeable) {
internedName = (internedName + "=").intern();
addMethod(internedName, new AttrWriterMethod(methodLocation, visibility, variableName));
callMethod(context, "method_added", runtime.fastNewSymbol(internedName));
// FIXME: This only works if identifier's encoding is ASCII-compatible
identifier = TypeConverter.checkID(runtime, internedIdentifier + "=");
internedIdentifier = identifier.toString();
addMethod(internedIdentifier, new AttrWriterMethod(methodLocation, visibility, variableName));
callMethod(context, "method_added", identifier);
}
}

@@ -1777,16 +1782,11 @@ public IRubyObject newMethod(IRubyObject receiver, final String methodName, bool
@JRubyMethod(name = "define_method", visibility = PRIVATE, reads = VISIBILITY)
public IRubyObject define_method(ThreadContext context, IRubyObject arg0, Block block) {
Ruby runtime = context.runtime;
String name = TypeConverter.convertToIdentifier(arg0);
RubySymbol nameSym = TypeConverter.checkID(arg0);
String name = nameSym.toString();
DynamicMethod newMethod = null;
Visibility visibility = PUBLIC;

// We need our identifier to be retrievable and creatable as a symbol. This side-effect
// populates this name into our symbol table so it will exist later if needed. The
// reason for this hack/side-effect is that symbols store their values as raw bytes. We lose encoding
// info so we need to make an entry so any accesses with raw bytes later gets proper symbol.
RubySymbol nameSym = RubySymbol.newSymbol(runtime, arg0);

if (!block.isGiven()) {
throw getRuntime().newArgumentError("tried to create Proc object without a block");
}
@@ -1823,16 +1823,11 @@ public IRubyObject define_method(ThreadContext context, IRubyObject arg0, Block
@JRubyMethod(name = "define_method", visibility = PRIVATE, reads = VISIBILITY)
public IRubyObject define_method(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block) {
Ruby runtime = context.runtime;
String name = TypeConverter.convertToIdentifier(arg0);
RubySymbol nameSym = TypeConverter.checkID(arg0);
String name = nameSym.toString();
DynamicMethod newMethod = null;
Visibility visibility = PUBLIC;

// We need our identifier to be retrievable and creatable as a symbol. This side-effect
// populates this name into our symbol table so it will exist later if needed. The
// reason for this hack/side-effect is that symbols store their values as raw bytes. We lose encoding
// info so we need to make an entry so any accesses with raw bytes later gets proper symbol.
RubySymbol nameSym = RubySymbol.newSymbol(runtime, arg0);

if (runtime.getProc().isInstance(arg1)) {
// double-testing args.length here, but it avoids duplicating the proc-setup code in two places
RubyProc proc = (RubyProc)arg1;
@@ -2180,15 +2175,15 @@ public IRubyObject initialize(ThreadContext context, Block block) {
}

public void addReadWriteAttribute(ThreadContext context, String name) {
addAccessor(context, name.intern(), PUBLIC, true, true);
addAccessor(context, TypeConverter.checkID(context.runtime, name), PUBLIC, true, true);
}

public void addReadAttribute(ThreadContext context, String name) {
addAccessor(context, name.intern(), PUBLIC, true, false);
addAccessor(context, TypeConverter.checkID(context.runtime, name), PUBLIC, true, false);
}

public void addWriteAttribute(ThreadContext context, String name) {
addAccessor(context, name.intern(), PUBLIC, false, true);
addAccessor(context, TypeConverter.checkID(context.runtime, name), PUBLIC, false, true);
}

/** rb_mod_attr
@@ -2204,7 +2199,7 @@ public IRubyObject attr19(ThreadContext context, IRubyObject[] args) {

if (args.length == 2 && (args[1] == runtime.getTrue() || args[1] == runtime.getFalse())) {
runtime.getWarnings().warn(ID.OBSOLETE_ARGUMENT, "optional boolean argument is obsoleted");
addAccessor(context, args[0].asJavaString().intern(), context.getCurrentVisibility(), args[0].isTrue(), args[1].isTrue());
addAccessor(context, TypeConverter.checkID(args[0]), context.getCurrentVisibility(), args[0].isTrue(), args[1].isTrue());
return runtime.getNil();
}

@@ -2225,7 +2220,7 @@ public IRubyObject attr_reader(ThreadContext context, IRubyObject[] args) {
Visibility visibility = context.getCurrentVisibility();

for (int i = 0; i < args.length; i++) {
addAccessor(context, args[i].asJavaString().intern(), visibility, true, false);
addAccessor(context, TypeConverter.checkID(args[i]), visibility, true, false);
}

return context.nil;
@@ -2240,7 +2235,7 @@ public IRubyObject attr_writer(ThreadContext context, IRubyObject[] args) {
Visibility visibility = context.getCurrentVisibility();

for (int i = 0; i < args.length; i++) {
addAccessor(context, args[i].asJavaString().intern(), visibility, false, true);
addAccessor(context, TypeConverter.checkID(args[i]), visibility, false, true);
}

return context.nil;
@@ -2265,7 +2260,7 @@ public IRubyObject attr_accessor(ThreadContext context, IRubyObject[] args) {
// This is almost always already interned, since it will be called with a symbol in most cases
// but when created from Java code, we might getService an argument that needs to be interned.
// addAccessor has as a precondition that the string MUST be interned
addAccessor(context, args[i].asJavaString().intern(), visibility, true, true);
addAccessor(context, TypeConverter.checkID(args[i]), visibility, true, true);
}

return context.nil;
@@ -2648,8 +2643,7 @@ public RubyModule private_class_method(IRubyObject[] args) {
public RubyModule alias_method(ThreadContext context, IRubyObject newId, IRubyObject oldId) {
String newName = newId.asJavaString();
defineAlias(newName, oldId.asJavaString());
RubySymbol newSym = newId instanceof RubySymbol ? (RubySymbol)newId :
context.runtime.newSymbol(newName);
RubySymbol newSym = TypeConverter.checkID(newId);
if (isSingleton()) {
((MetaClass)this).getAttached().callMethod(context, "singleton_method_added", newSym);
} else {
81 changes: 68 additions & 13 deletions core/src/main/java/org/jruby/RubySymbol.java
Original file line number Diff line number Diff line change
@@ -188,13 +188,31 @@ public static RubySymbol getSymbolLong(Ruby runtime, long id) {
*/

public static RubySymbol newSymbol(Ruby runtime, IRubyObject name) {
if (!(name instanceof RubyString)) return newSymbol(runtime, name.asJavaString());
if (name instanceof RubySymbol) {
return runtime.getSymbolTable().getSymbol(((RubySymbol) name).getBytes(), false);
} else if (name instanceof RubyString) {
return runtime.getSymbolTable().getSymbol(((RubyString) name).getByteList(), false);
} else {
return newSymbol(runtime, name.asJavaString());
}
}

return runtime.getSymbolTable().getSymbol(((RubyString) name).getByteList());
public static RubySymbol newHardSymbol(Ruby runtime, IRubyObject name) {
if (name instanceof RubySymbol) {
return runtime.getSymbolTable().getSymbol(((RubySymbol) name).getBytes(), true);
} else if (name instanceof RubyString) {
return runtime.getSymbolTable().getSymbol(((RubyString) name).getByteList(), true);
} else {
return newSymbol(runtime, name.asJavaString());
}
}

public static RubySymbol newSymbol(Ruby runtime, String name) {
return runtime.getSymbolTable().getSymbol(name);
return runtime.getSymbolTable().getSymbol(name, false);
}

public static RubySymbol newHardSymbol(Ruby runtime, String name) {
return runtime.getSymbolTable().getSymbol(name, true);
}

// FIXME: same bytesequences will fight over encoding of the symbol once cached. I think largely
@@ -208,6 +226,17 @@ public static RubySymbol newSymbol(Ruby runtime, String name, Encoding encoding)
return newSymbol;
}

// FIXME: same bytesequences will fight over encoding of the symbol once cached. I think largely
// this will only happen in some ISO_8859_?? encodings making symbols at the same time so it should
// be pretty rare.
public static RubySymbol newHardSymbol(Ruby runtime, String name, Encoding encoding) {
RubySymbol newSymbol = newHardSymbol(runtime, name);

newSymbol.associateEncoding(encoding);

return newSymbol;
}

/**
* @see org.jruby.compiler.Constantizable
*/
@@ -689,64 +718,90 @@ static class SymbolEntry {
final String name;
final ByteList bytes;
final WeakReference<RubySymbol> symbol;
RubySymbol hardReference; // only read in this
SymbolEntry next;

SymbolEntry(int hash, String name, ByteList bytes, WeakReference<RubySymbol> symbol, SymbolEntry next) {
SymbolEntry(int hash, String name, ByteList bytes, RubySymbol symbol, SymbolEntry next, boolean hard) {
this.hash = hash;
this.name = name;
this.bytes = bytes;
this.symbol = symbol;
this.symbol = new WeakReference<RubySymbol>(symbol);
this.next = next;
if (hard) hardReference = symbol;
}

/**
* Force an existing weak symbol to become a hard symbol, so it never goes away.
*/
public void setHardReference() {
if (hardReference == null) {
hardReference = symbol.get();
}
}
}

public RubySymbol getSymbol(String name) {
return getSymbol(name, false);
}

public RubySymbol getSymbol(String name, boolean hard) {
int hash = javaStringHashCode(name);
RubySymbol symbol = null;

for (SymbolEntry e = getEntryFromTable(symbolTable, hash); e != null; e = e.next) {
if (isSymbolMatch(name, hash, e)) {
if (hard) e.setHardReference();
symbol = e.symbol.get();
break;
}
}

if (symbol == null) symbol = createSymbol(name, symbolBytesFromString(runtime, name), hash);
if (symbol == null) symbol = createSymbol(name, symbolBytesFromString(runtime, name), hash, hard);

return symbol;
}

public RubySymbol getSymbol(ByteList bytes) {
return getSymbol(bytes, false);
}

public RubySymbol getSymbol(ByteList bytes, boolean hard) {
RubySymbol symbol = null;
int hash = javaStringHashCode(bytes);

for (SymbolEntry e = getEntryFromTable(symbolTable, hash); e != null; e = e.next) {
if (isSymbolMatch(bytes, hash, e)) {
if (hard) e.setHardReference();
symbol = e.symbol.get();
break;
}
}

if (symbol == null) {
bytes = bytes.dup();
symbol = createSymbol(bytes.toString(), bytes, hash);
symbol = createSymbol(bytes.toString(), bytes, hash, hard);
}

return symbol;
}

public RubySymbol fastGetSymbol(String internedName) {
return fastGetSymbol(internedName, false);
}

public RubySymbol fastGetSymbol(String internedName, boolean hard) {
RubySymbol symbol = null;

for (SymbolEntry e = getEntryFromTable(symbolTable, internedName.hashCode()); e != null; e = e.next) {
if (isSymbolMatch(internedName, e)) {
if (hard) e.setHardReference();
symbol = e.symbol.get();
break;
}
}

if (symbol == null) {
symbol = fastCreateSymbol(internedName);
symbol = fastCreateSymbol(internedName, hard);
}

return symbol;
@@ -768,7 +823,7 @@ private static boolean isSymbolMatch(String internedName, SymbolEntry entry) {
return internedName == entry.name;
}

private RubySymbol createSymbol(final String name, final ByteList value, final int hash) {
private RubySymbol createSymbol(final String name, final ByteList value, final int hash, boolean hard) {
ReentrantLock lock;
(lock = tableLock).lock();
try {
@@ -800,7 +855,7 @@ private RubySymbol createSymbol(final String name, final ByteList value, final i
if (symbol == null) {
String internedName = name.intern();
symbol = new RubySymbol(runtime, internedName, value);
table[index] = new SymbolEntry(hash, internedName, value, new WeakReference(symbol), table[index]);
table[index] = new SymbolEntry(hash, internedName, value, symbol, table[index], hard);
size++;
// write-volatile
symbolTable = table;
@@ -821,7 +876,7 @@ private void removeDeadEntry(SymbolEntry[] table, int index, SymbolEntry last, S
size--; // reduce current size because we lost one somewhere
}

private RubySymbol fastCreateSymbol(final String internedName) {
private RubySymbol fastCreateSymbol(final String internedName, boolean hard) {
ReentrantLock lock;
(lock = tableLock).lock();
try {
@@ -853,7 +908,7 @@ private RubySymbol fastCreateSymbol(final String internedName) {

if (symbol == null) {
symbol = new RubySymbol(runtime, internedName);
table[index] = new SymbolEntry(hash, internedName, symbol.getBytes(), new WeakReference(symbol), table[index]);
table[index] = new SymbolEntry(hash, internedName, symbol.getBytes(), symbol, table[index], hard);
size++;
// write-volatile
symbolTable = table;
@@ -934,7 +989,7 @@ private SymbolEntry[] rehash() {
for (SymbolEntry p = e; p != lastRun; p = p.next) {
int k = p.hash & sizeMask;
SymbolEntry n = newTable[k];
newTable[k] = new SymbolEntry(p.hash, p.name, p.bytes, p.symbol, n);
newTable[k] = new SymbolEntry(p.hash, p.name, p.bytes, p.symbol.get(), n, p.hardReference != null);
}
}
}
8 changes: 8 additions & 0 deletions core/src/main/java/org/jruby/ir/runtime/IRRuntimeHelpers.java
Original file line number Diff line number Diff line change
@@ -1229,6 +1229,7 @@ public static void defInterpretedClassMethod(ThreadContext context, IRScope meth
} else {
newMethod = new MixedModeIRMethod(method, Visibility.PUBLIC, rubyClass);
}
// FIXME: needs checkID and proper encoding to force hard symbol
rubyClass.addMethod(method.getName(), newMethod);
if (!rubyClass.isRefinement()) {
obj.callMethod(context, "singleton_method_added", context.runtime.fastNewSymbol(method.getName()));
@@ -1239,8 +1240,10 @@ public static void defInterpretedClassMethod(ThreadContext context, IRScope meth
public static void defCompiledClassMethod(ThreadContext context, MethodHandle handle, IRScope method, IRubyObject obj) {
RubyClass rubyClass = checkClassForDef(context, method, obj);

// FIXME: needs checkID and proper encoding to force hard symbol
rubyClass.addMethod(method.getName(), new CompiledIRMethod(handle, method, Visibility.PUBLIC, rubyClass, method.receivesKeywordArgs()));
if (!rubyClass.isRefinement()) {
// FIXME: needs checkID and proper encoding to force hard symbol
obj.callMethod(context, "singleton_method_added", context.runtime.fastNewSymbol(method.getName()));
}
}
@@ -1249,8 +1252,10 @@ public static void defCompiledClassMethod(ThreadContext context, MethodHandle ha
public static void defCompiledClassMethod(ThreadContext context, MethodHandle variable, MethodHandle specific, int specificArity, IRScope method, IRubyObject obj) {
RubyClass rubyClass = checkClassForDef(context, method, obj);

// FIXME: needs checkID and proper encoding to force hard symbol
rubyClass.addMethod(method.getName(), new CompiledIRMethod(variable, specific, specificArity, method, Visibility.PUBLIC, rubyClass, method.receivesKeywordArgs()));
if (!rubyClass.isRefinement()) {
// FIXME: needs checkID and proper encoding to force hard symbol
obj.callMethod(context, "singleton_method_added", context.runtime.fastNewSymbol(method.getName()));
}
}
@@ -1280,6 +1285,7 @@ public static void defInterpretedInstanceMethod(ThreadContext context, IRScope m
newMethod = new MixedModeIRMethod(method, newVisibility, rubyClass);
}

// FIXME: needs checkID and proper encoding to force hard symbol
Helpers.addInstanceMethod(rubyClass, method.getName(), newMethod, currVisibility, context, runtime);
}

@@ -1293,6 +1299,7 @@ public static void defCompiledInstanceMethod(ThreadContext context, MethodHandle

DynamicMethod newMethod = new CompiledIRMethod(handle, method, newVisibility, clazz, method.receivesKeywordArgs());

// FIXME: needs checkID and proper encoding to force hard symbol
Helpers.addInstanceMethod(clazz, method.getName(), newMethod, currVisibility, context, runtime);
}

@@ -1306,6 +1313,7 @@ public static void defCompiledInstanceMethod(ThreadContext context, MethodHandle

DynamicMethod newMethod = new CompiledIRMethod(variable, specific, specificArity, method, newVisibility, clazz, method.receivesKeywordArgs());

// FIXME: needs checkID and proper encoding to force hard symbol
Helpers.addInstanceMethod(clazz, method.getName(), newMethod, currVisibility, context, runtime);
}

39 changes: 31 additions & 8 deletions core/src/main/java/org/jruby/util/TypeConverter.java
Original file line number Diff line number Diff line change
@@ -41,6 +41,7 @@
import org.jruby.RubyModule;
import org.jruby.RubyNumeric;
import org.jruby.RubyString;
import org.jruby.RubySymbol;
import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.ClassIndex;
import org.jruby.RubyNil;
@@ -156,17 +157,27 @@ private static String typeAsString(IRubyObject obj) {
* are stored internally as raw bytes from whatever encoding they were originally sourced from.
* When methods are stored they must also get stored in this same raw fashion so that if we
* use symbols to look up methods or make symbols from these method names they will match up.
*
* For 2.2 compatibility, we also force all incoming identifiers to get anchored as hard-referenced symbols.
*/
public static String convertToIdentifier(IRubyObject obj) {
// Assume Symbol already returns ISO8859-1/raw bytes from asJavaString()
// Assume all other objects cannot participate in providing raw bytes since we cannot
// grab it's string representation without calling a method which properly encodes
// the string.
if (obj instanceof RubyString) {
return new String(ByteList.plain(((RubyString) obj).getByteList()), RubyEncoding.ISO).intern();
public static RubySymbol checkID(IRubyObject obj) {
if (obj instanceof RubySymbol || obj instanceof RubyString) {
return RubySymbol.newHardSymbol(obj.getRuntime(), obj);
}

return obj.asJavaString().intern();
return RubySymbol.newHardSymbol(obj.getRuntime(), obj.asJavaString());
}

/**
* Convert the supplied object into an internal identifier String. Basically, symbols
* are stored internally as raw bytes from whatever encoding they were originally sourced from.
* When methods are stored they must also get stored in this same raw fashion so that if we
* use symbols to look up methods or make symbols from these method names they will match up.
*
* For 2.2 compatibility, we also force all incoming identifiers to get anchored as hard-referenced symbols.
*/
public static RubySymbol checkID(Ruby runtime, String name) {
return RubySymbol.newHardSymbol(runtime, name.intern());
}

/**
@@ -391,4 +402,16 @@ public static IRubyObject convertToTypeWithCheck(IRubyObject obj, RubyClass targ
if (!target.isInstance(val)) throw obj.getRuntime().newTypeError(obj.getMetaClass() + "#" + convertMethod + " should return " + target.getName());
return val;
}

@Deprecated
public static String convertToIdentifier(IRubyObject obj) {
// Assume Symbol already returns ISO8859-1/raw bytes from asJavaString()
// Assume all other objects cannot participate in providing raw bytes since we cannot
// grab it's string representation without calling a method which properly encodes
// the string.
if (obj instanceof RubyString) {
return new String(ByteList.plain(((RubyString) obj).getByteList()), RubyEncoding.ISO).intern();
}
return obj.asJavaString().intern();
}
}

0 comments on commit a8bc41e

Please sign in to comment.