Skip to content

Commit

Permalink
Attempt to start making encoded symbols work as IDs.
Browse files Browse the repository at this point in the history
This is a big patch and represents a prototype/spike of encoded
IDs. Changes include:

* Propagate encoding and UTF-16 string from parser. This was
  a pragmatic choice since *many* paths expect identifier strings
  from the parser to be proper Java strings. In the future we
  would want to propagate the raw bytes.
* Use raw-strings (Java strings with raw bytes as chars) for all
  of constant storage. This fixes many issues with encodings being
  lost or having different paths to constants conflict because
  some use raw and some do not.
* Changes to compiler and IR to support all this.

This is promising work but it represents only the beginning. All
identifiers in the system would need to start propagating encoding
and raw bytes (or a way to get at raw bytes) which may require
large-scale changes to our APIs. In addition, there are some
deficiencies that would require a larger effort:

* If an identifier's symbol is not anchored in memory, the symbol
  will be collected and there will be no way to reliably get back
  to byte[]+encoding from that raw string. Most of the time it
  will be UTF-8 but it could be anything.
* Because of the above deficiency, we would need to start storing
  some form that preserves original bytes and encoding. This
  This would be a drastic API-breaking change throughout JRuby.
headius committed Feb 24, 2016
1 parent 92280ef commit f7f5417
Showing 34 changed files with 564 additions and 399 deletions.
4 changes: 4 additions & 0 deletions core/src/main/java/org/jruby/Ruby.java
Original file line number Diff line number Diff line change
@@ -3536,6 +3536,10 @@ public RubySymbol newSymbol(String name) {
return symbolTable.getSymbol(name);
}

public RubySymbol newSymbolFromRaw(String name) {
return symbolTable.getSymbol(name);
}

public RubySymbol newSymbol(String name, Encoding encoding) {
ByteList byteList = RubyString.encodeBytelist(name, encoding);
return symbolTable.getSymbol(byteList);
19 changes: 10 additions & 9 deletions core/src/main/java/org/jruby/RubyBasicObject.java
Original file line number Diff line number Diff line change
@@ -2018,7 +2018,7 @@ private RubyBoolean respond_to_p19(IRubyObject mname, final boolean includePriva
// MRI (1.9) always passes down a symbol when calling respond_to_missing?
if ( ! (mname instanceof RubySymbol) ) mname = runtime.newSymbol(name);
IRubyObject respond = Helpers.invoke(runtime.getCurrentContext(), this, "respond_to_missing?", mname, runtime.newBoolean(includePrivate));
return runtime.newBoolean( respond.isTrue() );
return runtime.newBoolean(respond.isTrue());
}

/** rb_obj_id
@@ -2427,12 +2427,8 @@ public RubyArray singleton_methods(ThreadContext context, IRubyObject[] args) {
* m.call #=> "Hello, @iv = Fred"
*/
public IRubyObject method(IRubyObject name) {
return getMetaClass().newMethod(this, name.asJavaString(), true, null);
}

public IRubyObject method19(IRubyObject name) {
final RubySymbol symbol = TypeConverter.checkID(name);
return getMetaClass().newMethod(this, symbol.asJavaString(), true, null, true);
return getMetaClass().newMethod(this, symbol.toID(), true, null, true);
}

/** rb_any_to_s
@@ -2783,7 +2779,7 @@ public IRubyObject remove_instance_variable(ThreadContext context, IRubyObject n
if ((value = (IRubyObject)variableTableRemove(validateInstanceVariable(name, name.asJavaString()))) != null) {
return value;
}
throw context.runtime.newNameError("instance variable " + name.asJavaString() + " not defined", this, name);
throw context.runtime.newNameError("instance variable %1$s not defined", this, name);
}

/** rb_obj_instance_variables
@@ -2876,13 +2872,13 @@ protected static int nonFixnumHashCode(IRubyObject hashValue) {
protected String validateInstanceVariable(String name) {
if (IdUtil.isValidInstanceVariableName(name)) return name;

throw getRuntime().newNameError("`" + name + "' is not allowable as an instance variable name", this, name);
throw getRuntime().newNameError("`%1$s' is not allowable as an instance variable name", this, name);
}

protected String validateInstanceVariable(IRubyObject nameObj, String name) {
if (IdUtil.isValidInstanceVariableName(name)) return name;

throw getRuntime().newNameError("`" + name + "' is not allowable as an instance variable name", this, nameObj);
throw getRuntime().newNameError("`%1$s' is not allowable as an instance variable name", this, nameObj);
}

/**
@@ -3077,4 +3073,9 @@ public final Object getNativeHandle() {
@Deprecated
public final void setNativeHandle(Object value) {
}

@Deprecated
public IRubyObject method19(IRubyObject name) {
return method(name);
}
}
9 changes: 7 additions & 2 deletions core/src/main/java/org/jruby/RubyKernel.java
Original file line number Diff line number Diff line change
@@ -1985,8 +1985,8 @@ public static RubyArray singleton_methods19(ThreadContext context, IRubyObject s
}

@JRubyMethod(name = "method", required = 1)
public static IRubyObject method19(IRubyObject self, IRubyObject symbol) {
return ((RubyBasicObject)self).method19(symbol);
public static IRubyObject method(IRubyObject self, IRubyObject symbol) {
return ((RubyBasicObject)self).method(symbol);
}

@JRubyMethod(name = "to_s")
@@ -2066,4 +2066,9 @@ public static IRubyObject methodMissing(ThreadContext context, IRubyObject recv,
return methodMissing(context, recv, name, lastVis, lastCallType, args);
}

@Deprecated
public static IRubyObject method19(IRubyObject self, IRubyObject symbol) {
return ((RubyBasicObject)self).method(symbol);
}

}
90 changes: 55 additions & 35 deletions core/src/main/java/org/jruby/RubyModule.java
Original file line number Diff line number Diff line change
@@ -56,6 +56,8 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;

import org.jcodings.specific.ISO8859_11Encoding;
import org.jcodings.specific.ISO8859_1Encoding;
import org.jruby.anno.AnnotationBinder;
import org.jruby.anno.AnnotationHelper;
import org.jruby.anno.JRubyClass;
@@ -1195,7 +1197,8 @@ public void removeMethod(ThreadContext context, String name) {
synchronized (methodsForWrite) {
DynamicMethod method = (DynamicMethod) methodsForWrite.remove(name);
if (method == null) {
throw runtime.newNameError("method '" + name + "' not defined in " + getName(), name);
String decoded = new String(name.getBytes(ISO8859_1Encoding.INSTANCE.getCharset()), runtime.getDefaultExternalEncoding().getCharset());
throw runtime.newNameError("method '" + decoded + "' not defined in " + getName(), decoded);
}

invalidateCoreClasses();
@@ -1642,7 +1645,7 @@ public RubyModule defineModuleUnder(String name) {
}

private void addAccessor(ThreadContext context, RubySymbol identifier, Visibility visibility, boolean readable, boolean writeable) {
String internedIdentifier = identifier.toString();
String internedIdentifier = identifier.toID();

final Ruby runtime = context.runtime;

@@ -1656,15 +1659,15 @@ private void addAccessor(ThreadContext context, RubySymbol identifier, Visibilit
}

// FIXME: This only works if identifier's encoding is ASCII-compatible
final String variableName = TypeConverter.checkID(runtime, '@' + internedIdentifier).toString();
final String variableName = TypeConverter.checkID(runtime, '@' + internedIdentifier).toID();
if (readable) {
addMethod(internedIdentifier, new AttrReaderMethod(methodLocation, visibility, variableName));
callMethod(context, "method_added", identifier);
}
if (writeable) {
// FIXME: This only works if identifier's encoding is ASCII-compatible
identifier = TypeConverter.checkID(runtime, internedIdentifier + '=');
internedIdentifier = identifier.toString();
internedIdentifier = identifier.toID();
addMethod(internedIdentifier, new AttrWriterMethod(methodLocation, visibility, variableName));
callMethod(context, "method_added", identifier);
}
@@ -1819,7 +1822,7 @@ public IRubyObject define_method(ThreadContext context, IRubyObject arg0, Block
public IRubyObject defineMethodFromBlock(ThreadContext context, IRubyObject arg0, Block block, Visibility visibility) {
final Ruby runtime = context.runtime;
RubySymbol nameSym = TypeConverter.checkID(arg0);
String name = nameSym.toString();
String name = nameSym.toID();
DynamicMethod newMethod;

if (!block.isGiven()) {
@@ -1864,7 +1867,7 @@ public IRubyObject define_method(ThreadContext context, IRubyObject arg0, IRubyO
public IRubyObject defineMethodFromCallable(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Visibility visibility) {
final Ruby runtime = context.runtime;
RubySymbol nameSym = TypeConverter.checkID(arg0);
String name = nameSym.toString();
String name = nameSym.toID();
DynamicMethod newMethod;

if (runtime.getProc().isInstance(arg1)) {
@@ -2686,9 +2689,10 @@ public RubyModule private_class_method(IRubyObject[] args) {

@JRubyMethod(name = "alias_method", required = 2, visibility = PRIVATE)
public RubyModule alias_method(ThreadContext context, IRubyObject newId, IRubyObject oldId) {
String newName = newId.asJavaString();
defineAlias(newName, oldId.asJavaString());
RubySymbol newSym = TypeConverter.checkID(newId);
RubySymbol oldSym = TypeConverter.checkID(oldId);
String newName = newSym.toID();
defineAlias(newName, oldSym.toID());
if (isSingleton()) {
((MetaClass)this).getAttached().callMethod(context, "singleton_method_added", newSym);
} else {
@@ -2700,7 +2704,8 @@ public RubyModule alias_method(ThreadContext context, IRubyObject newId, IRubyOb
@JRubyMethod(name = "undef_method", rest = true, visibility = PRIVATE)
public RubyModule undef_method(ThreadContext context, IRubyObject[] args) {
for (int i=0; i<args.length; i++) {
undef(context, args[i].asJavaString());
RubySymbol sym = TypeConverter.checkID(args[i]);
undef(context, sym.toID());
}
return this;
}
@@ -2759,7 +2764,9 @@ public IRubyObject module_exec(ThreadContext context, IRubyObject[] args, Block
@JRubyMethod(name = "remove_method", rest = true, visibility = PRIVATE)
public RubyModule remove_method(ThreadContext context, IRubyObject[] args) {
for(int i=0;i<args.length;i++) {
removeMethod(context, args[i].asJavaString());
RubySymbol nameSym = TypeConverter.checkID(args[i]);
String name = nameSym.toID();
removeMethod(context, name);
}
return this;
}
@@ -3078,44 +3085,54 @@ public RubyBoolean const_defined_p19(ThreadContext context, IRubyObject[] args)
boolean inherit = args.length == 1 || ( ! args[1].isNil() && args[1].isTrue() );

final IRubyObject symbol = args[0];
final String fullName = symbol.asJavaString();
String name = fullName;
RubySymbol nameSym = TypeConverter.checkID(symbol);
String id = nameSym.toID();
String name = nameSym.toString();

int sep = name.indexOf("::");
// symbol form does not allow ::
if (symbol instanceof RubySymbol && sep != -1) {
throw runtime.newNameError("wrong constant name", name);
throw runtime.newNameError("wrong constant name %s", name);
}

RubyModule mod = this;

if (sep == 0) { // ::Foo::Bar
mod = runtime.getObject();
name = name.substring(2);
id = id.substring(2);
}

// Bare ::
if (name.length() == 0) {
throw runtime.newNameError("wrong constant name ", fullName);
throw runtime.newNameError("wrong constant name %s", name);
}

IRubyObject obj;
while ( ( sep = name.indexOf("::") ) != -1 ) {
final String segment = name.substring(0, sep);
if (segment.length() == 0) {
throw runtime.newNameError("wrong constant name " + fullName, name);
// prep name segment
String nameSegment = name.substring(0, sep);
if (nameSegment.length() == 0) {
throw runtime.newNameError("wrong constant name %s", name);
}
obj = mod.getConstantNoConstMissing(validateConstant(segment, symbol), inherit, inherit);
validateConstant(nameSegment, symbol);

// prep id segment
int idSep = id.indexOf("::");
String idSegment = id.substring(0, idSep);
obj = mod.getConstantNoConstMissing(idSegment, inherit, inherit);
if (obj == null) return runtime.getFalse();
if (obj instanceof RubyModule) {
mod = (RubyModule) obj;
} else {
throw runtime.newTypeError(segment + " does not refer to class/module");
throw runtime.newTypeError(nameSegment + " does not refer to class/module");
}
name = name.substring(sep + 2);
id = id.substring(idSep + 2);
}

obj = mod.getConstantSkipAutoload(validateConstant(name, symbol), inherit, inherit);
validateConstant(name, symbol);
obj = mod.getConstantSkipAutoload(id, inherit, inherit);
return runtime.newBoolean(obj != null);
}

@@ -3176,29 +3193,32 @@ public IRubyObject const_get_2_0(ThreadContext context, IRubyObject[] args) {
*/
@JRubyMethod(name = "const_set", required = 2)
public IRubyObject const_set(IRubyObject symbol, IRubyObject value) {
return setConstant(validateConstant(symbol).intern(), value);
RubySymbol name = TypeConverter.checkID(symbol);
validateConstant(name.toString(), name);
return setConstant(name.toID(), value);
}

@JRubyMethod(name = "remove_const", required = 1, visibility = PRIVATE)
public IRubyObject remove_const(ThreadContext context, IRubyObject rubyName) {
String name = validateConstant(rubyName);
String id = TypeConverter.checkID(rubyName).toID();
IRubyObject value;
if ((value = deleteConstant(name)) != null) {
invalidateConstantCache(name);
if ((value = deleteConstant(id)) != null) {
invalidateConstantCache(id);
if (value != UNDEF) {
return value;
}
removeAutoload(name);
removeAutoload(id);
// FIXME: I'm not sure this is right, but the old code returned
// the undef, which definitely isn't right...
return context.runtime.getNil();
}

if (hasConstantInHierarchy(name)) {
if (hasConstantInHierarchy(id)) {
throw cannotRemoveError(name);
}

throw context.runtime.newNameError("constant " + name + " not defined for " + getName(), name);
throw context.runtime.newNameError("constant " + getName() + "::" + name + " not defined", name);
}

private boolean hasConstantInHierarchy(final String name) {
@@ -3219,16 +3239,16 @@ private boolean hasConstantInHierarchy(final String name) {
@JRubyMethod(name = "const_missing", required = 1)
public IRubyObject const_missing(ThreadContext context, IRubyObject rubyName, Block block) {
Ruby runtime = context.runtime;
String shortName = rubyName.asJavaString();
String longName;
RubySymbol symName = TypeConverter.checkID(rubyName);
String message;

if (this != runtime.getObject()) {
longName = getName() + "::" + shortName;
message = "uninitialized constant " + getName() + "::%1$s";
} else {
longName = shortName;
message = "uninitialized constant %1$s";
}

throw runtime.newNameError("uninitialized constant " + longName, this, rubyName);
throw runtime.newNameError(message, this, rubyName);
}

public RubyArray constants(ThreadContext context) {
@@ -3252,7 +3272,7 @@ public RubyArray constantsCommon19(ThreadContext context, boolean replaceModule,
Collection<String> constantNames = constantsCommon(runtime, replaceModule, allConstants, false);

for (String name : constantNames) {
array.add(runtime.newSymbol(name));
array.add(RubySymbol.newHardSymbol(runtime, name));
}
return array;
}
@@ -4009,14 +4029,14 @@ protected final String validateClassVariable(String name) {
if (IdUtil.isValidClassVariableName(name)) {
return name;
}
throw getRuntime().newNameError("`" + name + "' is not allowed as a class variable name", this, name);
throw getRuntime().newNameError("`%1$s' is not allowed as a class variable name", this, name);
}

protected final String validateClassVariable(IRubyObject nameObj, String name) {
if (IdUtil.isValidClassVariableName(name)) {
return name;
}
throw getRuntime().newNameError("`" + name + "' is not allowed as a class variable name", this, nameObj);
throw getRuntime().newNameError("`%1$s' is not allowed as a class variable name", this, nameObj);
}

protected final void ensureClassVariablesSettable() {
@@ -4162,7 +4182,7 @@ protected final String validateConstant(String name, IRubyObject errorName) {
nameString = (RubyString) nameString.inspect();
}

throw getRuntime().newNameError("wrong constant name " + nameString, name);
throw getRuntime().newNameError("wrong constant name %s", this, errorName);
}

protected final void ensureConstantsSettable() {
Loading

0 comments on commit f7f5417

Please sign in to comment.