Skip to content

Commit

Permalink
Merge pull request #4258 from jruby/add-default-encodings-to-encoding…
Browse files Browse the repository at this point in the history
…-manager

[Truffle] Add default encodings to encoding manager
  • Loading branch information
chrisseaton committed Nov 3, 2016
2 parents 2b3e077 + 6e7541d commit 2dcbc66
Show file tree
Hide file tree
Showing 18 changed files with 494 additions and 353 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Expand Up @@ -9,6 +9,7 @@ cache:

before_install:
- unset GEM_PATH GEM_HOME IRBRC JRUBY_OPTS
- rm ~/.m2/settings.xml

jdk: oraclejdk8

Expand Down
4 changes: 2 additions & 2 deletions core/pom.rb
Expand Up @@ -49,8 +49,8 @@
jar 'com.github.jnr:jnr-enxio:0.13', :exclusions => ['com.github.jnr:jnr-ffi']
jar 'com.github.jnr:jnr-x86asm:1.0.2', :exclusions => ['com.github.jnr:jnr-ffi']
jar 'com.github.jnr:jnr-unixsocket:0.14', :exclusions => ['com.github.jnr:jnr-ffi']
jar 'com.github.jnr:jnr-posix:3.0.31', :exclusions => ['com.github.jnr:jnr-ffi']
jar 'com.github.jnr:jnr-constants:0.9.4', :exclusions => ['com.github.jnr:jnr-ffi']
jar 'com.github.jnr:jnr-posix:3.0.32-SNAPSHOT', :exclusions => ['com.github.jnr:jnr-ffi']
jar 'com.github.jnr:jnr-constants:0.9.5-SNAPSHOT', :exclusions => ['com.github.jnr:jnr-ffi']
jar 'com.github.jnr:jnr-ffi:2.1.0'
jar 'com.github.jnr:jffi:${jffi.version}'
jar 'com.github.jnr:jffi:${jffi.version}:native'
Expand Down
4 changes: 2 additions & 2 deletions core/pom.xml
Expand Up @@ -137,7 +137,7 @@ DO NOT MODIFIY - GENERATED CODE
<dependency>
<groupId>com.github.jnr</groupId>
<artifactId>jnr-posix</artifactId>
<version>3.0.31</version>
<version>3.0.32-SNAPSHOT</version>
<exclusions>
<exclusion>
<artifactId>jnr-ffi</artifactId>
Expand All @@ -148,7 +148,7 @@ DO NOT MODIFIY - GENERATED CODE
<dependency>
<groupId>com.github.jnr</groupId>
<artifactId>jnr-constants</artifactId>
<version>0.9.4</version>
<version>0.9.5-SNAPSHOT</version>
<exclusions>
<exclusion>
<artifactId>jnr-ffi</artifactId>
Expand Down
117 changes: 9 additions & 108 deletions core/src/main/java/org/jruby/runtime/encoding/EncodingService.java
Expand Up @@ -3,7 +3,6 @@
import org.jcodings.Encoding;
import org.jcodings.EncodingDB;
import org.jcodings.EncodingDB.Entry;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.ISO8859_16Encoding;
import org.jcodings.util.CaseInsensitiveBytesHash;
Expand All @@ -19,22 +18,21 @@
import java.lang.reflect.Field;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.List;

import org.jcodings.specific.USASCIIEncoding;
import org.jruby.RubyFixnum;
import org.jruby.RubyString;
import org.jruby.ext.nkf.RubyNKF;
import org.jruby.util.SafePropertyAccessor;
import org.jruby.util.encoding.ISO_8859_16;
import org.jruby.util.io.EncodingUtils;

public final class EncodingService {
private final CaseInsensitiveBytesHash<Entry> encodings;
private final CaseInsensitiveBytesHash<Entry> aliases;

// for fast lookup: encoding entry => org.jruby.RubyEncoding
public final IRubyObject[] encodingList;
private final IRubyObject[] encodingList;
// for fast lookup: org.joni.encoding.Encoding => org.jruby.RubyEncoding
private RubyEncoding[] encodingIndex = new RubyEncoding[4];
// the runtime
Expand Down Expand Up @@ -178,133 +176,36 @@ public RubyEncoding getEncoding(Encoding enc) {
return encodingIndex[enc.getIndex()];
}

public interface EncodingDefinitionVisitor {
public void defineEncoding(Entry encodingEntry, byte[] name, int p, int end);

public void defineConstant(int encodingListIndex, String constName);
}

public interface EncodingAliasVisitor {
public void defineAlias(int encodingListIndex, String constName);

public void defineConstant(int encodingListIndex, String constName);
}

public void defineEncodings() {
defineEncodings(new EncodingDefinitionVisitor() {
@Override
public void defineEncoding(Entry encodingEntry, byte[] name, int p, int end) {
RubyEncoding encoding = RubyEncoding.newEncoding(runtime, name, p, end, encodingEntry.isDummy());
encodingList[encodingEntry.getIndex()] = encoding;
}

@Override
public void defineConstant(int encodingListIndex, String constName) {
defineEncodingConstant(runtime, (RubyEncoding) encodingList[encodingListIndex], constName);
}
});
}

public void defineEncodings(EncodingDefinitionVisitor visitor) {
HashEntryIterator hei = encodings.entryIterator();
while (hei.hasNext()) {
CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry> e =
((CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry>)hei.next());
Entry ee = e.value;

visitor.defineEncoding(ee, e.bytes, e.p, e.end);
RubyEncoding encoding = RubyEncoding.newEncoding(runtime, e.bytes, e.p, e.end, ee.isDummy());
encodingList[ee.getIndex()] = encoding;

for (String constName : encodingNames(e.bytes, e.p, e.end)) {
visitor.defineConstant(ee.getIndex(), constName);
for (String constName : EncodingUtils.encodingNames(e.bytes, e.p, e.end)) {
defineEncodingConstant(runtime, (RubyEncoding) encodingList[ee.getIndex()], constName);
}
}
}

public void defineAliases() {
defineAliases(new EncodingAliasVisitor() {
@Override
public void defineAlias(int encodingListIndex, String constName) { }

@Override
public void defineConstant(int encodingListIndex, String constName) {
defineEncodingConstant(runtime, (RubyEncoding) encodingList[encodingListIndex], constName);
}
});
}

public void defineAliases(EncodingAliasVisitor visitor) {
HashEntryIterator hei = aliases.entryIterator();
while (hei.hasNext()) {
CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry> e =
((CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry>)hei.next());
Entry ee = e.value;

for (String constName : encodingNames(e.bytes, e.p, e.end)) {
visitor.defineAlias(ee.getIndex(), constName);
visitor.defineConstant(ee.getIndex(), constName);
// The constant names must be treated by the the <code>encodingNames</code> helper.
for (String constName : EncodingUtils.encodingNames(e.bytes, e.p, e.end)) {
defineEncodingConstant(runtime, (RubyEncoding) encodingList[ee.getIndex()], constName);
}
}
}

private List<String> encodingNames(byte[] name, int p, int end) {
final List<String> names = new ArrayList<String>();

Encoding enc = ASCIIEncoding.INSTANCE;
int s = p;

int code = name[s] & 0xff;
if (enc.isDigit(code)) return names;

boolean hasUpper = false;
boolean hasLower = false;
if (enc.isUpper(code)) {
hasUpper = true;
while (++s < end && (enc.isAlnum(name[s] & 0xff) || name[s] == (byte)'_')) {
if (enc.isLower(name[s] & 0xff)) hasLower = true;
}
}

boolean isValid = false;
if (s >= end) {
isValid = true;
names.add(new String(name, p, end));
}

if (!isValid || hasLower) {
if (!hasLower || !hasUpper) {
do {
code = name[s] & 0xff;
if (enc.isLower(code)) hasLower = true;
if (enc.isUpper(code)) hasUpper = true;
} while (++s < end && (!hasLower || !hasUpper));
}

byte[]constName = new byte[end - p];
System.arraycopy(name, p, constName, 0, end - p);
s = 0;
code = constName[s] & 0xff;

if (!isValid) {
if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code];
for (; s < constName.length; ++s) {
if (!enc.isAlnum(constName[s] & 0xff)) constName[s] = (byte)'_';
}
if (hasUpper) {
names.add(new String(constName, 0, constName.length));
}
}
if (hasLower) {
for (s = 0; s < constName.length; ++s) {
code = constName[s] & 0xff;
if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code];
}
names.add(new String(constName, 0, constName.length));
}
}

return names;
}

private void defineEncodingConstant(Ruby runtime, RubyEncoding encoding, String constName) {
runtime.getEncoding().defineConstant(constName, encoding);
}
Expand Down
61 changes: 61 additions & 0 deletions core/src/main/java/org/jruby/util/io/EncodingUtils.java
Expand Up @@ -3,6 +3,7 @@
import org.jcodings.Encoding;
import org.jcodings.EncodingDB;
import org.jcodings.Ptr;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
import org.jcodings.specific.UTF16BEEncoding;
Expand Down Expand Up @@ -47,6 +48,7 @@
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

Expand Down Expand Up @@ -1116,6 +1118,65 @@ public static RubyString setStrBuf(Ruby runtime, final IRubyObject obj, final in
return str;
}

public static List<String> encodingNames(byte[] name, int p, int end) {
final List<String> names = new ArrayList<String>();

Encoding enc = ASCIIEncoding.INSTANCE;
int s = p;

int code = name[s] & 0xff;
if (enc.isDigit(code)) return names;

boolean hasUpper = false;
boolean hasLower = false;
if (enc.isUpper(code)) {
hasUpper = true;
while (++s < end && (enc.isAlnum(name[s] & 0xff) || name[s] == (byte)'_')) {
if (enc.isLower(name[s] & 0xff)) hasLower = true;
}
}

boolean isValid = false;
if (s >= end) {
isValid = true;
names.add(new String(name, p, end));
}

if (!isValid || hasLower) {
if (!hasLower || !hasUpper) {
do {
code = name[s] & 0xff;
if (enc.isLower(code)) hasLower = true;
if (enc.isUpper(code)) hasUpper = true;
} while (++s < end && (!hasLower || !hasUpper));
}

byte[]constName = new byte[end - p];
System.arraycopy(name, p, constName, 0, end - p);
s = 0;
code = constName[s] & 0xff;

if (!isValid) {
if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code];
for (; s < constName.length; ++s) {
if (!enc.isAlnum(constName[s] & 0xff)) constName[s] = (byte)'_';
}
if (hasUpper) {
names.add(new String(constName, 0, constName.length));
}
}
if (hasLower) {
for (s = 0; s < constName.length; ++s) {
code = constName[s] & 0xff;
if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code];
}
names.add(new String(constName, 0, constName.length));
}
}

return names;
}

public interface ResizeFunction {
/**
* Resize the destination, returning the new begin offset.
Expand Down
4 changes: 4 additions & 0 deletions truffle/src/main/java/org/jruby/truffle/RubyContext.java
Expand Up @@ -150,6 +150,10 @@ public RubyContext(Ruby jrubyRuntime, TruffleLanguage.Env env) {
nativePlatform = NativePlatformFactory.createPlatform(this);
rootLexicalScope = new LexicalScope(null, coreLibrary.getObjectClass());

// The encoding manager relies on POSIX having been initialized, so we can't process it during
// normal core library initialization.
coreLibrary.initializeEncodingManager();

threadManager = new ThreadManager(this);
threadManager.initialize();

Expand Down

0 comments on commit 2dcbc66

Please sign in to comment.