Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jcodings
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: b8e6d466f732
Choose a base ref
...
head repository: jruby/jcodings
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 9e8005fb3740
Choose a head ref
  • 2 commits
  • 6 files changed
  • 1 contributor

Commits on Dec 21, 2015

  1. Copy the full SHA
    0718e8a View commit details
  2. synchronize scripts

    lopex committed Dec 21, 2015
    Copy the full SHA
    9e8005f View commit details
12 changes: 6 additions & 6 deletions scripts/EncodingListTemplate.java
Original file line number Diff line number Diff line change
@@ -18,11 +18,11 @@
* SOFTWARE.
*/
package org.jcodings;
import org.jcodings.constants.EncodingType;
import org.jcodings.constants.EncodingFlag;

final class EncodingList {
static final EncodingType[] LIST = new EncodingType[] {
%{body}
};
}
static final void load() {
%{defines}
EncodingDB.ascii = EncodingDB.encodings.get("ASCII-8BIT".getBytes());
%{other}
}
}
48 changes: 46 additions & 2 deletions scripts/ExtractTables.rb
Original file line number Diff line number Diff line change
@@ -10,6 +10,49 @@
enc_dir = "#{dst_dir}/unicode"
INDENT = " " * 4

CLASS_MAP = {
"ASCII-8BIT" => "ASCII",
"UTF-8" => "UTF8",
"US-ASCII" => "USASCII",
"Big5" => "BIG5",
"Big5-HKSCS" => "Big5HKSCS",
"Big5-UAO" => "Big5UAO",
"CP949" => "CP949",
"Emacs-Mule" => "EmacsMule",
"EUC-JP" => "EUCJP",
"EUC-KR" => "EUCKR",
"EUC-TW" => "EUCTW",
"GB2312" => "GB2312",
"GB18030" => "GB18030",
"GBK" => "GBK",
"ISO-8859-1" => "ISO8859_1",
"ISO-8859-2" => "ISO8859_2",
"ISO-8859-3" => "ISO8859_3",
"ISO-8859-4" => "ISO8859_4",
"ISO-8859-5" => "ISO8859_5",
"ISO-8859-6" => "ISO8859_6",
"ISO-8859-7" => "ISO8859_7",
"ISO-8859-8" => "ISO8859_8",
"ISO-8859-9" => "ISO8859_9",
"ISO-8859-10" => "ISO8859_10",
"ISO-8859-11" => "ISO8859_11",
"ISO-8859-13" => "ISO8859_13",
"ISO-8859-14" => "ISO8859_14",
"ISO-8859-15" => "ISO8859_15",
"ISO-8859-16" => "ISO8859_16",
"KOI8-R" => "KOI8R",
"KOI8-U" => "KOI8U",
"Shift_JIS" => "SJIS",
"UTF-16BE" => "UTF16BE",
"UTF-16LE" => "UTF16LE",
"UTF-32BE" => "UTF32BE",
"UTF-32LE" => "UTF32LE",
"Windows-31J" => "Windows_31J", # TODO: Windows-31J is actually a variant of SJIS
"Windows-1250" => "Windows_1250",
"Windows-1251" => "Windows_1251",
"Windows-1252" => "Windows_1252"
}

def assert_eq a, b, msg = ""
raise "unmet condition: #{a.inspect} == #{b.inspect}, info #{msg}" unless a == b
end
@@ -126,7 +169,8 @@ def extract_to to, file
end
end

enc_db = open("#{repo_path}/encdb.h").read.tr('()', '').scan(/ENC_([A-Z_]+)(.*?);/m).reject { |a, b| a =~ /DEFINE/ }
defines, other = open("#{repo_path}/encdb.h").read.tr('()', '').scan(/ENC_([A-Z_]+)(.*?);/m).partition { |a, b| a =~ /DEFINE/ }

open("#{dst_dir}/EncodingList.java", "wb") { |f| f << open("EncodingListTemplate.java", "rb").read.
sub(/%\{body\}/, enc_db.map { |cmd, from, to| "#{INDENT*2}new EncodingType(EncodingFlag.#{cmd}, #{from}#{to.nil? ? "" : to})" }.join(",\n")) }
sub(/%\{defines\}/, defines.map { |cmd, name| "#{INDENT*2}EncodingDB.declare(#{name}, \"#{CLASS_MAP[name[/[^"]+/]] || (raise 'class not found for encoding ' + name)}\");" }.join("\n")).
sub(/%\{other\}/, other.map { |cmd, from, to| "#{INDENT*2}EncodingDB.#{cmd.downcase}(#{from}#{to.nil? ? "" : to});" }.join("\n")) }
82 changes: 4 additions & 78 deletions src/org/jcodings/EncodingDB.java
Original file line number Diff line number Diff line change
@@ -19,7 +19,6 @@
*/
package org.jcodings;

import org.jcodings.constants.EncodingType;
import org.jcodings.exception.ErrorMessages;
import org.jcodings.exception.InternalException;
import org.jcodings.util.CaseInsensitiveBytesHash;
@@ -97,53 +96,9 @@ public boolean isDummy() {
}
}

private static String[] builtin = {
"ASCII-8BIT", "ASCII",
"UTF-8", "UTF8",
"US-ASCII", "USASCII",
"Big5", "BIG5",
"Big5-HKSCS", "Big5HKSCS",
"Big5-UAO", "Big5UAO",
"CP949", "CP949",
"Emacs-Mule", "EmacsMule",
"EUC-JP", "EUCJP",
"EUC-KR", "EUCKR",
"EUC-TW", "EUCTW",
"GB2312", "GB2312",
"GB18030", "GB18030",
"GBK", "GBK",
"ISO-8859-1", "ISO8859_1",
"ISO-8859-2", "ISO8859_2",
"ISO-8859-3", "ISO8859_3",
"ISO-8859-4", "ISO8859_4",
"ISO-8859-5", "ISO8859_5",
"ISO-8859-6", "ISO8859_6",
"ISO-8859-7", "ISO8859_7",
"ISO-8859-8", "ISO8859_8",
"ISO-8859-9", "ISO8859_9",
"ISO-8859-10", "ISO8859_10",
"ISO-8859-11", "ISO8859_11",
"ISO-8859-13", "ISO8859_13",
"ISO-8859-14", "ISO8859_14",
"ISO-8859-15", "ISO8859_15",
"ISO-8859-16", "ISO8859_16",
"KOI8-R", "KOI8R",
"KOI8-U", "KOI8U",
"Shift_JIS", "SJIS",
"UTF-16BE", "UTF16BE",
"UTF-16LE", "UTF16LE",
"UTF-32BE", "UTF32BE",
"UTF-32LE", "UTF32LE",
"Windows-31J", "Windows_31J", // TODO: Windows-31J is actually a variant of SJIS
"Windows-1250", "Windows_1250",
"Windows-1251", "Windows_1251",
"Windows-1252", "Windows_1252"
};

static Entry ascii;

static final CaseInsensitiveBytesHash<Entry> encodings = new CaseInsensitiveBytesHash<Entry>(builtin.length);
static final CaseInsensitiveBytesHash<Entry> aliases = new CaseInsensitiveBytesHash<Entry>(builtin.length);
static final CaseInsensitiveBytesHash<Entry> encodings = new CaseInsensitiveBytesHash<Entry>(50);
static final CaseInsensitiveBytesHash<Entry> aliases = new CaseInsensitiveBytesHash<Entry>(150);

public static final CaseInsensitiveBytesHash<Entry> getEncodings() {
return encodings;
@@ -202,40 +157,11 @@ public static void dummy(String name) {
dummy(name.getBytes());
}

public static void dummyUnicode(String replica) {
public static void dummy_unicode(String replica) {
replicate(replica, replica + "BE", true);
}

static {
for (int i = 0; i < builtin.length / 2; i++) {
declare(builtin[i << 1], builtin[(i << 1) + 1]);
}
builtin = null;

ascii = encodings.get("ASCII-8BIT".getBytes());

EncodingType[]encList = EncodingList.LIST;
for (int i = 0; i < encList.length; i++) {
EncodingType enc = encList[i];
String name = enc.getName();
String otherName = enc.getOtherName();
switch (enc.getFlag()) {
case REPLICATE:
replicate(name, otherName);
break;
case ALIAS:
alias(name, otherName);
break;
case SET_BASE:
set_base(name, otherName);
break;
case DUMMY:
dummy(name);
break;
case DUMMY_UNICODE: // ENC_DUMMY_UNICODE from encdb.c
dummyUnicode(name);
break;
}
}
EncodingList.load();
}
}
Loading