Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jcodings
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 8b729bfece90
Choose a base ref
...
head repository: jruby/jcodings
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 514b3a4df9d4
Choose a head ref
  • 2 commits
  • 2 files changed
  • 1 contributor

Commits on Dec 29, 2017

  1. update code range list

    lopex committed Dec 29, 2017
    Copy the full SHA
    3fe384c View commit details
  2. update script

    lopex committed Dec 29, 2017
    Copy the full SHA
    514b3a4 View commit details
Showing with 31 additions and 7 deletions.
  1. +4 −4 scripts/generate.rb
  2. +27 −3 src/org/jcodings/unicode/UnicodeProperties.java
8 changes: 4 additions & 4 deletions scripts/generate.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# coding: utf-8

REPO_PATH = ARGV.first || '/usr/src/ruby-2.4.3' # path to ruby repo
REPO_PATH = ARGV.first || '/usr/src/ruby-2.5.0' # path to ruby repo
SECTION_NAME = "rdata"
UNICODE_VERSION = "9.0.0"
UNICODE_VERSION = "10.0.0"
SRC_DIR = "../src/org/jcodings"
DST_BIN_DIR = "../resources/tables"
INDENT = " " * 4
@@ -169,10 +169,10 @@ def generate_coderange_list

ranges = unicode_src[/CodeRanges\[\]\s+=\s+\{(.*?)\}\;/m, 1].scan(/CR_(\w+)/).flatten

standard_char_type_range = 15
standard_char_type_range = 16
out = ranges.take(standard_char_type_range).map{|range|[range.tr('_', '').downcase, range]} +
ranges.drop(standard_char_type_range).map do |range|
name = range =~ /Age_(\d)_(\d)/ ? "age=#{$1}.#{$2}" : range.tr('_', '').downcase
name = range =~ /Age_(\d+)_(\d+)/ ? "age=#{$1}.#{$2}" : range.tr('_', '').downcase
name = cr_map.delete(range) || name
name = "#{$1}=#{$2}" if name =~ /(graphemeclusterbreak)(.*)/i
([name] + aliases[name].to_a).map{|n|[n, range]}
30 changes: 27 additions & 3 deletions src/org/jcodings/unicode/UnicodeProperties.java
Original file line number Diff line number Diff line change
@@ -31,13 +31,14 @@ public class UnicodeProperties {
new CodeRangeEntry("graph", "CR_Graph"),
new CodeRangeEntry("lower", "CR_Lower"),
new CodeRangeEntry("print", "CR_Print"),
new CodeRangeEntry("punct", "CR_Punct"),
new CodeRangeEntry("xposixpunct", "CR_XPosixPunct"),
new CodeRangeEntry("space", "CR_Space"),
new CodeRangeEntry("upper", "CR_Upper"),
new CodeRangeEntry("xdigit", "CR_XDigit"),
new CodeRangeEntry("word", "CR_Word"),
new CodeRangeEntry("alnum", "CR_Alnum"),
new CodeRangeEntry("ascii", "CR_ASCII"),
new CodeRangeEntry("punct", "CR_Punct"),
new CodeRangeEntry("any", "CR_Any"),
new CodeRangeEntry("assigned", "CR_Assigned"),
new CodeRangeEntry("c", "CR_C"),
@@ -411,6 +412,14 @@ public class UnicodeProperties {
new CodeRangeEntry("osge", "CR_Osage"),
new CodeRangeEntry("tangut", "CR_Tangut"),
new CodeRangeEntry("tang", "CR_Tangut"),
new CodeRangeEntry("masaramgondi", "CR_Masaram_Gondi"),
new CodeRangeEntry("gonm", "CR_Masaram_Gondi"),
new CodeRangeEntry("nushu", "CR_Nushu"),
new CodeRangeEntry("nshu", "CR_Nushu"),
new CodeRangeEntry("soyombo", "CR_Soyombo"),
new CodeRangeEntry("soyo", "CR_Soyombo"),
new CodeRangeEntry("zanabazarsquare", "CR_Zanabazar_Square"),
new CodeRangeEntry("zanb", "CR_Zanabazar_Square"),
new CodeRangeEntry("whitespace", "CR_Space"),
new CodeRangeEntry("wspace", "CR_Space"),
new CodeRangeEntry("bidicontrol", "CR_Bidi_Control"),
@@ -474,6 +483,12 @@ public class UnicodeProperties {
new CodeRangeEntry("patsyn", "CR_Pattern_Syntax"),
new CodeRangeEntry("prependedconcatenationmark", "CR_Prepended_Concatenation_Mark"),
new CodeRangeEntry("pcm", "CR_Prepended_Concatenation_Mark"),
new CodeRangeEntry("graphemeclusterbreak=regionalindicator", "CR_Regional_Indicator"),
new CodeRangeEntry("emoji", "CR_Emoji"),
new CodeRangeEntry("emojipresentation", "CR_Emoji_Presentation"),
new CodeRangeEntry("graphemeclusterbreak=emodifier", "CR_Emoji_Modifier"),
new CodeRangeEntry("emojimodifierbase", "CR_Emoji_Modifier_Base"),
new CodeRangeEntry("emojicomponent", "CR_Emoji_Component"),
new CodeRangeEntry("unknown", "CR_Unknown"),
new CodeRangeEntry("zzzz", "CR_Unknown"),
new CodeRangeEntry("age=1.1", "CR_Age_1_1"),
@@ -494,21 +509,23 @@ public class UnicodeProperties {
new CodeRangeEntry("age=7.0", "CR_Age_7_0"),
new CodeRangeEntry("age=8.0", "CR_Age_8_0"),
new CodeRangeEntry("age=9.0", "CR_Age_9_0"),
new CodeRangeEntry("age=10.0", "CR_Age_10_0"),
new CodeRangeEntry("graphemeclusterbreak=prepend", "CR_Grapheme_Cluster_Break_Prepend"),
new CodeRangeEntry("graphemeclusterbreak=cr", "CR_Grapheme_Cluster_Break_CR"),
new CodeRangeEntry("graphemeclusterbreak=lf", "CR_NEWLINE"),
new CodeRangeEntry("graphemeclusterbreak=control", "CR_Grapheme_Cluster_Break_Control"),
new CodeRangeEntry("graphemeextend", "CR_Grapheme_Extend"),
new CodeRangeEntry("grext", "CR_Grapheme_Extend"),
new CodeRangeEntry("graphemeclusterbreak=regionalindicator", "CR_Grapheme_Cluster_Break_Regional_Indicator"),
new CodeRangeEntry("regionalindicator", "CR_Regional_Indicator"),
new CodeRangeEntry("ri", "CR_Regional_Indicator"),
new CodeRangeEntry("graphemeclusterbreak=spacingmark", "CR_Grapheme_Cluster_Break_SpacingMark"),
new CodeRangeEntry("graphemeclusterbreak=l", "CR_Grapheme_Cluster_Break_L"),
new CodeRangeEntry("graphemeclusterbreak=v", "CR_Grapheme_Cluster_Break_V"),
new CodeRangeEntry("graphemeclusterbreak=t", "CR_Grapheme_Cluster_Break_T"),
new CodeRangeEntry("graphemeclusterbreak=lv", "CR_Grapheme_Cluster_Break_LV"),
new CodeRangeEntry("graphemeclusterbreak=lvt", "CR_Grapheme_Cluster_Break_LVT"),
new CodeRangeEntry("graphemeclusterbreak=ebase", "CR_Grapheme_Cluster_Break_E_Base"),
new CodeRangeEntry("graphemeclusterbreak=emodifier", "CR_Grapheme_Cluster_Break_E_Modifier"),
new CodeRangeEntry("emojimodifier", "CR_Emoji_Modifier"),
new CodeRangeEntry("graphemeclusterbreak=zwj", "CR_Grapheme_Cluster_Break_ZWJ"),
new CodeRangeEntry("graphemeclusterbreak=glueafterzwj", "CR_Grapheme_Cluster_Break_Glue_After_Zwj"),
new CodeRangeEntry("graphemeclusterbreak=ebasegaz", "CR_Grapheme_Cluster_Break_E_Base_GAZ"),
@@ -531,6 +548,7 @@ public class UnicodeProperties {
new CodeRangeEntry("innko", "CR_In_NKo"),
new CodeRangeEntry("insamaritan", "CR_In_Samaritan"),
new CodeRangeEntry("inmandaic", "CR_In_Mandaic"),
new CodeRangeEntry("insyriacsupplement", "CR_In_Syriac_Supplement"),
new CodeRangeEntry("inarabicextendeda", "CR_In_Arabic_Extended_A"),
new CodeRangeEntry("indevanagari", "CR_In_Devanagari"),
new CodeRangeEntry("inbengali", "CR_In_Bengali"),
@@ -741,9 +759,12 @@ public class UnicodeProperties {
new CodeRangeEntry("intakri", "CR_In_Takri"),
new CodeRangeEntry("inahom", "CR_In_Ahom"),
new CodeRangeEntry("inwarangciti", "CR_In_Warang_Citi"),
new CodeRangeEntry("inzanabazarsquare", "CR_In_Zanabazar_Square"),
new CodeRangeEntry("insoyombo", "CR_In_Soyombo"),
new CodeRangeEntry("inpaucinhau", "CR_In_Pau_Cin_Hau"),
new CodeRangeEntry("inbhaiksuki", "CR_In_Bhaiksuki"),
new CodeRangeEntry("inmarchen", "CR_In_Marchen"),
new CodeRangeEntry("inmasaramgondi", "CR_In_Masaram_Gondi"),
new CodeRangeEntry("incuneiform", "CR_In_Cuneiform"),
new CodeRangeEntry("incuneiformnumbersandpunctuation", "CR_In_Cuneiform_Numbers_and_Punctuation"),
new CodeRangeEntry("inearlydynasticcuneiform", "CR_In_Early_Dynastic_Cuneiform"),
@@ -758,6 +779,8 @@ public class UnicodeProperties {
new CodeRangeEntry("intangut", "CR_In_Tangut"),
new CodeRangeEntry("intangutcomponents", "CR_In_Tangut_Components"),
new CodeRangeEntry("inkanasupplement", "CR_In_Kana_Supplement"),
new CodeRangeEntry("inkanaextendeda", "CR_In_Kana_Extended_A"),
new CodeRangeEntry("innushu", "CR_In_Nushu"),
new CodeRangeEntry("induployan", "CR_In_Duployan"),
new CodeRangeEntry("inshorthandformatcontrols", "CR_In_Shorthand_Format_Controls"),
new CodeRangeEntry("inbyzantinemusicalsymbols", "CR_In_Byzantine_Musical_Symbols"),
@@ -788,6 +811,7 @@ public class UnicodeProperties {
new CodeRangeEntry("incjkunifiedideographsextensionc", "CR_In_CJK_Unified_Ideographs_Extension_C"),
new CodeRangeEntry("incjkunifiedideographsextensiond", "CR_In_CJK_Unified_Ideographs_Extension_D"),
new CodeRangeEntry("incjkunifiedideographsextensione", "CR_In_CJK_Unified_Ideographs_Extension_E"),
new CodeRangeEntry("incjkunifiedideographsextensionf", "CR_In_CJK_Unified_Ideographs_Extension_F"),
new CodeRangeEntry("incjkcompatibilityideographssupplement", "CR_In_CJK_Compatibility_Ideographs_Supplement"),
new CodeRangeEntry("intags", "CR_In_Tags"),
new CodeRangeEntry("invariationselectorssupplement", "CR_In_Variation_Selectors_Supplement"),