Skip to content

Commit

Permalink
Remove BigInteger from values which probably will never need them lik…
Browse files Browse the repository at this point in the history
…e year.

This improves my bench a bit more (BEFORE):

```text
jruby ../snippets/date_bench.rb 
Warming up --------------------------------------
           _strptime    26.267k i/100ms
Calculating -------------------------------------
           _strptime    319.069k (± 5.8%) i/s -      3.178M in   9.998810s
```

AFTER:

```text
jruby  ../snippets/date_bench.rb 
Warming up --------------------------------------
           _strptime    29.320k i/100ms
Calculating -------------------------------------
           _strptime    353.076k (± 2.9%) i/s -      3.548M in  10.057641s
```

I also examined MRIs implementation and they basically do all dyncalls for all
the math which this rewrite in Java does not do.  I think this is ok but it
underlies how much more overhead may be lurking here (since we are doing direct
Java math).

I did not change seconds and smaller units of time since they may exceed long
boundary.  In that case a followup optimization would be to check digits of text
and flip to BigInteger only in those cases otherwise assume long or even int
when it fits.
enebo committed Jun 20, 2017
1 parent 65e1495 commit 5825391
Showing 1 changed file with 59 additions and 29 deletions.
88 changes: 59 additions & 29 deletions core/src/main/java/org/jruby/util/StrptimeParser.java
Original file line number Diff line number Diff line change
@@ -40,7 +40,7 @@

/**
* This is Java implementation of ext/date/date_strptime.c in Ruby 2.3.1.
* @see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strptime.c
* see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strptime.c
*/
public class StrptimeParser {
// day_names
@@ -63,19 +63,19 @@ public class StrptimeParser {

/**
* Ported Date::Format::Bag from lib/ruby/stdlib/date/format.rb in JRuby 9.1.5.0.
* @see https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/lib/ruby/stdlib/date/format.rb
* see https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/lib/ruby/stdlib/date/format.rb
*/
public static class FormatBag {
private int mDay = Integer.MIN_VALUE;
private int wDay = Integer.MIN_VALUE;
private int cWDay = Integer.MIN_VALUE;
private int yDay = Integer.MIN_VALUE;
private int cWeek = Integer.MIN_VALUE;
private BigInteger cWYear = null;
private long cWYear = Long.MIN_VALUE;
private int min = Integer.MIN_VALUE;
private int mon = Integer.MIN_VALUE;
private int hour = Integer.MIN_VALUE;
private BigInteger year = null;
private long year = Long.MIN_VALUE;
private int sec = Integer.MIN_VALUE;
private int wNum0 = Integer.MIN_VALUE;
private int wNum1 = Integer.MIN_VALUE;
@@ -89,7 +89,7 @@ public static class FormatBag {
private int secondsSize = Integer.MIN_VALUE;

private int merid = Integer.MIN_VALUE;
private BigInteger cent = null;
private long cent = Long.MIN_VALUE;

private boolean fail = false;
private String leftover = null;
@@ -114,7 +114,7 @@ public int getCWeek() {
return cWeek;
}

public BigInteger getCWYear() {
public long getCWYear() {
return cWYear;
}

@@ -130,7 +130,7 @@ public int getHour() {
return hour;
}

public BigInteger getYear() {
public long getYear() {
return year;
}

@@ -170,7 +170,7 @@ public int getMerid() {
return merid;
}

public BigInteger getCent() {
public long getCent() {
return cent;
}

@@ -186,6 +186,10 @@ public static boolean has(int v) {
return v != Integer.MIN_VALUE;
}

public static boolean has(long v) {
return v != Long.MIN_VALUE;
}

public static boolean has(BigInteger v) {
return v != null;
}
@@ -199,7 +203,7 @@ public StrptimeParser() {

/**
* Ported from RubyDateFormatter#addToPattern in JRuby 9.1.5.0.
* @see https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/core/src/main/java/org/jruby/util/RubyDateFormatter.java
* see https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/core/src/main/java/org/jruby/util/RubyDateFormatter.java
*/
private void addToPattern(final List<StrptimeToken> compiledPattern, final String str) {
for (int i = 0; i < str.length(); i++) {
@@ -214,7 +218,7 @@ private void addToPattern(final List<StrptimeToken> compiledPattern, final Strin

/**
* Ported from RubyDateFormatter#compilePattern in JRuby 9.1.5.0.
* @see https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/core/src/main/java/org/jruby/util/RubyDateFormatter.java
* see https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/core/src/main/java/org/jruby/util/RubyDateFormatter.java
*/
public List<StrptimeToken> compilePattern(final String pattern) {
final List<StrptimeToken> compiledPattern = new LinkedList<>();
@@ -288,14 +292,14 @@ public FormatBag parse(final List<StrptimeToken> compiledPattern, final String t

if (FormatBag.has(bag.cent)) {
if (FormatBag.has(bag.cWYear)) {
bag.cWYear = bag.cWYear.add(bag.cent.multiply(BigInteger.valueOf(100)));
bag.cWYear += bag.cent * 100;
}
if (FormatBag.has(bag.year)) {
bag.year = bag.year.add(bag.cent.multiply(BigInteger.valueOf(100)));
bag.year += bag.cent * 100;
}

// delete bag._cent
bag.cent = null;
bag.cent = Long.MIN_VALUE;
}

if (FormatBag.has(bag.merid)) {
@@ -378,9 +382,9 @@ private FormatBag parse(final List<StrptimeToken> compiledPattern) {
}
case FORMAT_CENTURY: { // %C - year / 100 (round down. 20 in 2009)
if (isNumberPattern(compiledPattern, tokenIndex)) {
bag.cent = BigInteger.valueOf(readDigits(2));
bag.cent = readDigits(2);
} else {
bag.cent = readDigitsMax();
bag.cent = readDigitsMaxLong();
}
break;
}
@@ -402,9 +406,9 @@ private FormatBag parse(final List<StrptimeToken> compiledPattern) {
}
case FORMAT_WEEKYEAR: { // %G - The week-based year
if (isNumberPattern(compiledPattern, tokenIndex)) {
bag.cWYear = BigInteger.valueOf(readDigits(4));
bag.cWYear = readDigits(4);
} else {
bag.cWYear = readDigitsMax();
bag.cWYear = readDigitsMaxLong();
}
break;
}
@@ -413,9 +417,9 @@ private FormatBag parse(final List<StrptimeToken> compiledPattern) {
if (!validRange(v, 0, 99)) {
fail = true;
}
bag.cWYear = BigInteger.valueOf(v);
bag.cWYear = v;
if (!bag.has(bag.cent)) {
bag.cent = BigInteger.valueOf((int)v >= 69 ? 19 : 20);
bag.cent = v >= 69 ? 19 : 20;
}
break;
}
@@ -588,23 +592,23 @@ private FormatBag parse(final List<StrptimeToken> compiledPattern) {
pos++;
}

final BigInteger year;
final long year;
if (isNumberPattern(compiledPattern, tokenIndex)) {
year = BigInteger.valueOf(readDigits(4));
year = readDigits(4);
} else {
year = readDigitsMax();
year = readDigitsMaxLong();
}
bag.year = !negative ? year : year.negate();
bag.year = !negative ? year : -1 * year;
break;
}
case FORMAT_YEAR_SHORT: { // %y, %Ey, %Oy - year % 100 (00..99)
final long year = readDigits(2);
if (!validRange(year, 0, 99)) {
fail = true;
}
bag.year = BigInteger.valueOf(year);
bag.year = year;
if (!bag.has(bag.cent)) {
bag.cent = BigInteger.valueOf((int)year >= 69 ? 19 : 20);
bag.cent = year >= 69 ? 19 : 20;
}
break;
}
@@ -651,7 +655,7 @@ private FormatBag parse(final List<StrptimeToken> compiledPattern) {

/**
* Ports read_digits from ext/date/date_strptime.c in MRI 2.3.1 under BSDL.
* @see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c
* see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c
*/
private long readDigits(final int len) {
char c;
@@ -681,7 +685,7 @@ private long readDigits(final int len) {

/**
* Ports READ_DIGITS_MAX from ext/date/date_strptime.c in MRI 2.3.1 under BSDL.
* @see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c
* see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c
*/
private BigInteger readDigitsMax() {
char c;
@@ -709,6 +713,32 @@ private BigInteger readDigitsMax() {
return v;
}

private long readDigitsMaxLong() {
char c;
long v = 0L;
final int initPos = pos;

while (true) {
if (isEndOfText(text, pos)) {
break;
}

c = text.charAt(pos);
if (!isDigit(c)) {
break;
} else {
v = v * 10 + toInt(c);
}
pos += 1;
}

if (pos == initPos) {
fail = true;
}

return v;
}

/**
* Returns -1 if text doesn't match with patterns.
*/
@@ -731,7 +761,7 @@ private int findIndexInPatterns(final String[] patterns) {

/**
* Ports num_pattern_p from ext/date/date_strptime.c in MRI 2.3.1 under BSDL.
* @see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c
* see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c
*/
private static boolean isNumberPattern(final List<StrptimeToken> compiledPattern, final int i) {
if (compiledPattern.size() <= i + 1) {
@@ -785,7 +815,7 @@ private static boolean isNumberPattern(final List<StrptimeToken> compiledPattern

/**
* Ports valid_pattern_p in ext/date/date_strptime.c in MRI 2.3.1 under BSDL.
* @see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c
* see https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c
*/
private static boolean validRange(long v, int lower, int upper) {
return lower <= v && v <= upper;

0 comments on commit 5825391

Please sign in to comment.