Store token list of strptime format string in a cache so we do not co…

…ntinually re-parse the same strings. For this very very simple single format string removing this parsing ended up speeding us up by 8x. More analysis below: BEFORE: ```text jruby ../snippets/date_bench.rb Warming up -------------------------------------- _strptime 29.320k i/100ms Calculating ------------------------------------- _strptime 353.076k (± 2.9%) i/s - 3.548M in 10.057641s ``` AFTER: ```text system ~/work/jruby master * 1571% jruby ../snippets/date_bench.rb Warming up -------------------------------------- _strptime 124.491k i/100ms Calculating ------------------------------------- _strptime 2.696M (± 5.3%) i/s - 26.890M in 10.007182s ``` Removing the cost of lexing the format string is the entirety of the speed up. I moved the lexer from being a field to a local so we did not have to keep re-initializing the object (surprisingly expensive in it's own right). After spending some time with this I think JFlex is a bit heavy for this parsing. A stateless (e.g. static recursive descent parser passing all values as params) lexer/parser would have probably made the cost of lexing lower (which also includes converting from String to ByteList as well). With that said... Having a format cache eliminates needing the lexer to be fast. I doubt the cache will ever grow large and if it does we can just make it bounded by LRU. At this point I think the performance is ok. Follow up work items if anyone wants more would be: 1. Use ByteList and not String 2. Eliminate Bag and just use RubyHash directly 3. Add dyn dispatch for all math (this is not performance but a disparity with MRI itself). I did try to use bytelist last week for fun and saw no jump in perf but it was completely obscured by parsing the format string. It may give us another perf bump if we try it.
jruby · Jun 27, 2017 · f2e7b82 · f2e7b82
1 parent d3a5ee2
commit f2e7b82
Showing 3 changed files with 21 additions and 15 deletions.
diff --git a/core/src/main/java/org/jruby/Ruby.java b/core/src/main/java/org/jruby/Ruby.java
@@ -63,6 +63,8 @@
 import org.jruby.runtime.JavaSites;
 import org.jruby.runtime.invokedynamic.InvokeDynamicSupport;
 import org.jruby.util.MRIRecursionGuard;
+import org.jruby.util.StrptimeParser;
+import org.jruby.util.StrptimeToken;
 import org.objectweb.asm.util.TraceClassVisitor;
 
 import jnr.constants.Constant;
@@ -4417,6 +4419,17 @@ public RuntimeCache getRuntimeCache() {
         return runtimeCache;
     }
 
+    public List<StrptimeToken> getCachedStrptimePattern(String pattern) {
+        List<StrptimeToken> tokens = strptimeFormatCache.get(pattern);
+
+        if (tokens == null) {
+            tokens = new StrptimeParser().compilePattern(pattern);
+            strptimeFormatCache.put(pattern, tokens);
+        }
+
+        return tokens;
+    }
+
     /**
      * Add a method, so it can be printed out later.
      *
@@ -5152,4 +5165,9 @@ protected TypePopulator computeValue(Class<?> type) {
     public final JavaSites sites = new JavaSites();
 
     private volatile MRIRecursionGuard mriRecursionGuard;
+
+    // For strptime processing we cache the parsed format strings since most applications
+    // reuse the same formats over and over.  This is also unbounded (for now) since all applications
+    // I know of use very few of them.  Even if there are many the size of these lists are modest.
+    private Map<String, List<StrptimeToken>> strptimeFormatCache = new ConcurrentHashMap<>();
 }
diff --git a/core/src/main/java/org/jruby/util/RubyDateParser.java b/core/src/main/java/org/jruby/util/RubyDateParser.java
@@ -43,12 +43,6 @@
  * This class has {@code StrptimeParser} and provides methods that are calls from JRuby.
  */
 public class RubyDateParser {
-    private final StrptimeParser strptimeParser;
-
-    public RubyDateParser() {
-        this.strptimeParser = new StrptimeParser();
-    }
-
     /**
      * Date._strptime method in JRuby 9.1.5.0's lib/ruby/stdlib/date/format.rb is replaced
      * with this method. This is Java implementation of date__strptime method in MRI 2.3.1's
@@ -59,8 +53,8 @@ public RubyDateParser() {
      */
 
     public IRubyObject parse(ThreadContext context, final RubyString format, final RubyString text) {
-        final List<StrptimeToken> compiledPattern = strptimeParser.compilePattern(format.asJavaString());
-        final StrptimeParser.FormatBag bag = strptimeParser.parse(compiledPattern, text.asJavaString());
+        final List<StrptimeToken> compiledPattern = context.runtime.getCachedStrptimePattern(format.asJavaString());
+        final StrptimeParser.FormatBag bag = new StrptimeParser().parse(compiledPattern, text.asJavaString());
 
         return bag == null ? context.nil :  convertFormatBagToHash(context, bag, text.isTaint());
     }

diff --git a/core/src/main/java/org/jruby/util/StrptimeParser.java b/core/src/main/java/org/jruby/util/StrptimeParser.java
@@ -195,12 +195,6 @@ public static boolean has(BigInteger v) {
         }
     }
 
-    private final StrptimeLexer lexer;
-
-    public StrptimeParser() {
-        this.lexer = new StrptimeLexer((Reader) null);
-    }
-
     /**
      * Ported from RubyDateFormatter#addToPattern in JRuby 9.1.5.0.
      * see https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/core/src/main/java/org/jruby/util/RubyDateFormatter.java
@@ -223,7 +217,7 @@ private void addToPattern(final List<StrptimeToken> compiledPattern, final Strin
     public List<StrptimeToken> compilePattern(final String pattern) {
         final List<StrptimeToken> compiledPattern = new LinkedList<>();
         final Reader reader = new StringReader(pattern); // TODO Use try-with-resource statement
-        lexer.yyreset(reader);
+        StrptimeLexer lexer = new StrptimeLexer(reader);
 
         StrptimeToken token;
         try {