Reworked inline caches.

Under concurrent update, we need to preserve the coherence of the multiple values is the InlineCache object. In other words, no thread should be able to see values in a single InlineCache object that represent a partial update of the inline cache. To meet this constraint, we pre-allocate the number of InlineCache slots specified at process invocation for every CallSite object. Those slots point to individual InlineCache objects as the call site is executed and the type profile changes. The pointers are swapped atomically, and if the atomic swap succeeds, the previous cache is retained in a 'dead list' until the next garbage collection cycle. If the update fails, the updated cache is discarded and the update is attempted again. Nothing should retain the InlineCache pointers outside the CallSite object. During execution of the CallSite, the InlineCache pointer should be on the execution stack, so swapping the pointers should not impact code that is already-in-progress. At a garbage collection checkpoint, no interpreter will be in-progress, so deleting the replaced InlineCache objects should be safe.
rubinius · Jul 1, 2016 · 0b0a184 · 0b0a184
1 parent f67c4d1
commit 0b0a184
Showing 6 changed files with 248 additions and 267 deletions.
diff --git a/library/rubinius/configuration.rb b/library/rubinius/configuration.rb
@@ -103,6 +103,9 @@
 
       cs.vm_variable "limit", 3,
         "Maximum number of caches at call sites"
+
+      cs.vm_variable "evictions", 10,
+        "Maximum number of cache evictions before disabling caching at the call site"
     end
 
     m.section "jit" do |j|

diff --git a/machine/builtin/call_site.cpp b/machine/builtin/call_site.cpp
@@ -13,6 +13,7 @@
 
 namespace rubinius {
   int CallSite::max_caches = 0;
+  int CallSite::max_evictions = 0;
   CallSite::Executor CallSite::default_execute = CallSite::lookup_invoke_cache;
 
   void CallSite::bootstrap(STATE) {
@@ -26,105 +27,14 @@ namespace rubinius {
     }
 
     max_caches = state->shared().config.machine_call_site_limit.value;
+    max_evictions = state->shared().config.machine_call_site_evictions.value;
   }
 
   void CallSite::Info::mark(Object* obj, memory::ObjectMark& mark) {
     auto_mark(obj, mark);
 
     CallSite* call_site = as<CallSite>(obj);
 
-    if(!call_site->caches()) return;
-
-    // 1. Check if individual caches should be evicted.
-    bool evict_p[call_site->depth()];
-
-    for(int i = 0; i < call_site->depth(); i++) {
-      evict_p[i] = call_site->caches()->cache[i].inefficient_p();
-    }
-
-    int evict_count = 0;
-    for(int i = 0; i < call_site->depth(); i++) {
-      if(evict_p[i]) evict_count++;
-    }
-
-    if(evict_count) {
-      VM::current()->metrics().machine.inline_cache_evicted += evict_count;
-
-      int new_size = call_site->depth() - evict_count;
-
-      if(new_size == 0) {
-        call_site->depth(0);
-        free(call_site->caches());
-        call_site->caches(NULL);
-
-        call_site->execute(CallSite::default_execute);
-        call_site->cache_miss(CallSite::default_execute);
-
-        return;
-      }
-
-      for(int i = 0, j = 0; i < call_site->depth() && j < new_size; i++) {
-        if(!evict_p[i]) {
-          call_site->caches()->cache[j++] = call_site->caches()->cache[i];
-        }
-      }
-
-      call_site->caches()->depth(new_size);
-    }
-
-    // 2. Attempt to re-order the caches by bubbling most hit forward.
-    bool reorder_p = false;
-    int indexes[call_site->depth()];
-
-    for(int i = 0; i < call_site->depth(); i++) {
-      indexes[i] = i;
-    }
-
-    InlineCaches* caches = call_site->caches();
-
-    for(int i = 0; i < call_site->depth() - 1; i++) {
-      if(caches->cache[i].hits() < caches->cache[i + 1].hits()) {
-        int tmp = indexes[i];
-        indexes[i] = indexes[i + 1];
-        indexes[i + 1] = tmp;
-        reorder_p = true;
-
-        // TODO: pass State through the GC!
-        VM::current()->metrics().machine.inline_cache_reordered++;
-      }
-    }
-
-    if(reorder_p) {
-      InlineCache* inline_caches = static_cast<InlineCache*>(
-          alloca(sizeof(CallSite) * call_site->depth()));
-
-      for(int i = 0; i < call_site->depth(); i++) {
-        inline_caches[i] = caches->cache[i];
-      }
-
-      for(int i = 0; i < call_site->depth(); i++) {
-        caches->cache[i] = inline_caches[indexes[i]];
-      }
-    }
-
-    // 3. Mark remaining caches.
-    for(int i = 0; i < call_site->depth(); i++) {
-      InlineCache* cache = &caches->cache[i];
-
-      if(Object* ref = mark.call(cache->receiver_class())) {
-        cache->receiver_class(as<Class>(ref));
-        mark.just_set(call_site, ref);
-      }
-
-      if(Object* ref = mark.call(cache->stored_module())) {
-        cache->stored_module(as<Module>(ref));
-        mark.just_set(call_site, ref);
-      }
-
-      if(Object* ref = mark.call(cache->executable())) {
-        cache->executable(as<Executable>(ref));
-        mark.just_set(call_site, ref);
-      }
-    }
+    call_site->evict_and_mark(mark);
   }
 }
diff --git a/machine/builtin/call_site.hpp b/machine/builtin/call_site.hpp
diff --git a/machine/memory.hpp b/machine/memory.hpp
@@ -381,6 +381,15 @@ namespace rubinius {
         return static_cast<T*>(new_object(state, klass, bytes, T::type));
       }
 
+    template <class T>
+      T* new_variable_object(STATE, Class *klass) {
+        T* obj = static_cast<T*>(new_object(
+              state, klass, TypeInfo::instance_sizes[T::type], T::type));
+        T::initialize(state, obj);
+
+        return obj;
+      }
+
     template <class T>
       T* new_bytes(STATE, Class* klass, native_int bytes) {
         bytes = ObjectHeader::align(sizeof(T) + bytes);

diff --git a/machine/memory/finalizer.cpp b/machine/memory/finalizer.cpp
@@ -188,7 +188,7 @@ namespace rubinius {
     void FinalizerThread::wakeup(STATE) {
       MachineThread::wakeup(state);
 
-      while(thread_running_) {
+      while(thread_running_p()) {
         UnmanagedPhase unmanaged(state);
         std::lock_guard<std::mutex> guard(list_mutex());
 

diff --git a/machine/spinlock.hpp b/machine/spinlock.hpp
@@ -23,7 +23,7 @@ namespace rubinius {
       }
 
       bool try_lock() {
-        return flag.test_and_set(std::memory_order_seq_cst);
+        return !flag.test_and_set(std::memory_order_seq_cst);
       }
 
       void unlock() {