Skip to content

Commit

Permalink
Reworked inline caches.
Browse files Browse the repository at this point in the history
Under concurrent update, we need to preserve the coherence of the multiple
values is the InlineCache object. In other words, no thread should be able to
see values in a single InlineCache object that represent a partial update of
the inline cache.

To meet this constraint, we pre-allocate the number of InlineCache slots
specified at process invocation for every CallSite object. Those slots point
to individual InlineCache objects as the call site is executed and the type
profile changes. The pointers are swapped atomically, and if the atomic swap
succeeds, the previous cache is retained in a 'dead list' until the next
garbage collection cycle. If the update fails, the updated cache is discarded
and the update is attempted again.

Nothing should retain the InlineCache pointers outside the CallSite object.
During execution of the CallSite, the InlineCache pointer should be on the
execution stack, so swapping the pointers should not impact code that is
already-in-progress. At a garbage collection checkpoint, no interpreter will
be in-progress, so deleting the replaced InlineCache objects should be safe.
brixen committed Jul 1, 2016
1 parent f67c4d1 commit 0b0a184
Showing 6 changed files with 248 additions and 267 deletions.
3 changes: 3 additions & 0 deletions library/rubinius/configuration.rb
Original file line number Diff line number Diff line change
@@ -103,6 +103,9 @@

cs.vm_variable "limit", 3,
"Maximum number of caches at call sites"

cs.vm_variable "evictions", 10,
"Maximum number of cache evictions before disabling caching at the call site"
end

m.section "jit" do |j|
96 changes: 3 additions & 93 deletions machine/builtin/call_site.cpp
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@

namespace rubinius {
int CallSite::max_caches = 0;
int CallSite::max_evictions = 0;
CallSite::Executor CallSite::default_execute = CallSite::lookup_invoke_cache;

void CallSite::bootstrap(STATE) {
@@ -26,105 +27,14 @@ namespace rubinius {
}

max_caches = state->shared().config.machine_call_site_limit.value;
max_evictions = state->shared().config.machine_call_site_evictions.value;
}

void CallSite::Info::mark(Object* obj, memory::ObjectMark& mark) {
auto_mark(obj, mark);

CallSite* call_site = as<CallSite>(obj);

if(!call_site->caches()) return;

// 1. Check if individual caches should be evicted.
bool evict_p[call_site->depth()];

for(int i = 0; i < call_site->depth(); i++) {
evict_p[i] = call_site->caches()->cache[i].inefficient_p();
}

int evict_count = 0;
for(int i = 0; i < call_site->depth(); i++) {
if(evict_p[i]) evict_count++;
}

if(evict_count) {
VM::current()->metrics().machine.inline_cache_evicted += evict_count;

int new_size = call_site->depth() - evict_count;

if(new_size == 0) {
call_site->depth(0);
free(call_site->caches());
call_site->caches(NULL);

call_site->execute(CallSite::default_execute);
call_site->cache_miss(CallSite::default_execute);

return;
}

for(int i = 0, j = 0; i < call_site->depth() && j < new_size; i++) {
if(!evict_p[i]) {
call_site->caches()->cache[j++] = call_site->caches()->cache[i];
}
}

call_site->caches()->depth(new_size);
}

// 2. Attempt to re-order the caches by bubbling most hit forward.
bool reorder_p = false;
int indexes[call_site->depth()];

for(int i = 0; i < call_site->depth(); i++) {
indexes[i] = i;
}

InlineCaches* caches = call_site->caches();

for(int i = 0; i < call_site->depth() - 1; i++) {
if(caches->cache[i].hits() < caches->cache[i + 1].hits()) {
int tmp = indexes[i];
indexes[i] = indexes[i + 1];
indexes[i + 1] = tmp;
reorder_p = true;

// TODO: pass State through the GC!
VM::current()->metrics().machine.inline_cache_reordered++;
}
}

if(reorder_p) {
InlineCache* inline_caches = static_cast<InlineCache*>(
alloca(sizeof(CallSite) * call_site->depth()));

for(int i = 0; i < call_site->depth(); i++) {
inline_caches[i] = caches->cache[i];
}

for(int i = 0; i < call_site->depth(); i++) {
caches->cache[i] = inline_caches[indexes[i]];
}
}

// 3. Mark remaining caches.
for(int i = 0; i < call_site->depth(); i++) {
InlineCache* cache = &caches->cache[i];

if(Object* ref = mark.call(cache->receiver_class())) {
cache->receiver_class(as<Class>(ref));
mark.just_set(call_site, ref);
}

if(Object* ref = mark.call(cache->stored_module())) {
cache->stored_module(as<Module>(ref));
mark.just_set(call_site, ref);
}

if(Object* ref = mark.call(cache->executable())) {
cache->executable(as<Executable>(ref));
mark.just_set(call_site, ref);
}
}
call_site->evict_and_mark(mark);
}
}
403 changes: 231 additions & 172 deletions machine/builtin/call_site.hpp

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions machine/memory.hpp
Original file line number Diff line number Diff line change
@@ -381,6 +381,15 @@ namespace rubinius {
return static_cast<T*>(new_object(state, klass, bytes, T::type));
}

template <class T>
T* new_variable_object(STATE, Class *klass) {
T* obj = static_cast<T*>(new_object(
state, klass, TypeInfo::instance_sizes[T::type], T::type));
T::initialize(state, obj);

return obj;
}

template <class T>
T* new_bytes(STATE, Class* klass, native_int bytes) {
bytes = ObjectHeader::align(sizeof(T) + bytes);
2 changes: 1 addition & 1 deletion machine/memory/finalizer.cpp
Original file line number Diff line number Diff line change
@@ -188,7 +188,7 @@ namespace rubinius {
void FinalizerThread::wakeup(STATE) {
MachineThread::wakeup(state);

while(thread_running_) {
while(thread_running_p()) {
UnmanagedPhase unmanaged(state);
std::lock_guard<std::mutex> guard(list_mutex());

2 changes: 1 addition & 1 deletion machine/spinlock.hpp
Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@ namespace rubinius {
}

bool try_lock() {
return flag.test_and_set(std::memory_order_seq_cst);
return !flag.test_and_set(std::memory_order_seq_cst);
}

void unlock() {

0 comments on commit 0b0a184

Please sign in to comment.