Skip to content
This repository has been archived by the owner on Apr 22, 2023. It is now read-only.

Commit

Permalink
Utf8 encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
piscisaureus committed Feb 28, 2012
1 parent 080ffb8 commit a8600ea
Show file tree
Hide file tree
Showing 7 changed files with 609 additions and 0 deletions.
116 changes: 116 additions & 0 deletions benchmark_utf8.js
@@ -0,0 +1,116 @@

var test_utf8_new = process.test_utf8_new;
var test_utf8_old_hint = process.test_utf8_old_hint;
var test_utf8_old_nohint = process.test_utf8_old_nohint;

function benchmark_all() {
var size = 8;
var iterations = 10000000;
while (true) {
var log = Math.floor(log10(iterations)),
factor = Math.pow(10, log),
mantissa = Math.round(iterations / factor),
round_iterations = mantissa * factor;
benchmark_shapes(size, round_iterations);
if (size <= 16) {
iterations /= 2;
} else if (size <= 64) {
iterations /= 4
} else {
iterations /= 8;
}
size *= 8;
if (iterations < 1) return;
}
}

function benchmark_shapes(size, iterations) {
var shapes = ["left_tailed", "right_tailed", "tree", "flat"];
for (var i = 0; i < shapes.length; i++) {
for (var j = 0; j <= 2; j++) {
benchmark(size, shapes[i], j, iterations);
}
}
}

function benchmark(size, shape, unicode, iterations) {
var string = shape_generators[shape](size, unicode),
i, start, end;

process.stdout.write("size: " + string.length + ", shape: " + shape);
process.stdout.write(", content: " + ["ansi", "single nonansi", "mixed"][unicode]);
process.stdout.write(", iterations: " + iterations);

process.stdout.write("\nnew: ");
start = (new Date()).getTime();
for (i = iterations - 1; i >= 0; i--)
test_utf8_new(string);
end = (new Date()).getTime();
process.stdout.write((end - start) / 1000 + " s");

process.stdout.write("\told_nohint: ");
start = (new Date()).getTime();
for (i = iterations - 1; i >= 0; i--)
test_utf8_old_nohint(string);
end = (new Date()).getTime();
process.stdout.write((end - start) / 1000 + " s");

process.stdout.write("\told_hint: ");
start = (new Date()).getTime();
for (i = iterations - 1; i >= 0; i--)
test_utf8_old_hint(string);
end = (new Date()).getTime();
process.stdout.write((end - start) / 1000 + " s");

process.stdout.write("\n\n");
}

/*
* Helpers
*/

function log10(n) {
return Math.log(n) / Math.log(10);
}

/*
* Shape generators
*/

function part(unicode) {
if (unicode) return "ü1234567";
else return "12345678";
}

var shape_generators = {
left_tailed: function(size, unicode) {
var s = part(unicode);
while (s.length < size) {
s = part(unicode > 1) + s;
}
return s;
},

right_tailed: function(size, unicode) {
var s = part(unicode);
while (s.length < size) {
s = s + part(unicode > 1);
}
return s;
},

tree: function(size, unicode) {
s = part(unicode > 1);
while (s.length < size) {
s = s + s;
}
var s = s + part(unicode);
return s;
},

flat: function(size, unicode) {
return new Buffer(shape_generators.tree(size, unicode)).toString();
}
};

benchmark_all();
63 changes: 63 additions & 0 deletions deps/v8/include/v8.h
Expand Up @@ -112,6 +112,8 @@ namespace internal {

class Arguments;
class Object;
class String;
class ConsString;
class Heap;
class HeapObject;
class Isolate;
Expand Down Expand Up @@ -1248,6 +1250,11 @@ class String : public Primitive {
*/
V8EXPORT bool CanMakeExternal();

/**
* Returns true if the string has only ascii (0-127) characters.
*/
V8EXPORT bool HasOnlyAsciiChars();

/** Creates an undetectable string from the supplied ASCII or UTF-8 data.*/
V8EXPORT static Local<String> NewUndetectable(const char* data,
int length = -1);
Expand Down Expand Up @@ -1324,6 +1331,62 @@ class String : public Primitive {
void operator=(const Value&);
};

/**
* Provides direct access to string memory. The user has to be aware that
* each buffer returned might contain either 8-bit or 16-bit characters. As
* long as the iterator exists no other interaction with the v8 heap is
* allowed, because the heap might be in inconsistent state.
*/
class V8EXPORT Memory {
static const int kCurrentIsSecondTag = 1;
static const int kParentStackSize = 1024;

public:
static enum StorageType {
kNone = 0,
kAscii = 1,
kTwoByte = 2
};
explicit Memory(Handle<v8::Value> obj);
~Memory() {
if (ptr_ != NULL) {
rewind();
}
}
const void* operator*() { return ptr_; }
int length() { return length_; }
StorageType storage_type() { return storage_type_; }
bool Next() {
if (ptr_ != NULL) {
next();
}
return ptr_ != NULL;
}


private:
void next();
void rewind();
inline void down();
inline void set_flat(v8::internal::String* flat);
inline void set_end();
inline void push_parent(bool second);
inline void pop_parent();

const void* ptr_;
int length_;
StorageType storage_type_;
v8::internal::ConsString* current_;
intptr_t parent_;
bool did_visit_second_;
int depth_;
intptr_t parents_[kParentStackSize];

// Disallow copying and assigning.
Memory(const Memory&);
void operator=(const Memory&);
};

private:
V8EXPORT void VerifyExternalStringResource(ExternalStringResource* val) const;
V8EXPORT static void CheckCast(v8::Value* obj);
Expand Down

0 comments on commit a8600ea

Please sign in to comment.