Navigation Menu

Skip to content
This repository has been archived by the owner on Apr 22, 2023. It is now read-only.

Commit

Permalink
Erik's optimization of the v8 encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
piscisaureus committed Feb 29, 2012
1 parent adcac38 commit cc16602
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 1 deletion.
16 changes: 15 additions & 1 deletion deps/v8/src/api.cc
Expand Up @@ -3697,11 +3697,12 @@ int String::WriteUtf8(char* buffer,
LOG_API(isolate, "String::WriteUtf8");
ENTER_V8(isolate);
i::Handle<i::String> str = Utils::OpenHandle(this);
int string_length = str->length();
if (str->IsAsciiRepresentation()) {
int len;
if (capacity == -1) {
capacity = str->length() + 1;
len = str->length();
len = string_length;
} else {
len = i::Min(capacity, str->length());
}
Expand All @@ -3714,6 +3715,19 @@ int String::WriteUtf8(char* buffer,
return len;
}

if (capacity == -1 || capacity >= string_length * 3) {
if (string_length < 100) {
int utf8_bytes =
str->RecursivelySerializeToUtf8(buffer, 0, string_length);
if ((options & NO_NULL_TERMINATION) == 0 &&
(capacity > utf8_bytes || capacity == -1)) {
buffer[utf8_bytes++] = '\0';
}
if (nchars_ref != NULL) *nchars_ref = string_length;
return utf8_bytes;
}
}

i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
isolate->string_tracker()->RecordWrite(str);
if (options & HINT_MANY_WRITES_EXPECTED) {
Expand Down
60 changes: 60 additions & 0 deletions deps/v8/src/objects.cc
Expand Up @@ -6048,6 +6048,66 @@ String::FlatContent String::GetFlatContent() {
}


int String::RecursivelySerializeToUtf8(char* buffer, int start, int end) {
if (IsAsciiRepresentation()) {
WriteToFlat(this, buffer, start, end);
return end - start;
}
switch (StringShape(this).representation_tag()) {
case kExternalStringTag: {
const uc16* data =
ExternalTwoByteString::cast(this)->GetChars();
char* current = buffer;
for (int i = start; i < end; i++) {
uc16 character = data[i];
current +=
unibrow::Utf8::Encode(current, character);
}
return current - buffer;
}
case kSeqStringTag: {
const uc16* data =
SeqTwoByteString::cast(this)->GetChars();
char* current = buffer;
for (int i = start; i < end; i++) {
uc16 character = data[i];
current +=
unibrow::Utf8::Encode(current, character);
}
return current - buffer;
}
case kConsStringTag: {
ConsString* cons_string = ConsString::cast(this);
String* first = cons_string->first();
int boundary = first->length();
if (start >= boundary) {
// Only need RHS.
return cons_string->second()->RecursivelySerializeToUtf8(
buffer, start - boundary, end - boundary);
} else if (end <= boundary) {
// Only need LHS.
return first->RecursivelySerializeToUtf8(
buffer, start - boundary, end - boundary);
} else {
int utf8_bytes = first->RecursivelySerializeToUtf8(
buffer, start, boundary);
return utf8_bytes +
cons_string->second()->RecursivelySerializeToUtf8(
buffer + utf8_bytes, 0, end - boundary);
}
}
case kSlicedStringTag: {
SlicedString* slice = SlicedString::cast(this);
unsigned offset = slice->offset();
return slice->parent()->RecursivelySerializeToUtf8(
buffer, start + offset, end + offset);
}
}
UNREACHABLE();
return 0;
}


SmartArrayPointer<char> String::ToCString(AllowNullsFlag allow_nulls,
RobustnessFlag robust_flag,
int offset,
Expand Down
1 change: 1 addition & 0 deletions deps/v8/src/objects.h
Expand Up @@ -6593,6 +6593,7 @@ class String: public HeapObject {

inline int Utf8Length() { return Utf8Length(this, 0, length()); }
static int Utf8Length(String* input, int from, int to);
int RecursivelySerializeToUtf8(char* buffer, int start, int end);

// Return a 16 bit Unicode representation of the string.
// The string should be nearly flat, otherwise the performance of
Expand Down

0 comments on commit cc16602

Please sign in to comment.