Skip to content

Commit

Permalink
Fix wrap_rows at inner byte of multibyte sequence
Browse files Browse the repository at this point in the history
Also fix UTF-8 inner byte bounds and make unittest for case this fixes.
  • Loading branch information
figec authored and est31 committed Jun 18, 2015
1 parent e45ecad commit 3b65a6a
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 10 deletions.
24 changes: 17 additions & 7 deletions src/unittest/test_utilities.cpp
Expand Up @@ -243,13 +243,23 @@ void TestUtilities::testWrapRows()
{
UASSERT(wrap_rows("12345678",4) == "1234\n5678");
// test that wrap_rows doesn't wrap inside multibyte sequences
const unsigned char s[] = {
0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x72, 0x61, 0x70, 0x74, 0x6f,
0x72, 0x2f, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0x2f,
0x6d, 0x69, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x62, 0x69,
0x6e, 0x2f, 0x2e, 0x2e, 0};
std::string str((char *)s);
UASSERT(utf8_to_wide(wrap_rows(str, 20)) != L"<invalid UTF-8 string>");
{
const unsigned char s[] = {
0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x72, 0x61, 0x70, 0x74, 0x6f,
0x72, 0x2f, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0x2f,
0x6d, 0x69, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x62, 0x69,
0x6e, 0x2f, 0x2e, 0x2e, 0};
std::string str((char *)s);
UASSERT(utf8_to_wide(wrap_rows(str, 20)) != L"<invalid UTF-8 string>");
};
{
const unsigned char s[] = {
0x74, 0x65, 0x73, 0x74, 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81,
0xd1, 0x82, 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82,
0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0};
std::string str((char *)s);
UASSERT(utf8_to_wide(wrap_rows(str, 8)) != L"<invalid UTF-8 string>");
}
}


Expand Down
14 changes: 11 additions & 3 deletions src/util/string.h
Expand Up @@ -33,7 +33,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#define TOSTRING(x) STRINGIFY(x)

// Checks whether a byte is an inner byte for an utf-8 multibyte sequence
#define IS_UTF8_MULTB_INNER(x) (((unsigned char)x >= 0x80) && ((unsigned char)x <= 0xc0))
#define IS_UTF8_MULTB_INNER(x) (((unsigned char)x >= 0x80) && ((unsigned char)x < 0xc0))

typedef std::map<std::string, std::string> StringMap;

Expand Down Expand Up @@ -426,12 +426,20 @@ inline std::string wrap_rows(const std::string &from,
{
std::string to;

bool need_to_wrap = false;

size_t character_idx = 0;
for (size_t i = 0; i < from.size(); i++) {
if (character_idx > 0 && character_idx % row_len == 0)
to += '\n';
if (!IS_UTF8_MULTB_INNER(from[i]))
need_to_wrap = true;
if (!IS_UTF8_MULTB_INNER(from[i])) {
// Wrap string if needed before next char started
if (need_to_wrap) {
to += '\n';
need_to_wrap = false;
}
character_idx++;
}
to += from[i];
}

Expand Down

0 comments on commit 3b65a6a

Please sign in to comment.