Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 45a9bb3

Browse files
committedJun 17, 2015
Make wrap_rows not wrap inside utf-8 multibyte sequences
Also count multibyte sequences as "one" character. Adds unittest for the bug reporter's case. Fixes #2796.
1 parent 6dcf549 commit 45a9bb3

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed
 

‎src/unittest/test_utilities.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,14 @@ void TestUtilities::testUTF8()
242242
void TestUtilities::testWrapRows()
243243
{
244244
UASSERT(wrap_rows("12345678",4) == "1234\n5678");
245+
// test that wrap_rows doesn't wrap inside multibyte sequences
246+
const unsigned char s[] = {
247+
0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x72, 0x61, 0x70, 0x74, 0x6f,
248+
0x72, 0x2f, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0x2f,
249+
0x6d, 0x69, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x62, 0x69,
250+
0x6e, 0x2f, 0x2e, 0x2e, 0};
251+
std::string str((char *)s);
252+
UASSERT(utf8_to_wide(wrap_rows(str, 20)) != L"<invalid UTF-8 string>");
245253
}
246254

247255

‎src/util/string.h

+13-2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ with this program; if not, write to the Free Software Foundation, Inc.,
3232
#define STRINGIFY(x) #x
3333
#define TOSTRING(x) STRINGIFY(x)
3434

35+
// Checks whether a byte is an inner byte for an utf-8 multibyte sequence
36+
#define IS_UTF8_MULTB_INNER(x) (((unsigned char)x >= 0x80) && ((unsigned char)x <= 0xc0))
37+
// Checks whether a byte is a start byte for an utf-8 multibyte sequence
38+
#define IS_UTF8_MULTB_START(x) ((unsigned char)x >= 0xc0)
39+
3540
typedef std::map<std::string, std::string> StringMap;
3641

3742
struct FlagDesc {
@@ -411,7 +416,10 @@ inline bool string_allowed_blacklist(const std::string &str,
411416
* every \p row_len characters whether it breaks a word or not. It is
412417
* intended to be used for, for example, showing paths in the GUI.
413418
*
414-
* @param from The string to be wrapped into rows.
419+
* @note This function doesn't wrap inside utf-8 multibyte sequences and also
420+
* counts multibyte sequences correcly as single characters.
421+
*
422+
* @param from The (utf-8) string to be wrapped into rows.
415423
* @param row_len The row length (in characters).
416424
* @return A new string with the wrapping applied.
417425
*/
@@ -420,9 +428,12 @@ inline std::string wrap_rows(const std::string &from,
420428
{
421429
std::string to;
422430

431+
size_t character_idx = 0;
423432
for (size_t i = 0; i < from.size(); i++) {
424-
if (i != 0 && i % row_len == 0)
433+
if (character_idx > 0 && character_idx % row_len == 0)
425434
to += '\n';
435+
if (!IS_UTF8_MULTB_INNER(from[i]))
436+
character_idx++;
426437
to += from[i];
427438
}
428439

0 commit comments

Comments
 (0)
Please sign in to comment.