1
0
Fork 0

Codechange: Use Utf8View::iterator in StringIterator.

pull/13960/head
frosch 2025-04-01 14:58:16 +02:00 committed by frosch
parent b19e43ae99
commit 83401ad5e2
7 changed files with 53 additions and 79 deletions

View File

@ -11,6 +11,7 @@
#include "string_osx.h" #include "string_osx.h"
#include "../../string_func.h" #include "../../string_func.h"
#include "../../strings_func.h" #include "../../strings_func.h"
#include "../../core/utf8.hpp"
#include "../../table/control_codes.h" #include "../../table/control_codes.h"
#include "../../fontcache.h" #include "../../fontcache.h"
#include "../../zoom_func.h" #include "../../zoom_func.h"
@ -368,10 +369,8 @@ int MacOSStringContains(const std::string_view str, const std::string_view value
} }
/* virtual */ void OSXStringIterator::SetString(const char *s) /* virtual */ void OSXStringIterator::SetString(std::string_view s)
{ {
const char *string_base = s;
this->utf16_to_utf8.clear(); this->utf16_to_utf8.clear();
this->str_info.clear(); this->str_info.clear();
this->cur_pos = 0; this->cur_pos = 0;
@ -379,10 +378,10 @@ int MacOSStringContains(const std::string_view str, const std::string_view value
/* CoreText operates on UTF-16, thus we have to convert the input string. /* CoreText operates on UTF-16, thus we have to convert the input string.
* To be able to return proper offsets, we have to create a mapping at the same time. */ * To be able to return proper offsets, we have to create a mapping at the same time. */
std::vector<UniChar> utf16_str; ///< UTF-16 copy of the string. std::vector<UniChar> utf16_str; ///< UTF-16 copy of the string.
while (*s != '\0') { Utf8View view(s);
size_t idx = s - string_base; for (auto it = view.begin(), end = view.end(); it != end; ++it) {
size_t idx = it.GetByteOffset();
char32_t c = Utf8Consume(&s); char32_t c = *it;
if (c < 0x10000) { if (c < 0x10000) {
utf16_str.push_back((UniChar)c); utf16_str.push_back((UniChar)c);
} else { } else {
@ -393,7 +392,7 @@ int MacOSStringContains(const std::string_view str, const std::string_view value
} }
this->utf16_to_utf8.push_back(idx); this->utf16_to_utf8.push_back(idx);
} }
this->utf16_to_utf8.push_back(s - string_base); this->utf16_to_utf8.push_back(s.size());
/* Query CoreText for word and cluster break information. */ /* Query CoreText for word and cluster break information. */
this->str_info.resize(utf16_to_utf8.size()); this->str_info.resize(utf16_to_utf8.size());

View File

@ -27,7 +27,7 @@ class OSXStringIterator : public StringIterator {
size_t cur_pos; ///< Current iteration position. size_t cur_pos; ///< Current iteration position.
public: public:
void SetString(const char *s) override; void SetString(std::string_view s) override;
size_t SetCurPosition(size_t pos) override; size_t SetCurPosition(size_t pos) override;
size_t Next(IterType what) override; size_t Next(IterType what) override;
size_t Prev(IterType what) override; size_t Prev(IterType what) override;

View File

@ -13,6 +13,7 @@
#include "../../language.h" #include "../../language.h"
#include "../../strings_func.h" #include "../../strings_func.h"
#include "../../string_func.h" #include "../../string_func.h"
#include "../../core/utf8.hpp"
#include "../../table/control_codes.h" #include "../../table/control_codes.h"
#include "../../zoom_func.h" #include "../../zoom_func.h"
#include "win32.h" #include "win32.h"
@ -516,10 +517,8 @@ std::span<const int> UniscribeParagraphLayout::UniscribeVisualRun::GetGlyphToCha
} }
/* virtual */ void UniscribeStringIterator::SetString(const char *s) /* virtual */ void UniscribeStringIterator::SetString(std::string_view s)
{ {
const char *string_base = s;
this->utf16_to_utf8.clear(); this->utf16_to_utf8.clear();
this->str_info.clear(); this->str_info.clear();
this->cur_pos = 0; this->cur_pos = 0;
@ -527,10 +526,10 @@ std::span<const int> UniscribeParagraphLayout::UniscribeVisualRun::GetGlyphToCha
/* Uniscribe operates on UTF-16, thus we have to convert the input string. /* Uniscribe operates on UTF-16, thus we have to convert the input string.
* To be able to return proper offsets, we have to create a mapping at the same time. */ * To be able to return proper offsets, we have to create a mapping at the same time. */
std::vector<wchar_t> utf16_str; ///< UTF-16 copy of the string. std::vector<wchar_t> utf16_str; ///< UTF-16 copy of the string.
while (*s != '\0') { Utf8View view(s);
size_t idx = s - string_base; for (auto it = view.begin(), end = view.end(); it != end; ++it) {
size_t idx = it.GetByteOffset();
char32_t c = Utf8Consume(&s); char32_t c = *it;
if (c < 0x10000) { if (c < 0x10000) {
utf16_str.push_back((wchar_t)c); utf16_str.push_back((wchar_t)c);
} else { } else {
@ -541,7 +540,7 @@ std::span<const int> UniscribeParagraphLayout::UniscribeVisualRun::GetGlyphToCha
} }
this->utf16_to_utf8.push_back(idx); this->utf16_to_utf8.push_back(idx);
} }
this->utf16_to_utf8.push_back(s - string_base); this->utf16_to_utf8.push_back(s.size());
/* Query Uniscribe for word and cluster break information. */ /* Query Uniscribe for word and cluster break information. */
this->str_info.resize(utf16_to_utf8.size()); this->str_info.resize(utf16_to_utf8.size());

View File

@ -77,7 +77,7 @@ class UniscribeStringIterator : public StringIterator {
size_t cur_pos; ///< Current iteration position. size_t cur_pos; ///< Current iteration position.
public: public:
void SetString(const char *s) override; void SetString(std::string_view s) override;
size_t SetCurPosition(size_t pos) override; size_t SetCurPosition(size_t pos) override;
size_t Next(IterType what) override; size_t Next(IterType what) override;
size_t Prev(IterType what) override; size_t Prev(IterType what) override;

View File

@ -13,6 +13,7 @@
#include "error_func.h" #include "error_func.h"
#include "string_func.h" #include "string_func.h"
#include "string_base.h" #include "string_base.h"
#include "core/utf8.hpp"
#include "table/control_codes.h" #include "table/control_codes.h"
@ -826,10 +827,8 @@ public:
delete this->word_itr; delete this->word_itr;
} }
void SetString(const char *s) override void SetString(std::string_view s) override
{ {
const char *string_base = s;
/* Unfortunately current ICU versions only provide rudimentary support /* Unfortunately current ICU versions only provide rudimentary support
* for word break iterators (especially for CJK languages) in combination * for word break iterators (especially for CJK languages) in combination
* with UTF-8 input. As a work around we have to convert the input to * with UTF-8 input. As a work around we have to convert the input to
@ -837,10 +836,10 @@ public:
this->utf16_str.clear(); this->utf16_str.clear();
this->utf16_to_utf8.clear(); this->utf16_to_utf8.clear();
while (*s != '\0') { Utf8View view(s);
size_t idx = s - string_base; for (auto it = view.begin(), end = view.end(); it != end; ++it) {
size_t idx = it.GetByteOffset();
char32_t c = Utf8Consume(&s); char32_t c = *it;
if (c < 0x10000) { if (c < 0x10000) {
this->utf16_str.push_back((UChar)c); this->utf16_str.push_back((UChar)c);
} else { } else {
@ -852,7 +851,7 @@ public:
this->utf16_to_utf8.push_back(idx); this->utf16_to_utf8.push_back(idx);
} }
this->utf16_str.push_back('\0'); this->utf16_str.push_back('\0');
this->utf16_to_utf8.push_back(s - string_base); this->utf16_to_utf8.push_back(s.size());
UText text = UTEXT_INITIALIZER; UText text = UTEXT_INITIALIZER;
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
@ -956,60 +955,43 @@ public:
/** Fallback simple string iterator. */ /** Fallback simple string iterator. */
class DefaultStringIterator : public StringIterator class DefaultStringIterator : public StringIterator
{ {
const char *string; ///< Current string. Utf8View string; ///< Current string.
size_t len; ///< String length. Utf8View::iterator cur_pos; //< Current iteration position.
size_t cur_pos; ///< Current iteration position.
public: public:
DefaultStringIterator() : string(nullptr), len(0), cur_pos(0) void SetString(std::string_view s) override
{
}
void SetString(const char *s) override
{ {
this->string = s; this->string = s;
this->len = strlen(s); this->cur_pos = this->string.begin();
this->cur_pos = 0;
} }
size_t SetCurPosition(size_t pos) override size_t SetCurPosition(size_t pos) override
{ {
assert(this->string != nullptr && pos <= this->len); this->cur_pos = this->string.GetIterAtByte(pos);
/* Sanitize in case we get a position inside an UTF-8 sequence. */ return this->cur_pos.GetByteOffset();
while (pos > 0 && IsUtf8Part(this->string[pos])) pos--;
return this->cur_pos = pos;
} }
size_t Next(IterType what) override size_t Next(IterType what) override
{ {
assert(this->string != nullptr); const auto end = this->string.end();
/* Already at the end? */ /* Already at the end? */
if (this->cur_pos >= this->len) return END; if (this->cur_pos >= end) return END;
switch (what) { switch (what) {
case ITER_CHARACTER: { case ITER_CHARACTER:
char32_t c; ++this->cur_pos;
this->cur_pos += Utf8Decode(&c, this->string + this->cur_pos); return this->cur_pos.GetByteOffset();
return this->cur_pos;
}
case ITER_WORD: { case ITER_WORD:
char32_t c;
/* Consume current word. */ /* Consume current word. */
size_t offs = Utf8Decode(&c, this->string + this->cur_pos); while (this->cur_pos != end && !IsWhitespace(*this->cur_pos)) {
while (this->cur_pos < this->len && !IsWhitespace(c)) { ++this->cur_pos;
this->cur_pos += offs;
offs = Utf8Decode(&c, this->string + this->cur_pos);
} }
/* Consume whitespace to the next word. */ /* Consume whitespace to the next word. */
while (this->cur_pos < this->len && IsWhitespace(c)) { while (this->cur_pos != end && IsWhitespace(*this->cur_pos)) {
this->cur_pos += offs; ++this->cur_pos;
offs = Utf8Decode(&c, this->string + this->cur_pos);
} }
return this->cur_pos.GetByteOffset();
return this->cur_pos;
}
default: default:
NOT_REACHED(); NOT_REACHED();
@ -1020,33 +1002,27 @@ public:
size_t Prev(IterType what) override size_t Prev(IterType what) override
{ {
assert(this->string != nullptr); const auto begin = this->string.begin();
/* Already at the beginning? */ /* Already at the beginning? */
if (this->cur_pos == 0) return END; if (this->cur_pos == begin) return END;
switch (what) { switch (what) {
case ITER_CHARACTER: case ITER_CHARACTER:
return this->cur_pos = Utf8PrevChar(this->string + this->cur_pos) - this->string; --this->cur_pos;
return this->cur_pos.GetByteOffset();
case ITER_WORD: { case ITER_WORD:
const char *s = this->string + this->cur_pos;
char32_t c;
/* Consume preceding whitespace. */ /* Consume preceding whitespace. */
do { do {
s = Utf8PrevChar(s); --this->cur_pos;
Utf8Decode(&c, s); } while (this->cur_pos != begin && IsWhitespace(*this->cur_pos));
} while (s > this->string && IsWhitespace(c));
/* Consume preceding word. */ /* Consume preceding word. */
while (s > this->string && !IsWhitespace(c)) { while (this->cur_pos != begin && !IsWhitespace(*this->cur_pos)) {
s = Utf8PrevChar(s); --this->cur_pos;
Utf8Decode(&c, s);
} }
/* Move caret back to the beginning of the word. */ /* Move caret back to the beginning of the word. */
if (IsWhitespace(c)) Utf8Consume(&s); if (IsWhitespace(*this->cur_pos)) ++this->cur_pos;
return this->cur_pos.GetByteOffset();
return this->cur_pos = s - this->string;
}
default: default:
NOT_REACHED(); NOT_REACHED();

View File

@ -35,7 +35,7 @@ public:
* changed. The cursor is reset to the start of the string. * changed. The cursor is reset to the start of the string.
* @param s New string. * @param s New string.
*/ */
virtual void SetString(const char *s) = 0; virtual void SetString(std::string_view s) = 0;
/** /**
* Change the current string cursor. * Change the current string cursor.

View File

@ -291,7 +291,7 @@ const char *Textbuf::GetText() const
/** Update the character iter after the text has changed. */ /** Update the character iter after the text has changed. */
void Textbuf::UpdateStringIter() void Textbuf::UpdateStringIter()
{ {
this->char_iter->SetString(this->buf.c_str()); this->char_iter->SetString(this->buf);
size_t pos = this->char_iter->SetCurPosition(this->caretpos); size_t pos = this->char_iter->SetCurPosition(this->caretpos);
this->caretpos = pos == StringIterator::END ? 0 : (uint16_t)pos; this->caretpos = pos == StringIterator::END ? 0 : (uint16_t)pos;
} }