From 83401ad5e25b0688798165c341ec0c028d9ba94c Mon Sep 17 00:00:00 2001 From: frosch Date: Tue, 1 Apr 2025 14:58:16 +0200 Subject: [PATCH] Codechange: Use Utf8View::iterator in StringIterator. --- src/os/macosx/string_osx.cpp | 15 +++-- src/os/macosx/string_osx.h | 2 +- src/os/windows/string_uniscribe.cpp | 15 +++-- src/os/windows/string_uniscribe.h | 2 +- src/string.cpp | 94 +++++++++++------------------ src/string_base.h | 2 +- src/textbuf.cpp | 2 +- 7 files changed, 53 insertions(+), 79 deletions(-) diff --git a/src/os/macosx/string_osx.cpp b/src/os/macosx/string_osx.cpp index 2153635d1e..1487884e81 100644 --- a/src/os/macosx/string_osx.cpp +++ b/src/os/macosx/string_osx.cpp @@ -11,6 +11,7 @@ #include "string_osx.h" #include "../../string_func.h" #include "../../strings_func.h" +#include "../../core/utf8.hpp" #include "../../table/control_codes.h" #include "../../fontcache.h" #include "../../zoom_func.h" @@ -368,10 +369,8 @@ int MacOSStringContains(const std::string_view str, const std::string_view value } -/* virtual */ void OSXStringIterator::SetString(const char *s) +/* virtual */ void OSXStringIterator::SetString(std::string_view s) { - const char *string_base = s; - this->utf16_to_utf8.clear(); this->str_info.clear(); this->cur_pos = 0; @@ -379,10 +378,10 @@ int MacOSStringContains(const std::string_view str, const std::string_view value /* CoreText operates on UTF-16, thus we have to convert the input string. * To be able to return proper offsets, we have to create a mapping at the same time. */ std::vector utf16_str; ///< UTF-16 copy of the string. - while (*s != '\0') { - size_t idx = s - string_base; - - char32_t c = Utf8Consume(&s); + Utf8View view(s); + for (auto it = view.begin(), end = view.end(); it != end; ++it) { + size_t idx = it.GetByteOffset(); + char32_t c = *it; if (c < 0x10000) { utf16_str.push_back((UniChar)c); } else { @@ -393,7 +392,7 @@ int MacOSStringContains(const std::string_view str, const std::string_view value } this->utf16_to_utf8.push_back(idx); } - this->utf16_to_utf8.push_back(s - string_base); + this->utf16_to_utf8.push_back(s.size()); /* Query CoreText for word and cluster break information. */ this->str_info.resize(utf16_to_utf8.size()); diff --git a/src/os/macosx/string_osx.h b/src/os/macosx/string_osx.h index 15ae46c051..03bc549f83 100644 --- a/src/os/macosx/string_osx.h +++ b/src/os/macosx/string_osx.h @@ -27,7 +27,7 @@ class OSXStringIterator : public StringIterator { size_t cur_pos; ///< Current iteration position. public: - void SetString(const char *s) override; + void SetString(std::string_view s) override; size_t SetCurPosition(size_t pos) override; size_t Next(IterType what) override; size_t Prev(IterType what) override; diff --git a/src/os/windows/string_uniscribe.cpp b/src/os/windows/string_uniscribe.cpp index f062187d39..9ba43d5bdd 100644 --- a/src/os/windows/string_uniscribe.cpp +++ b/src/os/windows/string_uniscribe.cpp @@ -13,6 +13,7 @@ #include "../../language.h" #include "../../strings_func.h" #include "../../string_func.h" +#include "../../core/utf8.hpp" #include "../../table/control_codes.h" #include "../../zoom_func.h" #include "win32.h" @@ -516,10 +517,8 @@ std::span UniscribeParagraphLayout::UniscribeVisualRun::GetGlyphToCha } -/* virtual */ void UniscribeStringIterator::SetString(const char *s) +/* virtual */ void UniscribeStringIterator::SetString(std::string_view s) { - const char *string_base = s; - this->utf16_to_utf8.clear(); this->str_info.clear(); this->cur_pos = 0; @@ -527,10 +526,10 @@ std::span UniscribeParagraphLayout::UniscribeVisualRun::GetGlyphToCha /* Uniscribe operates on UTF-16, thus we have to convert the input string. * To be able to return proper offsets, we have to create a mapping at the same time. */ std::vector utf16_str; ///< UTF-16 copy of the string. - while (*s != '\0') { - size_t idx = s - string_base; - - char32_t c = Utf8Consume(&s); + Utf8View view(s); + for (auto it = view.begin(), end = view.end(); it != end; ++it) { + size_t idx = it.GetByteOffset(); + char32_t c = *it; if (c < 0x10000) { utf16_str.push_back((wchar_t)c); } else { @@ -541,7 +540,7 @@ std::span UniscribeParagraphLayout::UniscribeVisualRun::GetGlyphToCha } this->utf16_to_utf8.push_back(idx); } - this->utf16_to_utf8.push_back(s - string_base); + this->utf16_to_utf8.push_back(s.size()); /* Query Uniscribe for word and cluster break information. */ this->str_info.resize(utf16_to_utf8.size()); diff --git a/src/os/windows/string_uniscribe.h b/src/os/windows/string_uniscribe.h index 896b5f8745..309f59eb1b 100644 --- a/src/os/windows/string_uniscribe.h +++ b/src/os/windows/string_uniscribe.h @@ -77,7 +77,7 @@ class UniscribeStringIterator : public StringIterator { size_t cur_pos; ///< Current iteration position. public: - void SetString(const char *s) override; + void SetString(std::string_view s) override; size_t SetCurPosition(size_t pos) override; size_t Next(IterType what) override; size_t Prev(IterType what) override; diff --git a/src/string.cpp b/src/string.cpp index 191d9924ad..6eec0604f6 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -13,6 +13,7 @@ #include "error_func.h" #include "string_func.h" #include "string_base.h" +#include "core/utf8.hpp" #include "table/control_codes.h" @@ -826,10 +827,8 @@ public: delete this->word_itr; } - void SetString(const char *s) override + void SetString(std::string_view s) override { - const char *string_base = s; - /* Unfortunately current ICU versions only provide rudimentary support * for word break iterators (especially for CJK languages) in combination * with UTF-8 input. As a work around we have to convert the input to @@ -837,10 +836,10 @@ public: this->utf16_str.clear(); this->utf16_to_utf8.clear(); - while (*s != '\0') { - size_t idx = s - string_base; - - char32_t c = Utf8Consume(&s); + Utf8View view(s); + for (auto it = view.begin(), end = view.end(); it != end; ++it) { + size_t idx = it.GetByteOffset(); + char32_t c = *it; if (c < 0x10000) { this->utf16_str.push_back((UChar)c); } else { @@ -852,7 +851,7 @@ public: this->utf16_to_utf8.push_back(idx); } this->utf16_str.push_back('\0'); - this->utf16_to_utf8.push_back(s - string_base); + this->utf16_to_utf8.push_back(s.size()); UText text = UTEXT_INITIALIZER; UErrorCode status = U_ZERO_ERROR; @@ -956,60 +955,43 @@ public: /** Fallback simple string iterator. */ class DefaultStringIterator : public StringIterator { - const char *string; ///< Current string. - size_t len; ///< String length. - size_t cur_pos; ///< Current iteration position. + Utf8View string; ///< Current string. + Utf8View::iterator cur_pos; //< Current iteration position. public: - DefaultStringIterator() : string(nullptr), len(0), cur_pos(0) - { - } - - void SetString(const char *s) override + void SetString(std::string_view s) override { this->string = s; - this->len = strlen(s); - this->cur_pos = 0; + this->cur_pos = this->string.begin(); } size_t SetCurPosition(size_t pos) override { - assert(this->string != nullptr && pos <= this->len); - /* Sanitize in case we get a position inside an UTF-8 sequence. */ - while (pos > 0 && IsUtf8Part(this->string[pos])) pos--; - return this->cur_pos = pos; + this->cur_pos = this->string.GetIterAtByte(pos); + return this->cur_pos.GetByteOffset(); } size_t Next(IterType what) override { - assert(this->string != nullptr); - + const auto end = this->string.end(); /* Already at the end? */ - if (this->cur_pos >= this->len) return END; + if (this->cur_pos >= end) return END; switch (what) { - case ITER_CHARACTER: { - char32_t c; - this->cur_pos += Utf8Decode(&c, this->string + this->cur_pos); - return this->cur_pos; - } + case ITER_CHARACTER: + ++this->cur_pos; + return this->cur_pos.GetByteOffset(); - case ITER_WORD: { - char32_t c; + case ITER_WORD: /* Consume current word. */ - size_t offs = Utf8Decode(&c, this->string + this->cur_pos); - while (this->cur_pos < this->len && !IsWhitespace(c)) { - this->cur_pos += offs; - offs = Utf8Decode(&c, this->string + this->cur_pos); + while (this->cur_pos != end && !IsWhitespace(*this->cur_pos)) { + ++this->cur_pos; } /* Consume whitespace to the next word. */ - while (this->cur_pos < this->len && IsWhitespace(c)) { - this->cur_pos += offs; - offs = Utf8Decode(&c, this->string + this->cur_pos); + while (this->cur_pos != end && IsWhitespace(*this->cur_pos)) { + ++this->cur_pos; } - - return this->cur_pos; - } + return this->cur_pos.GetByteOffset(); default: NOT_REACHED(); @@ -1020,33 +1002,27 @@ public: size_t Prev(IterType what) override { - assert(this->string != nullptr); - + const auto begin = this->string.begin(); /* Already at the beginning? */ - if (this->cur_pos == 0) return END; + if (this->cur_pos == begin) return END; switch (what) { case ITER_CHARACTER: - return this->cur_pos = Utf8PrevChar(this->string + this->cur_pos) - this->string; + --this->cur_pos; + return this->cur_pos.GetByteOffset(); - case ITER_WORD: { - const char *s = this->string + this->cur_pos; - char32_t c; + case ITER_WORD: /* Consume preceding whitespace. */ do { - s = Utf8PrevChar(s); - Utf8Decode(&c, s); - } while (s > this->string && IsWhitespace(c)); + --this->cur_pos; + } while (this->cur_pos != begin && IsWhitespace(*this->cur_pos)); /* Consume preceding word. */ - while (s > this->string && !IsWhitespace(c)) { - s = Utf8PrevChar(s); - Utf8Decode(&c, s); + while (this->cur_pos != begin && !IsWhitespace(*this->cur_pos)) { + --this->cur_pos; } /* Move caret back to the beginning of the word. */ - if (IsWhitespace(c)) Utf8Consume(&s); - - return this->cur_pos = s - this->string; - } + if (IsWhitespace(*this->cur_pos)) ++this->cur_pos; + return this->cur_pos.GetByteOffset(); default: NOT_REACHED(); diff --git a/src/string_base.h b/src/string_base.h index 6936279386..cedafa7559 100644 --- a/src/string_base.h +++ b/src/string_base.h @@ -35,7 +35,7 @@ public: * changed. The cursor is reset to the start of the string. * @param s New string. */ - virtual void SetString(const char *s) = 0; + virtual void SetString(std::string_view s) = 0; /** * Change the current string cursor. diff --git a/src/textbuf.cpp b/src/textbuf.cpp index fe5d253750..eae281cb89 100644 --- a/src/textbuf.cpp +++ b/src/textbuf.cpp @@ -291,7 +291,7 @@ const char *Textbuf::GetText() const /** Update the character iter after the text has changed. */ void Textbuf::UpdateStringIter() { - this->char_iter->SetString(this->buf.c_str()); + this->char_iter->SetString(this->buf); size_t pos = this->char_iter->SetCurPosition(this->caretpos); this->caretpos = pos == StringIterator::END ? 0 : (uint16_t)pos; }