From f06bfc0dad498e97400e79ca20989a2247d1ff1a Mon Sep 17 00:00:00 2001 From: frosch Date: Wed, 2 Apr 2025 16:16:14 +0200 Subject: [PATCH] Codechange: Use Utf8View to const-iterate over strings, if no particular error handling is needed. --- src/gfx_layout.cpp | 40 ++++++++++++++++++++---------------- src/string.cpp | 14 +++++-------- src/strings.cpp | 7 ++----- src/video/cocoa/cocoa_wnd.mm | 23 ++++++++------------- src/video/win32_v.cpp | 11 ++++++---- 5 files changed, 45 insertions(+), 50 deletions(-) diff --git a/src/gfx_layout.cpp b/src/gfx_layout.cpp index b3b5138f3e..213bfd4fdb 100644 --- a/src/gfx_layout.cpp +++ b/src/gfx_layout.cpp @@ -12,6 +12,7 @@ #include "gfx_layout.h" #include "string_func.h" #include "strings_func.h" +#include "core/utf8.hpp" #include "debug.h" #include "table/control_codes.h" @@ -74,15 +75,12 @@ static inline void GetLayouter(Layouter::LineCacheItem &line, std::string_view s font_mapping.clear(); - auto cur = str.begin(); - /* * Go through the whole string while adding Font instances to the font map * whenever the font changes, and convert the wide characters into a format * usable by ParagraphLayout. */ - for (; buff < buffer_last && cur != str.end();) { - char32_t c = Utf8Consume(cur); + for (char32_t c : Utf8View(str)) { if (c == '\0' || c == '\n') { /* Caller should already have filtered out these characters. */ NOT_REACHED(); @@ -102,6 +100,7 @@ static inline void GetLayouter(Layouter::LineCacheItem &line, std::string_view s * needed for RTL languages which need more proper shaping support. */ if (!T::SUPPORTS_RTL && IsTextDirectionChar(c)) continue; buff += T::AppendToBuffer(buff, buffer_last, c); + if (buff >= buffer_last) break; continue; } @@ -235,24 +234,28 @@ ParagraphLayouter::Position Layouter::GetCharPosition(std::string_view::const_it return p; } + /* Initial position, returned if character not found. */ + const ParagraphLayouter::Position initial_position = Point{_current_text_dir == TD_LTR ? 0 : line->GetWidth(), 0}; + /* Find the code point index which corresponds to the char * pointer into our UTF-8 source string. */ size_t index = 0; - auto str = this->string.begin(); - while (str < ch) { - char32_t c = Utf8Consume(str); - if (!IsConsumedFormattingCode(c)) index += line->GetInternalCharLength(c); + { + Utf8View view(this->string); + const size_t offset = ch - this->string.begin(); + const auto pos = view.GetIterAtByte(offset); + + /* We couldn't find the code point index. */ + if (pos.GetByteOffset() != offset) return initial_position; + + for (auto it = view.begin(); it < pos; ++it) { + char32_t c = *it; + if (!IsConsumedFormattingCode(c)) index += line->GetInternalCharLength(c); + } } - /* Initial position, returned if character not found. */ - const ParagraphLayouter::Position initial_position = Point{_current_text_dir == TD_LTR ? 0 : line->GetWidth(), 0}; const ParagraphLayouter::Position *position = &initial_position; - /* We couldn't find the code point index. */ - if (str != ch) return *position; - - /* Valid character. */ - /* Scan all runs until we've found our code point index. */ size_t best_index = SIZE_MAX; for (int run_index = 0; run_index < line->CountRuns(); run_index++) { @@ -311,10 +314,11 @@ ptrdiff_t Layouter::GetCharAtPosition(int x, size_t line_index) const size_t index = charmap[i]; size_t cur_idx = 0; - for (auto str = this->string.begin(); str != this->string.end();) { - if (cur_idx == index) return str - this->string.begin(); + Utf8View view(this->string); + for (auto it = view.begin(), end = view.end(); it != end; ++it) { + if (cur_idx == index) return it.GetByteOffset(); - char32_t c = Utf8Consume(str); + char32_t c = *it; if (!IsConsumedFormattingCode(c)) cur_idx += line->GetInternalCharLength(c); } } diff --git a/src/string.cpp b/src/string.cpp index 3224a96a7a..a5e6a7242c 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -587,15 +587,11 @@ static bool IsGarbageCharacter(char32_t c) */ static std::string_view SkipGarbage(std::string_view str) { - auto first = std::begin(str); - auto last = std::end(str); - while (first < last) { - char32_t c; - size_t len = Utf8Decode(&c, &*first); - if (!IsGarbageCharacter(c)) break; - first += len; - } - return {first, last}; + Utf8View view(str); + auto it = view.begin(); + const auto end = view.end(); + while (it != end && IsGarbageCharacter(*it)) ++it; + return str.substr(it.GetByteOffset()); } /** diff --git a/src/strings.cpp b/src/strings.cpp index 27192746f2..f3cfd3081b 100644 --- a/src/strings.cpp +++ b/src/strings.cpp @@ -38,6 +38,7 @@ #include "newgrf_engine.h" #include "core/backup_type.hpp" #include "gfx_layout.h" +#include "core/utf8.hpp" #include #include @@ -2261,13 +2262,9 @@ bool MissingGlyphSearcher::FindMissingGlyphs() this->Reset(); for (auto text = this->NextString(); text.has_value(); text = this->NextString()) { - auto src = text->cbegin(); - FontSize size = this->DefaultSize(); FontCache *fc = FontCache::Get(size); - while (src != text->cend()) { - char32_t c = Utf8Consume(src); - + for (char32_t c : Utf8View(*text)) { if (c >= SCC_FIRST_FONT && c <= SCC_LAST_FONT) { size = (FontSize)(c - SCC_FIRST_FONT); fc = FontCache::Get(size); diff --git a/src/video/cocoa/cocoa_wnd.mm b/src/video/cocoa/cocoa_wnd.mm index 38e7b720ed..e746ee1e16 100644 --- a/src/video/cocoa/cocoa_wnd.mm +++ b/src/video/cocoa/cocoa_wnd.mm @@ -34,6 +34,7 @@ #include "../../spritecache.h" #include "../../textbuf_type.h" #include "../../toolbar_gui.h" +#include "../../core/utf8.hpp" #include "../../table/sprites.h" @@ -104,14 +105,9 @@ static OTTDMain *_ottd_main; static NSUInteger CountUtf16Units(const char *from, const char *to) { NSUInteger i = 0; - - while (from < to) { - char32_t c; - size_t len = Utf8Decode(&c, from); - i += len < 4 ? 1 : 2; // Watch for surrogate pairs. - from += len; + for (char32_t c : Utf8View(std::string_view(from, to))) { + i += c < 0x10000 ? 1 : 2; // Watch for surrogate pairs. } - return i; } @@ -123,14 +119,13 @@ static NSUInteger CountUtf16Units(const char *from, const char *to) */ static const char *Utf8AdvanceByUtf16Units(const char *str, NSUInteger count) { - for (NSUInteger i = 0; i < count && *str != '\0'; ) { - char32_t c; - size_t len = Utf8Decode(&c, str); - i += len < 4 ? 1 : 2; // Watch for surrogates. - str += len; + Utf8View view(str); + auto it = view.begin(); + const auto end = view.end(); + for (NSUInteger i = 0; it != end && i < count; ++it) { + i += *it < 0x10000 ? 1 : 2; // Watch for surrogate pairs. } - - return str; + return str + it.GetByteOffset(); } /** diff --git a/src/video/win32_v.cpp b/src/video/win32_v.cpp index f29641cb65..c692ff9fc2 100644 --- a/src/video/win32_v.cpp +++ b/src/video/win32_v.cpp @@ -23,6 +23,7 @@ #include "../window_func.h" #include "../framerate_type.h" #include "../library_loader.h" +#include "../core/utf8.hpp" #include "win32_v.h" #include #include @@ -367,17 +368,19 @@ static LRESULT HandleIMEComposition(HWND hwnd, WPARAM wParam, LPARAM lParam) /* Convert caret position from bytes in the input string to a position in the UTF-8 encoded string. */ LONG caret_bytes = ImmGetCompositionString(hIMC, GCS_CURSORPOS, nullptr, 0); - const char *caret = utf8_buf; - for (const wchar_t *c = str.c_str(); *c != '\0' && *caret != '\0' && caret_bytes > 0; c++, caret_bytes--) { + Utf8View view(utf8_buf); + auto caret = view.begin(); + const auto end = view.end(); + for (const wchar_t *c = str.c_str(); *c != '\0' && caret != end && caret_bytes > 0; c++, caret_bytes--) { /* Skip DBCS lead bytes or leading surrogates. */ if (Utf16IsLeadSurrogate(*c)) { c++; caret_bytes--; } - Utf8Consume(&caret); + ++caret; } - HandleTextInput(utf8_buf, true, caret); + HandleTextInput(utf8_buf, true, utf8_buf + caret.GetByteOffset()); } else { HandleTextInput(nullptr, true); }