Codechange: Use Utf8View to const-iterate over strings, if no particular error handling is needed.

2025-04-02 16:16:14 +02:00 · 2025-04-02 16:16:14 +02:00 · f06bfc0dad
parent 22ab0244d8
commit f06bfc0dad
5 changed files with 45 additions and 50 deletions
--- a/src/gfx_layout.cpp
+++ b/src/gfx_layout.cpp
@ -12,6 +12,7 @@
 #include "gfx_layout.h"
 #include "string_func.h"
 #include "strings_func.h"
+#include "core/utf8.hpp"
 #include "debug.h"

 #include "table/control_codes.h"
@ -74,15 +75,12 @@ static inline void GetLayouter(Layouter::LineCacheItem &line, std::string_view s

 	font_mapping.clear();

-	auto cur = str.begin();
-
 	/*
 	 * Go through the whole string while adding Font instances to the font map
 	 * whenever the font changes, and convert the wide characters into a format
 	 * usable by ParagraphLayout.
 	 */
-	for (; buff < buffer_last && cur != str.end();) {
-		char32_t c = Utf8Consume(cur);
+	for (char32_t c : Utf8View(str)) {
 		if (c == '\0' || c == '\n') {
 			/* Caller should already have filtered out these characters. */
 			NOT_REACHED();
@ -102,6 +100,7 @@ static inline void GetLayouter(Layouter::LineCacheItem &line, std::string_view s
 			 * needed for RTL languages which need more proper shaping support. */
 			if (!T::SUPPORTS_RTL && IsTextDirectionChar(c)) continue;
 			buff += T::AppendToBuffer(buff, buffer_last, c);
+			if (buff >= buffer_last) break;
 			continue;
 		}

@ -235,24 +234,28 @@ ParagraphLayouter::Position Layouter::GetCharPosition(std::string_view::const_it
 		return p;
 	}

+	/* Initial position, returned if character not found. */
+	const ParagraphLayouter::Position initial_position = Point{_current_text_dir == TD_LTR ? 0 : line->GetWidth(), 0};
+
 	/* Find the code point index which corresponds to the char
 	 * pointer into our UTF-8 source string. */
 	size_t index = 0;
-	auto str = this->string.begin();
-	while (str < ch) {
-		char32_t c = Utf8Consume(str);
-		if (!IsConsumedFormattingCode(c)) index += line->GetInternalCharLength(c);
+	{
+		Utf8View view(this->string);
+		const size_t offset = ch - this->string.begin();
+		const auto pos = view.GetIterAtByte(offset);
+
+		/* We couldn't find the code point index. */
+		if (pos.GetByteOffset() != offset) return initial_position;
+
+		for (auto it = view.begin(); it < pos; ++it) {
+			char32_t c = *it;
+			if (!IsConsumedFormattingCode(c)) index += line->GetInternalCharLength(c);
+		}
 	}

-	/* Initial position, returned if character not found. */
-	const ParagraphLayouter::Position initial_position = Point{_current_text_dir == TD_LTR ? 0 : line->GetWidth(), 0};
 	const ParagraphLayouter::Position *position = &initial_position;

-	/* We couldn't find the code point index. */
-	if (str != ch) return *position;
-
-	/* Valid character. */
-
 	/* Scan all runs until we've found our code point index. */
 	size_t best_index = SIZE_MAX;
 	for (int run_index = 0; run_index < line->CountRuns(); run_index++) {
@ -311,10 +314,11 @@ ptrdiff_t Layouter::GetCharAtPosition(int x, size_t line_index) const
 				size_t index = charmap[i];

 				size_t cur_idx = 0;
-				for (auto str = this->string.begin(); str != this->string.end();) {
-					if (cur_idx == index) return str - this->string.begin();
+				Utf8View view(this->string);
+				for (auto it = view.begin(), end = view.end(); it != end; ++it) {
+					if (cur_idx == index) return it.GetByteOffset();

-					char32_t c = Utf8Consume(str);
+					char32_t c = *it;
 					if (!IsConsumedFormattingCode(c)) cur_idx += line->GetInternalCharLength(c);
 				}
 			}
--- a/src/string.cpp
+++ b/src/string.cpp
@ -587,15 +587,11 @@ static bool IsGarbageCharacter(char32_t c)
 */
 static std::string_view SkipGarbage(std::string_view str)
 {
-	auto first = std::begin(str);
-	auto last = std::end(str);
-	while (first < last) {
-		char32_t c;
-		size_t len = Utf8Decode(&c, &*first);
-		if (!IsGarbageCharacter(c)) break;
-		first += len;
-	}
-	return {first, last};
+	Utf8View view(str);
+	auto it = view.begin();
+	const auto end = view.end();
+	while (it != end && IsGarbageCharacter(*it)) ++it;
+	return str.substr(it.GetByteOffset());
 }

 /**
--- a/src/strings.cpp
+++ b/src/strings.cpp
@ -38,6 +38,7 @@
 #include "newgrf_engine.h"
 #include "core/backup_type.hpp"
 #include "gfx_layout.h"
+#include "core/utf8.hpp"
 #include <stack>
 #include <charconv>

@ -2261,13 +2262,9 @@ bool MissingGlyphSearcher::FindMissingGlyphs()

 	this->Reset();
 	for (auto text = this->NextString(); text.has_value(); text = this->NextString()) {
-		auto src = text->cbegin();
-
 		FontSize size = this->DefaultSize();
 		FontCache *fc = FontCache::Get(size);
-		while (src != text->cend()) {
-			char32_t c = Utf8Consume(src);
-
+		for (char32_t c : Utf8View(*text)) {
 			if (c >= SCC_FIRST_FONT && c <= SCC_LAST_FONT) {
 				size = (FontSize)(c - SCC_FIRST_FONT);
 				fc = FontCache::Get(size);
--- a/src/video/cocoa/cocoa_wnd.mm
+++ b/src/video/cocoa/cocoa_wnd.mm
@ -34,6 +34,7 @@
 #include "../../spritecache.h"
 #include "../../textbuf_type.h"
 #include "../../toolbar_gui.h"
+#include "../../core/utf8.hpp"

 #include "../../table/sprites.h"

@ -104,14 +105,9 @@ static OTTDMain *_ottd_main;
 static NSUInteger CountUtf16Units(const char *from, const char *to)
 {
 	NSUInteger i = 0;
-
-	while (from < to) {
-		char32_t c;
-		size_t len = Utf8Decode(&c, from);
-		i += len < 4 ? 1 : 2; // Watch for surrogate pairs.
-		from += len;
+	for (char32_t c : Utf8View(std::string_view(from, to))) {
+		i += c < 0x10000 ? 1 : 2; // Watch for surrogate pairs.
 	}
-
 	return i;
 }

@ -123,14 +119,13 @@ static NSUInteger CountUtf16Units(const char *from, const char *to)
 */
 static const char *Utf8AdvanceByUtf16Units(const char *str, NSUInteger count)
 {
-	for (NSUInteger i = 0; i < count && *str != '\0'; ) {
-		char32_t c;
-		size_t len = Utf8Decode(&c, str);
-		i += len < 4 ? 1 : 2; // Watch for surrogates.
-		str += len;
+	Utf8View view(str);
+	auto it = view.begin();
+	const auto end = view.end();
+	for (NSUInteger i = 0; it != end && i < count; ++it) {
+		i += *it < 0x10000 ? 1 : 2; // Watch for surrogate pairs.
 	}
-
-	return str;
+	return str + it.GetByteOffset();
 }

 /**
--- a/src/video/win32_v.cpp
+++ b/src/video/win32_v.cpp
@ -23,6 +23,7 @@
 #include "../window_func.h"
 #include "../framerate_type.h"
 #include "../library_loader.h"
+#include "../core/utf8.hpp"
 #include "win32_v.h"
 #include <windows.h>
 #include <imm.h>
@ -367,17 +368,19 @@ static LRESULT HandleIMEComposition(HWND hwnd, WPARAM wParam, LPARAM lParam)

 				/* Convert caret position from bytes in the input string to a position in the UTF-8 encoded string. */
 				LONG caret_bytes = ImmGetCompositionString(hIMC, GCS_CURSORPOS, nullptr, 0);
-				const char *caret = utf8_buf;
-				for (const wchar_t *c = str.c_str(); *c != '\0' && *caret != '\0' && caret_bytes > 0; c++, caret_bytes--) {
+				Utf8View view(utf8_buf);
+				auto caret = view.begin();
+				const auto end = view.end();
+				for (const wchar_t *c = str.c_str(); *c != '\0' && caret != end && caret_bytes > 0; c++, caret_bytes--) {
 					/* Skip DBCS lead bytes or leading surrogates. */
 					if (Utf16IsLeadSurrogate(*c)) {
 						c++;
 						caret_bytes--;
 					}
-					Utf8Consume(&caret);
+					++caret;
 				}

-				HandleTextInput(utf8_buf, true, caret);
+				HandleTextInput(utf8_buf, true, utf8_buf + caret.GetByteOffset());
 			} else {
 				HandleTextInput(nullptr, true);
 			}