1
0
Fork 0

Codechange: Use Utf8View to const-iterate over strings, if no particular error handling is needed.

pull/13960/head
frosch 2025-04-02 16:16:14 +02:00 committed by frosch
parent 22ab0244d8
commit f06bfc0dad
5 changed files with 45 additions and 50 deletions

View File

@ -12,6 +12,7 @@
#include "gfx_layout.h"
#include "string_func.h"
#include "strings_func.h"
#include "core/utf8.hpp"
#include "debug.h"
#include "table/control_codes.h"
@ -74,15 +75,12 @@ static inline void GetLayouter(Layouter::LineCacheItem &line, std::string_view s
font_mapping.clear();
auto cur = str.begin();
/*
* Go through the whole string while adding Font instances to the font map
* whenever the font changes, and convert the wide characters into a format
* usable by ParagraphLayout.
*/
for (; buff < buffer_last && cur != str.end();) {
char32_t c = Utf8Consume(cur);
for (char32_t c : Utf8View(str)) {
if (c == '\0' || c == '\n') {
/* Caller should already have filtered out these characters. */
NOT_REACHED();
@ -102,6 +100,7 @@ static inline void GetLayouter(Layouter::LineCacheItem &line, std::string_view s
* needed for RTL languages which need more proper shaping support. */
if (!T::SUPPORTS_RTL && IsTextDirectionChar(c)) continue;
buff += T::AppendToBuffer(buff, buffer_last, c);
if (buff >= buffer_last) break;
continue;
}
@ -235,24 +234,28 @@ ParagraphLayouter::Position Layouter::GetCharPosition(std::string_view::const_it
return p;
}
/* Initial position, returned if character not found. */
const ParagraphLayouter::Position initial_position = Point{_current_text_dir == TD_LTR ? 0 : line->GetWidth(), 0};
/* Find the code point index which corresponds to the char
* pointer into our UTF-8 source string. */
size_t index = 0;
auto str = this->string.begin();
while (str < ch) {
char32_t c = Utf8Consume(str);
if (!IsConsumedFormattingCode(c)) index += line->GetInternalCharLength(c);
{
Utf8View view(this->string);
const size_t offset = ch - this->string.begin();
const auto pos = view.GetIterAtByte(offset);
/* We couldn't find the code point index. */
if (pos.GetByteOffset() != offset) return initial_position;
for (auto it = view.begin(); it < pos; ++it) {
char32_t c = *it;
if (!IsConsumedFormattingCode(c)) index += line->GetInternalCharLength(c);
}
}
/* Initial position, returned if character not found. */
const ParagraphLayouter::Position initial_position = Point{_current_text_dir == TD_LTR ? 0 : line->GetWidth(), 0};
const ParagraphLayouter::Position *position = &initial_position;
/* We couldn't find the code point index. */
if (str != ch) return *position;
/* Valid character. */
/* Scan all runs until we've found our code point index. */
size_t best_index = SIZE_MAX;
for (int run_index = 0; run_index < line->CountRuns(); run_index++) {
@ -311,10 +314,11 @@ ptrdiff_t Layouter::GetCharAtPosition(int x, size_t line_index) const
size_t index = charmap[i];
size_t cur_idx = 0;
for (auto str = this->string.begin(); str != this->string.end();) {
if (cur_idx == index) return str - this->string.begin();
Utf8View view(this->string);
for (auto it = view.begin(), end = view.end(); it != end; ++it) {
if (cur_idx == index) return it.GetByteOffset();
char32_t c = Utf8Consume(str);
char32_t c = *it;
if (!IsConsumedFormattingCode(c)) cur_idx += line->GetInternalCharLength(c);
}
}

View File

@ -587,15 +587,11 @@ static bool IsGarbageCharacter(char32_t c)
*/
static std::string_view SkipGarbage(std::string_view str)
{
auto first = std::begin(str);
auto last = std::end(str);
while (first < last) {
char32_t c;
size_t len = Utf8Decode(&c, &*first);
if (!IsGarbageCharacter(c)) break;
first += len;
}
return {first, last};
Utf8View view(str);
auto it = view.begin();
const auto end = view.end();
while (it != end && IsGarbageCharacter(*it)) ++it;
return str.substr(it.GetByteOffset());
}
/**

View File

@ -38,6 +38,7 @@
#include "newgrf_engine.h"
#include "core/backup_type.hpp"
#include "gfx_layout.h"
#include "core/utf8.hpp"
#include <stack>
#include <charconv>
@ -2261,13 +2262,9 @@ bool MissingGlyphSearcher::FindMissingGlyphs()
this->Reset();
for (auto text = this->NextString(); text.has_value(); text = this->NextString()) {
auto src = text->cbegin();
FontSize size = this->DefaultSize();
FontCache *fc = FontCache::Get(size);
while (src != text->cend()) {
char32_t c = Utf8Consume(src);
for (char32_t c : Utf8View(*text)) {
if (c >= SCC_FIRST_FONT && c <= SCC_LAST_FONT) {
size = (FontSize)(c - SCC_FIRST_FONT);
fc = FontCache::Get(size);

View File

@ -34,6 +34,7 @@
#include "../../spritecache.h"
#include "../../textbuf_type.h"
#include "../../toolbar_gui.h"
#include "../../core/utf8.hpp"
#include "../../table/sprites.h"
@ -104,14 +105,9 @@ static OTTDMain *_ottd_main;
static NSUInteger CountUtf16Units(const char *from, const char *to)
{
NSUInteger i = 0;
while (from < to) {
char32_t c;
size_t len = Utf8Decode(&c, from);
i += len < 4 ? 1 : 2; // Watch for surrogate pairs.
from += len;
for (char32_t c : Utf8View(std::string_view(from, to))) {
i += c < 0x10000 ? 1 : 2; // Watch for surrogate pairs.
}
return i;
}
@ -123,14 +119,13 @@ static NSUInteger CountUtf16Units(const char *from, const char *to)
*/
static const char *Utf8AdvanceByUtf16Units(const char *str, NSUInteger count)
{
for (NSUInteger i = 0; i < count && *str != '\0'; ) {
char32_t c;
size_t len = Utf8Decode(&c, str);
i += len < 4 ? 1 : 2; // Watch for surrogates.
str += len;
Utf8View view(str);
auto it = view.begin();
const auto end = view.end();
for (NSUInteger i = 0; it != end && i < count; ++it) {
i += *it < 0x10000 ? 1 : 2; // Watch for surrogate pairs.
}
return str;
return str + it.GetByteOffset();
}
/**

View File

@ -23,6 +23,7 @@
#include "../window_func.h"
#include "../framerate_type.h"
#include "../library_loader.h"
#include "../core/utf8.hpp"
#include "win32_v.h"
#include <windows.h>
#include <imm.h>
@ -367,17 +368,19 @@ static LRESULT HandleIMEComposition(HWND hwnd, WPARAM wParam, LPARAM lParam)
/* Convert caret position from bytes in the input string to a position in the UTF-8 encoded string. */
LONG caret_bytes = ImmGetCompositionString(hIMC, GCS_CURSORPOS, nullptr, 0);
const char *caret = utf8_buf;
for (const wchar_t *c = str.c_str(); *c != '\0' && *caret != '\0' && caret_bytes > 0; c++, caret_bytes--) {
Utf8View view(utf8_buf);
auto caret = view.begin();
const auto end = view.end();
for (const wchar_t *c = str.c_str(); *c != '\0' && caret != end && caret_bytes > 0; c++, caret_bytes--) {
/* Skip DBCS lead bytes or leading surrogates. */
if (Utf16IsLeadSurrogate(*c)) {
c++;
caret_bytes--;
}
Utf8Consume(&caret);
++caret;
}
HandleTextInput(utf8_buf, true, caret);
HandleTextInput(utf8_buf, true, utf8_buf + caret.GetByteOffset());
} else {
HandleTextInput(nullptr, true);
}