diff --git a/src/string.cpp b/src/string.cpp index 4032a7f34f..a9d72efab9 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -377,48 +377,6 @@ bool IsValidChar(char32_t key, CharSetFilter afilter) } } - -/* UTF-8 handling routines */ - - -/** - * Decode and consume the next UTF-8 encoded character. - * @param c Buffer to place decoded character. - * @param s Character stream to retrieve character from. - * @return Number of characters in the sequence. - */ -size_t Utf8Decode(char32_t *c, const char *s) -{ - assert(c != nullptr); - - if (!HasBit(s[0], 7)) { - /* Single byte character: 0xxxxxxx */ - *c = s[0]; - return 1; - } else if (GB(s[0], 5, 3) == 6) { - if (IsUtf8Part(s[1])) { - /* Double byte character: 110xxxxx 10xxxxxx */ - *c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6); - if (*c >= 0x80) return 2; - } - } else if (GB(s[0], 4, 4) == 14) { - if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) { - /* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */ - *c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6); - if (*c >= 0x800) return 3; - } - } else if (GB(s[0], 3, 5) == 30) { - if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) { - /* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - *c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6); - if (*c >= 0x10000 && *c <= 0x10FFFF) return 4; - } - } - - *c = '?'; - return 1; -} - /** * Test if a unicode character is considered garbage to be skipped. * @param c Character to test. diff --git a/src/string_func.h b/src/string_func.h index 09c8dc18db..c7e8910542 100644 --- a/src/string_func.h +++ b/src/string_func.h @@ -72,44 +72,6 @@ inline bool StrEmpty(const char *s) bool IsValidChar(char32_t key, CharSetFilter afilter); -size_t Utf8Decode(char32_t *c, const char *s); -/* std::string_view::iterator might be char *, in which case we do not want this templated variant to be taken. */ -template requires (!std::is_same_v && (std::is_same_v || std::is_same_v)) -inline size_t Utf8Decode(char32_t *c, T &s) { return Utf8Decode(c, &*s); } - -inline char32_t Utf8Consume(const char **s) -{ - char32_t c; - *s += Utf8Decode(&c, *s); - return c; -} - -template -inline char32_t Utf8Consume(Titr &s) -{ - char32_t c; - s += Utf8Decode(&c, &*s); - return c; -} - -/** - * Return the length of an UTF-8 encoded value based on a single char. This - * char should be the first byte of the UTF-8 encoding. If not, or encoding - * is invalid, return value is 0 - * @param c char to query length of - * @return requested size - */ -inline int8_t Utf8EncodedCharLen(char c) -{ - if (GB(c, 3, 5) == 0x1E) return 4; - if (GB(c, 4, 4) == 0x0E) return 3; - if (GB(c, 5, 3) == 0x06) return 2; - if (GB(c, 7, 1) == 0x00) return 1; - - /* Invalid UTF8 start encoding */ - return 0; -} - /* Check if the given character is part of a UTF8 sequence */ inline bool IsUtf8Part(char c)