1
0
Fork 0

Codechange: Remove unused Utf8Consume, Utf8Decode, Utf8EncodedCharLen.

pull/14001/head
frosch 2025-04-01 18:21:34 +02:00 committed by frosch
parent bf8a241f69
commit 588fedb5db
2 changed files with 0 additions and 80 deletions

View File

@ -377,48 +377,6 @@ bool IsValidChar(char32_t key, CharSetFilter afilter)
}
}
/* UTF-8 handling routines */
/**
* Decode and consume the next UTF-8 encoded character.
* @param c Buffer to place decoded character.
* @param s Character stream to retrieve character from.
* @return Number of characters in the sequence.
*/
size_t Utf8Decode(char32_t *c, const char *s)
{
assert(c != nullptr);
if (!HasBit(s[0], 7)) {
/* Single byte character: 0xxxxxxx */
*c = s[0];
return 1;
} else if (GB(s[0], 5, 3) == 6) {
if (IsUtf8Part(s[1])) {
/* Double byte character: 110xxxxx 10xxxxxx */
*c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
if (*c >= 0x80) return 2;
}
} else if (GB(s[0], 4, 4) == 14) {
if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
/* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */
*c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
if (*c >= 0x800) return 3;
}
} else if (GB(s[0], 3, 5) == 30) {
if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
/* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
*c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
if (*c >= 0x10000 && *c <= 0x10FFFF) return 4;
}
}
*c = '?';
return 1;
}
/**
* Test if a unicode character is considered garbage to be skipped.
* @param c Character to test.

View File

@ -72,44 +72,6 @@ inline bool StrEmpty(const char *s)
bool IsValidChar(char32_t key, CharSetFilter afilter);
size_t Utf8Decode(char32_t *c, const char *s);
/* std::string_view::iterator might be char *, in which case we do not want this templated variant to be taken. */
template <typename T> requires (!std::is_same_v<T, char *> && (std::is_same_v<std::string_view::iterator, T> || std::is_same_v<std::string::iterator, T>))
inline size_t Utf8Decode(char32_t *c, T &s) { return Utf8Decode(c, &*s); }
inline char32_t Utf8Consume(const char **s)
{
char32_t c;
*s += Utf8Decode(&c, *s);
return c;
}
template <class Titr>
inline char32_t Utf8Consume(Titr &s)
{
char32_t c;
s += Utf8Decode(&c, &*s);
return c;
}
/**
* Return the length of an UTF-8 encoded value based on a single char. This
* char should be the first byte of the UTF-8 encoding. If not, or encoding
* is invalid, return value is 0
* @param c char to query length of
* @return requested size
*/
inline int8_t Utf8EncodedCharLen(char c)
{
if (GB(c, 3, 5) == 0x1E) return 4;
if (GB(c, 4, 4) == 0x0E) return 3;
if (GB(c, 5, 3) == 0x06) return 2;
if (GB(c, 7, 1) == 0x00) return 1;
/* Invalid UTF8 start encoding */
return 0;
}
/* Check if the given character is part of a UTF8 sequence */
inline bool IsUtf8Part(char c)