mirror of https://github.com/OpenTTD/OpenTTD
Fix bb8a0c7641: Skip control codes when sorting strings. (#13035)
Now that SkipGarbage doesn't skip all multi-byte utf-8 characters, string control codes are not skipped either. This gave unintended sorting when NewGRF names start with colour codes. Make SkipGarbage UTF-8 aware so that it is able to skip some unicode ranges as well.pull/13037/head
parent
233ee16c44
commit
9cf47e69d6
|
@ -541,6 +541,22 @@ char *strcasestr(const char *haystack, const char *needle)
|
||||||
}
|
}
|
||||||
#endif /* DEFINE_STRCASESTR */
|
#endif /* DEFINE_STRCASESTR */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test if a unicode character is considered garbage to be skipped.
|
||||||
|
* @param c Character to test.
|
||||||
|
* @returns true iff the character should be skipped.
|
||||||
|
*/
|
||||||
|
static bool IsGarbageCharacter(char32_t c)
|
||||||
|
{
|
||||||
|
if (c >= '0' && c <= '9') return false;
|
||||||
|
if (c >= 'A' && c <= 'Z') return false;
|
||||||
|
if (c >= 'a' && c <= 'z') return false;
|
||||||
|
if (c >= SCC_CONTROL_START && c <= SCC_CONTROL_END) return true;
|
||||||
|
if (c >= 0xC0 && c <= 0x10FFFF) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Skip some of the 'garbage' in the string that we don't want to use
|
* Skip some of the 'garbage' in the string that we don't want to use
|
||||||
* to sort on. This way the alphabetical sorting will work better as
|
* to sort on. This way the alphabetical sorting will work better as
|
||||||
|
@ -551,8 +567,15 @@ char *strcasestr(const char *haystack, const char *needle)
|
||||||
*/
|
*/
|
||||||
static std::string_view SkipGarbage(std::string_view str)
|
static std::string_view SkipGarbage(std::string_view str)
|
||||||
{
|
{
|
||||||
while (!str.empty() && (static_cast<uint8_t>(str[0]) < '0' || IsInsideMM(str[0], ';', '@' + 1) || IsInsideMM(str[0], '[', '`' + 1) || IsInsideMM(str[0], '{', '~' + 1))) str.remove_prefix(1);
|
auto first = std::begin(str);
|
||||||
return str;
|
auto last = std::end(str);
|
||||||
|
while (first < last) {
|
||||||
|
char32_t c;
|
||||||
|
size_t len = Utf8Decode(&c, &*first);
|
||||||
|
if (!IsGarbageCharacter(c)) break;
|
||||||
|
first += len;
|
||||||
|
}
|
||||||
|
return {first, last};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue