diff --git a/src/strgen/strgen.cpp b/src/strgen/strgen.cpp index b54da8a7b9..a926f0d1f9 100644 --- a/src/strgen/strgen.cpp +++ b/src/strgen/strgen.cpp @@ -91,7 +91,7 @@ struct FileStringReader : StringReader { return result; } - void HandlePragma(char *str, LanguagePackHeader &lang) override; + void HandlePragma(std::string_view str, LanguagePackHeader &lang) override; void ParseFile() override { @@ -103,51 +103,50 @@ struct FileStringReader : StringReader { } }; -void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang) +void FileStringReader::HandlePragma(std::string_view str, LanguagePackHeader &lang) { - if (!memcmp(str, "id ", 3)) { - this->data.next_string_id = std::strtoul(str + 3, nullptr, 0); - } else if (!memcmp(str, "name ", 5)) { - strecpy(lang.name, str + 5); - } else if (!memcmp(str, "ownname ", 8)) { - strecpy(lang.own_name, str + 8); - } else if (!memcmp(str, "isocode ", 8)) { - strecpy(lang.isocode, str + 8); - } else if (!memcmp(str, "textdir ", 8)) { - if (!memcmp(str + 8, "ltr", 3)) { + StringConsumer consumer(str); + auto name = consumer.ReadUntilChar(' ', StringConsumer::SKIP_ALL_SEPARATORS); + if (name == "id") { + this->data.next_string_id = consumer.ReadIntegerBase(0); + } else if (name == "name") { + strecpy(lang.name, consumer.Read(StringConsumer::npos)); + } else if (name == "ownname") { + strecpy(lang.own_name, consumer.Read(StringConsumer::npos)); + } else if (name == "isocode") { + strecpy(lang.isocode, consumer.Read(StringConsumer::npos)); + } else if (name == "textdir") { + auto dir = consumer.Read(StringConsumer::npos); + if (dir == "ltr") { lang.text_dir = TD_LTR; - } else if (!memcmp(str + 8, "rtl", 3)) { + } else if (dir == "rtl") { lang.text_dir = TD_RTL; } else { - FatalError("Invalid textdir {}", str + 8); + FatalError("Invalid textdir {}", dir); } - } else if (!memcmp(str, "digitsep ", 9)) { - str += 9; - strecpy(lang.digit_group_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str); - } else if (!memcmp(str, "digitsepcur ", 12)) { - str += 12; - strecpy(lang.digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? NBSP : str); - } else if (!memcmp(str, "decimalsep ", 11)) { - str += 11; - strecpy(lang.digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str); - } else if (!memcmp(str, "winlangid ", 10)) { - const char *buf = str + 10; - long langid = std::strtol(buf, nullptr, 16); + } else if (name == "digitsep") { + auto sep = consumer.Read(StringConsumer::npos); + strecpy(lang.digit_group_separator, sep == "{NBSP}" ? NBSP : sep); + } else if (name == "digitsepcur") { + auto sep = consumer.Read(StringConsumer::npos); + strecpy(lang.digit_group_separator_currency, sep == "{NBSP}" ? NBSP : sep); + } else if (name == "decimalsep") { + auto sep = consumer.Read(StringConsumer::npos); + strecpy(lang.digit_decimal_separator, sep == "{NBSP}" ? NBSP : sep); + } else if (name == "winlangid") { + auto langid = consumer.ReadIntegerBase(0); if (langid > UINT16_MAX || langid < 0) { - FatalError("Invalid winlangid {}", buf); + FatalError("Invalid winlangid {}", langid); } lang.winlangid = static_cast(langid); - } else if (!memcmp(str, "grflangid ", 10)) { - const char *buf = str + 10; - long langid = std::strtol(buf, nullptr, 16); + } else if (name == "grflangid") { + auto langid = consumer.ReadIntegerBase(0); if (langid >= 0x7F || langid < 0) { - FatalError("Invalid grflangid {}", buf); + FatalError("Invalid grflangid {}", langid); } lang.newgrflangid = static_cast(langid); - } else if (!memcmp(str, "gender ", 7)) { + } else if (name == "gender") { if (this->master) FatalError("Genders are not allowed in the base translation."); - StringConsumer consumer(std::string_view(str + 7)); - for (;;) { auto s = ParseWord(consumer); @@ -156,10 +155,8 @@ void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang) s->copy(lang.genders[lang.num_genders], CASE_GENDER_LEN - 1); lang.num_genders++; } - } else if (!memcmp(str, "case ", 5)) { + } else if (name == "case") { if (this->master) FatalError("Cases are not allowed in the base translation."); - StringConsumer consumer(std::string_view(str + 5)); - for (;;) { auto s = ParseWord(consumer); diff --git a/src/strgen/strgen.h b/src/strgen/strgen.h index 18e007f71e..c57b803d28 100644 --- a/src/strgen/strgen.h +++ b/src/strgen/strgen.h @@ -22,7 +22,7 @@ struct Case { uint8_t caseidx; ///< The index of the case. std::string string; ///< The translation of the case. - Case(uint8_t caseidx, const std::string &string); + Case(uint8_t caseidx, std::string_view string); }; /** Information about a single string. */ @@ -34,7 +34,7 @@ struct LangString { size_t line; ///< Line of string in source-file. std::vector translated_cases; ///< Cases of the translation. - LangString(const std::string &name, const std::string &english, size_t index, size_t line); + LangString(std::string_view name, std::string_view english, size_t index, size_t line); void FreeTranslation(); }; @@ -63,7 +63,7 @@ struct StringReader { StringReader(StringData &data, const std::string &file, bool master, bool translation); virtual ~StringReader() = default; - void HandleString(char *str); + void HandleString(std::string_view str); /** * Read a single line from the source of strings. @@ -75,7 +75,7 @@ struct StringReader { * Handle the pragma of the file. * @param str The pragma string to parse. */ - virtual void HandlePragma(char *str, LanguagePackHeader &lang); + virtual void HandlePragma(std::string_view str, LanguagePackHeader &lang); /** * Start parsing the file. diff --git a/src/strgen/strgen_base.cpp b/src/strgen/strgen_base.cpp index aa831c1945..4a08dcafae 100644 --- a/src/strgen/strgen_base.cpp +++ b/src/strgen/strgen_base.cpp @@ -41,7 +41,7 @@ static size_t TranslateArgumentIdx(size_t arg, size_t offset = 0); * @param caseidx The index of the case. * @param string The translation of the case. */ -Case::Case(uint8_t caseidx, const std::string &string) : +Case::Case(uint8_t caseidx, std::string_view string) : caseidx(caseidx), string(string) { } @@ -53,7 +53,7 @@ Case::Case(uint8_t caseidx, const std::string &string) : * @param index The index in the string table. * @param line The line this string was found on. */ -LangString::LangString(const std::string &name, const std::string &english, size_t index, size_t line) : +LangString::LangString(std::string_view name, std::string_view english, size_t index, size_t line) : name(name), english(english), index(index), line(line) { } @@ -164,30 +164,6 @@ size_t StringData::CountInUse(size_t tab) const return count; } -static size_t Utf8Validate(const char *s) -{ - char32_t c; - - if (!HasBit(s[0], 7)) { - /* 1 byte */ - return 1; - } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) { - /* 2 bytes */ - c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6); - if (c >= 0x80) return 2; - } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) { - /* 3 bytes */ - c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6); - if (c >= 0x800) return 3; - } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) { - /* 4 bytes */ - c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6); - if (c >= 0x10000 && c <= 0x10FFFF) return 4; - } - - return 0; -} - void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value) { if (!param.empty()) StrgenWarning("Ignoring trailing letters in command"); @@ -503,91 +479,92 @@ static bool CheckCommandsMatch(std::string_view a, std::string_view b, std::stri return result; } -void StringReader::HandleString(char *str) +[[nodiscard]] static std::string_view StripTrailingWhitespace(std::string_view str) { - if (*str == '#') { - if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2, _strgen.lang); - return; + auto len = str.find_last_not_of("\r\n "); + if (len == std::string_view::npos) return {}; + return str.substr(0, len + 1); +} + +void StringReader::HandleString(std::string_view src) +{ + /* Ignore blank lines */ + if (src.empty()) return; + + StringConsumer consumer(src); + if (consumer.ReadCharIf('#')) { + if (consumer.ReadCharIf('#') && !consumer.ReadCharIf('#')) this->HandlePragma(consumer.Read(StringConsumer::npos), _strgen.lang); + return; // ignore comments } - /* Ignore comments & blank lines */ - if (*str == ';' || *str == ' ' || *str == '\0') return; - - char *s = strchr(str, ':'); - if (s == nullptr) { + /* Read string name */ + std::string_view str_name = StripTrailingWhitespace(consumer.ReadUntilChar(':', StringConsumer::KEEP_SEPARATOR)); + if (!consumer.ReadCharIf(':')) { StrgenError("Line has no ':' delimiter"); return; } - char *t; - /* Trim spaces. - * After this str points to the command name, and s points to the command contents */ - for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {} - *t = 0; - s++; - - /* Check string is valid UTF-8 */ - const char *tmp; - for (tmp = s; *tmp != '\0';) { - size_t len = Utf8Validate(tmp); - if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s); - - char32_t c; - Utf8Decode(&c, tmp); - if (c <= 0x001F || // ASCII control character range - c == 0x200B || // Zero width space - (c >= 0xE000 && c <= 0xF8FF) || // Private range - (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range - StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", static_cast(c), s); - } - - tmp += len; + /* Read string case */ + std::optional casep; + if (auto index = str_name.find("."); index != std::string_view::npos) { + casep = str_name.substr(index + 1); + str_name = str_name.substr(0, index); } - /* Check if the string has a case.. - * The syntax for cases is IDENTNAME.case */ - char *casep = strchr(str, '.'); - if (casep != nullptr) *casep++ = '\0'; + /* Read string data */ + std::string_view value = consumer.Read(StringConsumer::npos); + + /* Check string is valid UTF-8 */ + for (StringConsumer validation_consumer(value); validation_consumer.AnyBytesLeft(); ) { + auto c = validation_consumer.TryReadUtf8(); + if (!c.has_value()) StrgenFatal("Invalid UTF-8 sequence in '{}'", value); + if (*c <= 0x001F || // ASCII control character range + *c == 0x200B || // Zero width space + (*c >= 0xE000 && *c <= 0xF8FF) || // Private range + (*c >= 0xFFF0 && *c <= 0xFFFF)) { // Specials range + StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", static_cast(*c), value); + } + } /* Check if this string already exists.. */ - LangString *ent = this->data.Find(str); + LangString *ent = this->data.Find(std::string(str_name)); if (this->master) { - if (casep != nullptr) { + if (casep.has_value()) { StrgenError("Cases in the base translation are not supported."); return; } if (ent != nullptr) { - StrgenError("String name '{}' is used multiple times", str); + StrgenError("String name '{}' is used multiple times", str_name); return; } if (this->data.strings[this->data.next_string_id] != nullptr) { - StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name); + StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str_name, this->data.strings[this->data.next_string_id]->name); return; } /* Allocate a new LangString */ - this->data.Add(std::make_unique(str, s, this->data.next_string_id++, _strgen.cur_line)); + this->data.Add(std::make_unique(str_name, value, this->data.next_string_id++, _strgen.cur_line)); } else { if (ent == nullptr) { - StrgenWarning("String name '{}' does not exist in master file", str); + StrgenWarning("String name '{}' does not exist in master file", str_name); return; } - if (!ent->translated.empty() && casep == nullptr) { - StrgenError("String name '{}' is used multiple times", str); + if (!ent->translated.empty() && !casep.has_value()) { + StrgenError("String name '{}' is used multiple times", str_name); return; } /* make sure that the commands match */ - if (!CheckCommandsMatch(s, ent->english, str)) return; + if (!CheckCommandsMatch(value, ent->english, str_name)) return; - if (casep != nullptr) { - ent->translated_cases.emplace_back(ResolveCaseName(casep), s); + if (casep.has_value()) { + ent->translated_cases.emplace_back(ResolveCaseName(*casep), value); } else { - ent->translated = s; + ent->translated = value; /* If the string was translated, use the line from the * translated language so errors in the translated file * are properly referenced to. */ @@ -596,23 +573,20 @@ void StringReader::HandleString(char *str) } } -void StringReader::HandlePragma(char *str, LanguagePackHeader &lang) +void StringReader::HandlePragma(std::string_view str, LanguagePackHeader &lang) { - if (!memcmp(str, "plural ", 7)) { - lang.plural_form = atoi(str + 7); + StringConsumer consumer(str); + auto name = consumer.ReadUntilChar(' ', StringConsumer::SKIP_ALL_SEPARATORS); + if (name == "plural") { + lang.plural_form = consumer.ReadIntegerBase(10); if (lang.plural_form >= lengthof(_plural_forms)) { StrgenFatal("Invalid pluralform {}", lang.plural_form); } } else { - StrgenFatal("unknown pragma '{}'", str); + StrgenFatal("unknown pragma '{}'", name); } } -static void StripTrailingWhitespace(std::string &str) -{ - str.erase(str.find_last_not_of("\r\n ") + 1); -} - void StringReader::ParseFile() { _strgen.warnings = _strgen.errors = 0; @@ -631,8 +605,7 @@ void StringReader::ParseFile() std::optional line = this->ReadLine(); if (!line.has_value()) return; - StripTrailingWhitespace(line.value()); - this->HandleString(line.value().data()); + this->HandleString(StripTrailingWhitespace(line.value())); _strgen.cur_line++; }