1
0
Fork 0

Codechange: Parse translation files using StringConsumer.

pull/14000/head
frosch 2025-03-31 17:32:05 +02:00 committed by frosch
parent b27fd83ff1
commit bf8a241f69
3 changed files with 96 additions and 126 deletions

View File

@ -91,7 +91,7 @@ struct FileStringReader : StringReader {
return result; return result;
} }
void HandlePragma(char *str, LanguagePackHeader &lang) override; void HandlePragma(std::string_view str, LanguagePackHeader &lang) override;
void ParseFile() override void ParseFile() override
{ {
@ -103,51 +103,50 @@ struct FileStringReader : StringReader {
} }
}; };
void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang) void FileStringReader::HandlePragma(std::string_view str, LanguagePackHeader &lang)
{ {
if (!memcmp(str, "id ", 3)) { StringConsumer consumer(str);
this->data.next_string_id = std::strtoul(str + 3, nullptr, 0); auto name = consumer.ReadUntilChar(' ', StringConsumer::SKIP_ALL_SEPARATORS);
} else if (!memcmp(str, "name ", 5)) { if (name == "id") {
strecpy(lang.name, str + 5); this->data.next_string_id = consumer.ReadIntegerBase<uint32_t>(0);
} else if (!memcmp(str, "ownname ", 8)) { } else if (name == "name") {
strecpy(lang.own_name, str + 8); strecpy(lang.name, consumer.Read(StringConsumer::npos));
} else if (!memcmp(str, "isocode ", 8)) { } else if (name == "ownname") {
strecpy(lang.isocode, str + 8); strecpy(lang.own_name, consumer.Read(StringConsumer::npos));
} else if (!memcmp(str, "textdir ", 8)) { } else if (name == "isocode") {
if (!memcmp(str + 8, "ltr", 3)) { strecpy(lang.isocode, consumer.Read(StringConsumer::npos));
} else if (name == "textdir") {
auto dir = consumer.Read(StringConsumer::npos);
if (dir == "ltr") {
lang.text_dir = TD_LTR; lang.text_dir = TD_LTR;
} else if (!memcmp(str + 8, "rtl", 3)) { } else if (dir == "rtl") {
lang.text_dir = TD_RTL; lang.text_dir = TD_RTL;
} else { } else {
FatalError("Invalid textdir {}", str + 8); FatalError("Invalid textdir {}", dir);
} }
} else if (!memcmp(str, "digitsep ", 9)) { } else if (name == "digitsep") {
str += 9; auto sep = consumer.Read(StringConsumer::npos);
strecpy(lang.digit_group_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str); strecpy(lang.digit_group_separator, sep == "{NBSP}" ? NBSP : sep);
} else if (!memcmp(str, "digitsepcur ", 12)) { } else if (name == "digitsepcur") {
str += 12; auto sep = consumer.Read(StringConsumer::npos);
strecpy(lang.digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? NBSP : str); strecpy(lang.digit_group_separator_currency, sep == "{NBSP}" ? NBSP : sep);
} else if (!memcmp(str, "decimalsep ", 11)) { } else if (name == "decimalsep") {
str += 11; auto sep = consumer.Read(StringConsumer::npos);
strecpy(lang.digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str); strecpy(lang.digit_decimal_separator, sep == "{NBSP}" ? NBSP : sep);
} else if (!memcmp(str, "winlangid ", 10)) { } else if (name == "winlangid") {
const char *buf = str + 10; auto langid = consumer.ReadIntegerBase<int32_t>(0);
long langid = std::strtol(buf, nullptr, 16);
if (langid > UINT16_MAX || langid < 0) { if (langid > UINT16_MAX || langid < 0) {
FatalError("Invalid winlangid {}", buf); FatalError("Invalid winlangid {}", langid);
} }
lang.winlangid = static_cast<uint16_t>(langid); lang.winlangid = static_cast<uint16_t>(langid);
} else if (!memcmp(str, "grflangid ", 10)) { } else if (name == "grflangid") {
const char *buf = str + 10; auto langid = consumer.ReadIntegerBase<int32_t>(0);
long langid = std::strtol(buf, nullptr, 16);
if (langid >= 0x7F || langid < 0) { if (langid >= 0x7F || langid < 0) {
FatalError("Invalid grflangid {}", buf); FatalError("Invalid grflangid {}", langid);
} }
lang.newgrflangid = static_cast<uint8_t>(langid); lang.newgrflangid = static_cast<uint8_t>(langid);
} else if (!memcmp(str, "gender ", 7)) { } else if (name == "gender") {
if (this->master) FatalError("Genders are not allowed in the base translation."); if (this->master) FatalError("Genders are not allowed in the base translation.");
StringConsumer consumer(std::string_view(str + 7));
for (;;) { for (;;) {
auto s = ParseWord(consumer); auto s = ParseWord(consumer);
@ -156,10 +155,8 @@ void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang)
s->copy(lang.genders[lang.num_genders], CASE_GENDER_LEN - 1); s->copy(lang.genders[lang.num_genders], CASE_GENDER_LEN - 1);
lang.num_genders++; lang.num_genders++;
} }
} else if (!memcmp(str, "case ", 5)) { } else if (name == "case") {
if (this->master) FatalError("Cases are not allowed in the base translation."); if (this->master) FatalError("Cases are not allowed in the base translation.");
StringConsumer consumer(std::string_view(str + 5));
for (;;) { for (;;) {
auto s = ParseWord(consumer); auto s = ParseWord(consumer);

View File

@ -22,7 +22,7 @@ struct Case {
uint8_t caseidx; ///< The index of the case. uint8_t caseidx; ///< The index of the case.
std::string string; ///< The translation of the case. std::string string; ///< The translation of the case.
Case(uint8_t caseidx, const std::string &string); Case(uint8_t caseidx, std::string_view string);
}; };
/** Information about a single string. */ /** Information about a single string. */
@ -34,7 +34,7 @@ struct LangString {
size_t line; ///< Line of string in source-file. size_t line; ///< Line of string in source-file.
std::vector<Case> translated_cases; ///< Cases of the translation. std::vector<Case> translated_cases; ///< Cases of the translation.
LangString(const std::string &name, const std::string &english, size_t index, size_t line); LangString(std::string_view name, std::string_view english, size_t index, size_t line);
void FreeTranslation(); void FreeTranslation();
}; };
@ -63,7 +63,7 @@ struct StringReader {
StringReader(StringData &data, const std::string &file, bool master, bool translation); StringReader(StringData &data, const std::string &file, bool master, bool translation);
virtual ~StringReader() = default; virtual ~StringReader() = default;
void HandleString(char *str); void HandleString(std::string_view str);
/** /**
* Read a single line from the source of strings. * Read a single line from the source of strings.
@ -75,7 +75,7 @@ struct StringReader {
* Handle the pragma of the file. * Handle the pragma of the file.
* @param str The pragma string to parse. * @param str The pragma string to parse.
*/ */
virtual void HandlePragma(char *str, LanguagePackHeader &lang); virtual void HandlePragma(std::string_view str, LanguagePackHeader &lang);
/** /**
* Start parsing the file. * Start parsing the file.

View File

@ -41,7 +41,7 @@ static size_t TranslateArgumentIdx(size_t arg, size_t offset = 0);
* @param caseidx The index of the case. * @param caseidx The index of the case.
* @param string The translation of the case. * @param string The translation of the case.
*/ */
Case::Case(uint8_t caseidx, const std::string &string) : Case::Case(uint8_t caseidx, std::string_view string) :
caseidx(caseidx), string(string) caseidx(caseidx), string(string)
{ {
} }
@ -53,7 +53,7 @@ Case::Case(uint8_t caseidx, const std::string &string) :
* @param index The index in the string table. * @param index The index in the string table.
* @param line The line this string was found on. * @param line The line this string was found on.
*/ */
LangString::LangString(const std::string &name, const std::string &english, size_t index, size_t line) : LangString::LangString(std::string_view name, std::string_view english, size_t index, size_t line) :
name(name), english(english), index(index), line(line) name(name), english(english), index(index), line(line)
{ {
} }
@ -164,30 +164,6 @@ size_t StringData::CountInUse(size_t tab) const
return count; return count;
} }
static size_t Utf8Validate(const char *s)
{
char32_t c;
if (!HasBit(s[0], 7)) {
/* 1 byte */
return 1;
} else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
/* 2 bytes */
c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
if (c >= 0x80) return 2;
} else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
/* 3 bytes */
c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
if (c >= 0x800) return 3;
} else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
/* 4 bytes */
c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
if (c >= 0x10000 && c <= 0x10FFFF) return 4;
}
return 0;
}
void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value) void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value)
{ {
if (!param.empty()) StrgenWarning("Ignoring trailing letters in command"); if (!param.empty()) StrgenWarning("Ignoring trailing letters in command");
@ -503,91 +479,92 @@ static bool CheckCommandsMatch(std::string_view a, std::string_view b, std::stri
return result; return result;
} }
void StringReader::HandleString(char *str) [[nodiscard]] static std::string_view StripTrailingWhitespace(std::string_view str)
{ {
if (*str == '#') { auto len = str.find_last_not_of("\r\n ");
if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2, _strgen.lang); if (len == std::string_view::npos) return {};
return; return str.substr(0, len + 1);
}
void StringReader::HandleString(std::string_view src)
{
/* Ignore blank lines */
if (src.empty()) return;
StringConsumer consumer(src);
if (consumer.ReadCharIf('#')) {
if (consumer.ReadCharIf('#') && !consumer.ReadCharIf('#')) this->HandlePragma(consumer.Read(StringConsumer::npos), _strgen.lang);
return; // ignore comments
} }
/* Ignore comments & blank lines */ /* Read string name */
if (*str == ';' || *str == ' ' || *str == '\0') return; std::string_view str_name = StripTrailingWhitespace(consumer.ReadUntilChar(':', StringConsumer::KEEP_SEPARATOR));
if (!consumer.ReadCharIf(':')) {
char *s = strchr(str, ':');
if (s == nullptr) {
StrgenError("Line has no ':' delimiter"); StrgenError("Line has no ':' delimiter");
return; return;
} }
char *t; /* Read string case */
/* Trim spaces. std::optional<std::string_view> casep;
* After this str points to the command name, and s points to the command contents */ if (auto index = str_name.find("."); index != std::string_view::npos) {
for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {} casep = str_name.substr(index + 1);
*t = 0; str_name = str_name.substr(0, index);
s++;
/* Check string is valid UTF-8 */
const char *tmp;
for (tmp = s; *tmp != '\0';) {
size_t len = Utf8Validate(tmp);
if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s);
char32_t c;
Utf8Decode(&c, tmp);
if (c <= 0x001F || // ASCII control character range
c == 0x200B || // Zero width space
(c >= 0xE000 && c <= 0xF8FF) || // Private range
(c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", static_cast<uint32_t>(c), s);
}
tmp += len;
} }
/* Check if the string has a case.. /* Read string data */
* The syntax for cases is IDENTNAME.case */ std::string_view value = consumer.Read(StringConsumer::npos);
char *casep = strchr(str, '.');
if (casep != nullptr) *casep++ = '\0'; /* Check string is valid UTF-8 */
for (StringConsumer validation_consumer(value); validation_consumer.AnyBytesLeft(); ) {
auto c = validation_consumer.TryReadUtf8();
if (!c.has_value()) StrgenFatal("Invalid UTF-8 sequence in '{}'", value);
if (*c <= 0x001F || // ASCII control character range
*c == 0x200B || // Zero width space
(*c >= 0xE000 && *c <= 0xF8FF) || // Private range
(*c >= 0xFFF0 && *c <= 0xFFFF)) { // Specials range
StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", static_cast<uint32_t>(*c), value);
}
}
/* Check if this string already exists.. */ /* Check if this string already exists.. */
LangString *ent = this->data.Find(str); LangString *ent = this->data.Find(std::string(str_name));
if (this->master) { if (this->master) {
if (casep != nullptr) { if (casep.has_value()) {
StrgenError("Cases in the base translation are not supported."); StrgenError("Cases in the base translation are not supported.");
return; return;
} }
if (ent != nullptr) { if (ent != nullptr) {
StrgenError("String name '{}' is used multiple times", str); StrgenError("String name '{}' is used multiple times", str_name);
return; return;
} }
if (this->data.strings[this->data.next_string_id] != nullptr) { if (this->data.strings[this->data.next_string_id] != nullptr) {
StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name); StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str_name, this->data.strings[this->data.next_string_id]->name);
return; return;
} }
/* Allocate a new LangString */ /* Allocate a new LangString */
this->data.Add(std::make_unique<LangString>(str, s, this->data.next_string_id++, _strgen.cur_line)); this->data.Add(std::make_unique<LangString>(str_name, value, this->data.next_string_id++, _strgen.cur_line));
} else { } else {
if (ent == nullptr) { if (ent == nullptr) {
StrgenWarning("String name '{}' does not exist in master file", str); StrgenWarning("String name '{}' does not exist in master file", str_name);
return; return;
} }
if (!ent->translated.empty() && casep == nullptr) { if (!ent->translated.empty() && !casep.has_value()) {
StrgenError("String name '{}' is used multiple times", str); StrgenError("String name '{}' is used multiple times", str_name);
return; return;
} }
/* make sure that the commands match */ /* make sure that the commands match */
if (!CheckCommandsMatch(s, ent->english, str)) return; if (!CheckCommandsMatch(value, ent->english, str_name)) return;
if (casep != nullptr) { if (casep.has_value()) {
ent->translated_cases.emplace_back(ResolveCaseName(casep), s); ent->translated_cases.emplace_back(ResolveCaseName(*casep), value);
} else { } else {
ent->translated = s; ent->translated = value;
/* If the string was translated, use the line from the /* If the string was translated, use the line from the
* translated language so errors in the translated file * translated language so errors in the translated file
* are properly referenced to. */ * are properly referenced to. */
@ -596,23 +573,20 @@ void StringReader::HandleString(char *str)
} }
} }
void StringReader::HandlePragma(char *str, LanguagePackHeader &lang) void StringReader::HandlePragma(std::string_view str, LanguagePackHeader &lang)
{ {
if (!memcmp(str, "plural ", 7)) { StringConsumer consumer(str);
lang.plural_form = atoi(str + 7); auto name = consumer.ReadUntilChar(' ', StringConsumer::SKIP_ALL_SEPARATORS);
if (name == "plural") {
lang.plural_form = consumer.ReadIntegerBase<uint32_t>(10);
if (lang.plural_form >= lengthof(_plural_forms)) { if (lang.plural_form >= lengthof(_plural_forms)) {
StrgenFatal("Invalid pluralform {}", lang.plural_form); StrgenFatal("Invalid pluralform {}", lang.plural_form);
} }
} else { } else {
StrgenFatal("unknown pragma '{}'", str); StrgenFatal("unknown pragma '{}'", name);
} }
} }
static void StripTrailingWhitespace(std::string &str)
{
str.erase(str.find_last_not_of("\r\n ") + 1);
}
void StringReader::ParseFile() void StringReader::ParseFile()
{ {
_strgen.warnings = _strgen.errors = 0; _strgen.warnings = _strgen.errors = 0;
@ -631,8 +605,7 @@ void StringReader::ParseFile()
std::optional<std::string> line = this->ReadLine(); std::optional<std::string> line = this->ReadLine();
if (!line.has_value()) return; if (!line.has_value()) return;
StripTrailingWhitespace(line.value()); this->HandleString(StripTrailingWhitespace(line.value()));
this->HandleString(line.value().data());
_strgen.cur_line++; _strgen.cur_line++;
} }