1
0
Fork 0

Codechange: Parse translation files using StringConsumer.

pull/14000/head
frosch 2025-03-31 17:32:05 +02:00 committed by frosch
parent b27fd83ff1
commit bf8a241f69
3 changed files with 96 additions and 126 deletions

View File

@ -91,7 +91,7 @@ struct FileStringReader : StringReader {
return result;
}
void HandlePragma(char *str, LanguagePackHeader &lang) override;
void HandlePragma(std::string_view str, LanguagePackHeader &lang) override;
void ParseFile() override
{
@ -103,51 +103,50 @@ struct FileStringReader : StringReader {
}
};
void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang)
void FileStringReader::HandlePragma(std::string_view str, LanguagePackHeader &lang)
{
if (!memcmp(str, "id ", 3)) {
this->data.next_string_id = std::strtoul(str + 3, nullptr, 0);
} else if (!memcmp(str, "name ", 5)) {
strecpy(lang.name, str + 5);
} else if (!memcmp(str, "ownname ", 8)) {
strecpy(lang.own_name, str + 8);
} else if (!memcmp(str, "isocode ", 8)) {
strecpy(lang.isocode, str + 8);
} else if (!memcmp(str, "textdir ", 8)) {
if (!memcmp(str + 8, "ltr", 3)) {
StringConsumer consumer(str);
auto name = consumer.ReadUntilChar(' ', StringConsumer::SKIP_ALL_SEPARATORS);
if (name == "id") {
this->data.next_string_id = consumer.ReadIntegerBase<uint32_t>(0);
} else if (name == "name") {
strecpy(lang.name, consumer.Read(StringConsumer::npos));
} else if (name == "ownname") {
strecpy(lang.own_name, consumer.Read(StringConsumer::npos));
} else if (name == "isocode") {
strecpy(lang.isocode, consumer.Read(StringConsumer::npos));
} else if (name == "textdir") {
auto dir = consumer.Read(StringConsumer::npos);
if (dir == "ltr") {
lang.text_dir = TD_LTR;
} else if (!memcmp(str + 8, "rtl", 3)) {
} else if (dir == "rtl") {
lang.text_dir = TD_RTL;
} else {
FatalError("Invalid textdir {}", str + 8);
FatalError("Invalid textdir {}", dir);
}
} else if (!memcmp(str, "digitsep ", 9)) {
str += 9;
strecpy(lang.digit_group_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str);
} else if (!memcmp(str, "digitsepcur ", 12)) {
str += 12;
strecpy(lang.digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? NBSP : str);
} else if (!memcmp(str, "decimalsep ", 11)) {
str += 11;
strecpy(lang.digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str);
} else if (!memcmp(str, "winlangid ", 10)) {
const char *buf = str + 10;
long langid = std::strtol(buf, nullptr, 16);
} else if (name == "digitsep") {
auto sep = consumer.Read(StringConsumer::npos);
strecpy(lang.digit_group_separator, sep == "{NBSP}" ? NBSP : sep);
} else if (name == "digitsepcur") {
auto sep = consumer.Read(StringConsumer::npos);
strecpy(lang.digit_group_separator_currency, sep == "{NBSP}" ? NBSP : sep);
} else if (name == "decimalsep") {
auto sep = consumer.Read(StringConsumer::npos);
strecpy(lang.digit_decimal_separator, sep == "{NBSP}" ? NBSP : sep);
} else if (name == "winlangid") {
auto langid = consumer.ReadIntegerBase<int32_t>(0);
if (langid > UINT16_MAX || langid < 0) {
FatalError("Invalid winlangid {}", buf);
FatalError("Invalid winlangid {}", langid);
}
lang.winlangid = static_cast<uint16_t>(langid);
} else if (!memcmp(str, "grflangid ", 10)) {
const char *buf = str + 10;
long langid = std::strtol(buf, nullptr, 16);
} else if (name == "grflangid") {
auto langid = consumer.ReadIntegerBase<int32_t>(0);
if (langid >= 0x7F || langid < 0) {
FatalError("Invalid grflangid {}", buf);
FatalError("Invalid grflangid {}", langid);
}
lang.newgrflangid = static_cast<uint8_t>(langid);
} else if (!memcmp(str, "gender ", 7)) {
} else if (name == "gender") {
if (this->master) FatalError("Genders are not allowed in the base translation.");
StringConsumer consumer(std::string_view(str + 7));
for (;;) {
auto s = ParseWord(consumer);
@ -156,10 +155,8 @@ void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang)
s->copy(lang.genders[lang.num_genders], CASE_GENDER_LEN - 1);
lang.num_genders++;
}
} else if (!memcmp(str, "case ", 5)) {
} else if (name == "case") {
if (this->master) FatalError("Cases are not allowed in the base translation.");
StringConsumer consumer(std::string_view(str + 5));
for (;;) {
auto s = ParseWord(consumer);

View File

@ -22,7 +22,7 @@ struct Case {
uint8_t caseidx; ///< The index of the case.
std::string string; ///< The translation of the case.
Case(uint8_t caseidx, const std::string &string);
Case(uint8_t caseidx, std::string_view string);
};
/** Information about a single string. */
@ -34,7 +34,7 @@ struct LangString {
size_t line; ///< Line of string in source-file.
std::vector<Case> translated_cases; ///< Cases of the translation.
LangString(const std::string &name, const std::string &english, size_t index, size_t line);
LangString(std::string_view name, std::string_view english, size_t index, size_t line);
void FreeTranslation();
};
@ -63,7 +63,7 @@ struct StringReader {
StringReader(StringData &data, const std::string &file, bool master, bool translation);
virtual ~StringReader() = default;
void HandleString(char *str);
void HandleString(std::string_view str);
/**
* Read a single line from the source of strings.
@ -75,7 +75,7 @@ struct StringReader {
* Handle the pragma of the file.
* @param str The pragma string to parse.
*/
virtual void HandlePragma(char *str, LanguagePackHeader &lang);
virtual void HandlePragma(std::string_view str, LanguagePackHeader &lang);
/**
* Start parsing the file.

View File

@ -41,7 +41,7 @@ static size_t TranslateArgumentIdx(size_t arg, size_t offset = 0);
* @param caseidx The index of the case.
* @param string The translation of the case.
*/
Case::Case(uint8_t caseidx, const std::string &string) :
Case::Case(uint8_t caseidx, std::string_view string) :
caseidx(caseidx), string(string)
{
}
@ -53,7 +53,7 @@ Case::Case(uint8_t caseidx, const std::string &string) :
* @param index The index in the string table.
* @param line The line this string was found on.
*/
LangString::LangString(const std::string &name, const std::string &english, size_t index, size_t line) :
LangString::LangString(std::string_view name, std::string_view english, size_t index, size_t line) :
name(name), english(english), index(index), line(line)
{
}
@ -164,30 +164,6 @@ size_t StringData::CountInUse(size_t tab) const
return count;
}
static size_t Utf8Validate(const char *s)
{
char32_t c;
if (!HasBit(s[0], 7)) {
/* 1 byte */
return 1;
} else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
/* 2 bytes */
c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
if (c >= 0x80) return 2;
} else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
/* 3 bytes */
c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
if (c >= 0x800) return 3;
} else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
/* 4 bytes */
c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
if (c >= 0x10000 && c <= 0x10FFFF) return 4;
}
return 0;
}
void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value)
{
if (!param.empty()) StrgenWarning("Ignoring trailing letters in command");
@ -503,91 +479,92 @@ static bool CheckCommandsMatch(std::string_view a, std::string_view b, std::stri
return result;
}
void StringReader::HandleString(char *str)
[[nodiscard]] static std::string_view StripTrailingWhitespace(std::string_view str)
{
if (*str == '#') {
if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2, _strgen.lang);
return;
auto len = str.find_last_not_of("\r\n ");
if (len == std::string_view::npos) return {};
return str.substr(0, len + 1);
}
void StringReader::HandleString(std::string_view src)
{
/* Ignore blank lines */
if (src.empty()) return;
StringConsumer consumer(src);
if (consumer.ReadCharIf('#')) {
if (consumer.ReadCharIf('#') && !consumer.ReadCharIf('#')) this->HandlePragma(consumer.Read(StringConsumer::npos), _strgen.lang);
return; // ignore comments
}
/* Ignore comments & blank lines */
if (*str == ';' || *str == ' ' || *str == '\0') return;
char *s = strchr(str, ':');
if (s == nullptr) {
/* Read string name */
std::string_view str_name = StripTrailingWhitespace(consumer.ReadUntilChar(':', StringConsumer::KEEP_SEPARATOR));
if (!consumer.ReadCharIf(':')) {
StrgenError("Line has no ':' delimiter");
return;
}
char *t;
/* Trim spaces.
* After this str points to the command name, and s points to the command contents */
for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
*t = 0;
s++;
/* Check string is valid UTF-8 */
const char *tmp;
for (tmp = s; *tmp != '\0';) {
size_t len = Utf8Validate(tmp);
if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s);
char32_t c;
Utf8Decode(&c, tmp);
if (c <= 0x001F || // ASCII control character range
c == 0x200B || // Zero width space
(c >= 0xE000 && c <= 0xF8FF) || // Private range
(c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", static_cast<uint32_t>(c), s);
}
tmp += len;
/* Read string case */
std::optional<std::string_view> casep;
if (auto index = str_name.find("."); index != std::string_view::npos) {
casep = str_name.substr(index + 1);
str_name = str_name.substr(0, index);
}
/* Check if the string has a case..
* The syntax for cases is IDENTNAME.case */
char *casep = strchr(str, '.');
if (casep != nullptr) *casep++ = '\0';
/* Read string data */
std::string_view value = consumer.Read(StringConsumer::npos);
/* Check string is valid UTF-8 */
for (StringConsumer validation_consumer(value); validation_consumer.AnyBytesLeft(); ) {
auto c = validation_consumer.TryReadUtf8();
if (!c.has_value()) StrgenFatal("Invalid UTF-8 sequence in '{}'", value);
if (*c <= 0x001F || // ASCII control character range
*c == 0x200B || // Zero width space
(*c >= 0xE000 && *c <= 0xF8FF) || // Private range
(*c >= 0xFFF0 && *c <= 0xFFFF)) { // Specials range
StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", static_cast<uint32_t>(*c), value);
}
}
/* Check if this string already exists.. */
LangString *ent = this->data.Find(str);
LangString *ent = this->data.Find(std::string(str_name));
if (this->master) {
if (casep != nullptr) {
if (casep.has_value()) {
StrgenError("Cases in the base translation are not supported.");
return;
}
if (ent != nullptr) {
StrgenError("String name '{}' is used multiple times", str);
StrgenError("String name '{}' is used multiple times", str_name);
return;
}
if (this->data.strings[this->data.next_string_id] != nullptr) {
StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str_name, this->data.strings[this->data.next_string_id]->name);
return;
}
/* Allocate a new LangString */
this->data.Add(std::make_unique<LangString>(str, s, this->data.next_string_id++, _strgen.cur_line));
this->data.Add(std::make_unique<LangString>(str_name, value, this->data.next_string_id++, _strgen.cur_line));
} else {
if (ent == nullptr) {
StrgenWarning("String name '{}' does not exist in master file", str);
StrgenWarning("String name '{}' does not exist in master file", str_name);
return;
}
if (!ent->translated.empty() && casep == nullptr) {
StrgenError("String name '{}' is used multiple times", str);
if (!ent->translated.empty() && !casep.has_value()) {
StrgenError("String name '{}' is used multiple times", str_name);
return;
}
/* make sure that the commands match */
if (!CheckCommandsMatch(s, ent->english, str)) return;
if (!CheckCommandsMatch(value, ent->english, str_name)) return;
if (casep != nullptr) {
ent->translated_cases.emplace_back(ResolveCaseName(casep), s);
if (casep.has_value()) {
ent->translated_cases.emplace_back(ResolveCaseName(*casep), value);
} else {
ent->translated = s;
ent->translated = value;
/* If the string was translated, use the line from the
* translated language so errors in the translated file
* are properly referenced to. */
@ -596,23 +573,20 @@ void StringReader::HandleString(char *str)
}
}
void StringReader::HandlePragma(char *str, LanguagePackHeader &lang)
void StringReader::HandlePragma(std::string_view str, LanguagePackHeader &lang)
{
if (!memcmp(str, "plural ", 7)) {
lang.plural_form = atoi(str + 7);
StringConsumer consumer(str);
auto name = consumer.ReadUntilChar(' ', StringConsumer::SKIP_ALL_SEPARATORS);
if (name == "plural") {
lang.plural_form = consumer.ReadIntegerBase<uint32_t>(10);
if (lang.plural_form >= lengthof(_plural_forms)) {
StrgenFatal("Invalid pluralform {}", lang.plural_form);
}
} else {
StrgenFatal("unknown pragma '{}'", str);
StrgenFatal("unknown pragma '{}'", name);
}
}
static void StripTrailingWhitespace(std::string &str)
{
str.erase(str.find_last_not_of("\r\n ") + 1);
}
void StringReader::ParseFile()
{
_strgen.warnings = _strgen.errors = 0;
@ -631,8 +605,7 @@ void StringReader::ParseFile()
std::optional<std::string> line = this->ReadLine();
if (!line.has_value()) return;
StripTrailingWhitespace(line.value());
this->HandleString(line.value().data());
this->HandleString(StripTrailingWhitespace(line.value()));
_strgen.cur_line++;
}