mirror of https://github.com/OpenTTD/OpenTTD
Codechange: Parse translation files using StringConsumer.
parent
b27fd83ff1
commit
bf8a241f69
|
@ -91,7 +91,7 @@ struct FileStringReader : StringReader {
|
|||
return result;
|
||||
}
|
||||
|
||||
void HandlePragma(char *str, LanguagePackHeader &lang) override;
|
||||
void HandlePragma(std::string_view str, LanguagePackHeader &lang) override;
|
||||
|
||||
void ParseFile() override
|
||||
{
|
||||
|
@ -103,51 +103,50 @@ struct FileStringReader : StringReader {
|
|||
}
|
||||
};
|
||||
|
||||
void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang)
|
||||
void FileStringReader::HandlePragma(std::string_view str, LanguagePackHeader &lang)
|
||||
{
|
||||
if (!memcmp(str, "id ", 3)) {
|
||||
this->data.next_string_id = std::strtoul(str + 3, nullptr, 0);
|
||||
} else if (!memcmp(str, "name ", 5)) {
|
||||
strecpy(lang.name, str + 5);
|
||||
} else if (!memcmp(str, "ownname ", 8)) {
|
||||
strecpy(lang.own_name, str + 8);
|
||||
} else if (!memcmp(str, "isocode ", 8)) {
|
||||
strecpy(lang.isocode, str + 8);
|
||||
} else if (!memcmp(str, "textdir ", 8)) {
|
||||
if (!memcmp(str + 8, "ltr", 3)) {
|
||||
StringConsumer consumer(str);
|
||||
auto name = consumer.ReadUntilChar(' ', StringConsumer::SKIP_ALL_SEPARATORS);
|
||||
if (name == "id") {
|
||||
this->data.next_string_id = consumer.ReadIntegerBase<uint32_t>(0);
|
||||
} else if (name == "name") {
|
||||
strecpy(lang.name, consumer.Read(StringConsumer::npos));
|
||||
} else if (name == "ownname") {
|
||||
strecpy(lang.own_name, consumer.Read(StringConsumer::npos));
|
||||
} else if (name == "isocode") {
|
||||
strecpy(lang.isocode, consumer.Read(StringConsumer::npos));
|
||||
} else if (name == "textdir") {
|
||||
auto dir = consumer.Read(StringConsumer::npos);
|
||||
if (dir == "ltr") {
|
||||
lang.text_dir = TD_LTR;
|
||||
} else if (!memcmp(str + 8, "rtl", 3)) {
|
||||
} else if (dir == "rtl") {
|
||||
lang.text_dir = TD_RTL;
|
||||
} else {
|
||||
FatalError("Invalid textdir {}", str + 8);
|
||||
FatalError("Invalid textdir {}", dir);
|
||||
}
|
||||
} else if (!memcmp(str, "digitsep ", 9)) {
|
||||
str += 9;
|
||||
strecpy(lang.digit_group_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str);
|
||||
} else if (!memcmp(str, "digitsepcur ", 12)) {
|
||||
str += 12;
|
||||
strecpy(lang.digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? NBSP : str);
|
||||
} else if (!memcmp(str, "decimalsep ", 11)) {
|
||||
str += 11;
|
||||
strecpy(lang.digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str);
|
||||
} else if (!memcmp(str, "winlangid ", 10)) {
|
||||
const char *buf = str + 10;
|
||||
long langid = std::strtol(buf, nullptr, 16);
|
||||
} else if (name == "digitsep") {
|
||||
auto sep = consumer.Read(StringConsumer::npos);
|
||||
strecpy(lang.digit_group_separator, sep == "{NBSP}" ? NBSP : sep);
|
||||
} else if (name == "digitsepcur") {
|
||||
auto sep = consumer.Read(StringConsumer::npos);
|
||||
strecpy(lang.digit_group_separator_currency, sep == "{NBSP}" ? NBSP : sep);
|
||||
} else if (name == "decimalsep") {
|
||||
auto sep = consumer.Read(StringConsumer::npos);
|
||||
strecpy(lang.digit_decimal_separator, sep == "{NBSP}" ? NBSP : sep);
|
||||
} else if (name == "winlangid") {
|
||||
auto langid = consumer.ReadIntegerBase<int32_t>(0);
|
||||
if (langid > UINT16_MAX || langid < 0) {
|
||||
FatalError("Invalid winlangid {}", buf);
|
||||
FatalError("Invalid winlangid {}", langid);
|
||||
}
|
||||
lang.winlangid = static_cast<uint16_t>(langid);
|
||||
} else if (!memcmp(str, "grflangid ", 10)) {
|
||||
const char *buf = str + 10;
|
||||
long langid = std::strtol(buf, nullptr, 16);
|
||||
} else if (name == "grflangid") {
|
||||
auto langid = consumer.ReadIntegerBase<int32_t>(0);
|
||||
if (langid >= 0x7F || langid < 0) {
|
||||
FatalError("Invalid grflangid {}", buf);
|
||||
FatalError("Invalid grflangid {}", langid);
|
||||
}
|
||||
lang.newgrflangid = static_cast<uint8_t>(langid);
|
||||
} else if (!memcmp(str, "gender ", 7)) {
|
||||
} else if (name == "gender") {
|
||||
if (this->master) FatalError("Genders are not allowed in the base translation.");
|
||||
StringConsumer consumer(std::string_view(str + 7));
|
||||
|
||||
for (;;) {
|
||||
auto s = ParseWord(consumer);
|
||||
|
||||
|
@ -156,10 +155,8 @@ void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang)
|
|||
s->copy(lang.genders[lang.num_genders], CASE_GENDER_LEN - 1);
|
||||
lang.num_genders++;
|
||||
}
|
||||
} else if (!memcmp(str, "case ", 5)) {
|
||||
} else if (name == "case") {
|
||||
if (this->master) FatalError("Cases are not allowed in the base translation.");
|
||||
StringConsumer consumer(std::string_view(str + 5));
|
||||
|
||||
for (;;) {
|
||||
auto s = ParseWord(consumer);
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ struct Case {
|
|||
uint8_t caseidx; ///< The index of the case.
|
||||
std::string string; ///< The translation of the case.
|
||||
|
||||
Case(uint8_t caseidx, const std::string &string);
|
||||
Case(uint8_t caseidx, std::string_view string);
|
||||
};
|
||||
|
||||
/** Information about a single string. */
|
||||
|
@ -34,7 +34,7 @@ struct LangString {
|
|||
size_t line; ///< Line of string in source-file.
|
||||
std::vector<Case> translated_cases; ///< Cases of the translation.
|
||||
|
||||
LangString(const std::string &name, const std::string &english, size_t index, size_t line);
|
||||
LangString(std::string_view name, std::string_view english, size_t index, size_t line);
|
||||
void FreeTranslation();
|
||||
};
|
||||
|
||||
|
@ -63,7 +63,7 @@ struct StringReader {
|
|||
|
||||
StringReader(StringData &data, const std::string &file, bool master, bool translation);
|
||||
virtual ~StringReader() = default;
|
||||
void HandleString(char *str);
|
||||
void HandleString(std::string_view str);
|
||||
|
||||
/**
|
||||
* Read a single line from the source of strings.
|
||||
|
@ -75,7 +75,7 @@ struct StringReader {
|
|||
* Handle the pragma of the file.
|
||||
* @param str The pragma string to parse.
|
||||
*/
|
||||
virtual void HandlePragma(char *str, LanguagePackHeader &lang);
|
||||
virtual void HandlePragma(std::string_view str, LanguagePackHeader &lang);
|
||||
|
||||
/**
|
||||
* Start parsing the file.
|
||||
|
|
|
@ -41,7 +41,7 @@ static size_t TranslateArgumentIdx(size_t arg, size_t offset = 0);
|
|||
* @param caseidx The index of the case.
|
||||
* @param string The translation of the case.
|
||||
*/
|
||||
Case::Case(uint8_t caseidx, const std::string &string) :
|
||||
Case::Case(uint8_t caseidx, std::string_view string) :
|
||||
caseidx(caseidx), string(string)
|
||||
{
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ Case::Case(uint8_t caseidx, const std::string &string) :
|
|||
* @param index The index in the string table.
|
||||
* @param line The line this string was found on.
|
||||
*/
|
||||
LangString::LangString(const std::string &name, const std::string &english, size_t index, size_t line) :
|
||||
LangString::LangString(std::string_view name, std::string_view english, size_t index, size_t line) :
|
||||
name(name), english(english), index(index), line(line)
|
||||
{
|
||||
}
|
||||
|
@ -164,30 +164,6 @@ size_t StringData::CountInUse(size_t tab) const
|
|||
return count;
|
||||
}
|
||||
|
||||
static size_t Utf8Validate(const char *s)
|
||||
{
|
||||
char32_t c;
|
||||
|
||||
if (!HasBit(s[0], 7)) {
|
||||
/* 1 byte */
|
||||
return 1;
|
||||
} else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
|
||||
/* 2 bytes */
|
||||
c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
|
||||
if (c >= 0x80) return 2;
|
||||
} else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
|
||||
/* 3 bytes */
|
||||
c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
|
||||
if (c >= 0x800) return 3;
|
||||
} else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
|
||||
/* 4 bytes */
|
||||
c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
|
||||
if (c >= 0x10000 && c <= 0x10FFFF) return 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value)
|
||||
{
|
||||
if (!param.empty()) StrgenWarning("Ignoring trailing letters in command");
|
||||
|
@ -503,91 +479,92 @@ static bool CheckCommandsMatch(std::string_view a, std::string_view b, std::stri
|
|||
return result;
|
||||
}
|
||||
|
||||
void StringReader::HandleString(char *str)
|
||||
[[nodiscard]] static std::string_view StripTrailingWhitespace(std::string_view str)
|
||||
{
|
||||
if (*str == '#') {
|
||||
if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2, _strgen.lang);
|
||||
return;
|
||||
auto len = str.find_last_not_of("\r\n ");
|
||||
if (len == std::string_view::npos) return {};
|
||||
return str.substr(0, len + 1);
|
||||
}
|
||||
|
||||
void StringReader::HandleString(std::string_view src)
|
||||
{
|
||||
/* Ignore blank lines */
|
||||
if (src.empty()) return;
|
||||
|
||||
StringConsumer consumer(src);
|
||||
if (consumer.ReadCharIf('#')) {
|
||||
if (consumer.ReadCharIf('#') && !consumer.ReadCharIf('#')) this->HandlePragma(consumer.Read(StringConsumer::npos), _strgen.lang);
|
||||
return; // ignore comments
|
||||
}
|
||||
|
||||
/* Ignore comments & blank lines */
|
||||
if (*str == ';' || *str == ' ' || *str == '\0') return;
|
||||
|
||||
char *s = strchr(str, ':');
|
||||
if (s == nullptr) {
|
||||
/* Read string name */
|
||||
std::string_view str_name = StripTrailingWhitespace(consumer.ReadUntilChar(':', StringConsumer::KEEP_SEPARATOR));
|
||||
if (!consumer.ReadCharIf(':')) {
|
||||
StrgenError("Line has no ':' delimiter");
|
||||
return;
|
||||
}
|
||||
|
||||
char *t;
|
||||
/* Trim spaces.
|
||||
* After this str points to the command name, and s points to the command contents */
|
||||
for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
|
||||
*t = 0;
|
||||
s++;
|
||||
|
||||
/* Check string is valid UTF-8 */
|
||||
const char *tmp;
|
||||
for (tmp = s; *tmp != '\0';) {
|
||||
size_t len = Utf8Validate(tmp);
|
||||
if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s);
|
||||
|
||||
char32_t c;
|
||||
Utf8Decode(&c, tmp);
|
||||
if (c <= 0x001F || // ASCII control character range
|
||||
c == 0x200B || // Zero width space
|
||||
(c >= 0xE000 && c <= 0xF8FF) || // Private range
|
||||
(c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
|
||||
StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", static_cast<uint32_t>(c), s);
|
||||
}
|
||||
|
||||
tmp += len;
|
||||
/* Read string case */
|
||||
std::optional<std::string_view> casep;
|
||||
if (auto index = str_name.find("."); index != std::string_view::npos) {
|
||||
casep = str_name.substr(index + 1);
|
||||
str_name = str_name.substr(0, index);
|
||||
}
|
||||
|
||||
/* Check if the string has a case..
|
||||
* The syntax for cases is IDENTNAME.case */
|
||||
char *casep = strchr(str, '.');
|
||||
if (casep != nullptr) *casep++ = '\0';
|
||||
/* Read string data */
|
||||
std::string_view value = consumer.Read(StringConsumer::npos);
|
||||
|
||||
/* Check string is valid UTF-8 */
|
||||
for (StringConsumer validation_consumer(value); validation_consumer.AnyBytesLeft(); ) {
|
||||
auto c = validation_consumer.TryReadUtf8();
|
||||
if (!c.has_value()) StrgenFatal("Invalid UTF-8 sequence in '{}'", value);
|
||||
if (*c <= 0x001F || // ASCII control character range
|
||||
*c == 0x200B || // Zero width space
|
||||
(*c >= 0xE000 && *c <= 0xF8FF) || // Private range
|
||||
(*c >= 0xFFF0 && *c <= 0xFFFF)) { // Specials range
|
||||
StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", static_cast<uint32_t>(*c), value);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if this string already exists.. */
|
||||
LangString *ent = this->data.Find(str);
|
||||
LangString *ent = this->data.Find(std::string(str_name));
|
||||
|
||||
if (this->master) {
|
||||
if (casep != nullptr) {
|
||||
if (casep.has_value()) {
|
||||
StrgenError("Cases in the base translation are not supported.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (ent != nullptr) {
|
||||
StrgenError("String name '{}' is used multiple times", str);
|
||||
StrgenError("String name '{}' is used multiple times", str_name);
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->data.strings[this->data.next_string_id] != nullptr) {
|
||||
StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
|
||||
StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str_name, this->data.strings[this->data.next_string_id]->name);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Allocate a new LangString */
|
||||
this->data.Add(std::make_unique<LangString>(str, s, this->data.next_string_id++, _strgen.cur_line));
|
||||
this->data.Add(std::make_unique<LangString>(str_name, value, this->data.next_string_id++, _strgen.cur_line));
|
||||
} else {
|
||||
if (ent == nullptr) {
|
||||
StrgenWarning("String name '{}' does not exist in master file", str);
|
||||
StrgenWarning("String name '{}' does not exist in master file", str_name);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ent->translated.empty() && casep == nullptr) {
|
||||
StrgenError("String name '{}' is used multiple times", str);
|
||||
if (!ent->translated.empty() && !casep.has_value()) {
|
||||
StrgenError("String name '{}' is used multiple times", str_name);
|
||||
return;
|
||||
}
|
||||
|
||||
/* make sure that the commands match */
|
||||
if (!CheckCommandsMatch(s, ent->english, str)) return;
|
||||
if (!CheckCommandsMatch(value, ent->english, str_name)) return;
|
||||
|
||||
if (casep != nullptr) {
|
||||
ent->translated_cases.emplace_back(ResolveCaseName(casep), s);
|
||||
if (casep.has_value()) {
|
||||
ent->translated_cases.emplace_back(ResolveCaseName(*casep), value);
|
||||
} else {
|
||||
ent->translated = s;
|
||||
ent->translated = value;
|
||||
/* If the string was translated, use the line from the
|
||||
* translated language so errors in the translated file
|
||||
* are properly referenced to. */
|
||||
|
@ -596,23 +573,20 @@ void StringReader::HandleString(char *str)
|
|||
}
|
||||
}
|
||||
|
||||
void StringReader::HandlePragma(char *str, LanguagePackHeader &lang)
|
||||
void StringReader::HandlePragma(std::string_view str, LanguagePackHeader &lang)
|
||||
{
|
||||
if (!memcmp(str, "plural ", 7)) {
|
||||
lang.plural_form = atoi(str + 7);
|
||||
StringConsumer consumer(str);
|
||||
auto name = consumer.ReadUntilChar(' ', StringConsumer::SKIP_ALL_SEPARATORS);
|
||||
if (name == "plural") {
|
||||
lang.plural_form = consumer.ReadIntegerBase<uint32_t>(10);
|
||||
if (lang.plural_form >= lengthof(_plural_forms)) {
|
||||
StrgenFatal("Invalid pluralform {}", lang.plural_form);
|
||||
}
|
||||
} else {
|
||||
StrgenFatal("unknown pragma '{}'", str);
|
||||
StrgenFatal("unknown pragma '{}'", name);
|
||||
}
|
||||
}
|
||||
|
||||
static void StripTrailingWhitespace(std::string &str)
|
||||
{
|
||||
str.erase(str.find_last_not_of("\r\n ") + 1);
|
||||
}
|
||||
|
||||
void StringReader::ParseFile()
|
||||
{
|
||||
_strgen.warnings = _strgen.errors = 0;
|
||||
|
@ -631,8 +605,7 @@ void StringReader::ParseFile()
|
|||
std::optional<std::string> line = this->ReadLine();
|
||||
if (!line.has_value()) return;
|
||||
|
||||
StripTrailingWhitespace(line.value());
|
||||
this->HandleString(line.value().data());
|
||||
this->HandleString(StripTrailingWhitespace(line.value()));
|
||||
_strgen.cur_line++;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue