1
0
Fork 0

Codechange: Parse translation strings using StringConsumer.

pull/14000/head
frosch 2025-03-31 17:30:54 +02:00 committed by frosch
parent b81a35ea89
commit b27fd83ff1
5 changed files with 75 additions and 122 deletions

View File

@ -267,7 +267,7 @@ static void ExtractStringParams(const StringData &data, StringParamsList &params
if (ls != nullptr) { if (ls != nullptr) {
StringParams &param = params.emplace_back(); StringParams &param = params.emplace_back();
ParsedCommandStruct pcs = ExtractCommandString(ls->english.c_str(), false); ParsedCommandStruct pcs = ExtractCommandString(ls->english, false);
for (auto it = pcs.consuming_commands.begin(); it != pcs.consuming_commands.end(); it++) { for (auto it = pcs.consuming_commands.begin(); it != pcs.consuming_commands.end(); it++) {
if (*it == nullptr) { if (*it == nullptr) {

View File

@ -146,10 +146,10 @@ void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang)
lang.newgrflangid = static_cast<uint8_t>(langid); lang.newgrflangid = static_cast<uint8_t>(langid);
} else if (!memcmp(str, "gender ", 7)) { } else if (!memcmp(str, "gender ", 7)) {
if (this->master) FatalError("Genders are not allowed in the base translation."); if (this->master) FatalError("Genders are not allowed in the base translation.");
const char *buf = str + 7; StringConsumer consumer(std::string_view(str + 7));
for (;;) { for (;;) {
auto s = ParseWord(&buf); auto s = ParseWord(consumer);
if (!s.has_value()) break; if (!s.has_value()) break;
if (lang.num_genders >= MAX_NUM_GENDERS) FatalError("Too many genders, max {}", MAX_NUM_GENDERS); if (lang.num_genders >= MAX_NUM_GENDERS) FatalError("Too many genders, max {}", MAX_NUM_GENDERS);
@ -158,10 +158,10 @@ void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang)
} }
} else if (!memcmp(str, "case ", 5)) { } else if (!memcmp(str, "case ", 5)) {
if (this->master) FatalError("Cases are not allowed in the base translation."); if (this->master) FatalError("Cases are not allowed in the base translation.");
const char *buf = str + 5; StringConsumer consumer(std::string_view(str + 5));
for (;;) { for (;;) {
auto s = ParseWord(&buf); auto s = ParseWord(consumer);
if (!s.has_value()) break; if (!s.has_value()) break;
if (lang.num_cases >= MAX_NUM_CASES) FatalError("Too many cases, max {}", MAX_NUM_CASES); if (lang.num_cases >= MAX_NUM_CASES) FatalError("Too many cases, max {}", MAX_NUM_CASES);

View File

@ -10,6 +10,7 @@
#ifndef STRGEN_H #ifndef STRGEN_H
#define STRGEN_H #define STRGEN_H
#include "../core/string_consumer.hpp"
#include "../language.h" #include "../language.h"
#include "../3rdparty/fmt/format.h" #include "../3rdparty/fmt/format.h"
@ -144,7 +145,7 @@ struct ParsedCommandStruct {
}; };
const CmdStruct *TranslateCmdForCompare(const CmdStruct *a); const CmdStruct *TranslateCmdForCompare(const CmdStruct *a);
ParsedCommandStruct ExtractCommandString(const char *s, bool warnings); ParsedCommandStruct ExtractCommandString(std::string_view s, bool warnings);
void StrgenWarningI(const std::string &msg); void StrgenWarningI(const std::string &msg);
void StrgenErrorI(const std::string &msg); void StrgenErrorI(const std::string &msg);
@ -152,7 +153,7 @@ void StrgenErrorI(const std::string &msg);
#define StrgenWarning(format_string, ...) StrgenWarningI(fmt::format(FMT_STRING(format_string) __VA_OPT__(,) __VA_ARGS__)) #define StrgenWarning(format_string, ...) StrgenWarningI(fmt::format(FMT_STRING(format_string) __VA_OPT__(,) __VA_ARGS__))
#define StrgenError(format_string, ...) StrgenErrorI(fmt::format(FMT_STRING(format_string) __VA_OPT__(,) __VA_ARGS__)) #define StrgenError(format_string, ...) StrgenErrorI(fmt::format(FMT_STRING(format_string) __VA_OPT__(,) __VA_ARGS__))
#define StrgenFatal(format_string, ...) StrgenFatalI(fmt::format(FMT_STRING(format_string) __VA_OPT__(,) __VA_ARGS__)) #define StrgenFatal(format_string, ...) StrgenFatalI(fmt::format(FMT_STRING(format_string) __VA_OPT__(,) __VA_ARGS__))
std::optional<std::string_view> ParseWord(const char **buf); std::optional<std::string_view> ParseWord(StringConsumer &consumer);
/** Global state shared between strgen.cpp, game_text.cpp and strgen_base.cpp */ /** Global state shared between strgen.cpp, game_text.cpp and strgen_base.cpp */
struct StrgenState { struct StrgenState {

View File

@ -33,7 +33,7 @@ struct ParsedCommandString {
std::optional<size_t> argno; std::optional<size_t> argno;
std::optional<uint8_t> casei; std::optional<uint8_t> casei;
}; };
static ParsedCommandString ParseCommandString(const char **str); static ParsedCommandString ParseCommandString(StringConsumer &consumer);
static size_t TranslateArgumentIdx(size_t arg, size_t offset = 0); static size_t TranslateArgumentIdx(size_t arg, size_t offset = 0);
/** /**
@ -139,9 +139,9 @@ uint32_t StringData::Version() const
hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1); hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
hash = VersionHashStr(hash, ls->name); hash = VersionHashStr(hash, ls->name);
const char *s = ls->english.c_str(); StringConsumer consumer(ls->english);
ParsedCommandString cs; ParsedCommandString cs;
while ((cs = ParseCommandString(&s)).cmd != nullptr) { while ((cs = ParseCommandString(consumer)).cmd != nullptr) {
if (cs.cmd->flags.Test(CmdFlag::DontCount)) continue; if (cs.cmd->flags.Test(CmdFlag::DontCount)) continue;
hash ^= (cs.cmd - _cmd_structs) * 0x1234567; hash ^= (cs.cmd - _cmd_structs) * 0x1234567;
@ -188,62 +188,41 @@ static size_t Utf8Validate(const char *s)
return 0; return 0;
} }
void EmitSingleChar(StringBuilder &builder, const char *buf, char32_t value) void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value)
{ {
if (*buf != '\0') StrgenWarning("Ignoring trailing letters in command"); if (!param.empty()) StrgenWarning("Ignoring trailing letters in command");
builder.PutUtf8(value); builder.PutUtf8(value);
} }
/* The plural specifier looks like /* The plural specifier looks like
* {NUM} {PLURAL <ARG#> passenger passengers} then it picks either passenger/passengers depending on the count in NUM */ * {NUM} {PLURAL <ARG#> passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
static std::pair<std::optional<size_t>, std::optional<size_t>> ParseRelNum(const char **buf) static std::pair<std::optional<size_t>, std::optional<size_t>> ParseRelNum(StringConsumer &consumer)
{ {
const char *s = *buf; consumer.SkipUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE);
char *end; std::optional<size_t> v = consumer.TryReadIntegerBase<size_t>(10);
while (*s == ' ' || *s == '\t') s++;
size_t v = std::strtoul(s, &end, 0);
if (end == s) return {};
std::optional<size_t> offset; std::optional<size_t> offset;
if (*end == ':') { if (v.has_value() && consumer.ReadCharIf(':')) {
/* Take the Nth within */ /* Take the Nth within */
s = end + 1; offset = consumer.TryReadIntegerBase<size_t>(10);
offset = std::strtoul(s, &end, 0); if (!offset.has_value()) StrgenFatal("Expected number for substring parameter");
if (end == s) return {};
} }
*buf = end;
return {v, offset}; return {v, offset};
} }
/* Parse out the next word, or nullptr */ /* Parse out the next word, or nullptr */
std::optional<std::string_view> ParseWord(const char **buf) std::optional<std::string_view> ParseWord(StringConsumer &consumer)
{ {
const char *s = *buf; consumer.SkipUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE);
if (!consumer.AnyBytesLeft()) return {};
while (*s == ' ' || *s == '\t') s++; if (consumer.ReadCharIf('"')) {
if (*s == '\0') return {};
if (*s == '"') {
const char *begin = ++s;
/* parse until next " or NUL */ /* parse until next " or NUL */
for (;;) { auto result = consumer.ReadUntilChar('"', StringConsumer::KEEP_SEPARATOR);
if (*s == '\0') StrgenFatal("Unterminated quotes"); if (!consumer.ReadCharIf('"')) StrgenFatal("Unterminated quotes");
if (*s == '"') { return result;
*buf = s + 1;
return std::string_view(begin, s - begin);
}
s++;
}
} else { } else {
/* proceed until whitespace or NUL */ /* proceed until whitespace or NUL */
const char *begin = s; return consumer.ReadUntilCharIn(StringConsumer::WHITESPACE_NO_NEWLINE);
for (;;) {
if (*s == '\0' || *s == ' ' || *s == '\t') {
*buf = s;
return std::string_view(begin, s - begin);
}
s++;
}
} }
} }
@ -262,10 +241,12 @@ static void EmitWordList(StringBuilder &builder, const std::vector<std::string>
} }
} }
void EmitPlural(StringBuilder &builder, const char *buf, char32_t) void EmitPlural(StringBuilder &builder, std::string_view param, char32_t)
{ {
StringConsumer consumer(param);
/* Parse out the number, if one exists. Otherwise default to prev arg. */ /* Parse out the number, if one exists. Otherwise default to prev arg. */
auto [argidx, offset] = ParseRelNum(&buf); auto [argidx, offset] = ParseRelNum(consumer);
if (!argidx.has_value()) { if (!argidx.has_value()) {
if (_cur_argidx == 0) StrgenFatal("Plural choice needs positional reference"); if (_cur_argidx == 0) StrgenFatal("Plural choice needs positional reference");
argidx = _cur_argidx - 1; argidx = _cur_argidx - 1;
@ -283,7 +264,7 @@ void EmitPlural(StringBuilder &builder, const char *buf, char32_t)
/* Parse each string */ /* Parse each string */
std::vector<std::string> words; std::vector<std::string> words;
for (;;) { for (;;) {
auto word = ParseWord(&buf); auto word = ParseWord(consumer);
if (!word.has_value()) break; if (!word.has_value()) break;
words.emplace_back(*word); words.emplace_back(*word);
} }
@ -315,14 +296,14 @@ void EmitPlural(StringBuilder &builder, const char *buf, char32_t)
EmitWordList(builder, words); EmitWordList(builder, words);
} }
void EmitGender(StringBuilder &builder, const char *buf, char32_t) void EmitGender(StringBuilder &builder, std::string_view param, char32_t)
{ {
if (buf[0] == '=') { StringConsumer consumer(param);
buf++; if (consumer.ReadCharIf('=')) {
/* This is a {G=DER} command */ /* This is a {G=DER} command */
auto nw = _strgen.lang.GetGenderIndex(buf); auto gender = consumer.Read(StringConsumer::npos);
if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", buf); auto nw = _strgen.lang.GetGenderIndex(gender);
if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", gender);
/* now nw contains the gender index */ /* now nw contains the gender index */
builder.PutUtf8(SCC_GENDER_INDEX); builder.PutUtf8(SCC_GENDER_INDEX);
@ -330,7 +311,7 @@ void EmitGender(StringBuilder &builder, const char *buf, char32_t)
} else { } else {
/* This is a {G 0 foo bar two} command. /* This is a {G 0 foo bar two} command.
* If no relative number exists, default to +0 */ * If no relative number exists, default to +0 */
auto [argidx, offset] = ParseRelNum(&buf); auto [argidx, offset] = ParseRelNum(consumer);
if (!argidx.has_value()) argidx = _cur_argidx; if (!argidx.has_value()) argidx = _cur_argidx;
if (!offset.has_value()) offset = 0; if (!offset.has_value()) offset = 0;
@ -341,7 +322,7 @@ void EmitGender(StringBuilder &builder, const char *buf, char32_t)
std::vector<std::string> words; std::vector<std::string> words;
for (;;) { for (;;) {
auto word = ParseWord(&buf); auto word = ParseWord(consumer);
if (!word.has_value()) break; if (!word.has_value()) break;
words.emplace_back(*word); words.emplace_back(*word);
} }
@ -370,74 +351,45 @@ static uint8_t ResolveCaseName(std::string_view str)
} }
/* returns cmd == nullptr on eof */ /* returns cmd == nullptr on eof */
static ParsedCommandString ParseCommandString(const char **str) static ParsedCommandString ParseCommandString(StringConsumer &consumer)
{ {
ParsedCommandString result; ParsedCommandString result;
const char *s = *str;
/* Scan to the next command, exit if there's no next command. */ /* Scan to the next command, exit if there's no next command. */
for (; *s != '{'; s++) { consumer.SkipUntilChar('{', StringConsumer::KEEP_SEPARATOR);
if (*s == '\0') return {}; if (!consumer.ReadCharIf('{')) return {};
}
s++; // Skip past the {
if (*s >= '0' && *s <= '9') { if (auto argno = consumer.TryReadIntegerBase<uint32_t>(10); argno.has_value()) {
char *end; result.argno = argno;
if (!consumer.ReadCharIf(':')) StrgenFatal("missing arg #");
result.argno = std::strtoul(s, &end, 0);
if (*end != ':') StrgenFatal("missing arg #");
s = end + 1;
} }
/* parse command name */ /* parse command name */
const char *start = s; auto command = consumer.ReadUntilCharIn("} =.");
char c;
do {
c = *s++;
} while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
std::string_view command(start, s - start - 1);
result.cmd = FindCmd(command); result.cmd = FindCmd(command);
if (result.cmd == nullptr) { if (result.cmd == nullptr) {
StrgenError("Undefined command '{}'", command); StrgenError("Undefined command '{}'", command);
return {}; return {};
} }
if (c == '.') { /* parse case */
const char *casep = s; if (consumer.ReadCharIf('.')) {
if (!result.cmd->flags.Test(CmdFlag::Case)) { if (!result.cmd->flags.Test(CmdFlag::Case)) {
StrgenFatal("Command '{}' can't have a case", result.cmd->cmd); StrgenFatal("Command '{}' can't have a case", result.cmd->cmd);
} }
do { auto casep = consumer.ReadUntilCharIn("} ");
c = *s++; result.casei = ResolveCaseName(casep);
} while (c != '}' && c != ' ' && c != '\0');
result.casei = ResolveCaseName(std::string_view(casep, s - casep - 1));
} }
if (c == '\0') { /* parse params */
StrgenError("Missing }} from command '{}'", start); result.param = consumer.ReadUntilChar('}', StringConsumer::KEEP_SEPARATOR);
if (!consumer.ReadCharIf('}')) {
StrgenError("Missing }} from command '{}'", result.cmd->cmd);
return {}; return {};
} }
if (c != '}') {
if (c == '=') s--;
/* copy params */
start = s;
for (;;) {
c = *s++;
if (c == '}') break;
if (c == '\0') {
StrgenError("Missing }} from command '{}'", start);
return {};
}
result.param += c;
}
}
*str = s;
return result; return result;
} }
@ -453,14 +405,15 @@ StringReader::StringReader(StringData &data, const std::string &file, bool maste
{ {
} }
ParsedCommandStruct ExtractCommandString(const char *s, bool) ParsedCommandStruct ExtractCommandString(std::string_view s, bool)
{ {
ParsedCommandStruct p; ParsedCommandStruct p;
StringConsumer consumer(s);
size_t argidx = 0; size_t argidx = 0;
for (;;) { for (;;) {
/* read until next command from a. */ /* read until next command from a. */
auto cs = ParseCommandString(&s); auto cs = ParseCommandString(consumer);
if (cs.cmd == nullptr) break; if (cs.cmd == nullptr) break;
@ -499,7 +452,7 @@ const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
return a; return a;
} }
static bool CheckCommandsMatch(const char *a, const char *b, const char *name) static bool CheckCommandsMatch(std::string_view a, std::string_view b, std::string_view name)
{ {
/* If we're not translating, i.e. we're compiling the base language, /* If we're not translating, i.e. we're compiling the base language,
* it is pointless to do all these checks as it'll always be correct. * it is pointless to do all these checks as it'll always be correct.
@ -629,7 +582,7 @@ void StringReader::HandleString(char *str)
} }
/* make sure that the commands match */ /* make sure that the commands match */
if (!CheckCommandsMatch(s, ent->english.c_str(), str)) return; if (!CheckCommandsMatch(s, ent->english, str)) return;
if (casep != nullptr) { if (casep != nullptr) {
ent->translated_cases.emplace_back(ResolveCaseName(casep), s); ent->translated_cases.emplace_back(ResolveCaseName(casep), s);
@ -735,20 +688,19 @@ static void PutArgidxCommand(StringBuilder &builder)
builder.PutUint8(static_cast<uint8_t>(TranslateArgumentIdx(_cur_argidx))); builder.PutUint8(static_cast<uint8_t>(TranslateArgumentIdx(_cur_argidx)));
} }
static std::string PutCommandString(const char *str) static std::string PutCommandString(std::string_view str)
{ {
std::string result; std::string result;
StringBuilder builder(result); StringBuilder builder(result);
StringConsumer consumer(str);
_cur_argidx = 0; _cur_argidx = 0;
while (*str != '\0') { for (;;) {
/* Process characters as they are until we encounter a { */ /* Process characters as they are until we encounter a { */
if (*str != '{') { builder.Put(consumer.ReadUntilChar('{', StringConsumer::KEEP_SEPARATOR));
builder.PutChar(*str++); if (!consumer.AnyBytesLeft()) break;
continue;
}
auto cs = ParseCommandString(&str); auto cs = ParseCommandString(consumer);
auto *cmd = cs.cmd; auto *cmd = cs.cmd;
if (cmd == nullptr) break; if (cmd == nullptr) break;
@ -772,7 +724,7 @@ static std::string PutCommandString(const char *str)
} }
} }
cmd->proc(builder, cs.param.c_str(), cmd->value); cmd->proc(builder, cs.param, cmd->value);
} }
return result; return result;
} }
@ -848,7 +800,7 @@ void LanguageWriter::WriteLang(const StringData &data)
} }
/* Extract the strings and stuff from the english command string */ /* Extract the strings and stuff from the english command string */
_cur_pcs = ExtractCommandString(ls->english.c_str(), false); _cur_pcs = ExtractCommandString(ls->english, false);
_translated = !ls->translated_cases.empty() || !ls->translated.empty(); _translated = !ls->translated_cases.empty() || !ls->translated.empty();
const std::string &cmdp = _translated ? ls->translated : ls->english; const std::string &cmdp = _translated ? ls->translated : ls->english;
@ -863,7 +815,7 @@ void LanguageWriter::WriteLang(const StringData &data)
/* Write each case */ /* Write each case */
for (const Case &c : ls->translated_cases) { for (const Case &c : ls->translated_cases) {
auto case_str = PutCommandString(c.string.c_str()); auto case_str = PutCommandString(c.string);
builder.PutUint8(c.caseidx); builder.PutUint8(c.caseidx);
builder.PutUint16LE(static_cast<uint16_t>(case_str.size())); builder.PutUint16LE(static_cast<uint16_t>(case_str.size()));
builder.Put(case_str); builder.Put(case_str);
@ -871,7 +823,7 @@ void LanguageWriter::WriteLang(const StringData &data)
} }
std::string def_str; std::string def_str;
if (!cmdp.empty()) def_str = PutCommandString(cmdp.c_str()); if (!cmdp.empty()) def_str = PutCommandString(cmdp);
if (!ls->translated_cases.empty()) { if (!ls->translated_cases.empty()) {
builder.PutUint16LE(static_cast<uint16_t>(def_str.size())); builder.PutUint16LE(static_cast<uint16_t>(def_str.size()));
} }

View File

@ -17,7 +17,7 @@ enum class CmdFlag : uint8_t {
using CmdFlags = EnumBitSet<CmdFlag, uint8_t>; using CmdFlags = EnumBitSet<CmdFlag, uint8_t>;
class StringBuilder; class StringBuilder;
typedef void (*ParseCmdProc)(StringBuilder &builder, const char *buf, char32_t value); typedef void (*ParseCmdProc)(StringBuilder &builder, std::string_view param, char32_t value);
struct CmdStruct { struct CmdStruct {
std::string_view cmd; std::string_view cmd;
@ -28,9 +28,9 @@ struct CmdStruct {
CmdFlags flags; CmdFlags flags;
}; };
extern void EmitSingleChar(StringBuilder &builder, const char *buf, char32_t value); extern void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value);
extern void EmitPlural(StringBuilder &builder, const char *buf, char32_t value); extern void EmitPlural(StringBuilder &builder, std::string_view param, char32_t value);
extern void EmitGender(StringBuilder &builder, const char *buf, char32_t value); extern void EmitGender(StringBuilder &builder, std::string_view param, char32_t value);
static const CmdStruct _cmd_structs[] = { static const CmdStruct _cmd_structs[] = {
/* Font size */ /* Font size */