diff --git a/src/game/game_text.cpp b/src/game/game_text.cpp index 5a7aa0365c..07182ad712 100644 --- a/src/game/game_text.cpp +++ b/src/game/game_text.cpp @@ -267,7 +267,7 @@ static void ExtractStringParams(const StringData &data, StringParamsList ¶ms if (ls != nullptr) { StringParams ¶m = params.emplace_back(); - ParsedCommandStruct pcs = ExtractCommandString(ls->english.c_str(), false); + ParsedCommandStruct pcs = ExtractCommandString(ls->english, false); for (auto it = pcs.consuming_commands.begin(); it != pcs.consuming_commands.end(); it++) { if (*it == nullptr) { diff --git a/src/strgen/strgen.cpp b/src/strgen/strgen.cpp index c86c0c1029..b54da8a7b9 100644 --- a/src/strgen/strgen.cpp +++ b/src/strgen/strgen.cpp @@ -146,10 +146,10 @@ void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang) lang.newgrflangid = static_cast(langid); } else if (!memcmp(str, "gender ", 7)) { if (this->master) FatalError("Genders are not allowed in the base translation."); - const char *buf = str + 7; + StringConsumer consumer(std::string_view(str + 7)); for (;;) { - auto s = ParseWord(&buf); + auto s = ParseWord(consumer); if (!s.has_value()) break; if (lang.num_genders >= MAX_NUM_GENDERS) FatalError("Too many genders, max {}", MAX_NUM_GENDERS); @@ -158,10 +158,10 @@ void FileStringReader::HandlePragma(char *str, LanguagePackHeader &lang) } } else if (!memcmp(str, "case ", 5)) { if (this->master) FatalError("Cases are not allowed in the base translation."); - const char *buf = str + 5; + StringConsumer consumer(std::string_view(str + 5)); for (;;) { - auto s = ParseWord(&buf); + auto s = ParseWord(consumer); if (!s.has_value()) break; if (lang.num_cases >= MAX_NUM_CASES) FatalError("Too many cases, max {}", MAX_NUM_CASES); diff --git a/src/strgen/strgen.h b/src/strgen/strgen.h index 78a6c8def7..18e007f71e 100644 --- a/src/strgen/strgen.h +++ b/src/strgen/strgen.h @@ -10,6 +10,7 @@ #ifndef STRGEN_H #define STRGEN_H +#include "../core/string_consumer.hpp" #include "../language.h" #include "../3rdparty/fmt/format.h" @@ -144,7 +145,7 @@ struct ParsedCommandStruct { }; const CmdStruct *TranslateCmdForCompare(const CmdStruct *a); -ParsedCommandStruct ExtractCommandString(const char *s, bool warnings); +ParsedCommandStruct ExtractCommandString(std::string_view s, bool warnings); void StrgenWarningI(const std::string &msg); void StrgenErrorI(const std::string &msg); @@ -152,7 +153,7 @@ void StrgenErrorI(const std::string &msg); #define StrgenWarning(format_string, ...) StrgenWarningI(fmt::format(FMT_STRING(format_string) __VA_OPT__(,) __VA_ARGS__)) #define StrgenError(format_string, ...) StrgenErrorI(fmt::format(FMT_STRING(format_string) __VA_OPT__(,) __VA_ARGS__)) #define StrgenFatal(format_string, ...) StrgenFatalI(fmt::format(FMT_STRING(format_string) __VA_OPT__(,) __VA_ARGS__)) -std::optional ParseWord(const char **buf); +std::optional ParseWord(StringConsumer &consumer); /** Global state shared between strgen.cpp, game_text.cpp and strgen_base.cpp */ struct StrgenState { diff --git a/src/strgen/strgen_base.cpp b/src/strgen/strgen_base.cpp index 534003ec16..aa831c1945 100644 --- a/src/strgen/strgen_base.cpp +++ b/src/strgen/strgen_base.cpp @@ -33,7 +33,7 @@ struct ParsedCommandString { std::optional argno; std::optional casei; }; -static ParsedCommandString ParseCommandString(const char **str); +static ParsedCommandString ParseCommandString(StringConsumer &consumer); static size_t TranslateArgumentIdx(size_t arg, size_t offset = 0); /** @@ -139,9 +139,9 @@ uint32_t StringData::Version() const hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1); hash = VersionHashStr(hash, ls->name); - const char *s = ls->english.c_str(); + StringConsumer consumer(ls->english); ParsedCommandString cs; - while ((cs = ParseCommandString(&s)).cmd != nullptr) { + while ((cs = ParseCommandString(consumer)).cmd != nullptr) { if (cs.cmd->flags.Test(CmdFlag::DontCount)) continue; hash ^= (cs.cmd - _cmd_structs) * 0x1234567; @@ -188,62 +188,41 @@ static size_t Utf8Validate(const char *s) return 0; } -void EmitSingleChar(StringBuilder &builder, const char *buf, char32_t value) +void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value) { - if (*buf != '\0') StrgenWarning("Ignoring trailing letters in command"); + if (!param.empty()) StrgenWarning("Ignoring trailing letters in command"); builder.PutUtf8(value); } /* The plural specifier looks like * {NUM} {PLURAL passenger passengers} then it picks either passenger/passengers depending on the count in NUM */ -static std::pair, std::optional> ParseRelNum(const char **buf) +static std::pair, std::optional> ParseRelNum(StringConsumer &consumer) { - const char *s = *buf; - char *end; - - while (*s == ' ' || *s == '\t') s++; - size_t v = std::strtoul(s, &end, 0); - if (end == s) return {}; + consumer.SkipUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE); + std::optional v = consumer.TryReadIntegerBase(10); std::optional offset; - if (*end == ':') { + if (v.has_value() && consumer.ReadCharIf(':')) { /* Take the Nth within */ - s = end + 1; - offset = std::strtoul(s, &end, 0); - if (end == s) return {}; + offset = consumer.TryReadIntegerBase(10); + if (!offset.has_value()) StrgenFatal("Expected number for substring parameter"); } - *buf = end; return {v, offset}; } /* Parse out the next word, or nullptr */ -std::optional ParseWord(const char **buf) +std::optional ParseWord(StringConsumer &consumer) { - const char *s = *buf; + consumer.SkipUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE); + if (!consumer.AnyBytesLeft()) return {}; - while (*s == ' ' || *s == '\t') s++; - if (*s == '\0') return {}; - - if (*s == '"') { - const char *begin = ++s; + if (consumer.ReadCharIf('"')) { /* parse until next " or NUL */ - for (;;) { - if (*s == '\0') StrgenFatal("Unterminated quotes"); - if (*s == '"') { - *buf = s + 1; - return std::string_view(begin, s - begin); - } - s++; - } + auto result = consumer.ReadUntilChar('"', StringConsumer::KEEP_SEPARATOR); + if (!consumer.ReadCharIf('"')) StrgenFatal("Unterminated quotes"); + return result; } else { /* proceed until whitespace or NUL */ - const char *begin = s; - for (;;) { - if (*s == '\0' || *s == ' ' || *s == '\t') { - *buf = s; - return std::string_view(begin, s - begin); - } - s++; - } + return consumer.ReadUntilCharIn(StringConsumer::WHITESPACE_NO_NEWLINE); } } @@ -262,10 +241,12 @@ static void EmitWordList(StringBuilder &builder, const std::vector } } -void EmitPlural(StringBuilder &builder, const char *buf, char32_t) +void EmitPlural(StringBuilder &builder, std::string_view param, char32_t) { + StringConsumer consumer(param); + /* Parse out the number, if one exists. Otherwise default to prev arg. */ - auto [argidx, offset] = ParseRelNum(&buf); + auto [argidx, offset] = ParseRelNum(consumer); if (!argidx.has_value()) { if (_cur_argidx == 0) StrgenFatal("Plural choice needs positional reference"); argidx = _cur_argidx - 1; @@ -283,7 +264,7 @@ void EmitPlural(StringBuilder &builder, const char *buf, char32_t) /* Parse each string */ std::vector words; for (;;) { - auto word = ParseWord(&buf); + auto word = ParseWord(consumer); if (!word.has_value()) break; words.emplace_back(*word); } @@ -315,14 +296,14 @@ void EmitPlural(StringBuilder &builder, const char *buf, char32_t) EmitWordList(builder, words); } -void EmitGender(StringBuilder &builder, const char *buf, char32_t) +void EmitGender(StringBuilder &builder, std::string_view param, char32_t) { - if (buf[0] == '=') { - buf++; - + StringConsumer consumer(param); + if (consumer.ReadCharIf('=')) { /* This is a {G=DER} command */ - auto nw = _strgen.lang.GetGenderIndex(buf); - if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", buf); + auto gender = consumer.Read(StringConsumer::npos); + auto nw = _strgen.lang.GetGenderIndex(gender); + if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", gender); /* now nw contains the gender index */ builder.PutUtf8(SCC_GENDER_INDEX); @@ -330,7 +311,7 @@ void EmitGender(StringBuilder &builder, const char *buf, char32_t) } else { /* This is a {G 0 foo bar two} command. * If no relative number exists, default to +0 */ - auto [argidx, offset] = ParseRelNum(&buf); + auto [argidx, offset] = ParseRelNum(consumer); if (!argidx.has_value()) argidx = _cur_argidx; if (!offset.has_value()) offset = 0; @@ -341,7 +322,7 @@ void EmitGender(StringBuilder &builder, const char *buf, char32_t) std::vector words; for (;;) { - auto word = ParseWord(&buf); + auto word = ParseWord(consumer); if (!word.has_value()) break; words.emplace_back(*word); } @@ -370,74 +351,45 @@ static uint8_t ResolveCaseName(std::string_view str) } /* returns cmd == nullptr on eof */ -static ParsedCommandString ParseCommandString(const char **str) +static ParsedCommandString ParseCommandString(StringConsumer &consumer) { ParsedCommandString result; - const char *s = *str; /* Scan to the next command, exit if there's no next command. */ - for (; *s != '{'; s++) { - if (*s == '\0') return {}; - } - s++; // Skip past the { + consumer.SkipUntilChar('{', StringConsumer::KEEP_SEPARATOR); + if (!consumer.ReadCharIf('{')) return {}; - if (*s >= '0' && *s <= '9') { - char *end; - - result.argno = std::strtoul(s, &end, 0); - if (*end != ':') StrgenFatal("missing arg #"); - s = end + 1; + if (auto argno = consumer.TryReadIntegerBase(10); argno.has_value()) { + result.argno = argno; + if (!consumer.ReadCharIf(':')) StrgenFatal("missing arg #"); } /* parse command name */ - const char *start = s; - char c; - do { - c = *s++; - } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0); - - std::string_view command(start, s - start - 1); + auto command = consumer.ReadUntilCharIn("} =."); result.cmd = FindCmd(command); if (result.cmd == nullptr) { StrgenError("Undefined command '{}'", command); return {}; } - if (c == '.') { - const char *casep = s; - + /* parse case */ + if (consumer.ReadCharIf('.')) { if (!result.cmd->flags.Test(CmdFlag::Case)) { StrgenFatal("Command '{}' can't have a case", result.cmd->cmd); } - do { - c = *s++; - } while (c != '}' && c != ' ' && c != '\0'); - result.casei = ResolveCaseName(std::string_view(casep, s - casep - 1)); + auto casep = consumer.ReadUntilCharIn("} "); + result.casei = ResolveCaseName(casep); } - if (c == '\0') { - StrgenError("Missing }} from command '{}'", start); + /* parse params */ + result.param = consumer.ReadUntilChar('}', StringConsumer::KEEP_SEPARATOR); + + if (!consumer.ReadCharIf('}')) { + StrgenError("Missing }} from command '{}'", result.cmd->cmd); return {}; } - if (c != '}') { - if (c == '=') s--; - /* copy params */ - start = s; - for (;;) { - c = *s++; - if (c == '}') break; - if (c == '\0') { - StrgenError("Missing }} from command '{}'", start); - return {}; - } - result.param += c; - } - } - - *str = s; - return result; } @@ -453,14 +405,15 @@ StringReader::StringReader(StringData &data, const std::string &file, bool maste { } -ParsedCommandStruct ExtractCommandString(const char *s, bool) +ParsedCommandStruct ExtractCommandString(std::string_view s, bool) { ParsedCommandStruct p; + StringConsumer consumer(s); size_t argidx = 0; for (;;) { /* read until next command from a. */ - auto cs = ParseCommandString(&s); + auto cs = ParseCommandString(consumer); if (cs.cmd == nullptr) break; @@ -499,7 +452,7 @@ const CmdStruct *TranslateCmdForCompare(const CmdStruct *a) return a; } -static bool CheckCommandsMatch(const char *a, const char *b, const char *name) +static bool CheckCommandsMatch(std::string_view a, std::string_view b, std::string_view name) { /* If we're not translating, i.e. we're compiling the base language, * it is pointless to do all these checks as it'll always be correct. @@ -629,7 +582,7 @@ void StringReader::HandleString(char *str) } /* make sure that the commands match */ - if (!CheckCommandsMatch(s, ent->english.c_str(), str)) return; + if (!CheckCommandsMatch(s, ent->english, str)) return; if (casep != nullptr) { ent->translated_cases.emplace_back(ResolveCaseName(casep), s); @@ -735,20 +688,19 @@ static void PutArgidxCommand(StringBuilder &builder) builder.PutUint8(static_cast(TranslateArgumentIdx(_cur_argidx))); } -static std::string PutCommandString(const char *str) +static std::string PutCommandString(std::string_view str) { std::string result; StringBuilder builder(result); + StringConsumer consumer(str); _cur_argidx = 0; - while (*str != '\0') { + for (;;) { /* Process characters as they are until we encounter a { */ - if (*str != '{') { - builder.PutChar(*str++); - continue; - } + builder.Put(consumer.ReadUntilChar('{', StringConsumer::KEEP_SEPARATOR)); + if (!consumer.AnyBytesLeft()) break; - auto cs = ParseCommandString(&str); + auto cs = ParseCommandString(consumer); auto *cmd = cs.cmd; if (cmd == nullptr) break; @@ -772,7 +724,7 @@ static std::string PutCommandString(const char *str) } } - cmd->proc(builder, cs.param.c_str(), cmd->value); + cmd->proc(builder, cs.param, cmd->value); } return result; } @@ -848,7 +800,7 @@ void LanguageWriter::WriteLang(const StringData &data) } /* Extract the strings and stuff from the english command string */ - _cur_pcs = ExtractCommandString(ls->english.c_str(), false); + _cur_pcs = ExtractCommandString(ls->english, false); _translated = !ls->translated_cases.empty() || !ls->translated.empty(); const std::string &cmdp = _translated ? ls->translated : ls->english; @@ -863,7 +815,7 @@ void LanguageWriter::WriteLang(const StringData &data) /* Write each case */ for (const Case &c : ls->translated_cases) { - auto case_str = PutCommandString(c.string.c_str()); + auto case_str = PutCommandString(c.string); builder.PutUint8(c.caseidx); builder.PutUint16LE(static_cast(case_str.size())); builder.Put(case_str); @@ -871,7 +823,7 @@ void LanguageWriter::WriteLang(const StringData &data) } std::string def_str; - if (!cmdp.empty()) def_str = PutCommandString(cmdp.c_str()); + if (!cmdp.empty()) def_str = PutCommandString(cmdp); if (!ls->translated_cases.empty()) { builder.PutUint16LE(static_cast(def_str.size())); } diff --git a/src/table/strgen_tables.h b/src/table/strgen_tables.h index bd57fa795d..2791e1982a 100644 --- a/src/table/strgen_tables.h +++ b/src/table/strgen_tables.h @@ -17,7 +17,7 @@ enum class CmdFlag : uint8_t { using CmdFlags = EnumBitSet; class StringBuilder; -typedef void (*ParseCmdProc)(StringBuilder &builder, const char *buf, char32_t value); +typedef void (*ParseCmdProc)(StringBuilder &builder, std::string_view param, char32_t value); struct CmdStruct { std::string_view cmd; @@ -28,9 +28,9 @@ struct CmdStruct { CmdFlags flags; }; -extern void EmitSingleChar(StringBuilder &builder, const char *buf, char32_t value); -extern void EmitPlural(StringBuilder &builder, const char *buf, char32_t value); -extern void EmitGender(StringBuilder &builder, const char *buf, char32_t value); +extern void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value); +extern void EmitPlural(StringBuilder &builder, std::string_view param, char32_t value); +extern void EmitGender(StringBuilder &builder, std::string_view param, char32_t value); static const CmdStruct _cmd_structs[] = { /* Font size */