1
0
Fork 0

Codechange: Replace strgen Buffer with StringBuilder.

pull/13983/head
frosch 2025-03-30 17:46:42 +02:00 committed by frosch
parent 9b87f306ca
commit 5878db1d02
2 changed files with 53 additions and 105 deletions

View File

@ -12,6 +12,7 @@
#include "../core/mem_func.hpp"
#include "../error_func.h"
#include "../string_func.h"
#include "../core/string_builder.hpp"
#include "../table/control_codes.h"
#include "strgen.h"
@ -168,43 +169,6 @@ size_t StringData::CountInUse(size_t tab) const
return count;
}
/** The buffer for writing a single string. */
struct Buffer : std::string {
/**
* Convenience method for adding a byte.
* @param value The value to add.
*/
void AppendByte(uint8_t value)
{
this->push_back(static_cast<char>(value));
}
/**
* Add an Unicode character encoded in UTF-8 to the buffer.
* @param value The character to add.
*/
void AppendUtf8(char32_t value)
{
if (value < 0x80) {
this->push_back(value);
} else if (value < 0x800) {
this->push_back(0xC0 + GB(value, 6, 5));
this->push_back(0x80 + GB(value, 0, 6));
} else if (value < 0x10000) {
this->push_back(0xE0 + GB(value, 12, 4));
this->push_back(0x80 + GB(value, 6, 6));
this->push_back(0x80 + GB(value, 0, 6));
} else if (value < 0x110000) {
this->push_back(0xF0 + GB(value, 18, 3));
this->push_back(0x80 + GB(value, 12, 6));
this->push_back(0x80 + GB(value, 6, 6));
this->push_back(0x80 + GB(value, 0, 6));
} else {
StrgenWarning("Invalid unicode value U+{:04X}", static_cast<uint32_t>(value));
}
}
};
static size_t Utf8Validate(const char *s)
{
char32_t c;
@ -229,10 +193,10 @@ static size_t Utf8Validate(const char *s)
return 0;
}
void EmitSingleChar(Buffer *buffer, const char *buf, char32_t value)
void EmitSingleChar(StringBuilder &builder, const char *buf, char32_t value)
{
if (*buf != '\0') StrgenWarning("Ignoring trailing letters in command");
buffer->AppendUtf8(value);
builder.PutUtf8(value);
}
/* The plural specifier looks like
@ -290,20 +254,20 @@ std::optional<std::string_view> ParseWord(const char **buf)
/* This is encoded like
* CommandByte <ARG#> <NUM> {Length of each string} {each string} */
static void EmitWordList(Buffer *buffer, const std::vector<std::string> &words)
static void EmitWordList(StringBuilder &builder, const std::vector<std::string> &words)
{
buffer->AppendByte(static_cast<uint8_t>(words.size()));
builder.PutUint8(static_cast<uint8_t>(words.size()));
for (size_t i = 0; i < words.size(); i++) {
size_t len = words[i].size();
if (len > UINT8_MAX) StrgenFatal("WordList {}/{} string '{}' too long, max bytes {}", i + 1, words.size(), words[i], UINT8_MAX);
buffer->AppendByte(static_cast<uint8_t>(len));
builder.PutUint8(static_cast<uint8_t>(len));
}
for (size_t i = 0; i < words.size(); i++) {
buffer->append(words[i]);
builder.Put(words[i]);
}
}
void EmitPlural(Buffer *buffer, const char *buf, char32_t)
void EmitPlural(StringBuilder &builder, const char *buf, char32_t)
{
/* Parse out the number, if one exists. Otherwise default to prev arg. */
auto [argidx, offset] = ParseRelNum(&buf);
@ -350,13 +314,13 @@ void EmitPlural(Buffer *buffer, const char *buf, char32_t)
}
}
buffer->AppendUtf8(SCC_PLURAL_LIST);
buffer->AppendByte(_lang.plural_form);
buffer->AppendByte(static_cast<uint8_t>(TranslateArgumentIdx(*argidx, *offset)));
EmitWordList(buffer, words);
builder.PutUtf8(SCC_PLURAL_LIST);
builder.PutUint8(_lang.plural_form);
builder.PutUint8(static_cast<uint8_t>(TranslateArgumentIdx(*argidx, *offset)));
EmitWordList(builder, words);
}
void EmitGender(Buffer *buffer, const char *buf, char32_t)
void EmitGender(StringBuilder &builder, const char *buf, char32_t)
{
if (buf[0] == '=') {
buf++;
@ -366,8 +330,8 @@ void EmitGender(Buffer *buffer, const char *buf, char32_t)
if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", buf);
/* now nw contains the gender index */
buffer->AppendUtf8(SCC_GENDER_INDEX);
buffer->AppendByte(nw);
builder.PutUtf8(SCC_GENDER_INDEX);
builder.PutUint8(nw);
} else {
/* This is a {G 0 foo bar two} command.
* If no relative number exists, default to +0 */
@ -389,9 +353,9 @@ void EmitGender(Buffer *buffer, const char *buf, char32_t)
if (words.size() != _lang.num_genders) StrgenFatal("Bad # of arguments for gender command");
assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
buffer->AppendUtf8(SCC_GENDER_LIST);
buffer->AppendByte(static_cast<uint8_t>(TranslateArgumentIdx(*argidx, *offset)));
EmitWordList(buffer, words);
builder.PutUtf8(SCC_GENDER_LIST);
builder.PutUint8(static_cast<uint8_t>(TranslateArgumentIdx(*argidx, *offset)));
EmitWordList(builder, words);
}
}
@ -770,20 +734,22 @@ static size_t TranslateArgumentIdx(size_t argidx, size_t offset)
return sum + offset;
}
static void PutArgidxCommand(Buffer *buffer)
static void PutArgidxCommand(StringBuilder &builder)
{
buffer->AppendUtf8(SCC_ARG_INDEX);
buffer->AppendByte(static_cast<uint8_t>(TranslateArgumentIdx(_cur_argidx)));
builder.PutUtf8(SCC_ARG_INDEX);
builder.PutUint8(static_cast<uint8_t>(TranslateArgumentIdx(_cur_argidx)));
}
static void PutCommandString(Buffer *buffer, const char *str)
static std::string PutCommandString(const char *str)
{
std::string result;
StringBuilder builder(result);
_cur_argidx = 0;
while (*str != '\0') {
/* Process characters as they are until we encounter a { */
if (*str != '{') {
buffer->append(1, *str++);
builder.PutChar(*str++);
continue;
}
@ -792,8 +758,8 @@ static void PutCommandString(Buffer *buffer, const char *str)
if (cmd == nullptr) break;
if (cs.casei.has_value()) {
buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
buffer->AppendByte(*cs.casei);
builder.PutUtf8(SCC_SET_CASE); // {SET_CASE}
builder.PutUint8(*cs.casei);
}
/* For params that consume values, we need to handle the argindex properly */
@ -801,7 +767,7 @@ static void PutCommandString(Buffer *buffer, const char *str)
/* Check if we need to output a move-param command */
if (cs.argno.has_value() && *cs.argno != _cur_argidx) {
_cur_argidx = *cs.argno;
PutArgidxCommand(buffer);
PutArgidxCommand(builder);
}
/* Output the one from the master string... it's always accurate. */
@ -811,8 +777,9 @@ static void PutCommandString(Buffer *buffer, const char *str)
}
}
cmd->proc(buffer, cs.param.c_str(), cmd->value);
cmd->proc(builder, cs.param.c_str(), cmd->value);
}
return result;
}
/**
@ -859,12 +826,10 @@ void LanguageWriter::WriteLang(const StringData &data)
_lang.winlangid = TO_LE16(_lang.winlangid);
this->WriteHeader(&_lang);
Buffer buffer;
for (size_t tab = 0; tab < data.tabs; tab++) {
for (size_t j = 0; j != in_use[tab]; j++) {
const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
const std::string *cmdp;
/* For undefined strings, just set that it's an empty string */
if (ls == nullptr) {
@ -872,6 +837,8 @@ void LanguageWriter::WriteLang(const StringData &data)
continue;
}
std::string output;
StringBuilder builder(output);
_cur_ident = ls->name.c_str();
_cur_line = ls->line;
@ -881,61 +848,42 @@ void LanguageWriter::WriteLang(const StringData &data)
StrgenWarning("'{}' is untranslated", ls->name);
}
if (_annotate_todos) {
buffer.append("<TODO> ");
builder.Put("<TODO> ");
}
}
/* Extract the strings and stuff from the english command string */
_cur_pcs = ExtractCommandString(ls->english.c_str(), false);
if (!ls->translated_cases.empty() || !ls->translated.empty()) {
cmdp = &ls->translated;
} else {
cmdp = &ls->english;
}
_translated = !ls->translated_cases.empty() || !ls->translated.empty();
const std::string &cmdp = _translated ? ls->translated : ls->english;
_translated = cmdp != &ls->english;
std::optional<size_t> default_case_pos;
if (!ls->translated_cases.empty()) {
/* Need to output a case-switch.
* It has this format
* <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <LENDEFAULT> <STRINGDEFAULT>
* Each LEN is printed using 2 bytes in little endian order. */
buffer.AppendUtf8(SCC_SWITCH_CASE);
buffer.AppendByte(static_cast<uint8_t>(ls->translated_cases.size()));
builder.PutUtf8(SCC_SWITCH_CASE);
builder.PutUint8(static_cast<uint8_t>(ls->translated_cases.size()));
/* Write each case */
for (const Case &c : ls->translated_cases) {
buffer.AppendByte(c.caseidx);
/* Make some space for the 16-bit length */
size_t pos = buffer.size();
buffer.AppendByte(0);
buffer.AppendByte(0);
/* Write string */
PutCommandString(&buffer, c.string.c_str());
/* Fill in the length */
size_t size = buffer.size() - (pos + 2);
buffer[pos + 0] = GB(size, 0, 8);
buffer[pos + 1] = GB(size, 8, 8);
auto case_str = PutCommandString(c.string.c_str());
builder.PutUint8(c.caseidx);
builder.PutUint16LE(static_cast<uint16_t>(case_str.size()));
builder.Put(case_str);
}
default_case_pos = buffer.size();
buffer.AppendByte(0);
buffer.AppendByte(0);
}
if (!cmdp->empty()) PutCommandString(&buffer, cmdp->c_str());
if (default_case_pos.has_value()) {
size_t size = buffer.size() - (*default_case_pos + 2);
buffer[*default_case_pos + 0] = GB(size, 0, 8);
buffer[*default_case_pos + 1] = GB(size, 8, 8);
std::string def_str;
if (!cmdp.empty()) def_str = PutCommandString(cmdp.c_str());
if (!ls->translated_cases.empty()) {
builder.PutUint16LE(static_cast<uint16_t>(def_str.size()));
}
builder.Put(def_str);
this->WriteLength(buffer.size());
this->Write(buffer.data(), buffer.size());
buffer.clear();
this->WriteLength(output.size());
this->Write(output.data(), output.size());
}
}
}

View File

@ -16,8 +16,8 @@ enum class CmdFlag : uint8_t {
};
using CmdFlags = EnumBitSet<CmdFlag, uint8_t>;
struct Buffer;
typedef void (*ParseCmdProc)(Buffer *buffer, const char *buf, char32_t value);
class StringBuilder;
typedef void (*ParseCmdProc)(StringBuilder &builder, const char *buf, char32_t value);
struct CmdStruct {
std::string_view cmd;
@ -28,9 +28,9 @@ struct CmdStruct {
CmdFlags flags;
};
extern void EmitSingleChar(Buffer *buffer, const char *buf, char32_t value);
extern void EmitPlural(Buffer *buffer, const char *buf, char32_t value);
extern void EmitGender(Buffer *buffer, const char *buf, char32_t value);
extern void EmitSingleChar(StringBuilder &builder, const char *buf, char32_t value);
extern void EmitPlural(StringBuilder &builder, const char *buf, char32_t value);
extern void EmitGender(StringBuilder &builder, const char *buf, char32_t value);
static const CmdStruct _cmd_structs[] = {
/* Font size */