From 800d6e339d1cdcaa96b4af5118a6046860806365 Mon Sep 17 00:00:00 2001 From: frosch Date: Thu, 27 Mar 2025 18:15:56 +0100 Subject: [PATCH] Codechange: Add StringConsumer. --- src/core/CMakeLists.txt | 2 + src/core/string_consumer.cpp | 196 ++++++++ src/core/string_consumer.hpp | 895 +++++++++++++++++++++++++++++++++ src/settingsgen/CMakeLists.txt | 1 + src/strgen/CMakeLists.txt | 1 + src/tests/CMakeLists.txt | 1 + src/tests/string_consumer.cpp | 487 ++++++++++++++++++ 7 files changed, 1583 insertions(+) create mode 100644 src/core/string_consumer.cpp create mode 100644 src/core/string_consumer.hpp create mode 100644 src/tests/string_consumer.cpp diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index c14d10bf39..d21ea8da92 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -25,6 +25,8 @@ add_files( smallstack_type.hpp string_builder.cpp string_builder.hpp + string_consumer.cpp + string_consumer.hpp strong_typedef_type.hpp utf8.cpp utf8.hpp diff --git a/src/core/string_consumer.cpp b/src/core/string_consumer.cpp new file mode 100644 index 0000000000..6f2906676e --- /dev/null +++ b/src/core/string_consumer.cpp @@ -0,0 +1,196 @@ +/* + * This file is part of OpenTTD. + * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. + * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see . + */ + +/** @file string_consumer.cpp Implementation of string parsing. */ + +#include "../stdafx.h" +#include "string_consumer.hpp" + +#include "bitmath_func.hpp" +#include "utf8.hpp" +#include "string_builder.hpp" + +#include "../string_func.h" + +#if defined(STRGEN) || defined(SETTINGSGEN) +#include "../error_func.h" +#else +#include "../debug.h" +#endif + +#include "../safeguards.h" + +const std::string_view StringConsumer::WHITESPACE_NO_NEWLINE = "\t\v\f\r "; +const std::string_view StringConsumer::WHITESPACE_OR_NEWLINE = "\t\n\v\f\r "; + +/* static */ void StringConsumer::LogError(std::string &&msg) +{ +#if defined(STRGEN) || defined(SETTINGSGEN) + FatalErrorI(std::move(msg)); +#else + DebugPrint("misc", 0, std::move(msg)); +#endif +} + +std::optional StringConsumer::PeekUint8() const +{ + if (this->GetBytesLeft() < 1) return std::nullopt; + return static_cast(this->src[this->position]); +} + +std::optional StringConsumer::PeekUint16LE() const +{ + if (this->GetBytesLeft() < 2) return std::nullopt; + return static_cast(this->src[this->position]) | + static_cast(this->src[this->position + 1]) << 8; +} + +std::optional StringConsumer::PeekUint32LE() const +{ + if (this->GetBytesLeft() < 4) return std::nullopt; + return static_cast(this->src[this->position]) | + static_cast(this->src[this->position + 1]) << 8 | + static_cast(this->src[this->position + 2]) << 16 | + static_cast(this->src[this->position + 3]) << 24; +} + +std::optional StringConsumer::PeekUint64LE() const +{ + if (this->GetBytesLeft() < 8) return std::nullopt; + return static_cast(static_cast(this->src[this->position])) | + static_cast(static_cast(this->src[this->position + 1])) << 8 | + static_cast(static_cast(this->src[this->position + 2])) << 16 | + static_cast(static_cast(this->src[this->position + 3])) << 24 | + static_cast(static_cast(this->src[this->position + 4])) << 32 | + static_cast(static_cast(this->src[this->position + 5])) << 40 | + static_cast(static_cast(this->src[this->position + 6])) << 48 | + static_cast(static_cast(this->src[this->position + 7])) << 56; +} + +std::optional StringConsumer::PeekChar() const +{ + auto result = this->PeekUint8(); + if (!result.has_value()) return {}; + return static_cast(*result); +} + +std::pair StringConsumer::PeekUtf8() const +{ + auto buf = this->src.substr(this->position); + return DecodeUtf8(buf); +} + +std::string_view StringConsumer::Peek(size_type len) const +{ + auto buf = this->src.substr(this->position); + if (len == std::string_view::npos) { + len = buf.size(); + } else if (len > buf.size()) { + len = buf.size(); + } + return buf.substr(0, len); +} + +void StringConsumer::Skip(size_type len) +{ + if (len == std::string_view::npos) { + this->position = this->src.size(); + } else if (size_type max_len = GetBytesLeft(); len > max_len) { + LogError(fmt::format("Source buffer too short: {} > {}", len, max_len)); + this->position = this->src.size(); + } else { + this->position += len; + } +} + +StringConsumer::size_type StringConsumer::Find(std::string_view str) const +{ + assert(!str.empty()); + auto buf = this->src.substr(this->position); + return buf.find(str); +} + +StringConsumer::size_type StringConsumer::FindUtf8(char32_t c) const +{ + auto [data, len] = EncodeUtf8(c); + return this->Find({data, len}); +} + +StringConsumer::size_type StringConsumer::FindCharIn(std::string_view chars) const +{ + assert(!chars.empty()); + auto buf = this->src.substr(this->position); + return buf.find_first_of(chars); +} + +StringConsumer::size_type StringConsumer::FindCharNotIn(std::string_view chars) const +{ + assert(!chars.empty()); + auto buf = this->src.substr(this->position); + return buf.find_first_not_of(chars); +} + +std::string_view StringConsumer::PeekUntil(std::string_view str, SeparatorUsage sep) const +{ + assert(!str.empty()); + auto buf = this->src.substr(this->position); + auto len = buf.find(str); + if (len != std::string_view::npos) { + switch (sep) { + case READ_ONE_SEPARATOR: + if (buf.compare(len, str.size(), str) == 0) len += str.size(); + break; + case READ_ALL_SEPARATORS: + while (buf.compare(len, str.size(), str) == 0) len += str.size(); + break; + default: + break; + } + } + return buf.substr(0, len); +} + +std::string_view StringConsumer::PeekUntilUtf8(char32_t c, SeparatorUsage sep) const +{ + auto [data, len] = EncodeUtf8(c); + return PeekUntil({data, len}, sep); +} + +std::string_view StringConsumer::ReadUntilUtf8(char32_t c, SeparatorUsage sep) +{ + auto [data, len] = EncodeUtf8(c); + return ReadUntil({data, len}, sep); +} + +void StringConsumer::SkipUntilUtf8(char32_t c, SeparatorUsage sep) +{ + auto [data, len] = EncodeUtf8(c); + return SkipUntil({data, len}, sep); +} + +void StringConsumer::SkipIntegerBase(int base) +{ + this->SkipIf("-"); + if (base == 0) { + if (this->ReadIf("0x") || this->ReadIf("0X")) { // boolean short-circuit ensures only one prefix is read + base = 16; + } else { + base = 10; + } + } + switch (base) { + default: + assert(false); + break; + case 10: + this->SkipUntilCharNotIn("0123456789"); + break; + case 16: + this->SkipUntilCharNotIn("0123456789abcdefABCDEF"); + break; + } +} diff --git a/src/core/string_consumer.hpp b/src/core/string_consumer.hpp new file mode 100644 index 0000000000..b85bf65eb9 --- /dev/null +++ b/src/core/string_consumer.hpp @@ -0,0 +1,895 @@ +/* + * This file is part of OpenTTD. + * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. + * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see . + */ + +/** + * @file string_consumer.hpp Parse strings. + */ + +#ifndef STRING_CONSUMER_HPP +#define STRING_CONSUMER_HPP + +#include +#include "format.hpp" + +/** + * Parse data from a string / buffer. + * + * There are generally four operations for each data type: + * - Peek: Check and return validity and value. Do not advance read position. + * - TryRead: Check and return validity and value. Advance reader, if valid. + * - Read: Check validity, return value or fallback-value. Advance reader, even if value is invalid, to avoid deadlocks/stalling. + * - Skip: Discard value. Advance reader, even if value is invalid, to avoid deadlocks/stalling. + */ +class StringConsumer { +public: + using size_type = std::string_view::size_type; + + /** + * Special value for "end of data". + */ + static constexpr size_type npos = std::string_view::npos; + + /** + * ASCII whitespace characters, excluding new-line. + * Usable in FindChar(In|NotIn), (Peek|Read|Skip)(If|Until)Char(In|NotIn) + */ + static const std::string_view WHITESPACE_NO_NEWLINE; + /** + * ASCII whitespace characters, including new-line. + * Usable in FindChar(In|NotIn), (Peek|Read|Skip)(If|Until)Char(In|NotIn) + */ + static const std::string_view WHITESPACE_OR_NEWLINE; + +private: + std::string_view src; + size_type position = 0; + + static void LogError(std::string &&msg); + +public: + /** + * Construct parser with data from string. + */ + explicit StringConsumer(std::string_view src) : src(src) {} + /** + * Construct parser with data from string. + */ + explicit StringConsumer(const std::string &src) : src(src) {} + /** + * Construct parser with data from span. + */ + explicit StringConsumer(std::span src) : src(src.data(), src.size()) {} + /** + * Construct parser with data from buffer. + */ + StringConsumer(const char *src, size_type len) : src(src, len) {} + + /** + * Check whether any bytes left to read. + */ + [[nodiscard]] bool AnyBytesLeft() const noexcept { return this->position < this->src.size(); } + /** + * Get number of bytes left to read. + */ + [[nodiscard]] size_type GetBytesLeft() const noexcept { return this->src.size() - this->position; } + + /** + * Check wheter any bytes were already read. + */ + [[nodiscard]] bool AnyBytesRead() const noexcept { return this->position > 0; } + /** + * Get number of already read bytes. + */ + [[nodiscard]] size_type GetBytesRead() const noexcept { return this->position; } + + /** + * Get the original data, as passed to the constructor. + */ + [[nodiscard]] std::string_view GetOrigData() const noexcept { return this->src; } + /** + * Get already read data. + */ + [[nodiscard]] std::string_view GetReadData() const noexcept { return this->src.substr(0, this->position); } + /** + * Get data left to read. + */ + [[nodiscard]] std::string_view GetLeftData() const noexcept { return this->src.substr(this->position); } + + /** + * Discard all remaining data. + */ + void SkipAll() { this->position = this->src.size(); } + + /** + * Peek binary uint8. + * @return Read integer, std::nullopt if not enough data. + */ + [[nodiscard]] std::optional PeekUint8() const; + /** + * Try to read binary uint8, and then advance reader. + */ + [[nodiscard]] std::optional TryReadUint8() + { + auto value = this->PeekUint8(); + if (value.has_value()) this->SkipUint8(); + return value; + } + /** + * Read binary uint8, and advance reader. + * @param def Default value to return, if not enough data. + * @return Read integer, 'def' if not enough data. + */ + [[nodiscard]] uint8_t ReadUint8(uint8_t def = 0) + { + auto value = this->PeekUint8(); + this->SkipUint8(); // always advance + return value.value_or(def); + } + /** + * Skip binary uint8. + */ + void SkipUint8() { this->Skip(1); } + + /** + * Peek binary int8. + * @return Read integer, std::nullopt if not enough data. + */ + [[nodiscard]] std::optional PeekSint8() const + { + auto result = PeekUint8(); + if (!result.has_value()) return std::nullopt; + return static_cast(*result); + } + /** + * Try to read binary int8, and then advance reader. + */ + [[nodiscard]] std::optional TryReadSint8() + { + auto value = this->PeekSint8(); + if (value.has_value()) this->SkipSint8(); + return value; + } + /** + * Read binary int8, and advance reader. + * @param def Default value to return, if not enough data. + * @return Read integer, 'def' if not enough data. + */ + [[nodiscard]] int8_t ReadSint8(int8_t def = 0) + { + auto value = this->PeekSint8(); + this->SkipSint8(); // always advance + return value.value_or(def); + } + /** + * Skip binary int8. + */ + void SkipSint8() { this->Skip(1); } + + /** + * Peek binary uint16 using little endian. + * @return Read integer, std::nullopt if not enough data. + */ + [[nodiscard]] std::optional PeekUint16LE() const; + /** + * Try to read binary uint16, and then advance reader. + */ + [[nodiscard]] std::optional TryReadUint16LE() + { + auto value = this->PeekUint16LE(); + if (value.has_value()) this->SkipUint16LE(); + return value; + } + /** + * Read binary uint16 using little endian, and advance reader. + * @param def Default value to return, if not enough data. + * @return Read integer, 'def' if not enough data. + * @note The reader is advanced, even if not enough data was present. + */ + [[nodiscard]] uint16_t ReadUint16LE(uint16_t def = 0) + { + auto value = this->PeekUint16LE(); + this->SkipUint16LE(); // always advance + return value.value_or(def); + } + /** + * Skip binary uint16, and advance reader. + * @note The reader is advanced, even if not enough data was present. + */ + void SkipUint16LE() { this->Skip(2); } + + /** + * Peek binary int16 using little endian. + * @return Read integer, std::nullopt if not enough data. + */ + [[nodiscard]] std::optional PeekSint16LE() const + { + auto result = PeekUint16LE(); + if (!result.has_value()) return std::nullopt; + return static_cast(*result); + } + /** + * Try to read binary int16, and then advance reader. + */ + [[nodiscard]] std::optional TryReadSint16LE() + { + auto value = this->PeekSint16LE(); + if (value.has_value()) this->SkipSint16LE(); + return value; + } + /** + * Read binary int16 using little endian, and advance reader. + * @param def Default value to return, if not enough data. + * @return Read integer, 'def' if not enough data. + * @note The reader is advanced, even if not enough data was present. + */ + [[nodiscard]] int16_t ReadSint16LE(int16_t def = 0) + { + auto value = this->PeekSint16LE(); + this->SkipSint16LE(); // always advance + return value.value_or(def); + } + /** + * Skip binary int16, and advance reader. + * @note The reader is advanced, even if not enough data was present. + */ + void SkipSint16LE() { this->Skip(2); } + + /** + * Peek binary uint32 using little endian. + * @return Read integer, std::nullopt if not enough data. + */ + [[nodiscard]] std::optional PeekUint32LE() const; + /** + * Try to read binary uint32, and then advance reader. + */ + [[nodiscard]] std::optional TryReadUint32LE() + { + auto value = this->PeekUint32LE(); + if (value.has_value()) this->SkipUint32LE(); + return value; + } + /** + * Read binary uint32 using little endian, and advance reader. + * @param def Default value to return, if not enough data. + * @return Read integer, 'def' if not enough data. + * @note The reader is advanced, even if not enough data was present. + */ + [[nodiscard]] uint32_t ReadUint32LE(uint32_t def = 0) + { + auto value = this->PeekUint32LE(); + this->SkipUint32LE(); // always advance + return value.value_or(def); + } + /** + * Skip binary uint32, and advance reader. + * @note The reader is advanced, even if not enough data was present. + */ + void SkipUint32LE() { this->Skip(4); } + + /** + * Peek binary int32 using little endian. + * @return Read integer, std::nullopt if not enough data. + */ + [[nodiscard]] std::optional PeekSint32LE() const + { + auto result = PeekUint32LE(); + if (!result.has_value()) return std::nullopt; + return static_cast(*result); + } + /** + * Try to read binary int32, and then advance reader. + */ + [[nodiscard]] std::optional TryReadSint32LE() + { + auto value = this->PeekSint32LE(); + if (value.has_value()) this->SkipSint32LE(); + return value; + } + /** + * Read binary int32 using little endian, and advance reader. + * @param def Default value to return, if not enough data. + * @return Read integer, 'def' if not enough data. + * @note The reader is advanced, even if not enough data was present. + */ + [[nodiscard]] int32_t ReadSint32LE(int32_t def = 0) + { + auto value = this->PeekSint32LE(); + this->SkipSint32LE(); // always advance + return value.value_or(def); + } + /** + * Skip binary int32, and advance reader. + * @note The reader is advanced, even if not enough data was present. + */ + void SkipSint32LE() { this->Skip(4); } + + /** + * Peek binary uint64 using little endian. + * @return Read integer, std::nullopt if not enough data. + */ + [[nodiscard]] std::optional PeekUint64LE() const; + /** + * Try to read binary uint64, and then advance reader. + */ + [[nodiscard]] std::optional TryReadUint64LE() + { + auto value = this->PeekUint64LE(); + if (value.has_value()) this->SkipUint64LE(); + return value; + } + /** + * Read binary uint64 using little endian, and advance reader. + * @param def Default value to return, if not enough data. + * @return Read integer, 'def' if not enough data. + * @note The reader is advanced, even if not enough data was present. + */ + [[nodiscard]] uint64_t ReadUint64LE(uint64_t def = 0) + { + auto value = this->PeekUint64LE(); + this->SkipUint64LE(); // always advance + return value.value_or(def); + } + /** + * Skip binary uint64, and advance reader. + * @note The reader is advanced, even if not enough data was present. + */ + void SkipUint64LE() { this->Skip(8); } + + /** + * Peek binary int64 using little endian. + * @return Read integer, std::nullopt if not enough data. + */ + [[nodiscard]] std::optional PeekSint64LE() const + { + auto result = PeekUint64LE(); + if (!result.has_value()) return std::nullopt; + return static_cast(*result); + } + /** + * Try to read binary int64, and then advance reader. + */ + [[nodiscard]] std::optional TryReadSint64LE() + { + auto value = this->PeekSint64LE(); + if (value.has_value()) this->SkipSint64LE(); + return value; + } + /** + * Read binary int64 using little endian, and advance reader. + * @param def Default value to return, if not enough data. + * @return Read integer, 'def' if not enough data. + * @note The reader is advanced, even if not enough data was present. + */ + [[nodiscard]] int64_t ReadSint64LE(int64_t def = 0) + { + auto value = this->PeekSint64LE(); + this->SkipSint64LE(); // always advance + return value.value_or(def); + } + /** + * Skip binary int64, and advance reader. + * @note The reader is advanced, even if not enough data was present. + */ + void SkipSint64LE() { this->Skip(8); } + + /** + * Peek 8-bit character. + * @return Read char, std::nullopt if not enough data. + */ + [[nodiscard]] std::optional PeekChar() const; + /** + * Try to read a 8-bit character, and then advance reader. + */ + [[nodiscard]] std::optional TryReadChar() + { + auto value = this->PeekChar(); + if (value.has_value()) this->SkipChar(); + return value; + } + /** + * Read 8-bit character, and advance reader. + * @param def Default value to return, if not enough data. + * @return Read character, 'def' if not enough data. + */ + [[nodiscard]] char ReadChar(char def = '?') { + auto value = this->PeekChar(); + this->SkipChar(); // always advance + return value.value_or(def); + } + /** + * Skip 8-bit character, and advance reader. + */ + void SkipChar() { this->Skip(1); } + + /** + * Peek UTF-8 character. + * @return Length and read char, {0, 0} if no valid data. + */ + [[nodiscard]] std::pair PeekUtf8() const; + /** + * Try to read a UTF-8 character, and then advance reader. + */ + [[nodiscard]] std::optional TryReadUtf8() + { + auto [len, value] = this->PeekUtf8(); + if (len == 0) return std::nullopt; + this->Skip(len); + return value; + } + /** + * Read UTF-8 character, and advance reader. + * @param def Default value to return, if no valid data. + * @return Read char, 'def' if no valid data. + * @note The reader is advanced, even if no valid data was present. + */ + [[nodiscard]] char32_t ReadUtf8(char32_t def = '?') + { + auto [len, value] = this->PeekUtf8(); + this->Skip(len > 0 ? len : 1); // advance at least one byte + return len > 0 ? value : def; + } + /** + * Skip UTF-8 character, and advance reader. + * @note The reader is advanced, even if no valid data was present. + * @note This behaves different to Utf8View::iterator. + * Here we do not skip overlong encodings, because we want to + * allow binary data to follow UTF-8 data. + */ + void SkipUtf8() + { + auto len = this->PeekUtf8().first; + this->Skip(len > 0 ? len : 1); // advance at least one byte + } + + /** + * Check whether the next data matches 'str'. + */ + [[nodiscard]] bool PeekIf(std::string_view str) const + { + return this->src.compare(this->position, str.size(), str) == 0; + } + /** + * Check whether the next data matches 'str', and skip it. + */ + [[nodiscard]] bool ReadIf(std::string_view str) + { + bool result = this->PeekIf(str); + if (result) this->Skip(str.size()); + return result; + } + /** + * If the next data matches 'str', then skip it. + */ + void SkipIf(std::string_view str) + { + if (this->PeekIf(str)) this->Skip(str.size()); + } + + /** + * Check whether the next 8-bit char matches 'c'. + */ + [[nodiscard]] bool PeekCharIf(char c) const + { + return this->PeekIf({&c, 1}); + } + /** + * Check whether the next 8-bit char matches 'c', and skip it. + */ + [[nodiscard]] bool ReadCharIf(char c) + { + return this->ReadIf({&c, 1}); + } + /** + * If the next data matches the 8-bit char 'c', then skip it. + */ + void SkipCharIf(char c) + { + return this->SkipIf({&c, 1}); + } + + /** + * Check whether the next UTF-8 char matches 'c'. + */ + [[nodiscard]] bool PeekUtf8If(char32_t c) const + { + auto [len, result] = this->PeekUtf8(); + return len > 0 && result == c; + } + /** + * Check whether the next UTF-8 char matches 'c', and skip it. + */ + [[nodiscard]] bool ReadUtf8If(char32_t c) + { + auto [len, result] = this->PeekUtf8(); + if (len == 0 || result != c) return false; + this->Skip(len); + return true; + } + /** + * If the next data matches the UTF-8 char 'c', then skip it. + */ + void SkipUtf8If(char32_t c) + { + auto [len, result] = this->PeekUtf8(); + if (len > 0 && result == c) { + this->Skip(len); + } + } + + /** + * Peek the next 'len' bytes. + * @param len Bytes to read, 'npos' to read all. + * @return Up to 'len' bytes. + */ + [[nodiscard]] std::string_view Peek(size_type len) const; + /** + * Read the next 'len' bytes, and advance reader. + * @param len Bytes to read, 'npos' to read all. + * @return Up to 'len' bytes. + */ + [[nodiscard]] std::string_view Read(size_type len) + { + auto result = this->Peek(len); + if (len != npos && len != result.size()) { + LogError(fmt::format("Source buffer too short: {} > {}", len, result.size())); + } + this->Skip(result.size()); + return result; + } + /** + * Discard some bytes. + * @param len Number of bytes to skip, 'npos' to skip all. + */ + void Skip(size_type len); + + /** + * Find first occurence of 'str'. + * @return Offset from current reader position. 'npos' if no match found. + */ + [[nodiscard]] size_type Find(std::string_view str) const; + /** + * Find first occurence of 8-bit char 'c'. + * @return Offset from current reader position. 'npos' if no match found. + */ + [[nodiscard]] size_type FindChar(char c) const + { + return this->Find({&c, 1}); + } + /** + * Find first occurence of UTF-8 char 'c'. + * @return Offset from current reader position. 'npos' if no match found. + */ + [[nodiscard]] size_type FindUtf8(char32_t c) const; + + /** + * Find first occurence of any 8-bit char in 'chars'. + * @return Offset from current reader position. 'npos' if no match found. + */ + [[nodiscard]] size_type FindCharIn(std::string_view chars) const; + /** + * Find first occurence of any 8-bit char not in 'chars'. + * @return Offset from current reader position. 'npos' if no match found. + */ + [[nodiscard]] size_type FindCharNotIn(std::string_view chars) const; + + /** + * Check whether the next 8-bit char is in 'chars'. + * @return Matching char, std::nullopt if no match. + */ + [[nodiscard]] std::optional PeekCharIfIn(std::string_view chars) const + { + assert(!chars.empty()); + std::optional c = this->PeekChar(); + if (c.has_value() && chars.find(*c) != std::string_view::npos) return c; + return std::nullopt; + } + /** + * Read next 8-bit char, check whether it is in 'chars', and advance reader. + * @return Matching char, std::nullopt if no match. + */ + [[nodiscard]] std::optional ReadCharIfIn(std::string_view chars) + { + auto result = this->PeekCharIfIn(chars); + if (result.has_value()) this->Skip(1); + return result; + } + /** + * If the next 8-bit char is in 'chars', skip it. + */ + void SkipCharIfIn(std::string_view chars) + { + auto result = this->PeekCharIfIn(chars); + if (result.has_value()) this->Skip(1); + } + + /** + * Check whether the next 8-bit char is not in 'chars'. + * @return Non-matching char, std::nullopt if match. + */ + [[nodiscard]] std::optional PeekCharIfNotIn(std::string_view chars) const + { + assert(!chars.empty()); + std::optional c = this->PeekChar(); + if (c.has_value() && chars.find(*c) == std::string_view::npos) return c; + return std::nullopt; + } + /** + * Read next 8-bit char, check whether it is not in 'chars', and advance reader. + * @return Non-matching char, std::nullopt if match. + */ + [[nodiscard]] std::optional ReadCharIfNotIn(std::string_view chars) + { + auto result = this->PeekCharIfNotIn(chars); + if (result.has_value()) this->Skip(1); + return result; + } + /** + * If the next 8-bit char is not in 'chars', skip it. + */ + void SkipCharIfNotIn(std::string_view chars) + { + auto result = this->PeekCharIfNotIn(chars); + if (result.has_value()) this->Skip(1); + } + + /** + * Peek 8-bit chars, while they are not in 'chars', until they are. + * @return Non-matching chars. + */ + [[nodiscard]] std::string_view PeekUntilCharIn(std::string_view chars) const + { + size_type len = this->FindCharIn(chars); + return this->Peek(len); + } + /** + * Read 8-bit chars, while they are not in 'chars', until they are; and advance reader. + * @return Non-matching chars. + */ + [[nodiscard]] std::string_view ReadUntilCharIn(std::string_view chars) + { + size_type len = this->FindCharIn(chars); + return this->Read(len); + } + /** + * Skip 8-bit chars, while they are not in 'chars', until they are. + */ + void SkipUntilCharIn(std::string_view chars) + { + size_type len = this->FindCharIn(chars); + this->Skip(len); + } + + /** + * Peek 8-bit chars, while they are in 'chars', until they are not. + * @return Matching chars. + */ + [[nodiscard]] std::string_view PeekUntilCharNotIn(std::string_view chars) const + { + size_type len = this->FindCharNotIn(chars); + return this->Peek(len); + } + /** + * Read 8-bit chars, while they are in 'chars', until they are not; and advance reader. + * @return Matching chars. + */ + [[nodiscard]] std::string_view ReadUntilCharNotIn(std::string_view chars) + { + size_type len = this->FindCharNotIn(chars); + return this->Read(len); + } + /** + * Skip 8-bit chars, while they are in 'chars', until they are not. + */ + void SkipUntilCharNotIn(std::string_view chars) + { + size_type len = this->FindCharNotIn(chars); + this->Skip(len); + } + + /** + * Treatment of separator characters. + */ + enum SeparatorUsage { + READ_ALL_SEPARATORS, ///< Read all consecutive separators, and include them all in the result + READ_ONE_SEPARATOR, ///< Read one separator, and include it in the result + KEEP_SEPARATOR, ///< Keep the separator in the data as next value to be read. + SKIP_ONE_SEPARATOR, ///< Read and discard one separator, do not include it in the result. + SKIP_ALL_SEPARATORS, ///< Read and discard all consecutive separators, do not include any in the result. + }; + + /** + * Peek data until the first occurrence of 'str'. + * @param str Separator string. + * @param sep Whether to include/exclude 'str' from the result. + */ + [[nodiscard]] std::string_view PeekUntil(std::string_view str, SeparatorUsage sep) const; + /** + * Read data until the first occurrence of 'str', and advance reader. + * @param str Separator string. + * @param sep Whether to include/exclude 'str' from the result, and/or skip it. + */ + [[nodiscard]] std::string_view ReadUntil(std::string_view str, SeparatorUsage sep) + { + assert(!str.empty()); + auto result = this->PeekUntil(str, sep); + this->Skip(result.size()); + switch (sep) { + default: + break; + case SKIP_ONE_SEPARATOR: + this->SkipIf(str); + break; + case SKIP_ALL_SEPARATORS: + while (this->ReadIf(str)) {} + break; + } + return result; + } + /** + * Skip data until the first occurrence of 'str'. + * @param str Separator string. + * @param sep Whether to also skip 'str'. + */ + void SkipUntil(std::string_view str, SeparatorUsage sep) + { + assert(!str.empty()); + this->Skip(this->Find(str)); + switch (sep) { + default: + break; + case READ_ONE_SEPARATOR: + case SKIP_ONE_SEPARATOR: + this->SkipIf(str); + break; + case READ_ALL_SEPARATORS: + case SKIP_ALL_SEPARATORS: + while (this->ReadIf(str)) {} + break; + } + } + + /** + * Peek data until the first occurrence of 8-bit char 'c'. + * @param c Separator char. + * @param sep Whether to include/exclude 'c' from the result. + */ + [[nodiscard]] std::string_view PeekUntilChar(char c, SeparatorUsage sep) const + { + return this->PeekUntil({&c, 1}, sep); + } + /** + * Read data until the first occurrence of 8-bit char 'c', and advance reader. + * @param c Separator char. + * @param sep Whether to include/exclude 'c' from the result, and/or skip it. + */ + [[nodiscard]] std::string_view ReadUntilChar(char c, SeparatorUsage sep) + { + return this->ReadUntil({&c, 1}, sep); + } + /** + * Skip data until the first occurrence of 8-bit char 'c'. + * @param c Separator char. + * @param sep Whether to also skip 'c'. + */ + void SkipUntilChar(char c, SeparatorUsage sep) + { + this->SkipUntil({&c, 1}, sep); + } + + /** + * Peek data until the first occurrence of UTF-8 char 'c'. + * @param c Separator char. + * @param sep Whether to include/exclude 'c' from the result. + */ + [[nodiscard]] std::string_view PeekUntilUtf8(char32_t c, SeparatorUsage sep) const; + /** + * Read data until the first occurrence of UTF-8 char 'c', and advance reader. + * @param c Separator char. + * @param sep Whether to include/exclude 'c' from the result, and/or skip it. + */ + [[nodiscard]] std::string_view ReadUntilUtf8(char32_t c, SeparatorUsage sep); + /** + * Skip data until the first occurrence of UTF-8 char 'c'. + * @param c Separator char. + * @param sep Whether to also skip 'c'. + */ + void SkipUntilUtf8(char32_t c, SeparatorUsage sep); + +private: + template + [[nodiscard]] static std::pair ParseIntegerBase(std::string_view src, int base, bool log_errors) + { + if (base == 0) { + /* Try positive hex */ + if (src.starts_with("0x") || src.starts_with("0X")) { + auto [len, value] = ParseIntegerBase(src.substr(2), 16, log_errors); + if (len == 0) return {}; + return {len + 2, value}; + } + + /* Try negative hex */ + if (std::is_signed_v && (src.starts_with("-0x") || src.starts_with("-0X"))) { + using Unsigned = std::make_signed_t; + auto [len, uvalue] = ParseIntegerBase(src.substr(3), 16, log_errors); + if (len == 0) return {}; + T value = -uvalue; + if (value > 0) { + if (log_errors) LogError(fmt::format("Integer out of range: '{}'", src.substr(0, len + 3))); + return {}; + } + return {len + 3, value}; + } + + /* Try decimal */ + return ParseIntegerBase(src, 10, log_errors); + } + + T value{}; + assert(base == 10 || base == 16); // we only support these bases when skipping + auto result = std::from_chars(src.data(), src.data() + src.size(), value, base); + auto len = result.ptr - src.data(); + if (result.ec == std::errc::result_out_of_range) { + if (log_errors) LogError(fmt::format("Integer out of range: '{}'+'{}'", src.substr(0, len), src.substr(len, 4))); + return {}; + } + if (result.ec != std::errc{}) { + if (log_errors) LogError(fmt::format("Cannot parse integer: '{}'+'{}'", src.substr(0, len), src.substr(len, 4))); + return {}; + } + return {len, value}; + } + +public: + /** + * Peek and parse an integer in number 'base'. + * If 'base == 0', then a prefix '0x' decides between base 16 or base 10. + * @return Length of string match, and parsed value. + * @note The parser rejects leading whitespace and unary plus. + */ + template + [[nodiscard]] std::pair PeekIntegerBase(int base) const + { + return ParseIntegerBase(this->src.substr(this->position), base, false); + } + /** + * Try to read and parse an integer in number 'base', and then advance the reader. + * If 'base == 0', then a prefix '0x' decides between base 16 or base 10. + * @return Parsed value, if valid. + * @note The parser rejects leading whitespace and unary plus. + */ + template + [[nodiscard]] std::optional TryReadIntegerBase(int base) + { + auto [len, value] = this->PeekIntegerBase(base); + if (len == 0) return std::nullopt; + this->SkipIntegerBase(base); + return value; + } + /** + * Read and parse an integer in number 'base', and advance the reader. + * If 'base == 0', then a prefix '0x' decides between base 16 or base 10. + * @return Parsed value, or 'def' if invalid. + * @note The reader is advanced, even if no valid data was present. + * @note The parser rejects leading whitespace and unary plus. + */ + template + [[nodiscard]] T ReadIntegerBase(int base, T def = 0) + { + auto [len, value] = ParseIntegerBase(this->src.substr(this->position), base, true); + this->SkipIntegerBase(base); // always advance + return len > 0 ? value : def; + } + /** + * Skip an integer in number 'base'. + * If 'base == 0', then a prefix '0x' decides between base 16 or base 10. + * @note The reader is advanced, even if no valid data was present. + * @note The parser rejects leading whitespace and unary plus. + */ + void SkipIntegerBase(int base); +}; + +#endif /* STRING_CONSUMER_HPP */ diff --git a/src/settingsgen/CMakeLists.txt b/src/settingsgen/CMakeLists.txt index c69f755035..220ffb972b 100644 --- a/src/settingsgen/CMakeLists.txt +++ b/src/settingsgen/CMakeLists.txt @@ -11,6 +11,7 @@ if (NOT HOST_BINARY_DIR) ../ini_load.cpp ../string.cpp ../core/string_builder.cpp + ../core/string_consumer.cpp ../core/utf8.cpp ) add_definitions(-DSETTINGSGEN) diff --git a/src/strgen/CMakeLists.txt b/src/strgen/CMakeLists.txt index 425cc1dd1a..40eb11448f 100644 --- a/src/strgen/CMakeLists.txt +++ b/src/strgen/CMakeLists.txt @@ -13,6 +13,7 @@ if (NOT HOST_BINARY_DIR) ../error.cpp ../string.cpp ../core/string_builder.cpp + ../core/string_consumer.cpp ../core/utf8.cpp ) add_definitions(-DSTRGEN) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 7bfc261e4c..29f97ad205 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -8,6 +8,7 @@ add_test_files( mock_spritecache.cpp mock_spritecache.h string_builder.cpp + string_consumer.cpp string_func.cpp test_main.cpp test_network_crypto.cpp diff --git a/src/tests/string_consumer.cpp b/src/tests/string_consumer.cpp new file mode 100644 index 0000000000..1cf470a0cc --- /dev/null +++ b/src/tests/string_consumer.cpp @@ -0,0 +1,487 @@ +/* + * This file is part of OpenTTD. + * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. + * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see . + */ + +/** @file string_consumer.cpp Test functionality from core/string_consumer. */ + +#include "../stdafx.h" + +#include + +#include "../3rdparty/catch2/catch.hpp" + +#include "../core/string_consumer.hpp" + +#include "../safeguards.h" + +using namespace std::literals; + +TEST_CASE("StringConsumer - basic") +{ + StringConsumer consumer("ab"sv); + CHECK(!consumer.AnyBytesRead()); + CHECK(consumer.GetBytesRead() == 0); + CHECK(consumer.AnyBytesLeft()); + CHECK(consumer.GetBytesLeft() == 2); + CHECK(consumer.GetOrigData() == "ab"); + CHECK(consumer.GetReadData() == ""); + CHECK(consumer.GetLeftData() == "ab"); + consumer.Skip(1); + CHECK(consumer.AnyBytesRead()); + CHECK(consumer.GetBytesRead() == 1); + CHECK(consumer.AnyBytesLeft()); + CHECK(consumer.GetBytesLeft() == 1); + CHECK(consumer.GetOrigData() == "ab"); + CHECK(consumer.GetReadData() == "a"); + CHECK(consumer.GetLeftData() == "b"); + consumer.SkipAll(); + CHECK(consumer.AnyBytesRead()); + CHECK(consumer.GetBytesRead() == 2); + CHECK(!consumer.AnyBytesLeft()); + CHECK(consumer.GetBytesLeft() == 0); + CHECK(consumer.GetOrigData() == "ab"); + CHECK(consumer.GetReadData() == "ab"); + CHECK(consumer.GetLeftData() == ""); + consumer.Skip(1); + CHECK(consumer.AnyBytesRead()); + CHECK(consumer.GetBytesRead() == 2); + CHECK(!consumer.AnyBytesLeft()); + CHECK(consumer.GetBytesLeft() == 0); + CHECK(consumer.GetOrigData() == "ab"); + CHECK(consumer.GetReadData() == "ab"); + CHECK(consumer.GetLeftData() == ""); +} + +TEST_CASE("StringConsumer - binary8") +{ + StringConsumer consumer("\xFF\xFE\xFD\0"sv); + CHECK(consumer.PeekUint8() == 0xFF); + CHECK(consumer.PeekSint8() == -1); + CHECK(consumer.PeekChar() == static_cast(-1)); + consumer.SkipUint8(); + CHECK(consumer.PeekUint8() == 0xFE); + CHECK(consumer.PeekSint8() == -2); + CHECK(consumer.PeekChar() == static_cast(-2)); + CHECK(consumer.ReadUint8() == 0xFE); + CHECK(consumer.PeekUint8() == 0xFD); + CHECK(consumer.PeekSint8() == -3); + CHECK(consumer.PeekChar() == static_cast(-3)); + CHECK(consumer.ReadSint8() == -3); + CHECK(consumer.PeekUint8() == 0); + CHECK(consumer.PeekSint8() == 0); + CHECK(consumer.PeekChar() == 0); + CHECK(consumer.ReadChar() == 0); + CHECK(consumer.PeekUint8() == std::nullopt); + CHECK(consumer.PeekSint8() == std::nullopt); + CHECK(consumer.PeekChar() == std::nullopt); + CHECK(consumer.ReadUint8(42) == 42); + consumer.SkipSint8(); + CHECK(consumer.ReadSint8(42) == 42); + CHECK(consumer.ReadChar(42) == 42); +} + +TEST_CASE("StringConsumer - binary16") +{ + StringConsumer consumer("\xFF\xFF\xFE\xFF\xFD\xFF"sv); + CHECK(consumer.PeekUint16LE() == 0xFFFF); + CHECK(consumer.PeekSint16LE() == -1); + consumer.SkipUint16LE(); + CHECK(consumer.PeekUint16LE() == 0xFFFE); + CHECK(consumer.PeekSint16LE() == -2); + CHECK(consumer.ReadUint16LE() == 0xFFFE); + CHECK(consumer.PeekUint16LE() == 0xFFFD); + CHECK(consumer.PeekSint16LE() == -3); + CHECK(consumer.ReadSint16LE() == -3); + CHECK(consumer.PeekUint16LE() == std::nullopt); + CHECK(consumer.PeekSint16LE() == std::nullopt); + CHECK(consumer.ReadUint16LE(42) == 42); + consumer.SkipSint16LE(); + CHECK(consumer.ReadSint16LE(42) == 42); +} + +TEST_CASE("StringConsumer - binary32") +{ + StringConsumer consumer("\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFD\xFF\xFF\xFF"sv); + CHECK(consumer.PeekUint32LE() == 0xFFFFFFFF); + CHECK(consumer.PeekSint32LE() == -1); + consumer.SkipUint32LE(); + CHECK(consumer.PeekUint32LE() == 0xFFFFFFFE); + CHECK(consumer.PeekSint32LE() == -2); + CHECK(consumer.ReadUint32LE() == 0xFFFFFFFE); + CHECK(consumer.PeekUint32LE() == 0xFFFFFFFD); + CHECK(consumer.PeekSint32LE() == -3); + CHECK(consumer.ReadSint32LE() == -3); + CHECK(consumer.PeekUint32LE() == std::nullopt); + CHECK(consumer.PeekSint32LE() == std::nullopt); + CHECK(consumer.ReadUint32LE(42) == 42); + consumer.SkipSint32LE(); + CHECK(consumer.ReadSint32LE(42) == 42); +} + +TEST_CASE("StringConsumer - binary64") +{ + StringConsumer consumer("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF"sv); + CHECK(consumer.PeekUint64LE() == 0xFFFFFFFF'FFFFFFFF); + CHECK(consumer.PeekSint64LE() == -1); + consumer.SkipUint64LE(); + CHECK(consumer.PeekUint64LE() == 0xFFFFFFFF'FFFFFFFE); + CHECK(consumer.PeekSint64LE() == -2); + CHECK(consumer.ReadUint64LE() == 0xFFFFFFFF'FFFFFFFE); + CHECK(consumer.PeekUint64LE() == 0xFFFFFFFF'FFFFFFFD); + CHECK(consumer.PeekSint64LE() == -3); + CHECK(consumer.ReadSint64LE() == -3); + CHECK(consumer.PeekUint64LE() == std::nullopt); + CHECK(consumer.PeekSint64LE() == std::nullopt); + CHECK(consumer.ReadUint64LE(42) == 42); + consumer.SkipSint64LE(); + CHECK(consumer.ReadSint64LE(42) == 42); +} + +TEST_CASE("StringConsumer - utf8") +{ + StringConsumer consumer("a\u1234\xFF\xFE""b"sv); + CHECK(consumer.PeekUtf8() == std::pair(1, 'a')); + consumer.SkipUtf8(); + CHECK(consumer.PeekUtf8() == std::pair(3, 0x1234)); + CHECK(consumer.ReadUtf8() == 0x1234); + CHECK(consumer.PeekUint8() == 0xFF); + CHECK(consumer.PeekUtf8() == std::pair(0, 0)); + CHECK(consumer.ReadUtf8() == '?'); + CHECK(consumer.PeekUint8() == 0xFE); + CHECK(consumer.PeekUtf8() == std::pair(0, 0)); + consumer.SkipUtf8(); + CHECK(consumer.PeekUtf8() == std::pair(1, 'b')); + CHECK(consumer.ReadUtf8() == 'b'); + CHECK(!consumer.AnyBytesLeft()); + CHECK(consumer.PeekUtf8() == std::pair(0, 0)); + CHECK(consumer.ReadUtf8() == '?'); +} + +TEST_CASE("StringConsumer - conditions") +{ + StringConsumer consumer("ABCDabcde\u0234@@@gh\0\0\0ij\0\0\0kl"sv); + CHECK(consumer.PeekIf("AB")); + CHECK(consumer.PeekCharIf('A')); + CHECK(consumer.PeekUtf8If('A')); + CHECK(!consumer.PeekIf("CD")); + CHECK(!consumer.ReadIf("CD")); + consumer.SkipIf("CD"); + CHECK(consumer.ReadIf("AB")); + CHECK(consumer.PeekIf("CD")); + consumer.SkipIf("CD"); + CHECK(consumer.Peek(2) == "ab"); + CHECK(consumer.Read(2) == "ab"); + CHECK(consumer.Peek(2) == "cd"); + CHECK(consumer.Find("e\u0234") == 2); + CHECK(consumer.Find("ab") == StringConsumer::npos); + CHECK(consumer.FindChar('e') == 2); + CHECK(consumer.FindChar('a') == StringConsumer::npos); + CHECK(consumer.FindUtf8(0x234) == 3); + CHECK(consumer.FindUtf8(0x1234) == StringConsumer::npos); + consumer.Skip(2); + CHECK(consumer.Peek(3) == "e\u0234"); + CHECK(consumer.PeekUntil("e", StringConsumer::READ_ALL_SEPARATORS) == "e"); + CHECK(consumer.PeekUntil("e", StringConsumer::READ_ONE_SEPARATOR) == "e"); + CHECK(consumer.PeekUntil("e", StringConsumer::KEEP_SEPARATOR) == ""); + CHECK(consumer.PeekUntil("e", StringConsumer::SKIP_ONE_SEPARATOR) == ""); + CHECK(consumer.PeekUntil("e", StringConsumer::SKIP_ALL_SEPARATORS) == ""); + CHECK(consumer.PeekUntil("@", StringConsumer::READ_ALL_SEPARATORS) == "e\u0234@@@"); + CHECK(consumer.PeekUntil("@", StringConsumer::READ_ONE_SEPARATOR) == "e\u0234@"); + CHECK(consumer.PeekUntil("@", StringConsumer::KEEP_SEPARATOR) == "e\u0234"); + CHECK(consumer.PeekUntil("@", StringConsumer::SKIP_ONE_SEPARATOR) == "e\u0234"); + CHECK(consumer.PeekUntil("@", StringConsumer::SKIP_ALL_SEPARATORS) == "e\u0234"); + CHECK(consumer.ReadUntil("@", StringConsumer::KEEP_SEPARATOR) == "e\u0234"); + CHECK(consumer.ReadUntil("@", StringConsumer::READ_ONE_SEPARATOR) == "@"); + CHECK(consumer.ReadUntil("@", StringConsumer::READ_ALL_SEPARATORS) == "@@"); + CHECK(consumer.PeekUntilChar('\0', StringConsumer::READ_ALL_SEPARATORS) == "gh\0\0\0"sv); + CHECK(consumer.PeekUntilChar('\0', StringConsumer::READ_ONE_SEPARATOR) == "gh\0"sv); + CHECK(consumer.PeekUntilChar('\0', StringConsumer::KEEP_SEPARATOR) == "gh"); + CHECK(consumer.PeekUntilChar('\0', StringConsumer::SKIP_ONE_SEPARATOR) == "gh"); + CHECK(consumer.PeekUntilChar('\0', StringConsumer::SKIP_ALL_SEPARATORS) == "gh"); + CHECK(consumer.ReadUntilChar('\0', StringConsumer::READ_ONE_SEPARATOR) == "gh\0"sv); + CHECK(consumer.PeekUntilChar('\0', StringConsumer::READ_ALL_SEPARATORS) == "\0\0"sv); + CHECK(consumer.ReadUntilChar('\0', StringConsumer::SKIP_ONE_SEPARATOR) == ""); + CHECK(consumer.PeekUntilChar('\0', StringConsumer::READ_ALL_SEPARATORS) == "\0"sv); + consumer.SkipUntilUtf8(0, StringConsumer::READ_ALL_SEPARATORS); + CHECK(consumer.PeekUntilUtf8(0, StringConsumer::KEEP_SEPARATOR) == "ij"); + consumer.SkipUntilUtf8(0, StringConsumer::SKIP_ALL_SEPARATORS); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::READ_ALL_SEPARATORS) == "kl"); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::READ_ONE_SEPARATOR) == "kl"); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::KEEP_SEPARATOR) == "kl"); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::SKIP_ONE_SEPARATOR) == "kl"); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::SKIP_ALL_SEPARATORS) == "kl"); + CHECK(consumer.ReadUntilUtf8(0x234, StringConsumer::READ_ALL_SEPARATORS) == "kl"); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::READ_ALL_SEPARATORS) == ""); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::READ_ONE_SEPARATOR) == ""); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::KEEP_SEPARATOR) == ""); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::SKIP_ONE_SEPARATOR) == ""); + CHECK(consumer.PeekUntilUtf8(0x234, StringConsumer::SKIP_ALL_SEPARATORS) == ""); + CHECK(consumer.ReadUntilUtf8(0x234, StringConsumer::READ_ALL_SEPARATORS) == ""); + CHECK(consumer.ReadUntilUtf8(0x234, StringConsumer::READ_ONE_SEPARATOR) == ""); + CHECK(consumer.ReadUntilUtf8(0x234, StringConsumer::KEEP_SEPARATOR) == ""); + CHECK(consumer.ReadUntilUtf8(0x234, StringConsumer::SKIP_ONE_SEPARATOR) == ""); + CHECK(consumer.ReadUntilUtf8(0x234, StringConsumer::SKIP_ALL_SEPARATORS) == ""); + CHECK(consumer.Peek(2) == ""); + CHECK(consumer.Read(2) == ""); +} + +TEST_CASE("StringConsumer - ascii") +{ + StringConsumer consumer("abcdefgh \r\n\tAB \r\n\t"sv); + CHECK(consumer.FindCharIn("dc") == 2); + CHECK(consumer.FindCharIn("xy") == StringConsumer::npos); + CHECK(consumer.FindCharNotIn("ba") == 2); + CHECK(consumer.PeekUntilCharNotIn("ba") == "ab"); + CHECK(consumer.PeekUntilCharNotIn("dc") == ""); + CHECK(consumer.PeekUntilCharIn("ba") == ""); + CHECK(consumer.PeekUntilCharIn("dc") == "ab"); + CHECK(consumer.ReadUntilCharNotIn("dc") == ""); + CHECK(consumer.ReadUntilCharNotIn("ba") == "ab"); + CHECK(consumer.ReadUntilCharIn("dc") == ""); + CHECK(consumer.ReadUntilCharIn("fe") == "cd"); + CHECK(consumer.PeekIf("ef")); + consumer.SkipUntilCharNotIn("ji"); + CHECK(consumer.PeekIf("ef")); + consumer.SkipUntilCharNotIn("fe"); + CHECK(consumer.PeekIf("gh")); + consumer.SkipUntilCharIn("hg"); + CHECK(consumer.PeekIf("gh")); + consumer.SkipUntilCharIn(StringConsumer::WHITESPACE_OR_NEWLINE); + CHECK(consumer.PeekCharIfIn(StringConsumer::WHITESPACE_OR_NEWLINE) == ' '); + CHECK(consumer.ReadCharIfIn(StringConsumer::WHITESPACE_OR_NEWLINE) == ' '); + consumer.SkipCharIfIn(StringConsumer::WHITESPACE_OR_NEWLINE); + CHECK(consumer.PeekUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE) == "\r"); + CHECK(consumer.ReadUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE) == "\r"); + consumer.SkipUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE); + CHECK(consumer.PeekCharIfIn(StringConsumer::WHITESPACE_OR_NEWLINE) == '\n'); + CHECK(consumer.ReadCharIfIn(StringConsumer::WHITESPACE_OR_NEWLINE) == '\n'); + CHECK(consumer.PeekUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE) == "\t"); + CHECK(consumer.ReadUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE) == "\t"); + consumer.SkipUntilCharNotIn(StringConsumer::WHITESPACE_NO_NEWLINE); + CHECK(consumer.PeekUntilCharIn(StringConsumer::WHITESPACE_OR_NEWLINE) == "AB"); + CHECK(consumer.ReadUntilCharIn(StringConsumer::WHITESPACE_OR_NEWLINE) == "AB"); + CHECK(consumer.PeekUntilCharNotIn(StringConsumer::WHITESPACE_OR_NEWLINE) == " \r\n\t"); + consumer.SkipUntilCharNotIn(StringConsumer::WHITESPACE_OR_NEWLINE); + CHECK(!consumer.AnyBytesLeft()); +} + +TEST_CASE("StringConsumer - parse int") +{ + StringConsumer consumer("1 a -a -2 -2 ffffFFFF ffffFFFF -1aaaAAAA -1aaaAAAA +3 1234567890123 1234567890123 1234567890123 ffffFFFFffffFFFE ffffFFFFffffFFFE ffffFFFFffffFFFE ffffFFFFffffFFFE -0x1aaaAAAAaaaaAAAA -1234567890123 "sv); + CHECK(consumer.PeekIntegerBase(0) == std::pair(1, 1)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(1, 1)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(1, 1)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(1, 1)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(1, 1)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(1, 1)); + CHECK(consumer.TryReadIntegerBase(10) == 1); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(1, 0xa)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(1, 0xa)); + CHECK(consumer.ReadIntegerBase(16) == 0xa); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(2, -0xa)); + CHECK(consumer.ReadIntegerBase(16) == -0xa); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(2, -2)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(2, -2)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(2, -2)); + CHECK(consumer.TryReadIntegerBase(10) == std::nullopt); + CHECK(consumer.ReadIntegerBase(10) == 0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(2, -2)); + CHECK(consumer.TryReadIntegerBase(10) == std::nullopt); + CHECK(consumer.ReadIntegerBase(10) == -2); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(16) == std::pair(8, 0xffffffff)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.TryReadIntegerBase(16) == std::nullopt); + CHECK(consumer.ReadIntegerBase(16) == 0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.TryReadIntegerBase(16) == 0xffffffff); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(9, -0x1aaaaaaa)); + CHECK(consumer.TryReadIntegerBase(16) == std::nullopt); + CHECK(consumer.ReadIntegerBase(16) == 0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(9, -0x1aaaaaaa)); + CHECK(consumer.TryReadIntegerBase(16) == std::nullopt); + CHECK(consumer.ReadIntegerBase(16) == -0x1aaaaaaa); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + consumer.SkipIntegerBase(10); + CHECK(consumer.ReadUtf8() == '+'); + CHECK(consumer.PeekIntegerBase(10) == std::pair(1, 3)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(1, 3)); + consumer.SkipIntegerBase(10); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(13, 1234567890123)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(13, 1234567890123)); + CHECK(consumer.TryReadIntegerBase(10) == std::nullopt); + CHECK(consumer.ReadIntegerBase(10) == 0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(13, 1234567890123)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(13, 1234567890123)); + CHECK(consumer.TryReadIntegerBase(10) == std::nullopt); + CHECK(consumer.ReadIntegerBase(10) == 0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(13, 1234567890123)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(13, 1234567890123)); + CHECK(consumer.ReadIntegerBase(10) == 1234567890123); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(16, 0xffffffff'fffffffe)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.ReadIntegerBase(16) == 0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(16, 0xffffffff'fffffffe)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.ReadIntegerBase(16) == 0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(16, 0xffffffff'fffffffe)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.ReadIntegerBase(16) == 0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(16, 0xffffffff'fffffffe)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.ReadIntegerBase(16) == 0xffffffff'fffffffe); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(2, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(2, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(19, -0x1aaaaaaa'aaaaaaaa)); + CHECK(consumer.ReadIntegerBase(0) == -0x1aaaaaaa'aaaaaaaa); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(14, -1234567890123)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(14, -1234567890123)); + CHECK(consumer.ReadIntegerBase(0) == -1234567890123); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + consumer.SkipIntegerBase(10); + consumer.SkipIntegerBase(10); + consumer.SkipIntegerBase(0); + consumer.SkipIntegerBase(0); + CHECK(consumer.ReadIntegerBase(10, 42) == 42); + CHECK(consumer.ReadIntegerBase(10, 42) == 42); + CHECK(consumer.ReadIntegerBase(10, 42) == 42); + CHECK(consumer.ReadIntegerBase(10, 42) == 42); + CHECK(consumer.ReadIntegerBase(0, 42) == 42); + CHECK(consumer.ReadIntegerBase(0, 42) == 42); + CHECK(consumer.ReadIntegerBase(0, 42) == 42); + CHECK(consumer.ReadIntegerBase(0, 42) == 42); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + consumer.SkipIntegerBase(10); + consumer.SkipIntegerBase(10); + CHECK(consumer.ReadIntegerBase(10, 42) == 42); + CHECK(consumer.ReadIntegerBase(10, 42) == 42); + CHECK(consumer.ReadIntegerBase(10, 42) == 42); + CHECK(consumer.ReadIntegerBase(10, 42) == 42); + CHECK(consumer.ReadIntegerBase(0, 42) == 42); + CHECK(consumer.ReadIntegerBase(0, 42) == 42); + CHECK(consumer.ReadIntegerBase(0, 42) == 42); + CHECK(consumer.ReadIntegerBase(0, 42) == 42); +} + +TEST_CASE("StringConsumer - invalid int") +{ + StringConsumer consumer("x 0x - -0x 0y"sv); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + consumer.SkipIntegerBase(0); + consumer.SkipIntegerBase(10); + consumer.SkipIntegerBase(16); + CHECK(consumer.ReadUtf8() == 'x'); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(1, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(1, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(1, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(1, 0)); + consumer.SkipIntegerBase(0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + consumer.SkipIntegerBase(0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(2, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(0, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(2, 0)); + consumer.SkipIntegerBase(0); + CHECK(consumer.ReadUtf8() == ' '); + CHECK(consumer.PeekIntegerBase(0) == std::pair(1, 0)); + CHECK(consumer.PeekIntegerBase(0) == std::pair(1, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(1, 0)); + CHECK(consumer.PeekIntegerBase(10) == std::pair(1, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(1, 0)); + CHECK(consumer.PeekIntegerBase(16) == std::pair(1, 0)); + consumer.SkipIntegerBase(0); + CHECK(consumer.ReadUtf8() == 'y'); +}