/***************************************************************************** * Copyright (c) 2014-2020 OpenRCT2 developers * * For a complete list of all authors, please refer to contributors.md * Interested in contributing? Visit https://github.com/OpenRCT2/OpenRCT2 * * OpenRCT2 is licensed under the GNU General Public License version 3. *****************************************************************************/ #if defined(__MINGW32__) && !defined(WINVER) && !defined(_WIN32_WINNT) // 0x0600 == vista # define WINVER 0x0600 # define _WIN32_WINNT 0x0600 #endif // __MINGW32__ #include #include #include #include #ifndef _WIN32 # include # include # include #endif #ifdef _WIN32 # include #endif #include "../common.h" #include "../localisation/ConversionTables.h" #include "../localisation/FormatCodes.h" #include "../localisation/Language.h" #include "../util/Util.h" #include "Memory.hpp" #include "String.hpp" #include "StringBuilder.h" namespace String { std::string ToStd(const utf8* str) { if (str == nullptr) return std::string(); else return std::string(str); } std::string StdFormat_VA(const utf8* format, va_list args) { auto buffer = Format_VA(format, args); auto returnValue = ToStd(buffer); Memory::Free(buffer); return returnValue; } std::string StdFormat(const utf8* format, ...) { va_list args; va_start(args, format); const utf8* buffer = Format_VA(format, args); va_end(args); std::string returnValue = ToStd(buffer); Memory::Free(buffer); return returnValue; } std::string ToUtf8(std::wstring_view src) { #ifdef _WIN32 int srcLen = static_cast(src.size()); int sizeReq = WideCharToMultiByte(CODE_PAGE::CP_UTF8, 0, src.data(), srcLen, nullptr, 0, nullptr, nullptr); auto result = std::string(sizeReq, 0); WideCharToMultiByte(CODE_PAGE::CP_UTF8, 0, src.data(), srcLen, result.data(), sizeReq, nullptr, nullptr); return result; #else // Which constructor to use depends on the size of wchar_t... // UTF-32 is the default on most POSIX systems; Windows uses UTF-16. // Unfortunately, we'll have to help the compiler here. # if U_SIZEOF_WCHAR_T == 4 icu::UnicodeString str = icu::UnicodeString::fromUTF32(reinterpret_cast(src.data()), src.length()); # elif U_SIZEOF_WCHAR_T == 2 std::wstring wstr = std::wstring(src); icu::UnicodeString str = icu::UnicodeString(static_cast(wstr.c_str())); # else # error Unsupported U_SIZEOF_WCHAR_T size # endif std::string result; str.toUTF8String(result); return result; #endif } std::wstring ToWideChar(std::string_view src) { #ifdef _WIN32 int srcLen = static_cast(src.size()); int sizeReq = MultiByteToWideChar(CODE_PAGE::CP_UTF8, 0, src.data(), srcLen, nullptr, 0); auto result = std::wstring(sizeReq, 0); MultiByteToWideChar(CODE_PAGE::CP_UTF8, 0, src.data(), srcLen, result.data(), sizeReq); return result; #else icu::UnicodeString str = icu::UnicodeString::fromUTF8(std::string(src)); // Which constructor to use depends on the size of wchar_t... // UTF-32 is the default on most POSIX systems; Windows uses UTF-16. // Unfortunately, we'll have to help the compiler here. # if U_SIZEOF_WCHAR_T == 4 size_t length = static_cast(str.length()); std::wstring result(length, '\0'); UErrorCode status = U_ZERO_ERROR; str.toUTF32(reinterpret_cast(&result[0]), str.length(), status); # elif U_SIZEOF_WCHAR_T == 2 const char16_t* buffer = str.getBuffer(); std::wstring result = static_cast(buffer); # else # error Unsupported U_SIZEOF_WCHAR_T size # endif return result; #endif } std::string_view ToStringView(const char* ch, size_t maxLen) { size_t len{}; for (size_t i = 0; i < maxLen; i++) { if (ch[i] == '\0') { break; } else { len++; } } return std::string_view(ch, len); } bool IsNullOrEmpty(const utf8* str) { return str == nullptr || str[0] == '\0'; } int32_t Compare(const std::string& a, const std::string& b, bool ignoreCase) { return Compare(a.c_str(), b.c_str(), ignoreCase); } int32_t Compare(const utf8* a, const utf8* b, bool ignoreCase) { if (a == b) return 0; if (a == nullptr) a = ""; if (b == nullptr) b = ""; if (ignoreCase) { return _stricmp(a, b); } else { return strcmp(a, b); } } bool Equals(std::string_view a, std::string_view b, bool ignoreCase) { if (ignoreCase) { if (a.size() == b.size()) { for (size_t i = 0; i < a.size(); i++) { if (tolower(a[i]) != tolower(b[i])) { return false; } } return true; } else { return false; } } else { return a == b; } } bool Equals(const std::string& a, const std::string& b, bool ignoreCase) { if (a.size() != b.size()) return false; if (ignoreCase) { for (size_t i = 0; i < a.size(); i++) { auto ai = a[i]; auto bi = b[i]; // Only do case insensitive comparison on ASCII characters if ((ai & 0x80) != 0 || (bi & 0x80) != 0) { if (a[i] != b[i]) { return false; } } else if (tolower(ai) != tolower(bi)) { return false; } } } else { for (size_t i = 0; i < a.size(); i++) { if (a[i] != b[i]) { return false; } } } return true; } bool Equals(const utf8* a, const utf8* b, bool ignoreCase) { if (a == b) return true; if (a == nullptr || b == nullptr) return false; if (ignoreCase) { return _stricmp(a, b) == 0; } else { return strcmp(a, b) == 0; } } bool StartsWith(std::string_view str, std::string_view match, bool ignoreCase) { if (str.size() >= match.size()) { auto view = str.substr(0, match.size()); return Equals(view, match, ignoreCase); } return false; } bool EndsWith(std::string_view str, std::string_view match, bool ignoreCase) { if (str.size() >= match.size()) { auto view = str.substr(str.size() - match.size()); return Equals(view, match, ignoreCase); } return false; } size_t IndexOf(const utf8* str, utf8 match, size_t startIndex) { const utf8* ch = str + startIndex; for (; *ch != '\0'; ch++) { if (*ch == match) { return static_cast(ch - str); } } return SIZE_MAX; } ptrdiff_t LastIndexOf(const utf8* str, utf8 match) { const utf8* lastOccurance = nullptr; const utf8* ch = str; for (; *ch != '\0'; ch++) { if (*ch == match) { lastOccurance = ch; } } if (lastOccurance == nullptr) { return -1; } else { return lastOccurance - str; } } size_t LengthOf(const utf8* str) { return utf8_length(str); } size_t SizeOf(const utf8* str) { return strlen(str); } utf8* Set(utf8* buffer, size_t bufferSize, const utf8* src) { return safe_strcpy(buffer, src, bufferSize); } utf8* Set(utf8* buffer, size_t bufferSize, const utf8* src, size_t srcSize) { utf8* dst = buffer; size_t minSize = std::min(bufferSize - 1, srcSize); for (size_t i = 0; i < minSize; i++) { *dst++ = *src; if (*src == '\0') break; src++; } *dst = '\0'; return buffer; } utf8* Append(utf8* buffer, size_t bufferSize, const utf8* src) { return safe_strcat(buffer, src, bufferSize); } utf8* Format(utf8* buffer, size_t bufferSize, const utf8* format, ...) { va_list args; va_start(args, format); vsnprintf(buffer, bufferSize, format, args); va_end(args); // Terminate buffer in case formatted string overflowed buffer[bufferSize - 1] = '\0'; return buffer; } utf8* Format(const utf8* format, ...) { va_list args; va_start(args, format); utf8* result = Format_VA(format, args); va_end(args); return result; } utf8* Format_VA(const utf8* format, va_list args) { va_list args1, args2; va_copy(args1, args); va_copy(args2, args); // Try to format to a initial buffer, enlarge if not big enough size_t bufferSize = 4096; utf8* buffer = Memory::Allocate(bufferSize); // Start with initial buffer int32_t len = vsnprintf(buffer, bufferSize, format, args); if (len < 0) { Memory::Free(buffer); va_end(args1); va_end(args2); // An error occurred... return nullptr; } size_t requiredSize = static_cast(len) + 1; if (requiredSize > bufferSize) { // Try again with bigger buffer buffer = Memory::Reallocate(buffer, bufferSize); len = vsnprintf(buffer, bufferSize, format, args); if (len < 0) { Memory::Free(buffer); va_end(args1); va_end(args2); // An error occurred... return nullptr; } } else { // Reduce buffer size to only what was required bufferSize = requiredSize; buffer = Memory::Reallocate(buffer, bufferSize); } // Ensure buffer is terminated buffer[bufferSize - 1] = '\0'; va_end(args1); va_end(args2); return buffer; } utf8* AppendFormat(utf8* buffer, size_t bufferSize, const utf8* format, ...) { utf8* dst = buffer; size_t i; for (i = 0; i < bufferSize; i++) { if (*dst == '\0') break; dst++; } if (i < bufferSize - 1) { va_list args; va_start(args, format); vsnprintf(dst, bufferSize - i - 1, format, args); va_end(args); // Terminate buffer in case formatted string overflowed buffer[bufferSize - 1] = '\0'; } return buffer; } utf8* Duplicate(const std::string& src) { return String::Duplicate(src.c_str()); } utf8* Duplicate(const utf8* src) { utf8* result = nullptr; if (src != nullptr) { size_t srcSize = SizeOf(src) + 1; result = Memory::Allocate(srcSize); std::memcpy(result, src, srcSize); } return result; } utf8* DiscardUse(utf8** ptr, utf8* replacement) { Memory::Free(*ptr); *ptr = replacement; return replacement; } utf8* DiscardDuplicate(utf8** ptr, const utf8* replacement) { return DiscardUse(ptr, String::Duplicate(replacement)); } std::vector Split(std::string_view s, std::string_view delimiter) { if (delimiter.empty()) { throw std::invalid_argument(nameof(delimiter) " can not be empty."); } std::vector results; if (!s.empty()) { size_t index = 0; size_t nextIndex; do { nextIndex = s.find(delimiter, index); if (nextIndex == std::string::npos) { results.emplace_back(s.substr(index)); } else { results.emplace_back(s.substr(index, nextIndex - index)); } index = nextIndex + delimiter.size(); } while (nextIndex != SIZE_MAX); } return results; } utf8* SkipBOM(utf8* buffer) { return const_cast(SkipBOM(static_cast(buffer))); } const utf8* SkipBOM(const utf8* buffer) { if (static_cast(buffer[0]) == 0xEF && static_cast(buffer[1]) == 0xBB && static_cast(buffer[2]) == 0xBF) { return buffer + 3; } return buffer; } size_t GetCodepointLength(codepoint_t codepoint) { return utf8_get_codepoint_length(codepoint); } codepoint_t GetNextCodepoint(utf8* ptr, utf8** nextPtr) { return GetNextCodepoint(static_cast(ptr), const_cast(nextPtr)); } codepoint_t GetNextCodepoint(const utf8* ptr, const utf8** nextPtr) { return utf8_get_next(ptr, nextPtr); } utf8* WriteCodepoint(utf8* dst, codepoint_t codepoint) { return utf8_write_codepoint(dst, codepoint); } void AppendCodepoint(std::string& str, codepoint_t codepoint) { char buffer[8]{}; utf8_write_codepoint(buffer, codepoint); str.append(buffer); } bool IsWhiteSpace(codepoint_t codepoint) { // 0x3000 is the 'ideographic space', a 'fullwidth' character used in CJK languages. return iswspace(static_cast(codepoint)) || codepoint == 0x3000; } utf8* Trim(utf8* str) { utf8* firstNonWhitespace = nullptr; codepoint_t codepoint; utf8* ch = str; utf8* nextCh; while ((codepoint = GetNextCodepoint(ch, &nextCh)) != '\0') { if (codepoint <= WCHAR_MAX && !IsWhiteSpace(codepoint)) { if (firstNonWhitespace == nullptr) { firstNonWhitespace = ch; } } ch = nextCh; } if (firstNonWhitespace != nullptr && firstNonWhitespace != str) { // Take multibyte characters into account: use the last byte of the // current character. size_t newStringSize = (nextCh - 1) - firstNonWhitespace; #ifdef DEBUG size_t currentStringSize = String::SizeOf(str); Guard::Assert(newStringSize < currentStringSize, GUARD_LINE); #endif std::memmove(str, firstNonWhitespace, newStringSize); str[newStringSize] = '\0'; } else { *ch = '\0'; } return str; } const utf8* TrimStart(const utf8* str) { codepoint_t codepoint; const utf8* ch = str; const utf8* nextCh; while ((codepoint = GetNextCodepoint(ch, &nextCh)) != '\0') { if (codepoint <= WCHAR_MAX && !IsWhiteSpace(codepoint)) { return ch; } ch = nextCh; } // String is all whitespace return ch; } utf8* TrimStart(utf8* buffer, size_t bufferSize, const utf8* src) { return String::Set(buffer, bufferSize, TrimStart(src)); } std::string TrimStart(const std::string& s) { const utf8* trimmed = TrimStart(s.c_str()); return std::string(trimmed); } std::string Trim(const std::string& s) { codepoint_t codepoint; const utf8* ch = s.c_str(); const utf8* nextCh; const utf8* startSubstr = nullptr; const utf8* endSubstr = nullptr; while ((codepoint = GetNextCodepoint(ch, &nextCh)) != '\0') { bool isWhiteSpace = codepoint <= WCHAR_MAX && IsWhiteSpace(codepoint); if (!isWhiteSpace) { if (startSubstr == nullptr) { startSubstr = ch; } // Take multibyte characters into account: move pointer towards // the last byte of the current character. endSubstr = nextCh - 1; } ch = nextCh; } if (startSubstr == nullptr) { // String is all whitespace return std::string(); } size_t stringLength = endSubstr - startSubstr + 1; return std::string(startSubstr, stringLength); } #ifndef _WIN32 static const char* GetIcuCodePage(int32_t codePage) { switch (codePage) { case CODE_PAGE::CP_932: return "windows-932"; case CODE_PAGE::CP_936: return "GB2312"; case CODE_PAGE::CP_949: return "windows-949"; case CODE_PAGE::CP_950: return "big5"; case CODE_PAGE::CP_1252: return "windows-1252"; case CODE_PAGE::CP_UTF8: return "utf-8"; default: throw std::runtime_error("Unsupported code page: " + std::to_string(codePage)); } } static std::string CodePageFromUnicode(icu::UnicodeString src, int32_t dstCodePage) { UConverter* conv; UErrorCode status = U_ZERO_ERROR; const char* codepage = GetIcuCodePage(dstCodePage); conv = ucnv_open(codepage, &status); if (U_FAILURE(status)) { log_error("ICU error: %s", u_errorName(status)); return nullptr; } // Allocate buffer to convert to. int8_t char_size = ucnv_getMaxCharSize(conv); std::string buffer(char_size * src.length(), '\0'); char* buffer_limit = &buffer[0] + (char_size * src.length()); // Ready the source string as well... const char16_t* source = src.getTerminatedBuffer(); const char16_t* source_limit = source + src.length(); // Convert the lot. char* buffer_target = &buffer[0]; ucnv_fromUnicode( conv, &buffer_target, buffer_limit, static_cast(&source), source_limit, nullptr, true, &status); if (U_FAILURE(status)) { log_error("ICU error: %s", u_errorName(status)); return nullptr; } ucnv_close(conv); return buffer; } #endif std::string Convert(std::string_view src, int32_t srcCodePage, int32_t dstCodePage) { #ifdef _WIN32 // Convert from source code page to UTF-16 std::wstring u16; { int srcLen = static_cast(src.size()); int sizeReq = MultiByteToWideChar(srcCodePage, 0, src.data(), srcLen, nullptr, 0); u16 = std::wstring(sizeReq, 0); MultiByteToWideChar(srcCodePage, 0, src.data(), srcLen, u16.data(), sizeReq); } // Convert from UTF-16 to destination code page std::string dst; { int srcLen = static_cast(u16.size()); int sizeReq = WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, nullptr, 0, nullptr, nullptr); dst = std::string(sizeReq, 0); WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, dst.data(), sizeReq, nullptr, nullptr); } return dst; #else const char* codepage = GetIcuCodePage(srcCodePage); icu::UnicodeString convertString(src.data(), codepage); std::string result; if (dstCodePage == CODE_PAGE::CP_UTF8) { convertString.toUTF8String(result); } else { result = CodePageFromUnicode(convertString, dstCodePage); } return result; #endif } std::string ToUpper(std::string_view src) { #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 auto srcW = ToWideChar(src); // Measure how long the destination needs to be auto requiredSize = LCMapStringEx( LOCALE_NAME_USER_DEFAULT, LCMAP_UPPERCASE | LCMAP_LINGUISTIC_CASING, srcW.c_str(), static_cast(srcW.length()), nullptr, 0, nullptr, nullptr, 0); auto dstW = std::wstring(); dstW.resize(requiredSize); // Transform the string auto result = LCMapStringEx( LOCALE_NAME_USER_DEFAULT, LCMAP_UPPERCASE | LCMAP_LINGUISTIC_CASING, srcW.c_str(), static_cast(srcW.length()), dstW.data(), static_cast(dstW.length()), nullptr, nullptr, 0); if (result == 0) { // Check the error auto error = GetLastError(); log_warning("LCMapStringEx failed with %d", error); return std::string(src); } else { return String::ToUtf8(dstW); } # else std::string dst = std::string(src); std::transform(dst.begin(), dst.end(), dst.begin(), [](unsigned char c) { return std::toupper(c); }); return dst; # endif #else icu::UnicodeString str = icu::UnicodeString::fromUTF8(std::string(src)); str.toUpper(); std::string res; str.toUTF8String(res); return res; #endif } } // namespace String char32_t CodepointView::iterator::GetNextCodepoint(const char* ch, const char** next) { return utf8_get_next(ch, next); }