mirror of
https://github.com/OpenRCT2/OpenRCT2
synced 2026-01-15 19:13:07 +01:00
708 lines
18 KiB
C++
708 lines
18 KiB
C++
#pragma region Copyright (c) 2014-2017 OpenRCT2 Developers
|
|
/*****************************************************************************
|
|
* OpenRCT2, an open source clone of Roller Coaster Tycoon 2.
|
|
*
|
|
* OpenRCT2 is the work of many authors, a full list can be found in contributors.md
|
|
* For more information, visit https://github.com/OpenRCT2/OpenRCT2
|
|
*
|
|
* OpenRCT2 is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* A full copy of the GNU General Public License can be found in licence.txt
|
|
*****************************************************************************/
|
|
#pragma endregion
|
|
|
|
#include <cwctype>
|
|
#include <stdexcept>
|
|
#include <vector>
|
|
#ifndef _WIN32
|
|
#include <unicode/ucnv.h>
|
|
#include <unicode/unistr.h>
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
#ifndef NOMINMAX
|
|
#define NOMINMAX
|
|
#endif
|
|
#define WIN32_LEAN_AND_MEAN
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#include "../common.h"
|
|
#include "../localisation/ConversionTables.h"
|
|
#include "../localisation/Language.h"
|
|
#include "../util/Util.h"
|
|
|
|
#include "Math.hpp"
|
|
#include "Memory.hpp"
|
|
#include "String.hpp"
|
|
#include "StringBuilder.hpp"
|
|
|
|
namespace String
|
|
{
|
|
std::string ToStd(const utf8 * str)
|
|
{
|
|
if (str == nullptr) return std::string();
|
|
else return std::string(str);
|
|
}
|
|
|
|
std::string StdFormat_VA(const utf8 * format, va_list args)
|
|
{
|
|
auto buffer = Format_VA(format, args);
|
|
auto returnValue = ToStd(buffer);
|
|
Memory::Free(buffer);
|
|
return returnValue;
|
|
}
|
|
|
|
std::string StdFormat(const utf8 * format, ...)
|
|
{
|
|
va_list args;
|
|
va_start(args, format);
|
|
const utf8 * buffer = Format_VA(format, args);
|
|
va_end(args);
|
|
std::string returnValue = ToStd(buffer);
|
|
Memory::Free(buffer);
|
|
return returnValue;
|
|
}
|
|
|
|
std::string ToUtf8(const std::wstring &s)
|
|
{
|
|
std::string result;
|
|
utf8 * cstr = widechar_to_utf8(s.c_str());
|
|
if (cstr != nullptr)
|
|
{
|
|
result = std::string(cstr);
|
|
}
|
|
free(cstr);
|
|
return result;
|
|
}
|
|
|
|
std::wstring ToUtf16(const std::string_view& s)
|
|
{
|
|
std::wstring result;
|
|
wchar_t * wcstr = utf8_to_widechar(s.data());
|
|
if (wcstr != nullptr)
|
|
{
|
|
result = std::wstring(wcstr);
|
|
}
|
|
free(wcstr);
|
|
return result;
|
|
}
|
|
|
|
bool IsNullOrEmpty(const utf8 * str)
|
|
{
|
|
return str == nullptr || str[0] == '\0';
|
|
}
|
|
|
|
sint32 Compare(const std::string &a, const std::string &b, bool ignoreCase)
|
|
{
|
|
return Compare(a.c_str(), b.c_str(), ignoreCase);
|
|
}
|
|
|
|
sint32 Compare(const utf8 * a, const utf8 * b, bool ignoreCase)
|
|
{
|
|
if (a == b) return 0;
|
|
if (a == nullptr) a = "";
|
|
if (b == nullptr) b = "";
|
|
if (ignoreCase)
|
|
{
|
|
return _stricmp(a, b);
|
|
}
|
|
else
|
|
{
|
|
return strcmp(a, b);
|
|
}
|
|
}
|
|
|
|
bool Equals(const std::string &a, const std::string &b, bool ignoreCase)
|
|
{
|
|
return Equals(a.c_str(), b.c_str(), ignoreCase);
|
|
}
|
|
|
|
bool Equals(const utf8 * a, const utf8 * b, bool ignoreCase)
|
|
{
|
|
if (a == b) return true;
|
|
if (a == nullptr || b == nullptr) return false;
|
|
|
|
if (ignoreCase)
|
|
{
|
|
return _stricmp(a, b) == 0;
|
|
}
|
|
else
|
|
{
|
|
return strcmp(a, b) == 0;
|
|
}
|
|
}
|
|
|
|
bool StartsWith(const utf8 * str, const utf8 * match, bool ignoreCase)
|
|
{
|
|
if (ignoreCase)
|
|
{
|
|
while (*str != '\0' && *match != '\0')
|
|
{
|
|
if (tolower(*str++) != tolower(*match++))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
while (*str != '\0' && *match != '\0')
|
|
{
|
|
if (*str++ != *match++)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
|
|
bool StartsWith(const std::string &str, const std::string &match, bool ignoreCase)
|
|
{
|
|
return StartsWith(str.c_str(), match.c_str(), ignoreCase);
|
|
}
|
|
|
|
bool EndsWith(const std::string_view& str, const std::string_view& match, bool ignoreCase)
|
|
{
|
|
if (str.size() >= match.size())
|
|
{
|
|
auto view = str.substr(str.size() - match.size());
|
|
return Equals(view.data(), match.data(), ignoreCase);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
size_t IndexOf(const utf8 * str, utf8 match, size_t startIndex)
|
|
{
|
|
const utf8 * ch = str + startIndex;
|
|
for (; *ch != '\0'; ch++)
|
|
{
|
|
if (*ch == match)
|
|
{
|
|
return (size_t)(ch - str);
|
|
}
|
|
}
|
|
return SIZE_MAX;
|
|
}
|
|
|
|
ptrdiff_t LastIndexOf(const utf8 * str, utf8 match)
|
|
{
|
|
const utf8 * lastOccurance = nullptr;
|
|
const utf8 * ch = str;
|
|
for (; *ch != '\0'; ch++)
|
|
{
|
|
if (*ch == match)
|
|
{
|
|
lastOccurance = ch;
|
|
}
|
|
}
|
|
|
|
if (lastOccurance == nullptr)
|
|
{
|
|
return -1;
|
|
}
|
|
else
|
|
{
|
|
return lastOccurance - str;
|
|
}
|
|
}
|
|
|
|
size_t LengthOf(const utf8 * str)
|
|
{
|
|
return utf8_length(str);
|
|
}
|
|
|
|
size_t SizeOf(const utf8 * str)
|
|
{
|
|
return strlen(str);
|
|
}
|
|
|
|
utf8 * Set(utf8 * buffer, size_t bufferSize, const utf8 * src)
|
|
{
|
|
return safe_strcpy(buffer, src, bufferSize);
|
|
}
|
|
|
|
utf8 * Set(utf8 * buffer, size_t bufferSize, const utf8 * src, size_t srcSize)
|
|
{
|
|
utf8 * dst = buffer;
|
|
size_t minSize = Math::Min(bufferSize - 1, srcSize);
|
|
for (size_t i = 0; i < minSize; i++)
|
|
{
|
|
*dst++ = *src;
|
|
if (*src == '\0') break;
|
|
src++;
|
|
}
|
|
*dst = '\0';
|
|
return buffer;
|
|
}
|
|
|
|
utf8 * Append(utf8 * buffer, size_t bufferSize, const utf8 * src)
|
|
{
|
|
return safe_strcat(buffer, src, bufferSize);
|
|
}
|
|
|
|
utf8 * Format(utf8 * buffer, size_t bufferSize, const utf8 * format, ...)
|
|
{
|
|
va_list args;
|
|
|
|
va_start(args, format);
|
|
vsnprintf(buffer, bufferSize, format, args);
|
|
va_end(args);
|
|
|
|
// Terminate buffer in case formatted string overflowed
|
|
buffer[bufferSize - 1] = '\0';
|
|
|
|
return buffer;
|
|
}
|
|
|
|
utf8 * Format(const utf8 * format, ...)
|
|
{
|
|
va_list args;
|
|
va_start(args, format);
|
|
utf8 * result = Format_VA(format, args);
|
|
va_end(args);
|
|
return result;
|
|
}
|
|
|
|
utf8 * Format_VA(const utf8 * format, va_list args)
|
|
{
|
|
va_list args1, args2;
|
|
va_copy(args1, args);
|
|
va_copy(args2, args);
|
|
|
|
// Try to format to a initial buffer, enlarge if not big enough
|
|
size_t bufferSize = 4096;
|
|
utf8 * buffer = Memory::Allocate<utf8>(bufferSize);
|
|
|
|
// Start with initial buffer
|
|
sint32 len = vsnprintf(buffer, bufferSize, format, args);
|
|
if (len < 0)
|
|
{
|
|
Memory::Free(buffer);
|
|
va_end(args1);
|
|
va_end(args2);
|
|
|
|
// An error occurred...
|
|
return nullptr;
|
|
}
|
|
|
|
size_t requiredSize = (size_t)len + 1;
|
|
if (requiredSize > bufferSize)
|
|
{
|
|
// Try again with bigger buffer
|
|
buffer = Memory::Reallocate<utf8>(buffer, bufferSize);
|
|
len = vsnprintf(buffer, bufferSize, format, args);
|
|
if (len < 0)
|
|
{
|
|
Memory::Free(buffer);
|
|
va_end(args1);
|
|
va_end(args2);
|
|
|
|
// An error occurred...
|
|
return nullptr;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Reduce buffer size to only what was required
|
|
bufferSize = requiredSize;
|
|
buffer = Memory::Reallocate<utf8>(buffer, bufferSize);
|
|
}
|
|
|
|
// Ensure buffer is terminated
|
|
buffer[bufferSize - 1] = '\0';
|
|
|
|
va_end(args1);
|
|
va_end(args2);
|
|
return buffer;
|
|
}
|
|
|
|
utf8 * AppendFormat(utf8 * buffer, size_t bufferSize, const utf8 * format, ...)
|
|
{
|
|
utf8 * dst = buffer;
|
|
size_t i;
|
|
for (i = 0; i < bufferSize; i++)
|
|
{
|
|
if (*dst == '\0') break;
|
|
dst++;
|
|
}
|
|
|
|
if (i < bufferSize - 1)
|
|
{
|
|
va_list args;
|
|
va_start(args, format);
|
|
vsnprintf(dst, bufferSize - i - 1, format, args);
|
|
va_end(args);
|
|
|
|
// Terminate buffer in case formatted string overflowed
|
|
buffer[bufferSize - 1] = '\0';
|
|
}
|
|
|
|
return buffer;
|
|
}
|
|
|
|
utf8 * Duplicate(const std::string &src)
|
|
{
|
|
return String::Duplicate(src.c_str());
|
|
}
|
|
|
|
utf8 * Duplicate(const utf8 * src)
|
|
{
|
|
utf8 * result = nullptr;
|
|
if (src != nullptr)
|
|
{
|
|
size_t srcSize = SizeOf(src) + 1;
|
|
result = Memory::Allocate<utf8>(srcSize);
|
|
memcpy(result, src, srcSize);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
utf8 * DiscardUse(utf8 * * ptr, utf8 * replacement)
|
|
{
|
|
Memory::Free(*ptr);
|
|
*ptr = replacement;
|
|
return replacement;
|
|
}
|
|
|
|
utf8 * DiscardDuplicate(utf8 * * ptr, const utf8 * replacement)
|
|
{
|
|
return DiscardUse(ptr, String::Duplicate(replacement));
|
|
}
|
|
|
|
std::vector<std::string> Split(const std::string &s, const std::string &delimiter)
|
|
{
|
|
if (delimiter.empty())
|
|
{
|
|
throw std::invalid_argument(nameof(delimiter) " can not be empty.");
|
|
}
|
|
|
|
std::vector<std::string> results;
|
|
if (!s.empty())
|
|
{
|
|
size_t index = 0;
|
|
size_t nextIndex;
|
|
do
|
|
{
|
|
nextIndex = s.find(delimiter, index);
|
|
std::string value;
|
|
if (nextIndex == std::string::npos)
|
|
{
|
|
value = s.substr(index);
|
|
}
|
|
else
|
|
{
|
|
value = s.substr(index, nextIndex - index);
|
|
}
|
|
results.push_back(value);
|
|
index = nextIndex + delimiter.size();
|
|
}
|
|
while (nextIndex != SIZE_MAX);
|
|
}
|
|
return results;
|
|
}
|
|
|
|
utf8 * SkipBOM(utf8 * buffer)
|
|
{
|
|
return (utf8*)SkipBOM((const utf8 *)buffer);
|
|
}
|
|
|
|
const utf8 * SkipBOM(const utf8 * buffer)
|
|
{
|
|
if ((uint8)buffer[0] == 0xEF && (uint8)buffer[1] == 0xBB && (uint8)buffer[2] == 0xBF)
|
|
{
|
|
return buffer + 3;
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
size_t GetCodepointLength(codepoint_t codepoint)
|
|
{
|
|
return utf8_get_codepoint_length(codepoint);
|
|
}
|
|
|
|
codepoint_t GetNextCodepoint(utf8 * ptr, utf8 * * nextPtr)
|
|
{
|
|
return GetNextCodepoint((const utf8 *)ptr, (const utf8 * *)nextPtr);
|
|
}
|
|
|
|
codepoint_t GetNextCodepoint(const utf8 * ptr, const utf8 * * nextPtr)
|
|
{
|
|
return utf8_get_next(ptr, nextPtr);
|
|
}
|
|
|
|
utf8 * WriteCodepoint(utf8 * dst, codepoint_t codepoint)
|
|
{
|
|
return utf8_write_codepoint(dst, codepoint);
|
|
}
|
|
|
|
bool IsWhiteSpace(codepoint_t codepoint)
|
|
{
|
|
// 0x3000 is the 'ideographic space', a 'fullwidth' character used in CJK languages.
|
|
return iswspace((wchar_t)codepoint) || codepoint == 0x3000;
|
|
}
|
|
|
|
utf8 * Trim(utf8 * str)
|
|
{
|
|
utf8 * firstNonWhitespace = nullptr;
|
|
|
|
codepoint_t codepoint;
|
|
utf8 * ch = str;
|
|
utf8 * nextCh;
|
|
while ((codepoint = GetNextCodepoint(ch, &nextCh)) != '\0')
|
|
{
|
|
if (codepoint <= WCHAR_MAX && !IsWhiteSpace(codepoint))
|
|
{
|
|
if (firstNonWhitespace == nullptr)
|
|
{
|
|
firstNonWhitespace = ch;
|
|
}
|
|
}
|
|
ch = nextCh;
|
|
}
|
|
|
|
if (firstNonWhitespace != nullptr &&
|
|
firstNonWhitespace != str)
|
|
{
|
|
// Take multibyte characters into account: use the last byte of the
|
|
// current character.
|
|
size_t newStringSize = (nextCh - 1) - firstNonWhitespace;
|
|
|
|
#ifdef DEBUG
|
|
size_t currentStringSize = String::SizeOf(str);
|
|
Guard::Assert(newStringSize < currentStringSize, GUARD_LINE);
|
|
#endif
|
|
|
|
std::memmove(str, firstNonWhitespace, newStringSize);
|
|
str[newStringSize] = '\0';
|
|
}
|
|
else
|
|
{
|
|
*ch = '\0';
|
|
}
|
|
|
|
return str;
|
|
}
|
|
|
|
const utf8 * TrimStart(const utf8 * str)
|
|
{
|
|
codepoint_t codepoint;
|
|
const utf8 * ch = str;
|
|
const utf8 * nextCh;
|
|
while ((codepoint = GetNextCodepoint(ch, &nextCh)) != '\0')
|
|
{
|
|
if (codepoint <= WCHAR_MAX && !IsWhiteSpace(codepoint))
|
|
{
|
|
return ch;
|
|
}
|
|
ch = nextCh;
|
|
}
|
|
// String is all whitespace
|
|
return ch;
|
|
}
|
|
|
|
utf8 * TrimStart(utf8 * buffer, size_t bufferSize, const utf8 * src)
|
|
{
|
|
return String::Set(buffer, bufferSize, TrimStart(src));
|
|
}
|
|
|
|
std::string TrimStart(const std::string &s)
|
|
{
|
|
const utf8 * trimmed = TrimStart(s.c_str());
|
|
return std::string(trimmed);
|
|
}
|
|
|
|
std::string Trim(const std::string &s)
|
|
{
|
|
codepoint_t codepoint;
|
|
const utf8 * ch = s.c_str();
|
|
const utf8 * nextCh;
|
|
const utf8 * startSubstr = nullptr;
|
|
const utf8 * endSubstr = nullptr;
|
|
while ((codepoint = GetNextCodepoint(ch, &nextCh)) != '\0')
|
|
{
|
|
bool isWhiteSpace = codepoint <= WCHAR_MAX && IsWhiteSpace(codepoint);
|
|
if (!isWhiteSpace)
|
|
{
|
|
if (startSubstr == nullptr)
|
|
{
|
|
startSubstr = ch;
|
|
}
|
|
|
|
// Take multibyte characters into account: move pointer towards
|
|
// the last byte of the current character.
|
|
endSubstr = nextCh - 1;
|
|
}
|
|
ch = nextCh;
|
|
}
|
|
|
|
if (startSubstr == nullptr)
|
|
{
|
|
// String is all whitespace
|
|
return std::string();
|
|
}
|
|
|
|
size_t stringLength = endSubstr - startSubstr + 1;
|
|
return std::string(startSubstr, stringLength);
|
|
}
|
|
|
|
#ifndef _WIN32
|
|
static const char* GetIcuCodePage(sint32 codePage)
|
|
{
|
|
switch (codePage)
|
|
{
|
|
case CODE_PAGE::CP_932:
|
|
return "windows-932";
|
|
|
|
case CODE_PAGE::CP_936:
|
|
return "GB2312";
|
|
|
|
case CODE_PAGE::CP_949:
|
|
return "windows-949";
|
|
|
|
case CODE_PAGE::CP_950:
|
|
return "big5";
|
|
|
|
default:
|
|
throw std::runtime_error("Unsupported code page: " + std::to_string(codePage));
|
|
}
|
|
}
|
|
|
|
static char* CodePageFromUTF8(const utf8* src, size_t srcMaxSize, sint32 dstCodePage)
|
|
{
|
|
UConverter *conv;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
const char* codepage = GetIcuCodePage(dstCodePage);
|
|
conv = ucnv_open(codepage, &status);
|
|
|
|
if (U_FAILURE(status))
|
|
{
|
|
log_error("ICU error: %s", u_errorName(status));
|
|
return nullptr;
|
|
}
|
|
|
|
const char *srcLimit = src + srcMaxSize;
|
|
auto sb = StringBuilder(64);
|
|
|
|
while ((src < srcLimit) && (*src != '\0'))
|
|
{
|
|
codepoint_t codepoint = ucnv_getNextUChar(conv, &src, srcLimit, &status);
|
|
|
|
if (U_FAILURE(status))
|
|
{
|
|
log_error("ICU error: %s", u_errorName(status));
|
|
return nullptr;
|
|
}
|
|
|
|
sb.Append(codepoint);
|
|
}
|
|
|
|
ucnv_close(conv);
|
|
|
|
return (char*) sb.StealString();
|
|
}
|
|
#endif
|
|
|
|
std::string Convert(const std::string_view& src, sint32 srcCodePage, sint32 dstCodePage)
|
|
{
|
|
#ifdef _WIN32
|
|
// Convert from source code page to UTF-16
|
|
std::wstring u16;
|
|
{
|
|
int srcLen = (int)src.size();
|
|
int sizeReq = MultiByteToWideChar(srcCodePage, 0, src.data(), srcLen, nullptr, 0);
|
|
u16 = std::wstring(sizeReq, 0);
|
|
MultiByteToWideChar(srcCodePage, 0, src.data(), srcLen, u16.data(), sizeReq);
|
|
}
|
|
|
|
// Convert from UTF-16 to destination code page
|
|
std::string dst;
|
|
{
|
|
int srcLen = (int)u16.size();
|
|
int sizeReq = WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, nullptr, 0, nullptr, nullptr);
|
|
dst = std::string(sizeReq, 0);
|
|
WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, dst.data(), sizeReq, nullptr, nullptr);
|
|
}
|
|
|
|
return dst;
|
|
#else
|
|
const char* codepage = GetIcuCodePage(srcCodePage);
|
|
icu::UnicodeString convertString(src.data(), codepage);
|
|
|
|
std::string result;
|
|
convertString.toUTF8String(result);
|
|
|
|
if (dstCodePage != CODE_PAGE::CP_UTF8)
|
|
{
|
|
result = CodePageFromUTF8(result.c_str(), result.length(), dstCodePage);
|
|
}
|
|
|
|
return result;
|
|
#endif
|
|
}
|
|
|
|
std::string ToUpper(const std::string_view& src)
|
|
{
|
|
#ifdef _WIN32
|
|
auto srcW = ToUtf16(src);
|
|
auto dstW = std::wstring();
|
|
|
|
// Keep doubling the buffer size until transformed string fits (max 10 times)
|
|
size_t bufferLength = src.size();
|
|
for (size_t i = 0; i < 10; i++)
|
|
{
|
|
// Double buffer size
|
|
bufferLength *= 2;
|
|
dstW.resize(bufferLength);
|
|
|
|
// Transform the string
|
|
auto result = LCMapStringEx(
|
|
LOCALE_NAME_USER_DEFAULT,
|
|
LCMAP_UPPERCASE | LCMAP_LINGUISTIC_CASING,
|
|
srcW.c_str(),
|
|
(int)srcW.length(),
|
|
dstW.data(),
|
|
(int)dstW.capacity(),
|
|
nullptr,
|
|
nullptr,
|
|
0);
|
|
if (result == 0)
|
|
{
|
|
// Check the error
|
|
auto error = GetLastError();
|
|
if (error == ERROR_INSUFFICIENT_BUFFER)
|
|
{
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
log_warning("LCMapStringEx failed with %d", error);
|
|
return std::string(src);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
dstW.resize(result);
|
|
return String::ToUtf8(dstW);
|
|
}
|
|
}
|
|
log_warning("LCMapStringEx loop exceeded");
|
|
return std::string(src);
|
|
#else
|
|
icu::UnicodeString str = icu::UnicodeString::fromUTF8(std::string(src));
|
|
str.toUpper();
|
|
|
|
std::string res;
|
|
str.toUTF8String(res);
|
|
|
|
return res;
|
|
#endif
|
|
}
|
|
} // namespace String
|