mirror of
https://github.com/OpenRCT2/OpenRCT2
synced 2026-01-16 19:43:06 +01:00
Merge pull request #17122 from Gymnasiast/refactor/cleanup-utf8
Clean up text conversion functions
This commit is contained in:
@@ -666,49 +666,9 @@ namespace String
|
||||
throw std::runtime_error("Unsupported code page: " + std::to_string(codePage));
|
||||
}
|
||||
}
|
||||
|
||||
static std::string CodePageFromUnicode(icu::UnicodeString src, int32_t dstCodePage)
|
||||
{
|
||||
UConverter* conv;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
const char* codepage = GetIcuCodePage(dstCodePage);
|
||||
conv = ucnv_open(codepage, &status);
|
||||
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
log_error("ICU error: %s", u_errorName(status));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Allocate buffer to convert to.
|
||||
int8_t char_size = ucnv_getMaxCharSize(conv);
|
||||
std::string buffer(char_size * src.length(), '\0');
|
||||
|
||||
char* buffer_limit = &buffer[0] + (char_size * src.length());
|
||||
|
||||
// Ready the source string as well...
|
||||
const char16_t* source = src.getTerminatedBuffer();
|
||||
const char16_t* source_limit = source + src.length();
|
||||
|
||||
// Convert the lot.
|
||||
char* buffer_target = &buffer[0];
|
||||
ucnv_fromUnicode(
|
||||
conv, &buffer_target, buffer_limit, static_cast<const UChar**>(&source), source_limit, nullptr, true, &status);
|
||||
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
log_error("ICU error: %s", u_errorName(status));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ucnv_close(conv);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
#endif
|
||||
|
||||
std::string Convert(std::string_view src, int32_t srcCodePage, int32_t dstCodePage)
|
||||
std::string ConvertToUtf8(std::string_view src, int32_t srcCodePage)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// Convert from source code page to UTF-16
|
||||
@@ -724,9 +684,9 @@ namespace String
|
||||
std::string dst;
|
||||
{
|
||||
int srcLen = static_cast<int>(u16.size());
|
||||
int sizeReq = WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, nullptr, 0, nullptr, nullptr);
|
||||
int sizeReq = WideCharToMultiByte(CODE_PAGE::CP_UTF8, 0, u16.data(), srcLen, nullptr, 0, nullptr, nullptr);
|
||||
dst = std::string(sizeReq, 0);
|
||||
WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, dst.data(), sizeReq, nullptr, nullptr);
|
||||
WideCharToMultiByte(CODE_PAGE::CP_UTF8, 0, u16.data(), srcLen, dst.data(), sizeReq, nullptr, nullptr);
|
||||
}
|
||||
|
||||
return dst;
|
||||
@@ -735,14 +695,7 @@ namespace String
|
||||
icu::UnicodeString convertString(src.data(), codepage);
|
||||
|
||||
std::string result;
|
||||
if (dstCodePage == CODE_PAGE::CP_UTF8)
|
||||
{
|
||||
convertString.toUTF8String(result);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = CodePageFromUnicode(convertString, dstCodePage);
|
||||
}
|
||||
convertString.toUTF8String(result);
|
||||
|
||||
return result;
|
||||
#endif
|
||||
|
||||
@@ -119,9 +119,9 @@ namespace String
|
||||
std::string Trim(const std::string& s);
|
||||
|
||||
/**
|
||||
* Converts a multi-byte string from one code page to another.
|
||||
* Converts a multi-byte string from one code page to UTF-8.
|
||||
*/
|
||||
std::string Convert(std::string_view src, int32_t srcCodePage, int32_t dstCodePage);
|
||||
std::string ConvertToUtf8(std::string_view src, int32_t srcCodePage);
|
||||
|
||||
/**
|
||||
* Returns an uppercased version of a UTF-8 string.
|
||||
|
||||
@@ -14,9 +14,15 @@
|
||||
#include <cstdlib>
|
||||
#include <iterator>
|
||||
|
||||
// clang-format off
|
||||
const encoding_convert_entry RCT2ToUnicodeTable[] =
|
||||
struct encoding_convert_entry
|
||||
{
|
||||
uint16_t code;
|
||||
uint32_t unicode;
|
||||
};
|
||||
|
||||
extern const encoding_convert_entry RCT2ToUnicodeTable[];
|
||||
|
||||
const encoding_convert_entry RCT2ToUnicodeTable[] = {
|
||||
// { 1, FORMAT_MOVE_X },
|
||||
// { 2, FORMAT_ADJUST_PALETTE },
|
||||
// { 5, FORMAT_NEWLINE },
|
||||
@@ -80,7 +86,7 @@ const encoding_convert_entry RCT2ToUnicodeTable[] =
|
||||
{ CSChar::road, UnicodeChar::road },
|
||||
{ CSChar::air, UnicodeChar::air },
|
||||
{ CSChar::water, UnicodeChar::water },
|
||||
{ CSChar::superscript_minus_one, UnicodeChar::superscript_minus_one},
|
||||
{ CSChar::superscript_minus_one, UnicodeChar::superscript_minus_one },
|
||||
{ CSChar::bullet, UnicodeChar::bullet },
|
||||
{ CSChar::small_up, UnicodeChar::small_up },
|
||||
{ CSChar::small_down, UnicodeChar::small_down },
|
||||
@@ -99,37 +105,22 @@ const encoding_convert_entry RCT2ToUnicodeTable[] =
|
||||
{ CSChar::z_acute, UnicodeChar::z_acute },
|
||||
};
|
||||
|
||||
static int32_t encoding_search_compare(const void *pKey, const void *pEntry)
|
||||
static int32_t encoding_search_compare(const void* pKey, const void* pEntry)
|
||||
{
|
||||
const uint16_t key = *reinterpret_cast<const uint16_t*>(pKey);
|
||||
const encoding_convert_entry *entry = static_cast<const encoding_convert_entry*>(pEntry);
|
||||
if (key < entry->code) return -1;
|
||||
if (key > entry->code) return 1;
|
||||
const encoding_convert_entry* entry = static_cast<const encoding_convert_entry*>(pEntry);
|
||||
if (key < entry->code)
|
||||
return -1;
|
||||
if (key > entry->code)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static wchar_t encoding_convert_x_to_unicode(wchar_t code, const encoding_convert_entry *table, size_t count)
|
||||
{
|
||||
encoding_convert_entry * entry = static_cast<encoding_convert_entry *>(std::bsearch(&code, table, count, sizeof(encoding_convert_entry), encoding_search_compare));
|
||||
if (entry == nullptr)
|
||||
return code;
|
||||
return entry->unicode;
|
||||
}
|
||||
|
||||
wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str)
|
||||
{
|
||||
return encoding_convert_x_to_unicode(rct2str, RCT2ToUnicodeTable, std::size(RCT2ToUnicodeTable));
|
||||
}
|
||||
|
||||
uint32_t encoding_convert_unicode_to_rct2(uint32_t unicode)
|
||||
{
|
||||
// Can't do a binary search as it's sorted by RCT2 code, not unicode
|
||||
for (const auto& entry : RCT2ToUnicodeTable)
|
||||
{
|
||||
if (entry.unicode == unicode)
|
||||
{
|
||||
return entry.code;
|
||||
}
|
||||
}
|
||||
return unicode;
|
||||
encoding_convert_entry* entry = static_cast<encoding_convert_entry*>(std::bsearch(
|
||||
&rct2str, RCT2ToUnicodeTable, std::size(RCT2ToUnicodeTable), sizeof(encoding_convert_entry), encoding_search_compare));
|
||||
if (entry == nullptr)
|
||||
return rct2str;
|
||||
return entry->unicode;
|
||||
}
|
||||
|
||||
@@ -11,13 +11,4 @@
|
||||
|
||||
#include "../common.h"
|
||||
|
||||
struct encoding_convert_entry
|
||||
{
|
||||
uint16_t code;
|
||||
uint32_t unicode;
|
||||
};
|
||||
|
||||
extern const encoding_convert_entry RCT2ToUnicodeTable[];
|
||||
|
||||
wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str);
|
||||
uint32_t encoding_convert_unicode_to_rct2(uint32_t unicode);
|
||||
|
||||
@@ -121,5 +121,5 @@ std::string rct2_to_utf8(std::string_view src, RCT2LanguageId languageId)
|
||||
}
|
||||
|
||||
auto decoded = DecodeToMultiByte(src);
|
||||
return String::Convert(decoded, codePage, CODE_PAGE::CP_UTF8);
|
||||
return String::ConvertToUtf8(decoded, codePage);
|
||||
}
|
||||
|
||||
@@ -599,7 +599,7 @@ private:
|
||||
}
|
||||
|
||||
// Convert to UTF-8 filename
|
||||
return String::Convert(normalisedName, CODE_PAGE::CP_1252, CODE_PAGE::CP_UTF8);
|
||||
return String::ConvertToUtf8(normalisedName, CODE_PAGE::CP_1252);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -86,15 +86,7 @@ TEST_F(StringTest, Convert_950_to_UTF8)
|
||||
{
|
||||
auto input = StringFromHex("a7d6b374aabab4c4a6e2aab0af57");
|
||||
auto expected = u8"快速的棕色狐狸";
|
||||
auto actual = String::Convert(input, CODE_PAGE::CP_950, CODE_PAGE::CP_UTF8);
|
||||
ASSERT_EQ(expected, actual);
|
||||
}
|
||||
|
||||
TEST_F(StringTest, Convert_UTF8_to_932)
|
||||
{
|
||||
auto input = u8"ファストブラウンフォックス";
|
||||
auto expected = StringFromHex("83748340835883678375838983458393837483488362834e8358");
|
||||
auto actual = String::Convert(input, CODE_PAGE::CP_UTF8, CODE_PAGE::CP_932);
|
||||
auto actual = String::ConvertToUtf8(input, CODE_PAGE::CP_950);
|
||||
ASSERT_EQ(expected, actual);
|
||||
}
|
||||
|
||||
@@ -102,7 +94,7 @@ TEST_F(StringTest, Convert_UTF8_to_UTF8)
|
||||
{
|
||||
auto input = u8"سريع|brown|ثعلب";
|
||||
auto expected = input;
|
||||
auto actual = String::Convert(input, CODE_PAGE::CP_UTF8, CODE_PAGE::CP_UTF8);
|
||||
auto actual = String::ConvertToUtf8(input, CODE_PAGE::CP_UTF8);
|
||||
ASSERT_EQ(expected, actual);
|
||||
}
|
||||
|
||||
@@ -110,7 +102,7 @@ TEST_F(StringTest, Convert_Empty)
|
||||
{
|
||||
auto input = "";
|
||||
auto expected = input;
|
||||
auto actual = String::Convert(input, CODE_PAGE::CP_1252, CODE_PAGE::CP_UTF8);
|
||||
auto actual = String::ConvertToUtf8(input, CODE_PAGE::CP_1252);
|
||||
ASSERT_EQ(expected, actual);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user