1
0
mirror of https://github.com/OpenRCT2/OpenRCT2 synced 2026-01-16 19:43:06 +01:00

Merge pull request #17122 from Gymnasiast/refactor/cleanup-utf8

Clean up text conversion functions
This commit is contained in:
Michael Steenbeek
2022-05-03 18:06:35 +02:00
committed by GitHub
7 changed files with 31 additions and 104 deletions

View File

@@ -666,49 +666,9 @@ namespace String
throw std::runtime_error("Unsupported code page: " + std::to_string(codePage));
}
}
static std::string CodePageFromUnicode(icu::UnicodeString src, int32_t dstCodePage)
{
UConverter* conv;
UErrorCode status = U_ZERO_ERROR;
const char* codepage = GetIcuCodePage(dstCodePage);
conv = ucnv_open(codepage, &status);
if (U_FAILURE(status))
{
log_error("ICU error: %s", u_errorName(status));
return nullptr;
}
// Allocate buffer to convert to.
int8_t char_size = ucnv_getMaxCharSize(conv);
std::string buffer(char_size * src.length(), '\0');
char* buffer_limit = &buffer[0] + (char_size * src.length());
// Ready the source string as well...
const char16_t* source = src.getTerminatedBuffer();
const char16_t* source_limit = source + src.length();
// Convert the lot.
char* buffer_target = &buffer[0];
ucnv_fromUnicode(
conv, &buffer_target, buffer_limit, static_cast<const UChar**>(&source), source_limit, nullptr, true, &status);
if (U_FAILURE(status))
{
log_error("ICU error: %s", u_errorName(status));
return nullptr;
}
ucnv_close(conv);
return buffer;
}
#endif
std::string Convert(std::string_view src, int32_t srcCodePage, int32_t dstCodePage)
std::string ConvertToUtf8(std::string_view src, int32_t srcCodePage)
{
#ifdef _WIN32
// Convert from source code page to UTF-16
@@ -724,9 +684,9 @@ namespace String
std::string dst;
{
int srcLen = static_cast<int>(u16.size());
int sizeReq = WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, nullptr, 0, nullptr, nullptr);
int sizeReq = WideCharToMultiByte(CODE_PAGE::CP_UTF8, 0, u16.data(), srcLen, nullptr, 0, nullptr, nullptr);
dst = std::string(sizeReq, 0);
WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, dst.data(), sizeReq, nullptr, nullptr);
WideCharToMultiByte(CODE_PAGE::CP_UTF8, 0, u16.data(), srcLen, dst.data(), sizeReq, nullptr, nullptr);
}
return dst;
@@ -735,14 +695,7 @@ namespace String
icu::UnicodeString convertString(src.data(), codepage);
std::string result;
if (dstCodePage == CODE_PAGE::CP_UTF8)
{
convertString.toUTF8String(result);
}
else
{
result = CodePageFromUnicode(convertString, dstCodePage);
}
convertString.toUTF8String(result);
return result;
#endif

View File

@@ -119,9 +119,9 @@ namespace String
std::string Trim(const std::string& s);
/**
* Converts a multi-byte string from one code page to another.
* Converts a multi-byte string from one code page to UTF-8.
*/
std::string Convert(std::string_view src, int32_t srcCodePage, int32_t dstCodePage);
std::string ConvertToUtf8(std::string_view src, int32_t srcCodePage);
/**
* Returns an uppercased version of a UTF-8 string.

View File

@@ -14,9 +14,15 @@
#include <cstdlib>
#include <iterator>
// clang-format off
const encoding_convert_entry RCT2ToUnicodeTable[] =
struct encoding_convert_entry
{
uint16_t code;
uint32_t unicode;
};
extern const encoding_convert_entry RCT2ToUnicodeTable[];
const encoding_convert_entry RCT2ToUnicodeTable[] = {
// { 1, FORMAT_MOVE_X },
// { 2, FORMAT_ADJUST_PALETTE },
// { 5, FORMAT_NEWLINE },
@@ -80,7 +86,7 @@ const encoding_convert_entry RCT2ToUnicodeTable[] =
{ CSChar::road, UnicodeChar::road },
{ CSChar::air, UnicodeChar::air },
{ CSChar::water, UnicodeChar::water },
{ CSChar::superscript_minus_one, UnicodeChar::superscript_minus_one},
{ CSChar::superscript_minus_one, UnicodeChar::superscript_minus_one },
{ CSChar::bullet, UnicodeChar::bullet },
{ CSChar::small_up, UnicodeChar::small_up },
{ CSChar::small_down, UnicodeChar::small_down },
@@ -99,37 +105,22 @@ const encoding_convert_entry RCT2ToUnicodeTable[] =
{ CSChar::z_acute, UnicodeChar::z_acute },
};
static int32_t encoding_search_compare(const void *pKey, const void *pEntry)
static int32_t encoding_search_compare(const void* pKey, const void* pEntry)
{
const uint16_t key = *reinterpret_cast<const uint16_t*>(pKey);
const encoding_convert_entry *entry = static_cast<const encoding_convert_entry*>(pEntry);
if (key < entry->code) return -1;
if (key > entry->code) return 1;
const encoding_convert_entry* entry = static_cast<const encoding_convert_entry*>(pEntry);
if (key < entry->code)
return -1;
if (key > entry->code)
return 1;
return 0;
}
static wchar_t encoding_convert_x_to_unicode(wchar_t code, const encoding_convert_entry *table, size_t count)
{
encoding_convert_entry * entry = static_cast<encoding_convert_entry *>(std::bsearch(&code, table, count, sizeof(encoding_convert_entry), encoding_search_compare));
if (entry == nullptr)
return code;
return entry->unicode;
}
wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str)
{
return encoding_convert_x_to_unicode(rct2str, RCT2ToUnicodeTable, std::size(RCT2ToUnicodeTable));
}
uint32_t encoding_convert_unicode_to_rct2(uint32_t unicode)
{
// Can't do a binary search as it's sorted by RCT2 code, not unicode
for (const auto& entry : RCT2ToUnicodeTable)
{
if (entry.unicode == unicode)
{
return entry.code;
}
}
return unicode;
encoding_convert_entry* entry = static_cast<encoding_convert_entry*>(std::bsearch(
&rct2str, RCT2ToUnicodeTable, std::size(RCT2ToUnicodeTable), sizeof(encoding_convert_entry), encoding_search_compare));
if (entry == nullptr)
return rct2str;
return entry->unicode;
}

View File

@@ -11,13 +11,4 @@
#include "../common.h"
struct encoding_convert_entry
{
uint16_t code;
uint32_t unicode;
};
extern const encoding_convert_entry RCT2ToUnicodeTable[];
wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str);
uint32_t encoding_convert_unicode_to_rct2(uint32_t unicode);

View File

@@ -121,5 +121,5 @@ std::string rct2_to_utf8(std::string_view src, RCT2LanguageId languageId)
}
auto decoded = DecodeToMultiByte(src);
return String::Convert(decoded, codePage, CODE_PAGE::CP_UTF8);
return String::ConvertToUtf8(decoded, codePage);
}

View File

@@ -599,7 +599,7 @@ private:
}
// Convert to UTF-8 filename
return String::Convert(normalisedName, CODE_PAGE::CP_1252, CODE_PAGE::CP_UTF8);
return String::ConvertToUtf8(normalisedName, CODE_PAGE::CP_1252);
}
else
{

View File

@@ -86,15 +86,7 @@ TEST_F(StringTest, Convert_950_to_UTF8)
{
auto input = StringFromHex("a7d6b374aabab4c4a6e2aab0af57");
auto expected = u8"快速的棕色狐狸";
auto actual = String::Convert(input, CODE_PAGE::CP_950, CODE_PAGE::CP_UTF8);
ASSERT_EQ(expected, actual);
}
TEST_F(StringTest, Convert_UTF8_to_932)
{
auto input = u8"ファストブラウンフォックス";
auto expected = StringFromHex("83748340835883678375838983458393837483488362834e8358");
auto actual = String::Convert(input, CODE_PAGE::CP_UTF8, CODE_PAGE::CP_932);
auto actual = String::ConvertToUtf8(input, CODE_PAGE::CP_950);
ASSERT_EQ(expected, actual);
}
@@ -102,7 +94,7 @@ TEST_F(StringTest, Convert_UTF8_to_UTF8)
{
auto input = u8"سريع|brown|ثعلب";
auto expected = input;
auto actual = String::Convert(input, CODE_PAGE::CP_UTF8, CODE_PAGE::CP_UTF8);
auto actual = String::ConvertToUtf8(input, CODE_PAGE::CP_UTF8);
ASSERT_EQ(expected, actual);
}
@@ -110,7 +102,7 @@ TEST_F(StringTest, Convert_Empty)
{
auto input = "";
auto expected = input;
auto actual = String::Convert(input, CODE_PAGE::CP_1252, CODE_PAGE::CP_UTF8);
auto actual = String::ConvertToUtf8(input, CODE_PAGE::CP_1252);
ASSERT_EQ(expected, actual);
}