From 6fcfd481ab07910701e36aacb8cc5fa79dda061a Mon Sep 17 00:00:00 2001 From: Gymnasiast Date: Sun, 1 May 2022 17:57:51 +0200 Subject: [PATCH 1/6] Rename String::Convert() to String::ConvertToUtf8() --- src/openrct2/core/String.cpp | 55 ++---------------------- src/openrct2/core/String.hpp | 4 +- src/openrct2/localisation/Convert.cpp | 2 +- src/openrct2/object/ObjectRepository.cpp | 2 +- 4 files changed, 8 insertions(+), 55 deletions(-) diff --git a/src/openrct2/core/String.cpp b/src/openrct2/core/String.cpp index d7e153f7fd..ef5cbba059 100644 --- a/src/openrct2/core/String.cpp +++ b/src/openrct2/core/String.cpp @@ -666,49 +666,9 @@ namespace String throw std::runtime_error("Unsupported code page: " + std::to_string(codePage)); } } - - static std::string CodePageFromUnicode(icu::UnicodeString src, int32_t dstCodePage) - { - UConverter* conv; - UErrorCode status = U_ZERO_ERROR; - - const char* codepage = GetIcuCodePage(dstCodePage); - conv = ucnv_open(codepage, &status); - - if (U_FAILURE(status)) - { - log_error("ICU error: %s", u_errorName(status)); - return nullptr; - } - - // Allocate buffer to convert to. - int8_t char_size = ucnv_getMaxCharSize(conv); - std::string buffer(char_size * src.length(), '\0'); - - char* buffer_limit = &buffer[0] + (char_size * src.length()); - - // Ready the source string as well... - const char16_t* source = src.getTerminatedBuffer(); - const char16_t* source_limit = source + src.length(); - - // Convert the lot. - char* buffer_target = &buffer[0]; - ucnv_fromUnicode( - conv, &buffer_target, buffer_limit, static_cast(&source), source_limit, nullptr, true, &status); - - if (U_FAILURE(status)) - { - log_error("ICU error: %s", u_errorName(status)); - return nullptr; - } - - ucnv_close(conv); - - return buffer; - } #endif - std::string Convert(std::string_view src, int32_t srcCodePage, int32_t dstCodePage) + std::string ConvertToUtf8(std::string_view src, int32_t srcCodePage) { #ifdef _WIN32 // Convert from source code page to UTF-16 @@ -724,9 +684,9 @@ namespace String std::string dst; { int srcLen = static_cast(u16.size()); - int sizeReq = WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, nullptr, 0, nullptr, nullptr); + int sizeReq = WideCharToMultiByte(CODE_PAGE::CP_UTF8, 0, u16.data(), srcLen, nullptr, 0, nullptr, nullptr); dst = std::string(sizeReq, 0); - WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, dst.data(), sizeReq, nullptr, nullptr); + WideCharToMultiByte(CODE_PAGE::CP_UTF8, 0, u16.data(), srcLen, dst.data(), sizeReq, nullptr, nullptr); } return dst; @@ -735,14 +695,7 @@ namespace String icu::UnicodeString convertString(src.data(), codepage); std::string result; - if (dstCodePage == CODE_PAGE::CP_UTF8) - { - convertString.toUTF8String(result); - } - else - { - result = CodePageFromUnicode(convertString, dstCodePage); - } + convertString.toUTF8String(result); return result; #endif diff --git a/src/openrct2/core/String.hpp b/src/openrct2/core/String.hpp index 182ced6c71..9238598118 100644 --- a/src/openrct2/core/String.hpp +++ b/src/openrct2/core/String.hpp @@ -119,9 +119,9 @@ namespace String std::string Trim(const std::string& s); /** - * Converts a multi-byte string from one code page to another. + * Converts a multi-byte string from one code page to UTF-8. */ - std::string Convert(std::string_view src, int32_t srcCodePage, int32_t dstCodePage); + std::string ConvertToUtf8(std::string_view src, int32_t srcCodePage); /** * Returns an uppercased version of a UTF-8 string. diff --git a/src/openrct2/localisation/Convert.cpp b/src/openrct2/localisation/Convert.cpp index 7321a7d049..489a5dc613 100644 --- a/src/openrct2/localisation/Convert.cpp +++ b/src/openrct2/localisation/Convert.cpp @@ -121,5 +121,5 @@ std::string rct2_to_utf8(std::string_view src, RCT2LanguageId languageId) } auto decoded = DecodeToMultiByte(src); - return String::Convert(decoded, codePage, CODE_PAGE::CP_UTF8); + return String::ConvertToUtf8(decoded, codePage); } diff --git a/src/openrct2/object/ObjectRepository.cpp b/src/openrct2/object/ObjectRepository.cpp index 50681d5b02..aaba30bd6e 100644 --- a/src/openrct2/object/ObjectRepository.cpp +++ b/src/openrct2/object/ObjectRepository.cpp @@ -599,7 +599,7 @@ private: } // Convert to UTF-8 filename - return String::Convert(normalisedName, CODE_PAGE::CP_1252, CODE_PAGE::CP_UTF8); + return String::ConvertToUtf8(normalisedName, CODE_PAGE::CP_1252); } else { From 62d5a69051afda577ea5884babad40d5639e5c43 Mon Sep 17 00:00:00 2001 From: Gymnasiast Date: Sun, 1 May 2022 17:58:32 +0200 Subject: [PATCH 2/6] Remove unused encoding_convert_unicode_to_rct2() --- src/openrct2/localisation/ConversionTables.cpp | 13 ------------- src/openrct2/localisation/ConversionTables.h | 1 - 2 files changed, 14 deletions(-) diff --git a/src/openrct2/localisation/ConversionTables.cpp b/src/openrct2/localisation/ConversionTables.cpp index 445ce2f236..6aa142a0b4 100644 --- a/src/openrct2/localisation/ConversionTables.cpp +++ b/src/openrct2/localisation/ConversionTables.cpp @@ -120,16 +120,3 @@ wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str) { return encoding_convert_x_to_unicode(rct2str, RCT2ToUnicodeTable, std::size(RCT2ToUnicodeTable)); } - -uint32_t encoding_convert_unicode_to_rct2(uint32_t unicode) -{ - // Can't do a binary search as it's sorted by RCT2 code, not unicode - for (const auto& entry : RCT2ToUnicodeTable) - { - if (entry.unicode == unicode) - { - return entry.code; - } - } - return unicode; -} diff --git a/src/openrct2/localisation/ConversionTables.h b/src/openrct2/localisation/ConversionTables.h index 88300573d5..1e6941e919 100644 --- a/src/openrct2/localisation/ConversionTables.h +++ b/src/openrct2/localisation/ConversionTables.h @@ -20,4 +20,3 @@ struct encoding_convert_entry extern const encoding_convert_entry RCT2ToUnicodeTable[]; wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str); -uint32_t encoding_convert_unicode_to_rct2(uint32_t unicode); From 598675ca8f30c3865f32fd85568e69e8df80fae3 Mon Sep 17 00:00:00 2001 From: Gymnasiast Date: Sun, 1 May 2022 17:59:52 +0200 Subject: [PATCH 3/6] Merge encoding_convert_rct2_to_unicode() with generic counterpart --- src/openrct2/localisation/ConversionTables.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/openrct2/localisation/ConversionTables.cpp b/src/openrct2/localisation/ConversionTables.cpp index 6aa142a0b4..6d6528e410 100644 --- a/src/openrct2/localisation/ConversionTables.cpp +++ b/src/openrct2/localisation/ConversionTables.cpp @@ -108,15 +108,10 @@ static int32_t encoding_search_compare(const void *pKey, const void *pEntry) return 0; } -static wchar_t encoding_convert_x_to_unicode(wchar_t code, const encoding_convert_entry *table, size_t count) -{ - encoding_convert_entry * entry = static_cast(std::bsearch(&code, table, count, sizeof(encoding_convert_entry), encoding_search_compare)); - if (entry == nullptr) - return code; - return entry->unicode; -} - wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str) { - return encoding_convert_x_to_unicode(rct2str, RCT2ToUnicodeTable, std::size(RCT2ToUnicodeTable)); + encoding_convert_entry * entry = static_cast(std::bsearch(&rct2str, RCT2ToUnicodeTable, std::size(RCT2ToUnicodeTable), sizeof(encoding_convert_entry), encoding_search_compare)); + if (entry == nullptr) + return rct2str; + return entry->unicode; } From aaf83b48c4ca30ecccef70c21aaa48c735091668 Mon Sep 17 00:00:00 2001 From: Gymnasiast Date: Sun, 1 May 2022 18:01:30 +0200 Subject: [PATCH 4/6] Limit scope of encoding_convert_entry to ConversionTables.cpp --- src/openrct2/localisation/ConversionTables.cpp | 8 ++++++++ src/openrct2/localisation/ConversionTables.h | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/openrct2/localisation/ConversionTables.cpp b/src/openrct2/localisation/ConversionTables.cpp index 6d6528e410..8914ea8341 100644 --- a/src/openrct2/localisation/ConversionTables.cpp +++ b/src/openrct2/localisation/ConversionTables.cpp @@ -14,6 +14,14 @@ #include #include +struct encoding_convert_entry +{ + uint16_t code; + uint32_t unicode; +}; + +extern const encoding_convert_entry RCT2ToUnicodeTable[]; + // clang-format off const encoding_convert_entry RCT2ToUnicodeTable[] = { diff --git a/src/openrct2/localisation/ConversionTables.h b/src/openrct2/localisation/ConversionTables.h index 1e6941e919..322bcee7df 100644 --- a/src/openrct2/localisation/ConversionTables.h +++ b/src/openrct2/localisation/ConversionTables.h @@ -11,12 +11,4 @@ #include "../common.h" -struct encoding_convert_entry -{ - uint16_t code; - uint32_t unicode; -}; - -extern const encoding_convert_entry RCT2ToUnicodeTable[]; - wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str); From b46496308abd3ef1dfa4fd2696855891ee3d2204 Mon Sep 17 00:00:00 2001 From: Gymnasiast Date: Sun, 1 May 2022 18:01:58 +0200 Subject: [PATCH 5/6] Remove unnecessary occurrence of clang-format off and reformat --- .../localisation/ConversionTables.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/openrct2/localisation/ConversionTables.cpp b/src/openrct2/localisation/ConversionTables.cpp index 8914ea8341..47b06c9142 100644 --- a/src/openrct2/localisation/ConversionTables.cpp +++ b/src/openrct2/localisation/ConversionTables.cpp @@ -22,9 +22,7 @@ struct encoding_convert_entry extern const encoding_convert_entry RCT2ToUnicodeTable[]; -// clang-format off -const encoding_convert_entry RCT2ToUnicodeTable[] = -{ +const encoding_convert_entry RCT2ToUnicodeTable[] = { // { 1, FORMAT_MOVE_X }, // { 2, FORMAT_ADJUST_PALETTE }, // { 5, FORMAT_NEWLINE }, @@ -88,7 +86,7 @@ const encoding_convert_entry RCT2ToUnicodeTable[] = { CSChar::road, UnicodeChar::road }, { CSChar::air, UnicodeChar::air }, { CSChar::water, UnicodeChar::water }, - { CSChar::superscript_minus_one, UnicodeChar::superscript_minus_one}, + { CSChar::superscript_minus_one, UnicodeChar::superscript_minus_one }, { CSChar::bullet, UnicodeChar::bullet }, { CSChar::small_up, UnicodeChar::small_up }, { CSChar::small_down, UnicodeChar::small_down }, @@ -107,18 +105,21 @@ const encoding_convert_entry RCT2ToUnicodeTable[] = { CSChar::z_acute, UnicodeChar::z_acute }, }; -static int32_t encoding_search_compare(const void *pKey, const void *pEntry) +static int32_t encoding_search_compare(const void* pKey, const void* pEntry) { const uint16_t key = *reinterpret_cast(pKey); - const encoding_convert_entry *entry = static_cast(pEntry); - if (key < entry->code) return -1; - if (key > entry->code) return 1; + const encoding_convert_entry* entry = static_cast(pEntry); + if (key < entry->code) + return -1; + if (key > entry->code) + return 1; return 0; } wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str) { - encoding_convert_entry * entry = static_cast(std::bsearch(&rct2str, RCT2ToUnicodeTable, std::size(RCT2ToUnicodeTable), sizeof(encoding_convert_entry), encoding_search_compare)); + encoding_convert_entry* entry = static_cast(std::bsearch( + &rct2str, RCT2ToUnicodeTable, std::size(RCT2ToUnicodeTable), sizeof(encoding_convert_entry), encoding_search_compare)); if (entry == nullptr) return rct2str; return entry->unicode; From 34eeacf3768e39460a4237a906a2523ed45f80a8 Mon Sep 17 00:00:00 2001 From: Gymnasiast Date: Sun, 1 May 2022 18:52:28 +0200 Subject: [PATCH 6/6] Update tests --- test/tests/StringTest.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/test/tests/StringTest.cpp b/test/tests/StringTest.cpp index f2f002244e..535af4e23c 100644 --- a/test/tests/StringTest.cpp +++ b/test/tests/StringTest.cpp @@ -86,15 +86,7 @@ TEST_F(StringTest, Convert_950_to_UTF8) { auto input = StringFromHex("a7d6b374aabab4c4a6e2aab0af57"); auto expected = u8"快速的棕色狐狸"; - auto actual = String::Convert(input, CODE_PAGE::CP_950, CODE_PAGE::CP_UTF8); - ASSERT_EQ(expected, actual); -} - -TEST_F(StringTest, Convert_UTF8_to_932) -{ - auto input = u8"ファストブラウンフォックス"; - auto expected = StringFromHex("83748340835883678375838983458393837483488362834e8358"); - auto actual = String::Convert(input, CODE_PAGE::CP_UTF8, CODE_PAGE::CP_932); + auto actual = String::ConvertToUtf8(input, CODE_PAGE::CP_950); ASSERT_EQ(expected, actual); } @@ -102,7 +94,7 @@ TEST_F(StringTest, Convert_UTF8_to_UTF8) { auto input = u8"سريع|brown|ثعلب"; auto expected = input; - auto actual = String::Convert(input, CODE_PAGE::CP_UTF8, CODE_PAGE::CP_UTF8); + auto actual = String::ConvertToUtf8(input, CODE_PAGE::CP_UTF8); ASSERT_EQ(expected, actual); } @@ -110,7 +102,7 @@ TEST_F(StringTest, Convert_Empty) { auto input = ""; auto expected = input; - auto actual = String::Convert(input, CODE_PAGE::CP_1252, CODE_PAGE::CP_UTF8); + auto actual = String::ConvertToUtf8(input, CODE_PAGE::CP_1252); ASSERT_EQ(expected, actual); }