1
0
mirror of https://github.com/OpenRCT2/OpenRCT2 synced 2025-12-10 09:32:29 +01:00

Merge pull request #7414 from OpenRCT2/refactor/string-conversions

Refactor string conversion between RCT2 and UTF8
This commit is contained in:
Ted John
2018-04-25 10:14:39 +01:00
committed by GitHub
23 changed files with 512 additions and 251 deletions

View File

@@ -92,7 +92,7 @@ matrix:
- docker
- os: osx
if: type != cron
osx_image: xcode8.3
osx_image: xcode9.3
env:
- secure: "OXn/i72FxW/oh6RGlaN+gHSbkt1ToFe36etaiDOsJQznt6fe9CpFdnE8U1XBHlGokcEjbGNErRU7CFDKYHQuGrPZyHXwgqG2/0emIqFaFt5ti5ypyYKf5qH9x1LLLfdZxDyHkxXdlJ7Etxbp3G7qrV8CGRQiYRNHm1f98AmuufE="
after_success:

View File

@@ -127,7 +127,7 @@ else ()
set(PIE_FLAG "-fpie")
endif ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++14")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
endif ()
# Defines

View File

@@ -1103,11 +1103,10 @@ static void load_landscape()
void utf8_to_rct2_self(char * buffer, size_t length)
{
char tempBuffer[512];
utf8_to_rct2(tempBuffer, buffer);
auto temp = utf8_to_rct2(buffer);
size_t i = 0;
const char * src = tempBuffer;
const char * src = temp.data();
char * dst = buffer;
while (*src != 0 && i < length - 1)
{
@@ -1143,9 +1142,8 @@ void rct2_to_utf8_self(char * buffer, size_t length)
{
if (length > 0)
{
char tempBuffer[512];
rct2_to_utf8(tempBuffer, buffer);
safe_strcpy(buffer, tempBuffer, length);
auto temp = rct2_to_utf8(buffer, RCT2_LANGUAGE_ID_ENGLISH_UK);
safe_strcpy(buffer, temp.data(), length);
}
}

View File

@@ -37,17 +37,13 @@ utf8 * IStream::ReadString()
std::string IStream::ReadStdString()
{
std::vector<utf8> result;
std::string result;
uint8 ch;
while ((ch = ReadValue<uint8>()) != 0)
{
result.push_back(ch);
}
result.push_back(0);
std::string resultString(result.data(), result.data() + result.size());
return resultString;
return result;
}
void IStream::WriteString(const utf8 * str)

View File

@@ -18,6 +18,14 @@
#include <stdexcept>
#include <vector>
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#endif
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#include "../localisation/Language.h"
#include "../util/Util.h"
@@ -517,4 +525,32 @@ namespace String
size_t stringLength = endSubstr - startSubstr + 1;
return std::string(startSubstr, stringLength);
}
std::string Convert(const std::string_view& src, sint32 srcCodePage, sint32 dstCodePage)
{
#ifdef _WIN32
// Convert from source code page to UTF-16
std::wstring u16;
{
int srcLen = (int)src.size();
int sizeReq = MultiByteToWideChar(srcCodePage, 0, src.data(), srcLen, nullptr, 0);
u16 = std::wstring(sizeReq, 0);
MultiByteToWideChar(srcCodePage, 0, src.data(), srcLen, u16.data(), sizeReq);
}
// Convert from UTF-16 to destination code page
std::string dst;
{
int srcLen = (int)u16.size();
int sizeReq = WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, nullptr, 0, nullptr, nullptr);
dst = std::string(sizeReq, 0);
WideCharToMultiByte(dstCodePage, 0, u16.data(), srcLen, dst.data(), sizeReq, nullptr, nullptr);
}
return dst;
#else
STUB();
return std::string(src);
#endif
}
}

View File

@@ -22,6 +22,19 @@
#include <vector>
#include "../common.h"
namespace CODE_PAGE
{
// windows.h defines CP_UTF8
#undef CP_UTF8
constexpr sint32 CP_932 = 932; // ANSI/OEM Japanese; Japanese (Shift-JIS)
constexpr sint32 CP_936 = 936; // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
constexpr sint32 CP_949 = 949; // ANSI/OEM Korean (Unified Hangul Code)
constexpr sint32 CP_950 = 950; // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
constexpr sint32 CP_1252 = 1252; // ANSI Latin 1; Western European (Windows)
constexpr sint32 CP_UTF8 = 65001; // Unicode (UTF-8)
}
namespace String
{
constexpr const utf8 * Empty = "";
@@ -91,4 +104,9 @@ namespace String
utf8 * TrimStart(utf8 * buffer, size_t bufferSize, const utf8 * src);
std::string TrimStart(const std::string &s);
std::string Trim(const std::string &s);
/**
* Converts a multi-byte string from one code page to another.
*/
std::string Convert(const std::string_view& src, sint32 srcCodePage, sint32 dstCodePage);
}

View File

@@ -15,7 +15,6 @@
#pragma endregion
#include "../core/Util.hpp"
#include "../localisation/ConversionTables.h"
#include "../localisation/FormatCodes.h"
#include "../localisation/Language.h"
#include "../sprites.h"

View File

@@ -14,8 +14,16 @@
*****************************************************************************/
#pragma endregion
#include "ConversionTables.h"
#include <cstdlib>
#include "../core/Util.hpp"
#include "FormatCodes.h"
#include "Localisation.h"
struct encoding_convert_entry
{
uint16 code;
uint32 unicode;
};
// clang-format off
const encoding_convert_entry RCT2ToUnicodeTable[256] =
@@ -277,7 +285,44 @@ const encoding_convert_entry RCT2ToUnicodeTable[256] =
{ RCT2_Z_ACUTE, UNICODE_Z_ACUTE },
{ 255, 255 }
};
static sint32 encoding_search_compare(const void *pKey, const void *pEntry)
{
uint16 key = *((uint16*)pKey);
encoding_convert_entry *entry = (encoding_convert_entry*)pEntry;
if (key < entry->code) return -1;
if (key > entry->code) return 1;
return 0;
}
static wchar_t encoding_convert_x_to_unicode(wchar_t code, const encoding_convert_entry *table, size_t count)
{
encoding_convert_entry * entry = (encoding_convert_entry *)std::bsearch(&code, table, count, sizeof(encoding_convert_entry), encoding_search_compare);
if (entry == nullptr) return code;
else return entry->unicode;
}
wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str)
{
return encoding_convert_x_to_unicode(rct2str, RCT2ToUnicodeTable, Util::CountOf(RCT2ToUnicodeTable));
}
uint32 encoding_convert_unicode_to_rct2(uint32 unicode)
{
// Can't do a binary search as it's sorted by RCT2 code, not unicode
for (const auto& entry : RCT2ToUnicodeTable)
{
if (entry.unicode == unicode)
{
return entry.code;
}
}
return unicode;
}
#ifndef _WIN32
const encoding_convert_entry GB2312ToUnicodeTable[7445] =
{
{ 8481, 12288 },
@@ -46295,3 +46340,25 @@ const encoding_convert_entry CP949ToUnicodeTable[17176] =
{ 0xFDFE, 0x8A70 }, // CJK UNIFIED IDEOGRAPH
};
//clang-format on
wchar_t encoding_convert_gb2312_to_unicode(wchar_t gb2312)
{
return encoding_convert_x_to_unicode(gb2312 - 0x8080, GB2312ToUnicodeTable, Util::CountOf(GB2312ToUnicodeTable));
}
wchar_t encoding_convert_big5_to_unicode(wchar_t big5)
{
return encoding_convert_x_to_unicode(big5, Big5ToUnicodeTable, Util::CountOf(Big5ToUnicodeTable));
}
wchar_t encoding_convert_cp932_to_unicode(wchar_t cp932)
{
return encoding_convert_x_to_unicode(cp932, CP932ToUnicodeTable, Util::CountOf(CP932ToUnicodeTable));
}
wchar_t encoding_convert_cp949_to_unicode(wchar_t cp949)
{
return encoding_convert_x_to_unicode(cp949, CP949ToUnicodeTable, Util::CountOf(CP949ToUnicodeTable));
}
#endif

View File

@@ -1,73 +0,0 @@
#pragma region Copyright (c) 2014-2018 OpenRCT2 Developers
/*****************************************************************************
* OpenRCT2, an open source clone of Roller Coaster Tycoon 2.
*
* OpenRCT2 is the work of many authors, a full list can be found in contributors.md
* For more information, visit https://github.com/OpenRCT2/OpenRCT2
*
* OpenRCT2 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* A full copy of the GNU General Public License can be found in licence.txt
*****************************************************************************/
#pragma endregion
#pragma once
#include "../common.h"
struct encoding_convert_entry
{
uint16 code;
uint32 unicode;
};
extern const encoding_convert_entry GB2312ToUnicodeTable[7445];
extern const encoding_convert_entry Big5ToUnicodeTable[13710];
extern const encoding_convert_entry RCT2ToUnicodeTable[256];
extern const encoding_convert_entry CP932ToUnicodeTable[7916];
extern const encoding_convert_entry CP949ToUnicodeTable[17176];
enum RCT2Polish
{
RCT2_A_OGONEK_UC = 159, // 0x9F
RCT2_C_ACUTE_UC = 162, // 0xA2
RCT2_E_OGONEK_UC = 166, // 0xA6
RCT2_N_ACUTE_UC = 198, // 0xC6
RCT2_L_STROKE_UC = 167, // 0xA7
RCT2_S_ACUTE_UC = 208, // 0xD0
RCT2_Z_DOT_UC = 216, // 0xD8
RCT2_Z_ACUTE_UC = 215, // 0xD7
RCT2_A_OGONEK = 221, // 0xDD
RCT2_C_ACUTE = 222, // 0xDE
RCT2_E_OGONEK = 230, // 0xE6
RCT2_N_ACUTE = 240, // 0xF0
RCT2_L_STROKE = 247, // 0xF7
RCT2_S_ACUTE = 248, // 0xF8
RCT2_Z_DOT = 253, // 0xFD
RCT2_Z_ACUTE = 254, // 0xFE
};
enum UnicodePolish
{
UNICODE_A_OGONEK_UC = 260,
UNICODE_C_ACUTE_UC = 262,
UNICODE_E_OGONEK_UC = 280,
UNICODE_N_ACUTE_UC = 323,
UNICODE_L_STROKE_UC = 321,
UNICODE_S_ACUTE_UC = 346,
UNICODE_Z_DOT_UC = 379,
UNICODE_Z_ACUTE_UC = 377,
UNICODE_A_OGONEK = 261,
UNICODE_C_ACUTE = 263,
UNICODE_E_OGONEK = 281,
UNICODE_N_ACUTE = 324,
UNICODE_L_STROKE = 322,
UNICODE_S_ACUTE = 347,
UNICODE_Z_DOT = 380,
UNICODE_Z_ACUTE = 378,
};

View File

@@ -1,4 +1,4 @@
#pragma region Copyright (c) 2014-2017 OpenRCT2 Developers
#pragma region Copyright (c) 2014-2018 OpenRCT2 Developers
/*****************************************************************************
* OpenRCT2, an open source clone of Roller Coaster Tycoon 2.
*
@@ -14,101 +14,182 @@
*****************************************************************************/
#pragma endregion
#include <algorithm>
#include <limits>
#include <stdexcept>
#include "../core/String.hpp"
#include "../core/Util.hpp"
#include "ConversionTables.h"
#include "Localisation.h"
sint32 rct2_to_utf8(utf8 *dst, const char *src)
/**
* Decodes an RCT2 string to a wide char string still in the original code page.
* An RCT2 string is a multi-byte string where every two-byte code point is preceeded with a byte value of 255.
*/
static std::wstring DecodeToWideChar(const std::string_view& src)
{
wchar_t codepoint;
std::wstring decoded;
decoded.reserve(src.size());
for (auto it = src.begin(); it != src.end(); )
{
uint8_t c = *it++;
if (c == 255)
{
// Push next two characters
uint8 a = 0;
uint8 b = 0;
if (it != src.end())
{
a = *it++;
if (it != src.end())
{
b = *it++;
}
else
{
// 2nd byte for double byte character is missing
break;
}
}
else
{
// 1st byte for double byte character is missing
break;
}
utf8 *start = dst;
const char *ch = src;
while (*ch != 0) {
if (*ch == (char)(uint8)0xFF) {
ch++;
// Read wide char
uint8 a = *ch++;
uint8 b = *ch++;
codepoint = (a << 8) | b;
} else {
codepoint = (uint8)(*ch++);
codepoint = encoding_convert_rct2_to_unicode(codepoint);
wchar_t cp = (a << 8) | b;
decoded.push_back(cp);
}
else
{
// Push character
decoded.push_back(c);
}
dst = utf8_write_codepoint(dst, codepoint);
}
dst = utf8_write_codepoint(dst, 0);
return (sint32)(dst - start);
return decoded;
}
sint32 utf8_to_rct2(char *dst, const utf8 *src)
static std::string DecodeToMultiByte(const std::string_view& src)
{
char *start = dst;
const utf8 *ch = src;
auto wide = DecodeToWideChar(src);
std::string result;
result.reserve(wide.size());
for (auto cc : wide)
{
if (cc <= 255)
{
result.push_back(cc);
}
else
{
result.push_back((cc >> 8) & 0xFF);
result.push_back(cc & 0xFF);
}
}
return result;
}
/**
* Encodes a UTF-8 string as an RCT2 string.
*/
static std::string Encode(const std::string_view& src)
{
std::string dst;
const utf8 * ch = src.data();
sint32 codepoint;
while ((codepoint = utf8_get_next(ch, &ch)) != 0) {
while ((codepoint = utf8_get_next(ch, &ch)) != 0)
{
codepoint = encoding_convert_unicode_to_rct2(codepoint);
if (codepoint < 256) {
*dst++ = (char)codepoint;
} else if (codepoint <= 0xFFFF) {
*dst++ = (char)(uint8)0xFF;
*dst++ = (codepoint >> 8) & 0xFF;
*dst++ = codepoint & 0xFF;
if (codepoint <= std::numeric_limits<uint8>::max())
{
dst.push_back(codepoint);
}
else if (codepoint <= std::numeric_limits<uint16>::max())
{
dst.push_back((char)(uint8)0xFF);
dst.push_back((codepoint >> 8) & 0xFF);
dst.push_back(codepoint & 0xFF);
}
else
{
// RCT2 strings do not support code points greater than 65535, replace them with '?'
dst.push_back('?');
}
}
*dst++ = 0;
return (sint32)(dst - start);
return dst;
}
static sint32 encoding_search_compare(const void *pKey, const void *pEntry)
static sint32 GetCodePageForRCT2Language(RCT2LanguageId languageId)
{
uint16 key = *((uint16*)pKey);
encoding_convert_entry *entry = (encoding_convert_entry*)pEntry;
if (key < entry->code) return -1;
if (key > entry->code) return 1;
return 0;
}
static wchar_t encoding_convert_x_to_unicode(wchar_t code, const encoding_convert_entry *table, size_t count)
{
encoding_convert_entry * entry = (encoding_convert_entry *)bsearch(&code, table, count, sizeof(encoding_convert_entry), encoding_search_compare);
if (entry == nullptr) return code;
else return entry->unicode;
}
uint32 encoding_convert_unicode_to_rct2(uint32 unicode)
{
// Can't do a binary search as it's sorted by RCT2 code, not unicode
for (uint32 i = 0; i < Util::CountOf(RCT2ToUnicodeTable); i++) {
if (RCT2ToUnicodeTable[i].unicode == unicode) return RCT2ToUnicodeTable[i].code;
switch (languageId)
{
case RCT2_LANGUAGE_ID_JAPANESE:
return CODE_PAGE::CP_932;
case RCT2_LANGUAGE_ID_CHINESE_SIMPLIFIED:
return CODE_PAGE::CP_936;
case RCT2_LANGUAGE_ID_KOREAN:
return CODE_PAGE::CP_949;
case RCT2_LANGUAGE_ID_CHINESE_TRADITIONAL:
return CODE_PAGE::CP_950;
default:
return CODE_PAGE::CP_1252;
}
return unicode;
}
wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str)
template<typename TConvertFunc>
static std::string DecodeConvertWithTable(const std::string_view& src, TConvertFunc func)
{
return encoding_convert_x_to_unicode(rct2str, RCT2ToUnicodeTable, Util::CountOf(RCT2ToUnicodeTable));
auto decoded = DecodeToWideChar(src);
std::wstring u16;
u16.reserve(decoded.size());
for (auto cc : decoded)
{
u16.push_back(func(cc));
}
return String::ToUtf8(u16);
}
wchar_t encoding_convert_gb2312_to_unicode(wchar_t gb2312)
std::string rct2_to_utf8(const std::string_view& src, RCT2LanguageId languageId)
{
return encoding_convert_x_to_unicode(gb2312 - 0x8080, GB2312ToUnicodeTable, Util::CountOf(GB2312ToUnicodeTable));
auto codePage = GetCodePageForRCT2Language(languageId);
std::string result;
switch (codePage)
{
case CODE_PAGE::CP_1252:
// The code page used by RCT2 was not quite 1252 as some codes were used for Polish characters.
result = DecodeConvertWithTable(src, encoding_convert_rct2_to_unicode);
break;
#ifdef _WIN32
default:
auto decoded = DecodeToMultiByte(src);
result = String::Convert(decoded, codePage, CODE_PAGE::CP_UTF8);
#else
// TODO Change this to use a library such as libicu
case CODE_PAGE::CP_932:
result = DecodeConvertWithTable(src, encoding_convert_cp932_to_unicode);
break;
case CODE_PAGE::CP_936:
result = DecodeConvertWithTable(src, encoding_convert_gb2312_to_unicode);
break;
case CODE_PAGE::CP_949:
result = DecodeConvertWithTable(src, encoding_convert_cp949_to_unicode);
break;
case CODE_PAGE::CP_950:
result = DecodeConvertWithTable(src, encoding_convert_big5_to_unicode);
break;
default:
throw std::runtime_error("Unsupported code page: " + std::to_string(codePage));
break;
#endif
}
return result;
}
wchar_t encoding_convert_big5_to_unicode(wchar_t big5)
std::string utf8_to_rct2(const std::string_view& src)
{
return encoding_convert_x_to_unicode(big5, Big5ToUnicodeTable, Util::CountOf(Big5ToUnicodeTable));
// NOTE: This is only used for SC6 / SV6 files which don't store the language identifier
// because of this, we can only store in RCT2's CP_1252 format. We can preserve some
// unicode characters, but only those between 256 and 65535.
return Encode(src);
}
wchar_t encoding_convert_cp932_to_unicode(wchar_t cp932)
{
return encoding_convert_x_to_unicode(cp932, CP932ToUnicodeTable, Util::CountOf(CP932ToUnicodeTable));
}
wchar_t encoding_convert_cp949_to_unicode(wchar_t cp949)
{
return encoding_convert_x_to_unicode(cp949, CP949ToUnicodeTable, Util::CountOf(CP949ToUnicodeTable));
}

View File

@@ -139,4 +139,46 @@ enum {
FORMAT_COMMA1DP16 = 20004
};
enum RCT2Polish
{
RCT2_A_OGONEK_UC = 159, // 0x9F
RCT2_C_ACUTE_UC = 162, // 0xA2
RCT2_E_OGONEK_UC = 166, // 0xA6
RCT2_N_ACUTE_UC = 198, // 0xC6
RCT2_L_STROKE_UC = 167, // 0xA7
RCT2_S_ACUTE_UC = 208, // 0xD0
RCT2_Z_DOT_UC = 216, // 0xD8
RCT2_Z_ACUTE_UC = 215, // 0xD7
RCT2_A_OGONEK = 221, // 0xDD
RCT2_C_ACUTE = 222, // 0xDE
RCT2_E_OGONEK = 230, // 0xE6
RCT2_N_ACUTE = 240, // 0xF0
RCT2_L_STROKE = 247, // 0xF7
RCT2_S_ACUTE = 248, // 0xF8
RCT2_Z_DOT = 253, // 0xFD
RCT2_Z_ACUTE = 254, // 0xFE
};
enum UnicodePolish
{
UNICODE_A_OGONEK_UC = 260,
UNICODE_C_ACUTE_UC = 262,
UNICODE_E_OGONEK_UC = 280,
UNICODE_N_ACUTE_UC = 323,
UNICODE_L_STROKE_UC = 321,
UNICODE_S_ACUTE_UC = 346,
UNICODE_Z_DOT_UC = 379,
UNICODE_Z_ACUTE_UC = 377,
UNICODE_A_OGONEK = 261,
UNICODE_C_ACUTE = 263,
UNICODE_E_OGONEK = 281,
UNICODE_N_ACUTE = 324,
UNICODE_L_STROKE = 322,
UNICODE_S_ACUTE = 347,
UNICODE_Z_DOT = 380,
UNICODE_Z_ACUTE = 378,
};
#endif

View File

@@ -176,79 +176,6 @@ void language_close_all()
constexpr rct_string_id NONSTEX_BASE_STRING_ID = 3463;
constexpr uint16 MAX_OBJECT_CACHED_STRINGS = 2048;
static wchar_t convert_specific_language_character_to_unicode(RCT2LanguageId languageId, wchar_t codepoint)
{
switch (languageId) {
case RCT2_LANGUAGE_ID_CHINESE_TRADITIONAL:
return encoding_convert_big5_to_unicode(codepoint);
case RCT2_LANGUAGE_ID_CHINESE_SIMPLIFIED:
return encoding_convert_gb2312_to_unicode(codepoint);
case RCT2_LANGUAGE_ID_JAPANESE:
return encoding_convert_cp932_to_unicode(codepoint);
case RCT2_LANGUAGE_ID_KOREAN:
return encoding_convert_cp949_to_unicode(codepoint);
default:
return codepoint;
}
}
static utf8 * convert_multibyte_charset(const char * src, size_t srcMaxSize, RCT2LanguageId languageId)
{
constexpr char CODEPOINT_DOUBLEBYTE = (char)(uint8)0xFF;
auto sb = StringBuilder(64);
for (const char * ch = src; (ch < src + srcMaxSize) && (*ch != '\0');)
{
if (*ch == CODEPOINT_DOUBLEBYTE)
{
ch++;
if (ch < src + srcMaxSize)
{
uint8 a = *ch++;
if (a != '\0')
{
uint8 b = *ch++;
wchar_t codepoint16 = (wchar_t)((a << 8) | b);
codepoint16 = convert_specific_language_character_to_unicode(languageId, codepoint16);
sb.Append(codepoint16);
}
}
}
else
{
codepoint_t codepoint = (uint8)*ch++;
sb.Append(codepoint);
}
}
return sb.StealString();
}
static bool rct2_language_is_multibyte_charset(RCT2LanguageId languageId)
{
switch (languageId) {
case RCT2_LANGUAGE_ID_KOREAN:
case RCT2_LANGUAGE_ID_CHINESE_TRADITIONAL:
case RCT2_LANGUAGE_ID_CHINESE_SIMPLIFIED:
case RCT2_LANGUAGE_ID_JAPANESE:
return true;
default:
return false;
}
}
utf8 * rct2_language_string_to_utf8(const char *src, size_t srcSize, RCT2LanguageId languageId)
{
if (rct2_language_is_multibyte_charset(languageId))
{
return convert_multibyte_charset(src, srcSize, languageId);
}
else
{
return win1252_to_utf8_alloc(src, srcSize);
}
}
bool language_get_localised_scenario_strings(const utf8 *scenarioFilename, rct_string_id *outStringIds)
{
outStringIds[0] = _languageCurrent->GetScenarioOverrideStringId(scenarioFilename, 0);

View File

@@ -18,6 +18,7 @@
#define _LANGUAGE_H_
#include <string>
#include <string_view>
#include "../common.h"
#include "../drawing/Font.h"
@@ -109,7 +110,8 @@ sint32 utf8_length(const utf8 *text);
wchar_t *utf8_to_widechar(const utf8 *src);
utf8 *widechar_to_utf8(const wchar_t *src);
utf8 *rct2_language_string_to_utf8(const char *src, size_t srcSize, RCT2LanguageId languageId);
std::string rct2_to_utf8(const std::string_view& src, RCT2LanguageId languageId);
std::string utf8_to_rct2(const std::string_view& src);
bool language_get_localised_scenario_strings(const utf8 *scenarioFilename, rct_string_id *outStringIds);
void language_free_object_string(rct_string_id stringId);
rct_string_id language_get_object_override_string_id(const char * identifier, uint8 index);

View File

@@ -53,15 +53,18 @@ bool is_user_string_id(rct_string_id stringId);
utf8 *win1252_to_utf8_alloc(const char *src, size_t srcMaxSize);
sint32 win1252_to_utf8(utf8string dst, const char *src, size_t srcLength, size_t maxBufferLength);
sint32 rct2_to_utf8(utf8 *dst, const char *src);
sint32 utf8_to_rct2(char *dst, const utf8 *src);
wchar_t encoding_convert_rct2_to_unicode(wchar_t rct2str);
uint32 encoding_convert_unicode_to_rct2(uint32 unicode);
#ifndef _WIN32
wchar_t encoding_convert_gb2312_to_unicode(wchar_t gb2312);
wchar_t encoding_convert_big5_to_unicode(wchar_t big5);
wchar_t encoding_convert_cp932_to_unicode(wchar_t cp932);
wchar_t encoding_convert_cp949_to_unicode(wchar_t cp949);
#endif
#define MAX_USER_STRINGS 1024
#define USER_STRING_MAX_LENGTH 32

View File

@@ -68,13 +68,13 @@ void StringTable::Read(IReadObjectContext * context, IStream * stream, uint8 id)
entry.LanguageId = languageId;
std::string stringAsWin1252 = stream->ReadStdString();
utf8 * stringAsUtf8 = rct2_language_string_to_utf8(stringAsWin1252.c_str(), stringAsWin1252.size(), rct2LanguageId);
auto stringAsUtf8 = rct2_to_utf8(stringAsWin1252, rct2LanguageId);
if (StringIsBlank(stringAsUtf8))
if (StringIsBlank(stringAsUtf8.data()))
{
entry.LanguageId = LANGUAGE_UNDEFINED;
}
String::Trim(stringAsUtf8);
stringAsUtf8 = String::Trim(stringAsUtf8);
entry.Text = stringAsUtf8;
_strings.push_back(entry);

View File

@@ -271,10 +271,7 @@ public:
dst->objective_arg_2 = _s4.scenario_objective_currency;
dst->objective_arg_3 = _s4.scenario_objective_num_guests;
utf8 utf8name[256];
rct2_to_utf8(utf8name, _s4.scenario_name);
std::string name = std::string(utf8name, sizeof(utf8name));
auto name = rct2_to_utf8(_s4.scenario_name, RCT2_LANGUAGE_ID_ENGLISH_UK);
std::string details;
// TryGetById won't set this property if the scenario is not recognised,
@@ -2759,10 +2756,8 @@ private:
std::string GetUserString(rct_string_id stringId)
{
utf8 buffer[128] = { 0 };
const char * originalString = _s4.string_table[(stringId - USER_STRING_START) % 1024];
rct2_to_utf8(buffer, originalString);
return std::string(buffer);
return rct2_to_utf8(originalString, RCT2_LANGUAGE_ID_ENGLISH_UK);
}
void FixLandOwnership()

View File

@@ -164,8 +164,14 @@ void S6Exporter::Export()
log_error("Found %d disjoint null sprites", disjoint_sprites_count);
}
_s6.info = gS6Info;
utf8_to_rct2(_s6.info.name, gS6Info.name);
utf8_to_rct2(_s6.info.details, gS6Info.details);
{
auto temp = utf8_to_rct2(gS6Info.name);
safe_strcpy(_s6.info.name, temp.data(), sizeof(_s6.info.name));
}
{
auto temp = utf8_to_rct2(gS6Info.details);
safe_strcpy(_s6.info.details, temp.data(), sizeof(_s6.info.details));
}
uint32 researchedTrackPiecesA[128];
uint32 researchedTrackPiecesB[128];

View File

@@ -47,6 +47,7 @@
#include "../scenario/Scenario.h"
#include "../scenario/ScenarioRepository.h"
#include "../util/SawyerCoding.h"
#include "../util/Util.h"
#include "../world/Climate.h"
#include "../world/Entrance.h"
#include "../world/MapAnimation.h"
@@ -200,8 +201,15 @@ public:
// _s6.header
gS6Info = _s6.info;
rct2_to_utf8(gS6Info.name, _s6.info.name);
rct2_to_utf8(gS6Info.details, _s6.info.details);
{
auto temp = rct2_to_utf8(_s6.info.name, RCT2_LANGUAGE_ID_ENGLISH_UK);
safe_strcpy(gS6Info.name, temp.data(), sizeof(gS6Info.name));
}
{
auto temp = rct2_to_utf8(_s6.info.details, RCT2_LANGUAGE_ID_ENGLISH_UK);
safe_strcpy(gS6Info.details, temp.data(), sizeof(gS6Info.details));
}
gDateMonthsElapsed = _s6.elapsed_months;
gDateMonthTicks = _s6.current_day;

View File

@@ -76,6 +76,8 @@ set(COMMON_TEST_SOURCES
"${ROOT_DIR}/src/openrct2/core/Guard.cpp"
"${ROOT_DIR}/src/openrct2/core/String.cpp"
"${ROOT_DIR}/src/openrct2/Diagnostic.cpp"
"${ROOT_DIR}/src/openrct2/localisation/ConversionTables.cpp"
"${ROOT_DIR}/src/openrct2/localisation/Convert.cpp"
"${ROOT_DIR}/src/openrct2/localisation/FormatCodes.cpp"
"${ROOT_DIR}/src/openrct2/localisation/UTF8.cpp"
"${ROOT_DIR}/src/openrct2/util/Util.cpp"
@@ -139,6 +141,11 @@ add_executable(test_string ${STRING_TEST_SOURCES})
target_link_libraries(test_string ${GTEST_LIBRARIES} test-common ${LDL} z)
add_test(NAME string COMMAND test_string)
# Localisation test
set(STRING_TEST_SOURCES "${CMAKE_CURRENT_LIST_DIR}/Localisation.cpp")
add_executable(test_localisation ${STRING_TEST_SOURCES})
target_link_libraries(test_localisation ${GTEST_LIBRARIES} test-common ${LDL} z)
add_test(NAME localisation COMMAND test_localisation)
# Ride ratings test
set(RIDE_RATINGS_TEST_SOURCES "${CMAKE_CURRENT_LIST_DIR}/RideRatings.cpp"

View File

@@ -0,0 +1,80 @@
#include "helpers/StringHelpers.hpp"
#include "openrct2/localisation/Localisation.h"
#include <gtest/gtest.h>
class Localisation : public testing::Test
{
};
///////////////////////////////////////////////////////////////////////////////
// Tests for rct2_to_utf8
///////////////////////////////////////////////////////////////////////////////
TEST_F(Localisation, RCT2_to_UTF8_UK)
{
auto input = "The quick brown fox";
auto expected = u8"The quick brown fox";
auto actual = rct2_to_utf8(input, RCT2_LANGUAGE_ID_ENGLISH_UK);
ASSERT_EQ(expected, actual);
}
TEST_F(Localisation, RCT2_to_UTF8_JP)
{
auto input = StringFromHex("ff8374ff8340ff8358ff8367ff8375ff8389ff8345ff8393ff8374ff8348ff8362ff834eff8358");
auto expected = u8"ファストブラウンフォックス";
auto actual = rct2_to_utf8(input, RCT2_LANGUAGE_ID_JAPANESE);
ASSERT_EQ(expected, actual);
}
TEST_F(Localisation, RCT2_to_UTF8_ZH_TW)
{
auto input = StringFromHex("ffa7d6ffb374ffaabaffb4c4ffa6e2ffaab0ffaf57");
auto expected = u8"快速的棕色狐狸";
auto actual = rct2_to_utf8(input, RCT2_LANGUAGE_ID_CHINESE_TRADITIONAL);
ASSERT_EQ(expected, actual);
}
TEST_F(Localisation, RCT2_to_UTF8_PL)
{
auto input = StringFromHex("47F372736b6120446ff76b692054e6637a6f7779");
auto expected = u8"Górska Dołki Tęczowy";
auto actual = rct2_to_utf8(input, RCT2_LANGUAGE_ID_ENGLISH_UK);
ASSERT_EQ(expected, actual);
}
TEST_F(Localisation, RCT2_to_UTF8_ZH_TW_PREMATURE_END)
{
// This string can be found in BATFL.DAT, the last double byte character is missing its second byte.
auto input = StringFromHex("ffa470ffabacffa8aeffbdf8ffa662ffc54bffb944ffa457ffaeb6ffb0caffb76effc2");
auto expected = u8"小型車輛在鐵道上振動搖";
auto actual = rct2_to_utf8(input, RCT2_LANGUAGE_ID_CHINESE_TRADITIONAL);
ASSERT_EQ(expected, actual);
}
///////////////////////////////////////////////////////////////////////////////
// Tests for utf8_to_rct2
///////////////////////////////////////////////////////////////////////////////
TEST_F(Localisation, UTF8_to_RCT2_Basic)
{
auto input = u8"à l'époque était";
auto expected = StringFromHex("e0206c27e9706f71756520e974616974");
auto actual = utf8_to_rct2(input);
ASSERT_EQ(expected, actual);
}
TEST_F(Localisation, UTF8_to_RCT2_ChineseTraditional)
{
auto input = u8"$: 快速的棕色狐狸";
auto expected = StringFromHex("243a20ff5febff901fff7684ff68d5ff8272ff72d0ff72f8");
auto actual = utf8_to_rct2(input);
ASSERT_EQ(expected, actual);
}
TEST_F(Localisation, UTF8_to_RCT2_PL)
{
auto input = u8"Górska Dołki Tęczowy";
auto expected = StringFromHex("47F372736b6120446ff76b692054e6637a6f7779");
auto actual = utf8_to_rct2(input);
ASSERT_EQ(expected, actual);
}

View File

@@ -4,6 +4,7 @@
#include <gtest/gtest.h>
#include <openrct2/core/String.hpp>
#include "AssertHelpers.hpp"
#include "helpers/StringHelpers.hpp"
using TCase = std::tuple<std::string, std::string, std::string>;
@@ -11,6 +12,10 @@ class StringTest : public testing::TestWithParam<TCase>
{
};
///////////////////////////////////////////////////////////////////////////////
// Tests for String::Trim
///////////////////////////////////////////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(TrimData, StringTest, testing::Values(
// input after Trim after TrimStart
TCase("string", "string", "string"),
@@ -45,6 +50,10 @@ TEST_P(StringTest, TrimStart)
ASSERT_EQ(expected, actual);
}
///////////////////////////////////////////////////////////////////////////////
// Tests for String::Split
///////////////////////////////////////////////////////////////////////////////
TEST_F(StringTest, Split_ByComma)
{
auto actual = String::Split("a,bb,ccc,dd", ",");
@@ -64,3 +73,44 @@ TEST_F(StringTest, Split_ByEmpty)
{
EXPECT_THROW(String::Split("string", ""), std::invalid_argument);
}
///////////////////////////////////////////////////////////////////////////////
// Tests for String::Convert
///////////////////////////////////////////////////////////////////////////////
// TODO Remove when String::Convert is implemented for non-Windows platforms
#ifdef _WIN32
TEST_F(StringTest, Convert_950_to_UTF8)
{
auto input = StringFromHex("a7d6b374aabab4c4a6e2aab0af57");
auto expected = u8"快速的棕色狐狸";
auto actual = String::Convert(input, CODE_PAGE::CP_950, CODE_PAGE::CP_UTF8);
ASSERT_EQ(expected, actual);
}
TEST_F(StringTest, Convert_UTF8_to_932)
{
auto input = u8"ファストブラウンフォックス";
auto expected = StringFromHex("83748340835883678375838983458393837483488362834e8358");
auto actual = String::Convert(input, CODE_PAGE::CP_UTF8, CODE_PAGE::CP_932);
ASSERT_EQ(expected, actual);
}
TEST_F(StringTest, Convert_UTF8_to_UTF8)
{
auto input = u8"سريع|brown|ثعلب";
auto expected = input;
auto actual = String::Convert(input, CODE_PAGE::CP_UTF8, CODE_PAGE::CP_UTF8);
ASSERT_EQ(expected, actual);
}
TEST_F(StringTest, Convert_Empty)
{
auto input = "";
auto expected = input;
auto actual = String::Convert(input, CODE_PAGE::CP_1252, CODE_PAGE::CP_UTF8);
ASSERT_EQ(expected, actual);
}
#endif

View File

@@ -0,0 +1,17 @@
#include <cassert>
#include <string>
#include <string_view>
inline std::string StringFromHex(const std::string_view& input)
{
assert((input.size() & 1) == 0);
std::string result;
result.reserve(input.size() / 2);
for (size_t i = 0; i < input.size(); i += 2)
{
auto val = std::stoi(std::string(input.substr(i, 2)), 0, 16);
result.push_back(val);
}
return result;
}

View File

@@ -52,12 +52,14 @@
<!-- Files -->
<ItemGroup>
<ClInclude Include="AssertHelpers.hpp" />
<ClInclude Include="helpers\StringHelpers.hpp" />
<ClInclude Include="TestData.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="LanguagePackTest.cpp" />
<ClCompile Include="IniReaderTest.cpp" />
<ClCompile Include="IniWriterTest.cpp" />
<ClCompile Include="Localisation.cpp" />
<ClCompile Include="MultiLaunch.cpp" />
<ClCompile Include="RideRatings.cpp" />
<ClCompile Include="sawyercoding_test.cpp" />