1
0
mirror of https://github.com/OpenRCT2/OpenRCT2 synced 2026-01-19 13:03:11 +01:00
Files
OpenRCT2/src/openrct2/core/UTF8.cpp
2024-12-31 20:30:04 +01:00

110 lines
2.5 KiB
C++

/*****************************************************************************
* Copyright (c) 2014-2025 OpenRCT2 developers
*
* For a complete list of all authors, please refer to contributors.md
* Interested in contributing? Visit https://github.com/OpenRCT2/OpenRCT2
*
* OpenRCT2 is licensed under the GNU General Public License version 3.
*****************************************************************************/
#include "UTF8.h"
#include <cstring>
#include <wchar.h>
uint32_t UTF8GetNext(const utf8* char_ptr, const utf8** nextchar_ptr)
{
int32_t result;
int32_t numBytes;
if (!(char_ptr[0] & 0x80))
{
result = char_ptr[0];
numBytes = 1;
}
else if ((char_ptr[0] & 0xE0) == 0xC0)
{
result = ((char_ptr[0] & 0x1F) << 6) | (char_ptr[1] & 0x3F);
numBytes = 2;
}
else if ((char_ptr[0] & 0xF0) == 0xE0)
{
result = ((char_ptr[0] & 0x0F) << 12) | ((char_ptr[1] & 0x3F) << 6) | (char_ptr[2] & 0x3F);
numBytes = 3;
}
else if ((char_ptr[0] & 0xF8) == 0xF0)
{
result = ((char_ptr[0] & 0x07) << 18) | ((char_ptr[1] & 0x3F) << 12) | ((char_ptr[2] & 0x3F) << 6)
| (char_ptr[3] & 0x3F);
numBytes = 4;
}
else
{
// TODO 4 bytes
result = ' ';
numBytes = 1;
}
if (nextchar_ptr != nullptr)
*nextchar_ptr = char_ptr + numBytes;
return result;
}
bool UTF8IsCodepointStart(const utf8* text)
{
if ((text[0] & 0x80) == 0)
return true;
if ((text[0] & 0xC0) == 0xC0)
return true;
return false;
}
int32_t UTF8GetCodepointLength(char32_t codepoint)
{
if (codepoint <= 0x7F)
{
return 1;
}
if (codepoint <= 0x7FF)
{
return 2;
}
if (codepoint <= 0xFFFF)
{
return 3;
}
return 4;
}
/**
* Gets the number of characters / codepoints in a UTF-8 string (not necessarily 1:1 with bytes and not including null
* terminator).
*/
int32_t UTF8Length(const utf8* text)
{
const utf8* ch = text;
int32_t count = 0;
while (UTF8GetNext(ch, &ch) != 0)
{
count++;
}
return count;
}
/**
* Returns a pointer to the null terminator of the given UTF-8 string.
*/
utf8* GetStringEnd(const utf8* text)
{
return const_cast<char*>(std::strchr(text, 0));
}
/**
* Return the number of bytes (including the null terminator) in the given UTF-8 string.
*/
size_t GetStringSize(const utf8* text)
{
return GetStringEnd(text) - text + 1;
}