mirror of
https://github.com/OpenRCT2/OpenRCT2
synced 2026-01-19 13:03:11 +01:00
110 lines
2.5 KiB
C++
110 lines
2.5 KiB
C++
/*****************************************************************************
|
|
* Copyright (c) 2014-2025 OpenRCT2 developers
|
|
*
|
|
* For a complete list of all authors, please refer to contributors.md
|
|
* Interested in contributing? Visit https://github.com/OpenRCT2/OpenRCT2
|
|
*
|
|
* OpenRCT2 is licensed under the GNU General Public License version 3.
|
|
*****************************************************************************/
|
|
|
|
#include "UTF8.h"
|
|
|
|
#include <cstring>
|
|
#include <wchar.h>
|
|
|
|
uint32_t UTF8GetNext(const utf8* char_ptr, const utf8** nextchar_ptr)
|
|
{
|
|
int32_t result;
|
|
int32_t numBytes;
|
|
|
|
if (!(char_ptr[0] & 0x80))
|
|
{
|
|
result = char_ptr[0];
|
|
numBytes = 1;
|
|
}
|
|
else if ((char_ptr[0] & 0xE0) == 0xC0)
|
|
{
|
|
result = ((char_ptr[0] & 0x1F) << 6) | (char_ptr[1] & 0x3F);
|
|
numBytes = 2;
|
|
}
|
|
else if ((char_ptr[0] & 0xF0) == 0xE0)
|
|
{
|
|
result = ((char_ptr[0] & 0x0F) << 12) | ((char_ptr[1] & 0x3F) << 6) | (char_ptr[2] & 0x3F);
|
|
numBytes = 3;
|
|
}
|
|
else if ((char_ptr[0] & 0xF8) == 0xF0)
|
|
{
|
|
result = ((char_ptr[0] & 0x07) << 18) | ((char_ptr[1] & 0x3F) << 12) | ((char_ptr[2] & 0x3F) << 6)
|
|
| (char_ptr[3] & 0x3F);
|
|
numBytes = 4;
|
|
}
|
|
else
|
|
{
|
|
// TODO 4 bytes
|
|
result = ' ';
|
|
numBytes = 1;
|
|
}
|
|
|
|
if (nextchar_ptr != nullptr)
|
|
*nextchar_ptr = char_ptr + numBytes;
|
|
return result;
|
|
}
|
|
|
|
bool UTF8IsCodepointStart(const utf8* text)
|
|
{
|
|
if ((text[0] & 0x80) == 0)
|
|
return true;
|
|
if ((text[0] & 0xC0) == 0xC0)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
int32_t UTF8GetCodepointLength(char32_t codepoint)
|
|
{
|
|
if (codepoint <= 0x7F)
|
|
{
|
|
return 1;
|
|
}
|
|
if (codepoint <= 0x7FF)
|
|
{
|
|
return 2;
|
|
}
|
|
if (codepoint <= 0xFFFF)
|
|
{
|
|
return 3;
|
|
}
|
|
return 4;
|
|
}
|
|
|
|
/**
|
|
* Gets the number of characters / codepoints in a UTF-8 string (not necessarily 1:1 with bytes and not including null
|
|
* terminator).
|
|
*/
|
|
int32_t UTF8Length(const utf8* text)
|
|
{
|
|
const utf8* ch = text;
|
|
|
|
int32_t count = 0;
|
|
while (UTF8GetNext(ch, &ch) != 0)
|
|
{
|
|
count++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* Returns a pointer to the null terminator of the given UTF-8 string.
|
|
*/
|
|
utf8* GetStringEnd(const utf8* text)
|
|
{
|
|
return const_cast<char*>(std::strchr(text, 0));
|
|
}
|
|
|
|
/**
|
|
* Return the number of bytes (including the null terminator) in the given UTF-8 string.
|
|
*/
|
|
size_t GetStringSize(const utf8* text)
|
|
{
|
|
return GetStringEnd(text) - text + 1;
|
|
}
|