250 lines
4.7 KiB
C
250 lines
4.7 KiB
C
////////////////////////////////////////////////////////////
|
|
//~ Utf8
|
|
|
|
//- Decode
|
|
|
|
Utf8DecodeResult DecodeUtf8(String str)
|
|
{
|
|
const u8 lengths[32] = {
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2,3,3,4,5
|
|
};
|
|
|
|
Utf8DecodeResult result = Zi;
|
|
u32 codepoint = U32Max;
|
|
u32 advance = 0;
|
|
if (str.len > 0)
|
|
{
|
|
u8 c0 = str.text[0];
|
|
u8 utf8_len = lengths[c0 >> 3];
|
|
|
|
advance = 1;
|
|
switch (utf8_len)
|
|
{
|
|
default: break;
|
|
|
|
case 1:
|
|
{
|
|
codepoint = c0;
|
|
} break;
|
|
|
|
case 2:
|
|
{
|
|
if (str.len >= 2)
|
|
{
|
|
u8 c1 = str.text[1];
|
|
if (lengths[c1 >> 3] == 0)
|
|
{
|
|
codepoint = (c1 & 0x3F) << 0;
|
|
codepoint |= (c0 & 0x1F) << 6;
|
|
advance = 2;
|
|
}
|
|
}
|
|
} break;
|
|
|
|
case 3:
|
|
{
|
|
if (str.len >= 3)
|
|
{
|
|
u8 c1 = str.text[1];
|
|
u8 c2 = str.text[2];
|
|
if (
|
|
lengths[c1 >> 3] == 0 &&
|
|
lengths[c2 >> 3] == 0
|
|
)
|
|
{
|
|
codepoint = (c2 & 0x3F) << 0;
|
|
codepoint |= (c1 & 0x3F) << 6;
|
|
codepoint |= (c0 & 0x0F) << 12;
|
|
advance = 3;
|
|
}
|
|
}
|
|
} break;
|
|
|
|
case 4:
|
|
{
|
|
if (str.len >= 4)
|
|
{
|
|
u8 c1 = str.text[1];
|
|
u8 c2 = str.text[2];
|
|
u8 c3 = str.text[3];
|
|
if (
|
|
lengths[c1 >> 3] == 0 &&
|
|
lengths[c2 >> 3] == 0 &&
|
|
lengths[c3 >> 3] == 0
|
|
)
|
|
{
|
|
codepoint = (c3 & 0x3F) << 0;
|
|
codepoint |= (c2 & 0x3F) << 6;
|
|
codepoint |= (c1 & 0x3F) << 12;
|
|
codepoint |= (c0 & 0x07) << 16;
|
|
advance = 3;
|
|
}
|
|
}
|
|
} break;
|
|
}
|
|
}
|
|
|
|
result.advance8 = advance;
|
|
result.codepoint = codepoint;
|
|
return result;
|
|
}
|
|
|
|
//- Encode
|
|
|
|
Utf8EncodeResult EncodeUtf8(u32 codepoint)
|
|
{
|
|
Utf8EncodeResult result = Zi;
|
|
|
|
if (codepoint <= 0x7F)
|
|
{
|
|
result.count8 = 1;
|
|
result.chars8[0] = codepoint;
|
|
}
|
|
else if (codepoint <= 0x7FF)
|
|
{
|
|
result.count8 = 2;
|
|
result.chars8[1] = 0x80 | ((codepoint >> 0) & 0x3F);
|
|
result.chars8[0] = 0xC0 | ((codepoint >> 6) & 0x1F);
|
|
}
|
|
else if (codepoint <= 0xFFFF)
|
|
{
|
|
result.count8 = 3;
|
|
result.chars8[2] = 0x80 | ((codepoint >> 0) & 0x3F);
|
|
result.chars8[1] = 0x80 | ((codepoint >> 6) & 0x3F);
|
|
result.chars8[0] = 0xE0 | ((codepoint >> 12) & 0x0F);
|
|
}
|
|
else if (codepoint <= 0x10FFFF)
|
|
{
|
|
result.count8 = 4;
|
|
result.chars8[3] = 0x80 | ((codepoint >> 0) & 0x3F);
|
|
result.chars8[2] = 0x80 | ((codepoint >> 6) & 0x3F);
|
|
result.chars8[1] = 0x80 | ((codepoint >> 12) & 0x3F);
|
|
result.chars8[0] = 0xF0 | ((codepoint >> 18) & 0x07);
|
|
}
|
|
else
|
|
{
|
|
// Invalid codepoint
|
|
result.count8 = 1;
|
|
result.chars8[0] = '?';
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Utf16
|
|
|
|
//- Decode
|
|
|
|
Utf16DecodeResult DecodeUtf16(String16 str)
|
|
{
|
|
Utf16DecodeResult result = Zi;
|
|
u32 codepoint = U32Max;
|
|
u32 advance = 0;
|
|
|
|
if (str.len >= 1)
|
|
{
|
|
u16 c0 = str.text[0];
|
|
codepoint = c0;
|
|
advance = 1;
|
|
if (str.len >= 2)
|
|
{
|
|
u16 c1 = str.text[1];
|
|
if ((0xD800 <= c0 && c0 < 0xDC00) && (0xDC00 <= c1 && c1 < 0xE000))
|
|
{
|
|
codepoint = (c1 & 0x3FF) << 0;
|
|
codepoint |= (c0 & 0x3FF) << 10;
|
|
advance = 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
result.advance16 = advance;
|
|
result.codepoint = codepoint;
|
|
return result;
|
|
}
|
|
|
|
//- Encode
|
|
|
|
Utf16EncodeResult EncodeUtf16(u32 codepoint)
|
|
{
|
|
Utf16EncodeResult result = Zi;
|
|
|
|
if (codepoint <= 0xFFFF)
|
|
{
|
|
result.count16 = 1;
|
|
result.chars16[0] = codepoint;
|
|
}
|
|
else if (codepoint <= 0x10FFFF)
|
|
{
|
|
result.count16 = 2;
|
|
result.chars16[1] = 0xDC00 | ((codepoint >> 0) & 0x3FF);
|
|
result.chars16[0] = 0xD800 | ((codepoint >> 10) & 0x3FF);
|
|
}
|
|
else
|
|
{
|
|
// Invalid codepoint
|
|
result.count16 = 1;
|
|
result.chars16[0] = '?';
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
//- Surrogate check
|
|
|
|
b32 IsUtf16HighSurrogate(u16 c)
|
|
{
|
|
return 0xD800 <= c && c < 0xDC00;
|
|
}
|
|
|
|
b32 IsUtf16LowSurrogate(u16 c)
|
|
{
|
|
return 0xDC00 <= c && c < 0xE000;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Utf32
|
|
|
|
//- Decode
|
|
|
|
Utf32DecodeResult DecodeUtf32(String32 str)
|
|
{
|
|
Utf32DecodeResult result = Zi;
|
|
u32 codepoint = U32Max;
|
|
u32 advance = 0;
|
|
|
|
if (str.len >= 1)
|
|
{
|
|
u32 c = str.text[0];
|
|
advance = 1;
|
|
if (c <= 0x10FFFF)
|
|
{
|
|
codepoint = c;
|
|
}
|
|
}
|
|
|
|
result.advance32 = advance;
|
|
result.codepoint = codepoint;
|
|
return result;
|
|
}
|
|
|
|
//- Encode
|
|
|
|
Utf32EncodeResult EncodeUtf32(u32 codepoint)
|
|
{
|
|
Utf32EncodeResult result = Zi;
|
|
|
|
if (codepoint <= 0x10FFFF)
|
|
{
|
|
result.chars32 = codepoint;
|
|
}
|
|
else
|
|
{
|
|
// Invalid codepoint
|
|
result.chars32 = '?';
|
|
}
|
|
|
|
return result;
|
|
}
|