power_play/src/uni.c

203 lines
5.3 KiB
C

#include "uni.h"
/* ========================== *
* utf8
* ========================== */
struct uni_decode_utf8_result uni_decode_utf8(struct string str)
{
LOCAL_PERSIST const u8 lengths[32] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2,3,3,4,5
};
u32 codepoint = U32_MAX;
u32 advance = 0;
if (str.len > 0) {
u8 c0 = str.text[0];
u8 utf8_len = lengths[c0 >> 3];
advance = 1;
switch (utf8_len) {
case 1: {
codepoint = c0;
} break;
case 2: {
if (str.len >= 2) {
u8 c1 = str.text[1];
if (lengths[c1 >> 3] == 0) {
codepoint = (c1 & 0x3F) << 0;
codepoint |= (c0 & 0x1F) << 6;
advance = 2;
}
}
} break;
case 3: {
if (str.len >= 3) {
u8 c1 = str.text[1];
u8 c2 = str.text[2];
if (lengths[c1 >> 3] == 0 &&
lengths[c2 >> 3] == 0) {
codepoint = (c2 & 0x3F) << 0;
codepoint |= (c1 & 0x3F) << 6;
codepoint |= (c0 & 0x0F) << 12;
advance = 3;
}
}
} break;
case 4: {
if (str.len >= 4) {
u8 c1 = str.text[1];
u8 c2 = str.text[2];
u8 c3 = str.text[3];
if (lengths[c1 >> 3] == 0 &&
lengths[c2 >> 3] == 0 &&
lengths[c3 >> 3] == 0) {
codepoint = (c3 & 0x3F) << 0;
codepoint |= (c2 & 0x3F) << 6;
codepoint |= (c1 & 0x3F) << 12;
codepoint |= (c0 & 0x07) << 16;
advance = 3;
}
}
} break;
default: break;
}
}
return (struct uni_decode_utf8_result) {
.advance8 = advance,
.codepoint = codepoint
};
}
struct uni_encode_utf8_result uni_encode_utf8(u32 codepoint)
{
struct uni_encode_utf8_result res = ZI;
if (codepoint <= 0x7F) {
res.count8 = 1;
res.chars8[0] = codepoint;
} else if (codepoint <= 0x7FF) {
res.count8 = 2;
res.chars8[1] = 0x80 | ((codepoint >> 0) & 0x3F);
res.chars8[0] = 0xC0 | ((codepoint >> 6) & 0x1F);
} else if (codepoint <= 0xFFFF) {
res.count8 = 3;
res.chars8[2] = 0x80 | ((codepoint >> 0) & 0x3F);
res.chars8[1] = 0x80 | ((codepoint >> 6) & 0x3F);
res.chars8[0] = 0xE0 | ((codepoint >> 12) & 0x0F);
} else if (codepoint <= 0x10FFFF) {
res.count8 = 4;
res.chars8[3] = 0x80 | ((codepoint >> 0) & 0x3F);
res.chars8[2] = 0x80 | ((codepoint >> 6) & 0x3F);
res.chars8[1] = 0x80 | ((codepoint >> 12) & 0x3F);
res.chars8[0] = 0xF0 | ((codepoint >> 18) & 0x07);
} else {
/* Invalid codepoint */
res.count8 = 1;
res.chars8[0] = '?';
}
return res;
}
/* ========================== *
* utf16
* ========================== */
struct uni_decode_utf16_result uni_decode_utf16(struct string16 str)
{
u32 codepoint = U32_MAX;
u32 advance = 0;
if (str.len >= 1) {
u16 c0 = str.text[0];
codepoint = c0;
advance = 1;
if (str.len >= 2) {
u16 c1 = str.text[1];
if ((0xD800 <= c0 && c0 < 0xDC00) && (0xDC00 <= c1 && c1 < 0xE000)) {
codepoint = (c1 & 0x3FF) << 0;
codepoint |= (c0 & 0x3FF) << 10;
advance = 2;
}
}
}
return (struct uni_decode_utf16_result) {
.advance16 = advance,
.codepoint = codepoint
};
}
struct uni_encode_utf16_result uni_encode_utf16(u32 codepoint)
{
struct uni_encode_utf16_result res = ZI;
if (codepoint <= 0xFFFF) {
res.count16 = 1;
res.chars16[0] = codepoint;
} else if (codepoint <= 0x10FFFF) {
res.count16 = 2;
res.chars16[1] = 0xDC00 | ((codepoint >> 0) & 0x3FF);
res.chars16[0] = 0xD800 | ((codepoint >> 10) & 0x3FF);
} else {
/* Invalid codepoint */
res.count16 = 1;
res.chars16[0] = '?';
}
return res;
}
b32 uni_is_utf16_high_surrogate(u16 c)
{
return 0xD800 <= c && c < 0xDC00;
}
b32 uni_is_utf16_low_surrogate(u16 c)
{
return 0xDC00 <= c && c < 0xE000;
}
/* ========================== *
* utf32
* ========================== */
struct uni_decode_utf32_result uni_decode_utf32(struct string32 str)
{
u32 codepoint = U32_MAX;
u32 advance = 0;
if (str.len >= 1) {
u32 c = str.text[0];
advance = 1;
if (c <= 0x10FFFF) {
codepoint = c;
}
}
return (struct uni_decode_utf32_result) {
.advance32 = advance,
.codepoint = codepoint
};
}
struct uni_encode_utf32_result uni_encode_utf32(u32 codepoint)
{
struct uni_encode_utf32_result res = ZI;
if (codepoint <= 0x10FFFF) {
res.chars32 = codepoint;
} else {
/* Invalid codepoint */
res.chars32 = '?';
}
return res;
}