rename utf.h -> uni.h

This commit is contained in:
jacob 2024-04-12 12:03:39 -05:00
parent aaad37da27
commit f399c093ca
7 changed files with 93 additions and 90 deletions

View File

@ -281,6 +281,9 @@ void draw_text_ex(struct renderer_canvas *canvas, struct font *font, struct v2 p
draw_pos.y += font->point_size * scale; draw_pos.y += font->point_size * scale;
struct font_glyphs_list glyphs = font_get_glyphs(font, str32); struct font_glyphs_list glyphs = font_get_glyphs(font, str32);
for (struct font_glyph *glyph = glyphs->first; glyph; glyph = glyph->next) {
}

View File

@ -3,7 +3,7 @@
#include "memory.h" #include "memory.h"
#include "scratch.h" #include "scratch.h"
#include "math.h" #include "math.h"
#include "utf.h" #include "uni.h"
/* /*
* NOTE: Strings should be considered ~IMMUTABLE~ * NOTE: Strings should be considered ~IMMUTABLE~
@ -480,7 +480,7 @@ b32 string_codepoint_iter_next(struct string_codepoint_iter *iter)
{ {
if (iter->pos < iter->src.len) { if (iter->pos < iter->src.len) {
struct string str_remaining = { .len = (iter->src.len - iter->pos), .text = iter->src.text + iter->pos }; struct string str_remaining = { .len = (iter->src.len - iter->pos), .text = iter->src.text + iter->pos };
struct utf8_decode_result decoded = utf8_decode(str_remaining); struct uni_decode_utf8_result decoded = uni_decode_utf8(str_remaining);
iter->pos += decoded.advance8; iter->pos += decoded.advance8;
iter->codepoint = decoded.codepoint; iter->codepoint = decoded.codepoint;
return true; return true;
@ -506,8 +506,8 @@ struct string string_from_string16(struct arena *arena, struct string16 str16)
u64 pos16 = 0; u64 pos16 = 0;
while (pos16 < str16.len) { while (pos16 < str16.len) {
struct string16 str16_remaining = { .len = (str16.len - pos16), .text = str16.text + pos16 }; struct string16 str16_remaining = { .len = (str16.len - pos16), .text = str16.text + pos16 };
struct utf16_decode_result decoded = utf16_decode(str16_remaining); struct uni_decode_utf16_result decoded = uni_decode_utf16(str16_remaining);
struct utf8_encode_result encoded = utf8_encode(decoded.codepoint); struct uni_encode_utf8_result encoded = uni_encode_utf8(decoded.codepoint);
u8 *dest = arena_push_array(arena, u8, encoded.count8); u8 *dest = arena_push_array(arena, u8, encoded.count8);
MEMCPY(dest, encoded.chars8, encoded.count8); MEMCPY(dest, encoded.chars8, encoded.count8);
@ -530,8 +530,8 @@ struct string string_from_string32(struct arena *arena, struct string32 str32)
u64 pos32 = 0; u64 pos32 = 0;
while (pos32 < str32.len) { while (pos32 < str32.len) {
struct string32 str32_remaining = { .len = (str32.len - pos32), .text = str32.text + pos32 }; struct string32 str32_remaining = { .len = (str32.len - pos32), .text = str32.text + pos32 };
struct utf32_decode_result decoded = utf32_decode(str32_remaining); struct uni_decode_utf32_result decoded = uni_decode_utf32(str32_remaining);
struct utf8_encode_result encoded = utf8_encode(decoded.codepoint); struct uni_encode_utf8_result encoded = uni_encode_utf8(decoded.codepoint);
u8 *dest = arena_push_array(arena, u8, encoded.count8); u8 *dest = arena_push_array(arena, u8, encoded.count8);
MEMCPY(dest, &encoded.chars8, encoded.count8); MEMCPY(dest, &encoded.chars8, encoded.count8);
@ -554,8 +554,8 @@ struct string16 string16_from_string(struct arena *arena, struct string str8)
u64 pos8 = 0; u64 pos8 = 0;
while (pos8 < str8.len) { while (pos8 < str8.len) {
struct string str8_remaining = { .len = (str8.len - pos8), .text = str8.text + pos8 }; struct string str8_remaining = { .len = (str8.len - pos8), .text = str8.text + pos8 };
struct utf8_decode_result decoded = utf8_decode(str8_remaining); struct uni_decode_utf8_result decoded = uni_decode_utf8(str8_remaining);
struct utf16_encode_result encoded = utf16_encode(decoded.codepoint); struct uni_encode_utf16_result encoded = uni_encode_utf16(decoded.codepoint);
u16 *dest = arena_push_array(arena, u16, encoded.count16); u16 *dest = arena_push_array(arena, u16, encoded.count16);
MEMCPY(dest, encoded.chars16, (encoded.count16 << 1)); MEMCPY(dest, encoded.chars16, (encoded.count16 << 1));
@ -578,8 +578,8 @@ struct string32 string32_from_string(struct arena *arena, struct string str8)
u64 pos8 = 0; u64 pos8 = 0;
while (pos8 < str8.len) { while (pos8 < str8.len) {
struct string str8_remaining = { .len = (str8.len - pos8), .text = str8.text + pos8 }; struct string str8_remaining = { .len = (str8.len - pos8), .text = str8.text + pos8 };
struct utf8_decode_result decoded = utf8_decode(str8_remaining); struct uni_decode_utf8_result decoded = uni_decode_utf8(str8_remaining);
struct utf32_encode_result encoded = utf32_encode(decoded.codepoint); struct uni_encode_utf32_result encoded = uni_encode_utf32(decoded.codepoint);
u32 *dest = arena_push(arena, u32); u32 *dest = arena_push(arena, u32);
*dest = encoded.chars32; *dest = encoded.chars32;

View File

@ -10,7 +10,7 @@
#include "math.h" #include "math.h"
#include "util.h" #include "util.h"
#include "thread_local.h" #include "thread_local.h"
#include "utf.h" #include "uni.h"
#define UNICODE #define UNICODE
#include <Windows.h> #include <Windows.h>
@ -205,8 +205,8 @@ INTERNAL struct string string_from_win32_path(struct arena *arena, wchar_t *src)
while (*src) { while (*src) {
struct string16 decode_str = { .len = *(src + 1) ? 2 : 1, .text = src }; struct string16 decode_str = { .len = *(src + 1) ? 2 : 1, .text = src };
struct utf16_decode_result decoded = utf16_decode(decode_str); struct uni_decode_utf16_result decoded = uni_decode_utf16(decode_str);
struct utf8_encode_result encoded = utf8_encode(decoded.codepoint); struct uni_encode_utf8_result encoded = uni_encode_utf8(decoded.codepoint);
u8 *dest = arena_push_array(arena, u8, encoded.count8); u8 *dest = arena_push_array(arena, u8, encoded.count8);
for (u32 i = 0; i < encoded.count8; ++i) { for (u32 i = 0; i < encoded.count8; ++i) {
u8 byte = encoded.chars8[i]; u8 byte = encoded.chars8[i];
@ -921,14 +921,14 @@ INTERNAL LRESULT CALLBACK win32_window_proc(HWND hwnd, UINT msg, WPARAM wparam,
/* Decode */ /* Decode */
u32 codepoint = 0; u32 codepoint = 0;
if (utf16_is_high_surrogate(utf16_char)) { if (uni_is_utf16_high_surrogate(utf16_char)) {
window->utf16_high_surrogate_last_input = utf16_char; window->utf16_high_surrogate_last_input = utf16_char;
} else if (utf16_is_low_surrogate(utf16_char)) { } else if (uni_is_utf16_low_surrogate(utf16_char)) {
u16 high = window->utf16_high_surrogate_last_input; u16 high = window->utf16_high_surrogate_last_input;
u16 low = utf16_char; u16 low = utf16_char;
if (high) { if (high) {
u16 utf16_pair_bytes[2] = { high, low }; u16 utf16_pair_bytes[2] = { high, low };
struct utf16_decode_result decoded = utf16_decode((struct string16) { .len = ARRAY_COUNT(utf16_pair_bytes), .text = utf16_pair_bytes }); struct uni_decode_utf16_result decoded = uni_decode_utf16((struct string16) { .len = ARRAY_COUNT(utf16_pair_bytes), .text = utf16_pair_bytes });
if (decoded.advance16 == 2 && decoded.codepoint < U32_MAX) { if (decoded.advance16 == 2 && decoded.codepoint < U32_MAX) {
codepoint = decoded.codepoint; codepoint = decoded.codepoint;
} }

View File

@ -1,4 +1,4 @@
#include "utf.h" #include "uni.h"
/* ========================== * /* ========================== *
* utf8 * utf8
@ -8,7 +8,7 @@ GLOBAL READONLY u8 g_utf8_lens[32] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2,3,3,4,5 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2,3,3,4,5
}; };
struct utf8_decode_result utf8_decode(struct string str) struct uni_decode_utf8_result uni_decode_utf8(struct string str)
{ {
u32 codepoint = U32_MAX; u32 codepoint = U32_MAX;
u32 advance = 0; u32 advance = 0;
@ -68,15 +68,15 @@ struct utf8_decode_result utf8_decode(struct string str)
} }
} }
return (struct utf8_decode_result) { return (struct uni_decode_utf8_result) {
.advance8 = advance, .advance8 = advance,
.codepoint = codepoint .codepoint = codepoint
}; };
} }
struct utf8_encode_result utf8_encode(u32 codepoint) struct uni_encode_utf8_result uni_encode_utf8(u32 codepoint)
{ {
struct utf8_encode_result res = { 0 }; struct uni_encode_utf8_result res = { 0 };
if (codepoint <= 0x7F) { if (codepoint <= 0x7F) {
res.count8 = 1; res.count8 = 1;
@ -109,7 +109,7 @@ struct utf8_encode_result utf8_encode(u32 codepoint)
* utf16 * utf16
* ========================== */ * ========================== */
struct utf16_decode_result utf16_decode(struct string16 str) struct uni_decode_utf16_result uni_decode_utf16(struct string16 str)
{ {
u32 codepoint = U32_MAX; u32 codepoint = U32_MAX;
u32 advance = 0; u32 advance = 0;
@ -128,15 +128,15 @@ struct utf16_decode_result utf16_decode(struct string16 str)
} }
} }
return (struct utf16_decode_result) { return (struct uni_decode_utf16_result) {
.advance16 = advance, .advance16 = advance,
.codepoint = codepoint .codepoint = codepoint
}; };
} }
struct utf16_encode_result utf16_encode(u32 codepoint) struct uni_encode_utf16_result uni_encode_utf16(u32 codepoint)
{ {
struct utf16_encode_result res = { 0 }; struct uni_encode_utf16_result res = { 0 };
if (codepoint <= 0xFFFF) { if (codepoint <= 0xFFFF) {
res.count16 = 1; res.count16 = 1;
@ -154,12 +154,12 @@ struct utf16_encode_result utf16_encode(u32 codepoint)
return res; return res;
} }
b32 utf16_is_high_surrogate(u16 c) b32 uni_is_utf16_high_surrogate(u16 c)
{ {
return 0xD800 <= c && c < 0xDC00; return 0xD800 <= c && c < 0xDC00;
} }
b32 utf16_is_low_surrogate(u16 c) b32 uni_is_utf16_low_surrogate(u16 c)
{ {
return 0xDC00 <= c && c < 0xE000; return 0xDC00 <= c && c < 0xE000;
} }
@ -168,7 +168,7 @@ b32 utf16_is_low_surrogate(u16 c)
* utf32 * utf32
* ========================== */ * ========================== */
struct utf32_decode_result utf32_decode(struct string32 str) struct uni_decode_utf32_result uni_decode_utf32(struct string32 str)
{ {
u32 codepoint = U32_MAX; u32 codepoint = U32_MAX;
u32 advance = 0; u32 advance = 0;
@ -181,15 +181,15 @@ struct utf32_decode_result utf32_decode(struct string32 str)
} }
} }
return (struct utf32_decode_result) { return (struct uni_decode_utf32_result) {
.advance32 = advance, .advance32 = advance,
.codepoint = codepoint .codepoint = codepoint
}; };
} }
struct utf32_encode_result utf32_encode(u32 codepoint) struct uni_encode_utf32_result uni_encode_utf32(u32 codepoint)
{ {
struct utf32_encode_result res = { 0 }; struct uni_encode_utf32_result res = { 0 };
if (codepoint <= 0x10FFFF) { if (codepoint <= 0x10FFFF) {
res.chars32 = codepoint; res.chars32 = codepoint;

56
src/uni.h Normal file
View File

@ -0,0 +1,56 @@
#ifndef UNI_H
#define UNI_H
/* ========================== *
* utf8
* ========================== */
struct uni_decode_utf8_result {
u32 advance8;
u32 codepoint;
};
struct uni_encode_utf8_result {
u32 count8;
u8 chars8[4];
};
struct uni_decode_utf8_result uni_decode_utf8(struct string str);
struct uni_encode_utf8_result uni_encode_utf8(u32 codepoint);
/* ========================== *
* utf16
* ========================== */
struct uni_decode_utf16_result {
u32 advance16;
u32 codepoint;
};
struct uni_encode_utf16_result {
u32 count16;
u16 chars16[2];
};
struct uni_decode_utf16_result uni_decode_utf16(struct string16 str);
struct uni_encode_utf16_result uni_encode_utf16(u32 codepoint);
b32 uni_is_utf16_high_surrogate(u16 c);
b32 uni_is_utf16_low_surrogate(u16 c);
/* ========================== *
* utf32
* ========================== */
struct uni_decode_utf32_result {
u32 advance32;
u32 codepoint;
};
struct uni_encode_utf32_result {
u32 chars32;
};
struct uni_decode_utf32_result uni_decode_utf32(struct string32 str);
struct uni_encode_utf32_result uni_encode_utf32(u32 codepoint);
#endif

View File

@ -23,7 +23,7 @@
/* FIXME: remove this (testing) */ /* FIXME: remove this (testing) */
#include "utf.h" #include "uni.h"
GLOBAL u8 test_input_array[256] = { 0 }; GLOBAL u8 test_input_array[256] = { 0 };
GLOBAL u32 test_input_array_pos; GLOBAL u32 test_input_array_pos;
@ -515,7 +515,7 @@ INTERNAL void user_update(void)
if (event->kind == SYS_EVENT_KIND_TEXT) { if (event->kind == SYS_EVENT_KIND_TEXT) {
u32 codepoint = event->text_codepoint; u32 codepoint = event->text_codepoint;
struct utf8_encode_result encoded = utf8_encode(codepoint); struct uni_encode_utf8_result encoded = uni_encode_utf8(codepoint);
MEMCPY(&test_input_array[test_input_array_pos], encoded.chars8, encoded.count8); MEMCPY(&test_input_array[test_input_array_pos], encoded.chars8, encoded.count8);
test_input_array_pos += encoded.count8; test_input_array_pos += encoded.count8;
@ -1049,7 +1049,7 @@ INTERNAL void user_update(void)
u64 pos = 0; u64 pos = 0;
while (pos < src_str.len) { while (pos < src_str.len) {
struct string remaining = { .len = src_str.len - pos, .text = src_str.text + pos }; struct string remaining = { .len = src_str.len - pos, .text = src_str.text + pos };
struct utf8_decode_result decoded = utf8_decode(remaining); struct uni_decode_utf8_result decoded = uni_decode_utf8(remaining);
if (codepoint_count % 10 == 0) { if (codepoint_count % 10 == 0) {
draw_str.len += string_copy(scratch.arena, STR("\n")).len; draw_str.len += string_copy(scratch.arena, STR("\n")).len;

View File

@ -1,56 +0,0 @@
#ifndef UTF_H
#define UTF_H
/* ========================== *
* utf8
* ========================== */
struct utf8_decode_result {
u32 advance8;
u32 codepoint;
};
struct utf8_encode_result {
u32 count8;
u8 chars8[4];
};
struct utf8_decode_result utf8_decode(struct string str);
struct utf8_encode_result utf8_encode(u32 codepoint);
/* ========================== *
* utf16
* ========================== */
struct utf16_decode_result {
u32 advance16;
u32 codepoint;
};
struct utf16_encode_result {
u32 count16;
u16 chars16[2];
};
struct utf16_decode_result utf16_decode(struct string16 str);
struct utf16_encode_result utf16_encode(u32 codepoint);
b32 utf16_is_high_surrogate(u16 c);
b32 utf16_is_low_surrogate(u16 c);
/* ========================== *
* utf32
* ========================== */
struct utf32_decode_result {
u32 advance32;
u32 codepoint;
};
struct utf32_encode_result {
u32 chars32;
};
struct utf32_decode_result utf32_decode(struct string32 str);
struct utf32_encode_result utf32_encode(u32 codepoint);
#endif