From e4ecc217e31c15cec5c777d6729b09df2f742687 Mon Sep 17 00:00:00 2001 From: jacob Date: Thu, 4 Apr 2024 14:51:32 -0500 Subject: [PATCH] process unicode input in win32 --- src/common.h | 11 ++--------- src/sys.h | 2 +- src/sys_win32.c | 51 ++++++++++++++++++++++++++++++++++++++----------- src/utf.c | 10 ++++++++++ src/utf.h | 2 ++ 5 files changed, 55 insertions(+), 21 deletions(-) diff --git a/src/common.h b/src/common.h index 4845f096..58d58c2a 100644 --- a/src/common.h +++ b/src/common.h @@ -402,16 +402,9 @@ struct buffer { .text = (u8 *)(cstr_lit) \ } -#define STRING_FROM_ARRAY(a) \ - ( \ - /* Must be array */ \ - ASSERT(IS_ARRAY(a)), \ - /* Must be array of bytes */ \ - ASSERT(sizeof(a[0]) == sizeof(u8)), \ - ((struct string) { .len = ARRAY_COUNT(a), .text = (u8 *)(a) }) \ - ) +#define STRING_FROM_BUFFER(buff) ((struct string) { buff.size, buff.data }) -#define STRING_FROM_BUFFER(buff) ((struct string) { buff.size, buff.data}) +#define STRING_FROM_ARRAY(a) STRING_FROM_BUFFER(BUFFER_FROM_ARRAY(a)) /* ========================== * * Math types diff --git a/src/sys.h b/src/sys.h index c8c5b56c..50cec0cd 100644 --- a/src/sys.h +++ b/src/sys.h @@ -133,7 +133,7 @@ struct sys_event { b32 is_repeat; /* SYS_EVENT_KIND_TEXT */ - u32 text_character; + u32 text_codepoint; /* SYS_EVENT_KIND_CURSOR_MOVE */ struct v2 cursor_position; diff --git a/src/sys_win32.c b/src/sys_win32.c index 9e9b29ac..0d574c37 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -59,6 +59,8 @@ struct win32_window { u32 tid; struct sync_flag ready_sf; + u16 utf16_high_surrogate_last_input; + struct sys_rw_mutex settings_rw_mutex; struct sys_window_settings settings; @@ -914,19 +916,46 @@ INTERNAL LRESULT CALLBACK win32_window_proc(HWND hwnd, UINT msg, WPARAM wparam, /* Text */ case WM_SYSCHAR: case WM_CHAR: { - u32 character = (u32)wparam; - if (character == '\r') { - character = '\n'; /* Just treat all \r as newline */ - } - if((character >= 32 && character != 127) || character == '\t' || character == '\n') { - win32_window_process_event( - window, - (struct sys_event) { - .kind = SYS_EVENT_KIND_TEXT, - .text_character = character + u16 utf16_char = (u32)wparam; + + /* Decode */ + u32 codepoint = 0; + if (utf16_is_high_surrogate(utf16_char)) { + window->utf16_high_surrogate_last_input = utf16_char; + } else if (utf16_is_low_surrogate(utf16_char)) { + u16 high = window->utf16_high_surrogate_last_input; + u16 low = utf16_char; + if (high) { + u16 utf16_pair_bytes[2] = { high, low }; + struct utf16_decode_result decoded = utf16_decode((struct string16) { .len = ARRAY_COUNT(utf16_pair_bytes), .text = utf16_pair_bytes }); + if (decoded.advance16 == 2 && decoded.codepoint < U32_MAX) { + codepoint = decoded.codepoint; + } else { + codepoint = '?'; } - ); + } + window->utf16_high_surrogate_last_input = 0; + } else { + window->utf16_high_surrogate_last_input = 0; + codepoint = utf16_char; } + + if (codepoint) { + if (codepoint == '\r') { + codepoint = '\n'; /* Just treat all \r as newline */ + } + if((codepoint >= 32 && codepoint != 127) || codepoint == '\t' || codepoint == '\n') { + win32_window_process_event( + window, + (struct sys_event) { + .kind = SYS_EVENT_KIND_TEXT, + .text_codepoint = codepoint + } + ); + } + } + + } break; /* Mouse buttons */ diff --git a/src/utf.c b/src/utf.c index b6fe488f..17a09da9 100644 --- a/src/utf.c +++ b/src/utf.c @@ -154,6 +154,16 @@ struct utf16_encode_result utf16_encode(u32 codepoint) return res; } +b32 utf16_is_high_surrogate(u16 c) +{ + return 0xD800 <= c && c < 0xDC00; +} + +b32 utf16_is_low_surrogate(u16 c) +{ + return 0xDC00 <= c && c < 0xE000; +} + /* ========================== * * utf32 * ========================== */ diff --git a/src/utf.h b/src/utf.h index 6c3a33ef..119a1a93 100644 --- a/src/utf.h +++ b/src/utf.h @@ -34,6 +34,8 @@ struct utf16_encode_result { struct utf16_decode_result utf16_decode(struct string16 str); struct utf16_encode_result utf16_encode(u32 codepoint); +b32 utf16_is_high_surrogate(u16 c); +b32 utf16_is_low_surrogate(u16 c); /* ========================== * * utf32