power_play/src/string.c
2025-02-24 09:02:06 -06:00

817 lines
22 KiB
C

#include "string.h"
#include "arena.h"
#include "memory.h"
#include "scratch.h"
#include "math.h"
#include "uni.h"
/*
* NOTE: Strings should be considered ~IMMUTABLE~
*
* All string functions return a new string as a result. Any strings used as
* an argument (IE: in string_cat) will not be modified.
*
* Use the LIT macro to create strings from string literals
*
* NOTE: It is valid for a string to have len 0 but a non-NULL text pointer.
* Always check string.len rather than string.text for string presence.
* (If we want to change this behavior then we need to check for length = 0 in
* our functions that return a pointer from arena_dry_push, or guarantee that
* all functions returning an arena_dry_push do allocate.)
*/
/* ========================== *
* Conversion
* ========================== */
#define INT_CHARS ("0123456789abcdef")
struct string string_from_char(struct arena *arena, char c)
{
u8 *dest = arena_push(arena, u8);
*dest = c;
return (struct string) {
.len = 1,
.text = dest
};
}
struct string string_from_uint(struct arena *arena, u64 n, u64 base, u64 zfill)
{
/* Base too large */
ASSERT(base <= (ARRAY_COUNT(INT_CHARS) - 1));
struct temp_arena scratch = scratch_begin(arena);
/* Build backwards text starting from least significant digit */
u64 len = 0;
u8 *backwards_text = arena_dry_push(scratch.arena, u8);
do {
string_from_char(scratch.arena, INT_CHARS[n % base]);
++len;
n /= base;
} while (n > 0);
while (len < zfill) {
string_from_char(scratch.arena, '0');
++len;
}
/* Reverse text into final string */
u8 *final_text = arena_push_array(arena, u8, len);
for (u64 i = 0; i < len; ++i) {
final_text[i] = backwards_text[len - i - 1];
}
scratch_end(scratch);
return (struct string) {
.len = len,
.text = final_text
};
}
struct string string_from_int(struct arena *arena, i64 n, u64 base, u64 zfill)
{
u8 *final_text = arena_dry_push(arena, u8);
u8 len = 0;
if (n < 0) {
/* Push sign */
string_from_char(arena, '-');
len = 1;
n = -n;
}
/* Push unsigned number */
struct string uint_str = string_from_uint(arena, n, base, zfill);
return (struct string) {
.len = len + uint_str.len,
.text = final_text
};
}
struct string string_from_ptr(struct arena *arena, void *ptr)
{
struct string prepend = string_copy(arena, LIT("0x"));
struct string uint_str = string_from_uint(arena, (u64)ptr, 16, sizeof(ptr));
return (struct string) {
.len = prepend.len + uint_str.len,
.text = prepend.text
};
}
struct string string_from_float(struct arena *arena, f64 f, u32 precision)
{
struct temp_arena scratch = scratch_begin(arena);
u8 *final_text = arena_dry_push(arena, u8);
u64 final_len = 0;
if (F32_IS_NAN(f)) {
final_len += string_copy(arena, LIT("NaN")).len;
} else if (f == F64_INFINITY) {
final_len += string_copy(arena, LIT("inf")).len;
} else if (f == -F64_INFINITY) {
final_len += string_copy(arena, LIT("-inf")).len;
} else {
if (f < 0) {
string_from_char(arena, '-');
f = -f;
++final_len;
}
/* Add one half of next precision level to round up */
f += 0.5 / (f64)math_pow_u64(10, (u8)precision);
f64 part_whole = math_trunc64(f);
f64 part_decimal = f - part_whole;
/* Print whole part */
{
/* Build backwards text starting from least significant digit */
u8 *backwards_text = arena_dry_push(scratch.arena, u8);
u64 backwards_text_len = 0;
do {
u64 digit = (u64)math_round_to_int64(math_fmod64(part_whole, 10.0));
string_from_char(scratch.arena, INT_CHARS[digit % 10]);
++backwards_text_len;
part_whole = math_trunc64(part_whole / 10.0);
} while (part_whole > 0);
/* Reverse text into final string */
arena_push_array(arena, u8, backwards_text_len);
for (u64 i = backwards_text_len; i-- > 0;) {
final_text[final_len++] = backwards_text[i];
}
}
/* Print decimal part */
if (precision > 0) {
string_from_char(arena, '.');
for (u64 i = 0; i < precision; ++i) {
part_decimal *= 10.0;
u64 digit = (u64)part_decimal;
part_decimal -= digit;
string_from_char(arena, INT_CHARS[digit % 10]);
}
final_len += (u64)precision + 1;
}
}
scratch_end(scratch);
return (struct string) {
.len = final_len,
.text = final_text
};
}
struct string string_from_handle(struct arena *arena, u64 v0, u64 v1)
{
struct string res = ZI;
res.text = arena_dry_push(arena, u8);
res.len += string_copy(arena, LIT("h")).len;
res.len += string_from_uint(arena, v0, 16, 0).len;
res.len += string_copy(arena, LIT("x")).len;
res.len += string_from_uint(arena, v1, 16, 0).len;
return res;
}
struct string string_from_uid(struct arena *arena, struct uid uid)
{
struct string res = ZI;
res.text = arena_dry_push(arena, u8);
res.len += string_from_uint(arena, (uid.hi >> 32), 16, 8).len;
return res;
}
/* ========================== *
* String operations
* ========================== */
struct string string_copy(struct arena *arena, struct string src)
{
struct string str = {
.len = src.len,
.text = arena_push_array(arena, u8, src.len)
};
MEMCPY(str.text, src.text, src.len);
return str;
}
struct string string_copy_to_string(struct string dst, struct string src)
{
struct string res = ZI;
res.len = min_u64(dst.len, src.len);
res.text = dst.text;
MEMCPY(res.text, src.text, res.len);
return res;
}
struct string string_repeat(struct arena *arena, struct string src, u64 count)
{
u64 final_len = src.len * count;
u8 *final_text = arena_push_array(arena, u8, final_len);
for (u64 i = 0; i < count; ++i) {
MEMCPY(final_text + (src.len * i), src.text, src.len);
}
return (struct string) {
.text = final_text,
.len = final_len
};
}
struct string string_cat(struct arena *arena, struct string str1, struct string str2)
{
struct string new_str = ZI;
new_str.len = str1.len + str2.len;
new_str.text = arena_push_array(arena, u8, new_str.len);
MEMCPY(new_str.text, str1.text, str1.len);
MEMCPY(new_str.text + str1.len, str2.text, str2.len);
return new_str;
}
/* `arena` is where pieces items will be allocated. These strings point
* into the existing supplied string and do not allocate any new text. */
struct string_array string_split(struct arena *arena, struct string str, struct string delim)
{
struct string_array pieces = {
.count = 0,
.strings = arena_dry_push(arena, struct string)
};
struct string piece = {
.len = 0,
.text = str.text
};
for (u64 i = 0; i <= str.len - delim.len; ++i) {
/* Clamp comparison string so we don't overflow. */
struct string comp_str = {
.len = delim.len,
.text = &str.text[i]
};
b32 is_delimiter = string_eq(comp_str, delim);
b32 is_end = i == str.len - 1;
if (!is_delimiter || is_end) {
++piece.len;
}
if (is_delimiter || is_end) {
/* Delimiter found */
struct string *piece_pushed = arena_push(arena, struct string);
*piece_pushed = piece;
++pieces.count;
piece.text = piece.text + piece.len + delim.len;
piece.len = 0;
}
}
return pieces;
}
/* NOTE: Really slow */
struct string string_indent(struct arena *arena, struct string str, u32 indent)
{
struct temp_arena scratch = scratch_begin(arena);
u64 final_len = 0;
u8 *final_text = arena_dry_push(arena, u8);
struct string_array split = string_split(scratch.arena, str, LIT("\n"));
for (u64 i = 0; i < split.count; ++i) {
struct string piece = split.strings[i];
for (u32 j = 0; j < indent; ++j) {
string_from_char(arena, ' ');
++final_len;
}
string_copy(arena, piece);
final_len += piece.len;
if (i < split.count - 1) {
string_from_char(arena, '\n');
++final_len;
}
}
scratch_end(scratch);
return (struct string) {
.len = final_len,
.text = final_text
};
}
struct string string_lower(struct arena *arena, struct string str)
{
struct string res = ZI;
res.text = arena_push_array(arena, u8, str.len);
res.len = str.len;
for (u64 i = 0; i < str.len; ++i) {
u8 c = str.text[i];
if (65 <= c && c <= 90) {
c += 32;
}
res.text[i] = c;
}
return res;
}
b32 string_eq(struct string str1, struct string str2)
{
b32 eq = true;
if (str1.len == str2.len) {
for (u64 i = 0; i < str1.len; ++i) {
if (str1.text[i] != str2.text[i]) {
eq = false;
break;
}
}
} else {
eq = false;
}
return eq;
}
i32 string_cmp(struct string str1, struct string str2)
{
i32 res = 0;
for (u64 i = 0; i < min_u64(str1.len, str2.len); ++i) {
res = str1.text[i] - str2.text[i];
if (res != 0) {
break;
}
}
if (res == 0) {
if (str1.len > str2.len) {
res = str1.text[str2.len];
} else if (str2.len > str1.len) {
res = str2.text[str1.len];
}
}
return res;
}
b32 string_contains(struct string str, struct string substring)
{
if (substring.len > str.len) {
return false;
}
for (u64 i = 0; i <= str.len - substring.len; ++i) {
b32 match = true;
for (u64 j = 0; j < substring.len; ++j) {
if (str.text[i + j] != substring.text[j]) {
match = false;
break;
}
}
if (match) {
return true;
}
}
return false;
}
b32 string_starts_with(struct string str, struct string substring)
{
if (str.len >= substring.len) {
for (u64 i = 0; i < substring.len; ++i) {
if (str.text[i] != substring.text[i]) {
return false;
}
}
return true;
}
return false;
}
b32 string_ends_with(struct string str, struct string substring)
{
if (str.len >= substring.len) {
u64 start = str.len - substring.len;
for (u64 i = 0; i < substring.len; ++i) {
if (str.text[start + i] != substring.text[i]) {
return false;
}
}
return true;
}
return false;
}
/* ========================== *
* Format
* ========================== */
/* String formatting only has one format specifier: "%F". All specifier info is
* included in the arguments (instead of w/ the specifier like in printf). This
* is safer.
*
* Example:
* string_format(arena,
* LIT("Hello there %F. You are %F feet %F inches tall!"),
* FMT_STR(LIT("George")),
* FMT_UINT(6),
* FMT_FLOAT(5.375));
*
* NOTE: FMT_END must be passed as the last arg in the va_list (This is
* done automatically by the `string_format` macro).
*
* Format arguments:
* FMT_CHAR: Format a single u8 character
* FMT_STR: Format a `string` struct
* FMT_UINT: Format a u64
* FMT_SINT: Format an i64
* FMT_FLOAT: Format an f64 with DEFAULT_FMT_PRECISION
* FMT_FLOAT_P: Format an f64 with specified precision
* FMT_HEX: Format a u64 in hexadecimal notation
* FMT_PTR: Format a pointer in hexadecimal notation prefixed by "0x"
* FMT_HANDLE: Format a 128 bit handle
*
* FMT_END (internal): Denote the end of the va_list
*
* TODO:
* %n equivalent? (nothing)
* %e/%E equivalent? (scientific notation of floats)
* %o equivalent? (octal representation)
*/
struct string string_formatv(struct arena *arena, struct string fmt, va_list args)
{
__prof;
u64 final_len = 0;
u8 *final_text = arena_dry_push(arena, u8);
u8 *end = fmt.text + fmt.len;
b32 no_more_args = false;
for (u8 *c = fmt.text; c < end; ++c) {
u8 *next = ((c + 1) < end) ? (c + 1) : (u8 *)"\0";
/* Escape '%%' */
b32 escape = !no_more_args && *c == '%' && *next == '%';
if (escape) {
/* Skip the escape '%' char from parsing */
++c;
}
if (!no_more_args && !escape && *c == '%' && *next == 'F') {
struct string parsed_str = ZI;
/* Detect arg type and parse to string */
struct fmt_arg arg = va_arg(args, struct fmt_arg);
switch (arg.type) {
case FMT_TYPE_CHAR: {
parsed_str = string_from_char(arena, arg.value.c);
} break;
case FMT_TYPE_STR: {
parsed_str = string_copy(arena, arg.value.string);
} break;
case FMT_TYPE_UINT: {
parsed_str = string_from_uint(arena, arg.value.uint, 10, arg.zfill);
} break;
case FMT_TYPE_SINT: {
parsed_str = string_from_int(arena, arg.value.sint, 10, arg.zfill);
} break;
case FMT_TYPE_HEX: {
parsed_str = string_from_uint(arena, arg.value.sint, 16, arg.zfill);
} break;
case FMT_TYPE_PTR: {
parsed_str = string_from_ptr(arena, arg.value.ptr);
} break;
case FMT_TYPE_FLOAT: {
parsed_str = string_from_float(arena, arg.value.f, arg.precision);
} break;
case FMT_TYPE_HANDLE: {
parsed_str = string_from_handle(arena, arg.value.handle.h64[0], arg.value.handle.h64[1]);
} break;
case FMT_TYPE_UID: {
parsed_str = string_from_uid(arena, arg.value.uid);
} break;
case FMT_TYPE_END: {
/* Unexpected end. Not enough FMT args passed to function. */
ASSERT(false);
parsed_str = string_copy(arena, LIT("<?>"));
no_more_args = true;
} break;
default: {
/* Unknown format type */
ASSERT(false);
parsed_str = string_copy(arena, LIT("<?>"));
no_more_args = true;
} break;
}
/* Update final string len / start */
final_len += parsed_str.len;
/* Skip 'F' from parsing */
++c;
} else {
/* Parse character normally */
string_from_char(arena, *c);
++final_len;
}
}
#if RTC
if (!no_more_args) {
struct fmt_arg last_arg = va_arg(args, struct fmt_arg);
/* End arg not reached. Too many FMT values passed to function. */
ASSERT(last_arg.type == FMT_TYPE_END);
}
#endif
return (struct string) {
.len = final_len,
.text = final_text
};
}
struct string _string_format(struct arena *arena, struct string fmt, ...)
{
va_list args;
va_start(args, fmt);
struct string new_str = string_formatv(arena, fmt, args);
va_end(args);
return new_str;
}
/* ========================== *
* Unicode
* ========================== */
/* Codepoint iteration */
struct string_codepoint_iter string_codepoint_iter_begin(struct string str)
{
return (struct string_codepoint_iter) {
.src = str
};
}
b32 string_codepoint_iter_next(struct string_codepoint_iter *iter)
{
if (iter->pos < iter->src.len) {
struct string str_remaining = { .len = (iter->src.len - iter->pos), .text = iter->src.text + iter->pos };
struct uni_decode_utf8_result decoded = uni_decode_utf8(str_remaining);
iter->pos += decoded.advance8;
iter->codepoint = decoded.codepoint;
return true;
} else {
return false;
}
}
void string_codepoint_iter_end(struct string_codepoint_iter *iter)
{
/* Do nothing */
(UNUSED)iter;
}
/* utf8 <- utf16 */
struct string string_from_string16(struct arena *arena, struct string16 str16)
{
struct string res = {
.len = 0,
.text = arena_dry_push(arena, u8)
};
u64 pos16 = 0;
while (pos16 < str16.len) {
struct string16 str16_remaining = { .len = (str16.len - pos16), .text = str16.text + pos16 };
struct uni_decode_utf16_result decoded = uni_decode_utf16(str16_remaining);
struct uni_encode_utf8_result encoded = uni_encode_utf8(decoded.codepoint);
u8 *dest = arena_push_array(arena, u8, encoded.count8);
MEMCPY(dest, encoded.chars8, encoded.count8);
pos16 += decoded.advance16;
res.len += encoded.count8;
}
return res;
}
/* utf8 <- utf32 */
struct string string_from_string32(struct arena *arena, struct string32 str32)
{
struct string res = {
.len = 0,
.text = arena_dry_push(arena, u8)
};
u64 pos32 = 0;
while (pos32 < str32.len) {
struct string32 str32_remaining = { .len = (str32.len - pos32), .text = str32.text + pos32 };
struct uni_decode_utf32_result decoded = uni_decode_utf32(str32_remaining);
struct uni_encode_utf8_result encoded = uni_encode_utf8(decoded.codepoint);
u8 *dest = arena_push_array(arena, u8, encoded.count8);
MEMCPY(dest, &encoded.chars8, encoded.count8);
pos32 += 1;
res.len += encoded.count8;
}
return res;
}
/* utf16 <- utf8 */
struct string16 string16_from_string(struct arena *arena, struct string str8)
{
struct string16 res = {
.len = 0,
.text = arena_dry_push(arena, u16)
};
u64 pos8 = 0;
while (pos8 < str8.len) {
struct string str8_remaining = { .len = (str8.len - pos8), .text = str8.text + pos8 };
struct uni_decode_utf8_result decoded = uni_decode_utf8(str8_remaining);
struct uni_encode_utf16_result encoded = uni_encode_utf16(decoded.codepoint);
u16 *dest = arena_push_array(arena, u16, encoded.count16);
MEMCPY(dest, encoded.chars16, (encoded.count16 << 1));
pos8 += decoded.advance8;
res.len += encoded.count16;
}
return res;
}
/* utf32 <- utf8 */
struct string32 string32_from_string(struct arena *arena, struct string str8)
{
struct string32 res = {
.len = 0,
.text = arena_dry_push(arena, u32)
};
u64 pos8 = 0;
while (pos8 < str8.len) {
struct string str8_remaining = { .len = (str8.len - pos8), .text = str8.text + pos8 };
struct uni_decode_utf8_result decoded = uni_decode_utf8(str8_remaining);
struct uni_encode_utf32_result encoded = uni_encode_utf32(decoded.codepoint);
u32 *dest = arena_push(arena, u32);
*dest = encoded.chars32;
pos8 += decoded.advance8;
res.len += 1;
}
return res;
}
/* ========================== *
* C narrow strings
* ========================== */
u64 cstr_len_no_limit(char *cstr)
{
char *end = cstr;
if (cstr) {
while (*end) {
++end;
}
}
return end - cstr;
}
u64 cstr_len(char *cstr, u64 limit)
{
char *end = cstr;
if (cstr) {
for (u64 i = 0; i < limit; ++i) {
if (*end) {
++end;
} else {
break;
}
}
}
return end - cstr;
}
char *cstr_from_string(struct arena *arena, struct string src)
{
u8 *text = arena_push_array(arena, u8, src.len + 1);
MEMCPY(text, src.text, src.len);
text[src.len] = 0;
return (char *)text;
}
char *cstr_buff_from_string(struct string dest_buff, struct string src)
{
if (dest_buff.len > 0) {
u64 len = min_u64(src.len, dest_buff.len - 1);
MEMCPY(dest_buff.text, src.text, len);
dest_buff.text[len] = 0;
}
return (char *)dest_buff.text;
}
struct string string_from_cstr_no_limit(char *cstr)
{
u64 len = cstr_len_no_limit(cstr);
return (struct string) {
.len = len,
.text = (u8 *)cstr
};
}
struct string string_from_cstr(char *cstr, u64 limit)
{
u64 len = cstr_len(cstr, limit);
return (struct string) {
.text = (u8 *)cstr,
.len = len
};
}
/* ========================== *
* C wide strings
* ========================== */
u64 wstr_len_no_limit(wchar_t *wstr)
{
wchar_t *end = wstr;
if (end) {
while (*end) {
++end;
}
}
return end - wstr;
}
u64 wstr_len(wchar_t *wstr, u64 limit)
{
wchar_t *end = wstr;
if (wstr) {
for (u64 i = 0; i < limit; ++i) {
if (*end) {
++end;
} else {
break;
}
}
}
return end - wstr;
}
wchar_t *wstr_from_string(struct arena *arena, struct string src)
{
struct string16 str16 = string16_from_string(arena, src);
*arena_push(arena, u16) = 0;
return (wchar_t *)str16.text;
}
wchar_t *wstr_from_string16(struct arena *arena, struct string16 src)
{
u16 *text = arena_push_array(arena, u16, src.len + 1);
text[src.len] = 0;
return (wchar_t *)text;
}
struct string string_from_wstr_no_limit(struct arena *arena, wchar_t *wstr)
{
struct string16 str16 = string16_from_wstr_no_limit(wstr);
return string_from_string16(arena, str16);
}
struct string string_from_wstr(struct arena *arena, wchar_t *wstr, u64 limit)
{
struct string16 str16 = string16_from_wstr(wstr, limit);
return string_from_string16(arena, str16);
}
struct string16 string16_from_wstr_no_limit(wchar_t *wstr)
{
u64 len = wstr_len_no_limit(wstr);
return (struct string16) {
.len = len,
.text = (u16 *)wstr
};
}
struct string16 string16_from_wstr(wchar_t *wstr, u64 limit)
{
u64 len = wstr_len(wstr, limit);
return (struct string16)
{
.len = len,
.text = (u16 *)wstr
};
}