558 lines
15 KiB
C
558 lines
15 KiB
C
#include "string.h"
|
|
#include "arena.h"
|
|
#include "memory.h"
|
|
#include "scratch.h"
|
|
#include "math.h"
|
|
|
|
/*
|
|
* NOTE: Strings should be considered ~IMMUTABLE~
|
|
*
|
|
* All string functions return a new string as a result. Any strings used as
|
|
* an argument (IE: in string_cat) will not be modified.
|
|
*
|
|
* Use the STR macro to create strings from string literals
|
|
*
|
|
* NOTE: It is valid for a string to have len 0 but a non-NULL text pointer.
|
|
* Always check string.len rather than string.text for string presence.
|
|
* (If we want to change this behavior then we need to check for length = 0 in
|
|
* our functions that return a pointer from arena_dry_push, or guarantee that
|
|
* all functions returning an arena_dry_push do allocate.)
|
|
*/
|
|
|
|
/* ========================== *
|
|
* Conversion
|
|
* ========================== */
|
|
|
|
#define INT_CHARS ("0123456789abcdef")
|
|
|
|
struct string string_from_char(struct arena *arena, char c)
|
|
{
|
|
u8 *dest = arena_push(arena, u8);
|
|
*dest = c;
|
|
return (struct string) {
|
|
.len = 1,
|
|
.text = dest
|
|
};
|
|
}
|
|
|
|
struct string string_from_uint(struct arena *arena, u64 n, u32 base)
|
|
{
|
|
/* Base too large */
|
|
ASSERT(base <= (ARRAY_COUNT(INT_CHARS) - 1));
|
|
|
|
/* TODO: we can probably use a fixed buffer here rather than scratch */
|
|
struct temp_arena scratch = scratch_begin(arena);
|
|
|
|
/* Build backwards text starting from least significant digit */
|
|
u64 len = 0;
|
|
u8 *backwards_text = arena_dry_push(scratch.arena, u8);
|
|
do {
|
|
string_from_char(scratch.arena, INT_CHARS[n % base]);
|
|
++len;
|
|
n /= base;
|
|
} while (n > 0);
|
|
|
|
/* Reverse text into final string */
|
|
u8 *final_text = arena_push_array(arena, u8, len);
|
|
for (u64 i = 0; i < len; ++i) {
|
|
final_text[i] = backwards_text[len - i - 1];
|
|
}
|
|
|
|
scratch_end(scratch);
|
|
|
|
return (struct string) {
|
|
.len = len,
|
|
.text = final_text
|
|
};
|
|
}
|
|
|
|
struct string string_from_int(struct arena *arena, i64 n, u32 base)
|
|
{
|
|
u8 *final_text = arena_dry_push(arena, u8);
|
|
u8 len = 0;
|
|
if (n < 0) {
|
|
/* Push sign */
|
|
string_from_char(arena, '-');
|
|
len = 1;
|
|
n = -n;
|
|
}
|
|
/* Push unsigned number */
|
|
struct string uint_str = string_from_uint(arena, n, base);
|
|
return (struct string) {
|
|
.len = len + uint_str.len,
|
|
.text = final_text
|
|
};
|
|
}
|
|
|
|
struct string string_from_ptr(struct arena *arena, void *ptr)
|
|
{
|
|
struct string prepend = string_cpy(arena, STR("0x"));
|
|
struct string uint_str = string_from_uint(arena, (u64)ptr, 16);
|
|
return (struct string) {
|
|
.len = prepend.len + uint_str.len,
|
|
.text = prepend.text
|
|
};
|
|
}
|
|
|
|
/* NOTE: This is an imprecise and inefficient way of doing this */
|
|
struct string string_from_float(struct arena *arena, f64 f, u32 precision)
|
|
{
|
|
u8 *final_text = arena_dry_push(arena, u8);
|
|
u64 final_len = 0;
|
|
|
|
/* Currently this function doesn't support large floats. We should
|
|
* rewrite this function if this needs to change. */
|
|
f64 max_representable = (f64)((u64)1 << 62);
|
|
b32 too_large = (f >= max_representable) || (f <= -max_representable);
|
|
b32 nan = f != f;
|
|
|
|
if (nan) {
|
|
final_len += string_cpy(arena, STR("NaN")).len;
|
|
} else if (too_large) {
|
|
string_from_char(arena, '?');
|
|
++final_len;
|
|
} else {
|
|
if (f < 0) {
|
|
string_from_char(arena, '-');
|
|
f = -f;
|
|
++final_len;
|
|
}
|
|
|
|
/* Add one half of next precision level to round up */
|
|
f += 0.5 / (f64)math_pow_u64(10, (u8)precision);
|
|
|
|
/* Print whole part */
|
|
u64 whole = (u64)f;
|
|
struct string whole_str = string_from_uint(arena, whole, 10);
|
|
final_len += whole_str.len;
|
|
|
|
/* Print decimal part */
|
|
if (precision > 0) {
|
|
string_from_char(arena, '.');
|
|
f -= (f64)whole;
|
|
for (u64 i = 0; i < precision; ++i) {
|
|
f *= 10.0;
|
|
u64 digit = (u64)f;
|
|
f -= (f64)digit;
|
|
string_from_char(arena, INT_CHARS[digit % 10]);
|
|
}
|
|
final_len += (u64)precision + 1;
|
|
}
|
|
}
|
|
|
|
return (struct string) {
|
|
.len = final_len,
|
|
.text = final_text
|
|
};
|
|
}
|
|
|
|
/* ========================== *
|
|
* String operations
|
|
* ========================== */
|
|
|
|
struct string string_cpy(struct arena *arena, struct string src)
|
|
{
|
|
struct string str = {
|
|
.len = src.len,
|
|
.text = arena_push_array(arena, u8, src.len)
|
|
};
|
|
MEMCPY(str.text, src.text, src.len);
|
|
return str;
|
|
}
|
|
|
|
/* TODO: Benchmark performance of appending each character while calculating size here */
|
|
// //struct string string_cpy_cstr(struct arena *arena, char *cstr)
|
|
// //{
|
|
// // u8 *final_text = arena_next(arena);
|
|
// // char *c = cstr;
|
|
// // for (; *c != 0; ++c) {
|
|
// // u8 *new_char = arena_push(arena, 1);
|
|
// // *new_char = *c;
|
|
// // }
|
|
// // return (struct string) {
|
|
// // .len = c - cstr,
|
|
// // .text = final_text
|
|
// // };
|
|
// //}
|
|
|
|
struct string string_from_cstr(char *cstr)
|
|
{
|
|
struct string str = { 0 };
|
|
if (cstr) {
|
|
char *c = cstr;
|
|
while (*c != 0) {
|
|
++c;
|
|
}
|
|
str.len = c - cstr;
|
|
str.text = (u8 *)cstr;
|
|
}
|
|
return str;
|
|
}
|
|
|
|
struct string string_from_cstr_len(char *cstr, u64 len)
|
|
{
|
|
return (struct string) {
|
|
.text = (u8 *)cstr,
|
|
.len = len
|
|
};
|
|
}
|
|
|
|
struct string string_repeat(struct arena *arena, struct string src, u64 count)
|
|
{
|
|
u64 final_len = src.len * count;
|
|
u8 *final_text = arena_push_array(arena, u8, final_len);
|
|
for (u64 i = 0; i < count; ++i) {
|
|
MEMCPY(final_text + (src.len * i), src.text, src.len);
|
|
}
|
|
return (struct string) {
|
|
.text = final_text,
|
|
.len = final_len
|
|
};
|
|
}
|
|
|
|
struct string string_cat(struct arena *arena, struct string str1, struct string str2)
|
|
{
|
|
struct string new_str = { 0 };
|
|
new_str.len = str1.len + str2.len;
|
|
new_str.text = arena_push_array(arena, u8, new_str.len);
|
|
MEMCPY(new_str.text, str1.text, str1.len);
|
|
MEMCPY(new_str.text + str1.len, str2.text, str2.len);
|
|
return new_str;
|
|
}
|
|
|
|
/* `arena` is where pieces items will be allocated. These strings point
|
|
* into the existing supplied string and do not allocate any new text. */
|
|
struct string_array string_split(struct arena *arena, struct string str, struct string delim)
|
|
{
|
|
struct string_array pieces = {
|
|
.count = 0,
|
|
.strings = arena_dry_push(arena, struct string)
|
|
};
|
|
|
|
struct string piece = {
|
|
.len = 0,
|
|
.text = str.text
|
|
};
|
|
|
|
for (u64 i = 0; i <= str.len - delim.len; ++i) {
|
|
/* Clamp comparison string so we don't overflow. */
|
|
struct string comp_str = {
|
|
.len = delim.len,
|
|
.text = &str.text[i]
|
|
};
|
|
|
|
b32 is_delimiter = string_eq(comp_str, delim);
|
|
b32 is_end = i == str.len - 1;
|
|
|
|
if (!is_delimiter || is_end) {
|
|
++piece.len;
|
|
}
|
|
|
|
if (is_delimiter || is_end) {
|
|
/* Delimiter found */
|
|
struct string *piece_pushed = arena_push(arena, struct string);
|
|
*piece_pushed = piece;
|
|
++pieces.count;
|
|
piece.text = piece.text + piece.len + delim.len;
|
|
piece.len = 0;
|
|
}
|
|
}
|
|
|
|
return pieces;
|
|
}
|
|
|
|
/* NOTE: Really slow */
|
|
struct string string_indent(struct arena *arena, struct string str, u32 indent)
|
|
{
|
|
struct temp_arena scratch = scratch_begin(arena);
|
|
|
|
u64 final_len = 0;
|
|
u8 *final_text = arena_dry_push(arena, u8);
|
|
|
|
struct string_array split = string_split(scratch.arena, str, STR("\n"));
|
|
for (u64 i = 0; i < split.count; ++i) {
|
|
struct string piece = split.strings[i];
|
|
for (u32 j = 0; j < indent; ++j) {
|
|
string_from_char(arena, ' ');
|
|
++final_len;
|
|
}
|
|
string_cpy(arena, piece);
|
|
final_len += piece.len;
|
|
if (i < split.count - 1) {
|
|
string_from_char(arena, '\n');
|
|
++final_len;
|
|
}
|
|
}
|
|
|
|
scratch_end(scratch);
|
|
|
|
return (struct string) {
|
|
.len = final_len,
|
|
.text = final_text
|
|
};
|
|
}
|
|
|
|
b32 string_eq(struct string str1, struct string str2)
|
|
{
|
|
if (str1.len == str2.len) {
|
|
for (u64 i = 0; i < str1.len; ++i) {
|
|
if (str1.text[i] != str2.text[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
b32 string_contains(struct string str, struct string substring)
|
|
{
|
|
if (substring.len > str.len) {
|
|
return false;
|
|
}
|
|
|
|
for (u64 i = 0; i <= str.len - substring.len; ++i) {
|
|
b32 match = true;
|
|
for (u64 j = 0; j < substring.len; ++j) {
|
|
if (str.text[i + j] != substring.text[j]) {
|
|
match = false;
|
|
break;
|
|
}
|
|
}
|
|
if (match) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
b32 string_starts_with(struct string str, struct string substring)
|
|
{
|
|
if (str.len >= substring.len) {
|
|
for (u64 i = 0; i < substring.len; ++i) {
|
|
if (str.text[i] != substring.text[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
b32 string_ends_with(struct string str, struct string substring)
|
|
{
|
|
if (str.len >= substring.len) {
|
|
u64 start = str.len - substring.len;
|
|
for (u64 i = 0; i < substring.len; ++i) {
|
|
if (str.text[start + i] != substring.text[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
#if 0
|
|
/* NOTE: This is a LOSSY conversion.
|
|
* `wstr` must be null-terminated.
|
|
*/
|
|
struct string string_from_wstr(struct arena *arena, u16 *wstr)
|
|
{
|
|
u8 *final_text = arena_next(arena);
|
|
u16 *wchar = wstr;
|
|
for (; *wchar != 0; ++wchar) {
|
|
u8 *c = arena_push(arena, 1);
|
|
*c = (u8)(*wchar & 0xFF);
|
|
}
|
|
return (struct string) {
|
|
.len = wchar - wstr,
|
|
.text = final_text
|
|
};
|
|
}
|
|
#endif
|
|
|
|
char *string_to_cstr(struct arena *arena, struct string str)
|
|
{
|
|
u8 *text = arena_push_array(arena, u8, str.len + 1);
|
|
MEMCPY(text, str.text, str.len);
|
|
text[str.len] = '\0';
|
|
return (char *)text;
|
|
}
|
|
|
|
char *string_to_cstr_buff(struct string str, struct buffer buff)
|
|
{
|
|
if (buff.size > 0) {
|
|
u64 len = min_u64(str.len, buff.size - 1);
|
|
MEMCPY(buff.data, str.text, len);
|
|
buff.data[len] = '\0';
|
|
}
|
|
return (char *)buff.data;
|
|
}
|
|
|
|
wchar_t *string_to_wstr(struct arena *arena, struct string str)
|
|
{
|
|
/* FIXME: Do proper encoding. */
|
|
u16 *text = arena_push_array(arena, u16, str.len + 1);
|
|
for (u64 i = 0; i < str.len; ++i) {
|
|
text[i] = (u16)str.text[i];
|
|
}
|
|
text[str.len] = '\0';
|
|
return (wchar_t *)text;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Format
|
|
* ========================== */
|
|
|
|
/* String formatting only has one format specifier: "%F". All specifier info is
|
|
* included in the arguments (instead of w/ the specifier like in printf). This
|
|
* is safer.
|
|
*
|
|
* Example:
|
|
* string_format(arena,
|
|
* STR("Hello there %F. You are %F feet %F inches tall!"),
|
|
* FMT_STR(STR("George")),
|
|
* FMT_UINT(6),
|
|
* FMT_FLOAT(5.375));
|
|
*
|
|
* NOTE: FMT_END must be passed as the last arg in the va_list (This is
|
|
* done automatically by the `string_format` macro).
|
|
*
|
|
* Format arguments:
|
|
* FMT_CHAR: Format a single u8 character
|
|
* FMT_STR: Format a `string` struct
|
|
* FMT_UINT: Format a u64
|
|
* FMT_SINT: Format an i64
|
|
* FMT_FLOAT: Format an f64 with DEFAULT_FMT_PRECISION
|
|
* FMT_FLOAT_P: Format an f64 with specified precision
|
|
* FMT_HEX: Format a u64 in hexadecimal notation
|
|
* FMT_PTR: Format a pointer in hexadecimal notation prefixed by "0x"
|
|
*
|
|
* FMT_END (internal): Denote the end of the va_list
|
|
*
|
|
* TODO:
|
|
* %n equivalent? (nothing)
|
|
* %e/%E equivalent? (scientific notation of floats)
|
|
* %o equivalent? (octal representation)
|
|
*/
|
|
struct string string_formatv(struct arena *arena, struct string fmt, va_list args)
|
|
{
|
|
__prof;
|
|
|
|
u64 final_len = 0;
|
|
u8 *final_text = arena_dry_push(arena, u8);
|
|
|
|
u8 *end = fmt.text + fmt.len;
|
|
b32 no_more_args = false;
|
|
for (u8 *c = fmt.text; c < end; ++c) {
|
|
u8 *next = ((c + 1) < end) ? (c + 1) : (u8 *)"\0";
|
|
|
|
/* Escape '%%' */
|
|
b32 escape = !no_more_args && *c == '%' && *next == '%';
|
|
if (escape) {
|
|
/* Skip the escape '%' char from parsing */
|
|
++c;
|
|
}
|
|
|
|
if (!no_more_args && !escape && *c == '%' && *next == 'F') {
|
|
struct string parsed_str = { 0 };
|
|
/* Detect arg type and parse to string */
|
|
struct fmt_arg arg = va_arg(args, struct fmt_arg);
|
|
switch (arg.type) {
|
|
case FMT_TYPE_CHAR: {
|
|
parsed_str = string_from_char(arena, arg.value.c);
|
|
} break;
|
|
|
|
case FMT_TYPE_STR: {
|
|
parsed_str = string_cpy(arena, arg.value.string);
|
|
} break;
|
|
|
|
case FMT_TYPE_UINT: {
|
|
parsed_str = string_from_uint(arena, arg.value.uint, 10);
|
|
} break;
|
|
|
|
case FMT_TYPE_SINT: {
|
|
parsed_str = string_from_int(arena, arg.value.sint, 10);
|
|
} break;
|
|
|
|
case FMT_TYPE_HEX: {
|
|
parsed_str = string_from_uint(arena, arg.value.sint, 16);
|
|
} break;
|
|
|
|
case FMT_TYPE_PTR: {
|
|
parsed_str = string_from_ptr(arena, arg.value.ptr);
|
|
} break;
|
|
|
|
case FMT_TYPE_FLOAT: {
|
|
parsed_str = string_from_float(arena, arg.value.f, arg.precision);
|
|
} break;
|
|
|
|
case FMT_TYPE_END: {
|
|
/* Unexpected end. Not enough FMT args passed to function. */
|
|
ASSERT(false);
|
|
parsed_str = string_cpy(arena, STR("<?>"));
|
|
no_more_args = true;
|
|
} break;
|
|
|
|
case FMT_TYPE_NONE: {
|
|
/* Unknown format type */
|
|
ASSERT(false);
|
|
parsed_str = string_cpy(arena, STR("<?>"));
|
|
no_more_args = true;
|
|
} break;
|
|
}
|
|
/* Update final string len / start */
|
|
final_len += parsed_str.len;
|
|
/* Skip 'F' from parsing */
|
|
++c;
|
|
} else {
|
|
/* Parse character normally */
|
|
string_from_char(arena, *c);
|
|
++final_len;
|
|
}
|
|
}
|
|
|
|
#if RTC
|
|
if (!no_more_args) {
|
|
struct fmt_arg last_arg = va_arg(args, struct fmt_arg);
|
|
/* End arg not reached. Too many FMT values passed to function. */
|
|
ASSERT(last_arg.type == FMT_TYPE_END);
|
|
}
|
|
#endif
|
|
|
|
return (struct string) {
|
|
.len = final_len,
|
|
.text = final_text
|
|
};
|
|
}
|
|
|
|
struct string _string_format(struct arena *arena, struct string fmt, ...)
|
|
{
|
|
va_list args;
|
|
va_start(args, fmt);
|
|
struct string new_str = string_formatv(arena, fmt, args);
|
|
va_end(args);
|
|
return new_str;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Unicode
|
|
* ========================== */
|
|
|
|
/* Placeholder functions. Unicode not supported yet. */
|
|
|
|
struct string32 string32_from_string(struct arena *arena, struct string str)
|
|
{
|
|
u32 *text = arena_push_array(arena, u32, str.len);
|
|
for (u64 i = 0; i < str.len; ++i) {
|
|
u8 c = str.text[i];
|
|
text[i] = (u32)c;
|
|
}
|
|
return (struct string32) {
|
|
.text = text,
|
|
.len = str.len
|
|
};
|
|
}
|