power_play/src/string.c

558 lines
15 KiB
C

#include "string.h"
#include "arena.h"
#include "memory.h"
#include "scratch.h"
#include "math.h"
/*
* NOTE: Strings should be considered ~IMMUTABLE~
*
* All string functions return a new string as a result. Any strings used as
* an argument (IE: in string_cat) will not be modified.
*
* Use the STR macro to create strings from string literals
*
* NOTE: It is valid for a string to have len 0 but a non-NULL text pointer.
* Always check string.len rather than string.text for string presence.
* (If we want to change this behavior then we need to check for length = 0 in
* our functions that return a pointer from arena_dry_push, or guarantee that
* all functions returning an arena_dry_push do allocate.)
*/
/* ========================== *
* Conversion
* ========================== */
#define INT_CHARS ("0123456789abcdef")
struct string string_from_char(struct arena *arena, char c)
{
u8 *dest = arena_push(arena, u8);
*dest = c;
return (struct string) {
.len = 1,
.text = dest
};
}
struct string string_from_uint(struct arena *arena, u64 n, u32 base)
{
/* Base too large */
ASSERT(base <= (ARRAY_COUNT(INT_CHARS) - 1));
/* TODO: we can probably use a fixed buffer here rather than scratch */
struct temp_arena scratch = scratch_begin(arena);
/* Build backwards text starting from least significant digit */
u64 len = 0;
u8 *backwards_text = arena_dry_push(scratch.arena, u8);
do {
string_from_char(scratch.arena, INT_CHARS[n % base]);
++len;
n /= base;
} while (n > 0);
/* Reverse text into final string */
u8 *final_text = arena_push_array(arena, u8, len);
for (u64 i = 0; i < len; ++i) {
final_text[i] = backwards_text[len - i - 1];
}
scratch_end(scratch);
return (struct string) {
.len = len,
.text = final_text
};
}
struct string string_from_int(struct arena *arena, i64 n, u32 base)
{
u8 *final_text = arena_dry_push(arena, u8);
u8 len = 0;
if (n < 0) {
/* Push sign */
string_from_char(arena, '-');
len = 1;
n = -n;
}
/* Push unsigned number */
struct string uint_str = string_from_uint(arena, n, base);
return (struct string) {
.len = len + uint_str.len,
.text = final_text
};
}
struct string string_from_ptr(struct arena *arena, void *ptr)
{
struct string prepend = string_cpy(arena, STR("0x"));
struct string uint_str = string_from_uint(arena, (u64)ptr, 16);
return (struct string) {
.len = prepend.len + uint_str.len,
.text = prepend.text
};
}
/* NOTE: This is an imprecise and inefficient way of doing this */
struct string string_from_float(struct arena *arena, f64 f, u32 precision)
{
u8 *final_text = arena_dry_push(arena, u8);
u64 final_len = 0;
/* Currently this function doesn't support large floats. We should
* rewrite this function if this needs to change. */
f64 max_representable = (f64)((u64)1 << 62);
b32 too_large = (f >= max_representable) || (f <= -max_representable);
b32 nan = f != f;
if (nan) {
final_len += string_cpy(arena, STR("NaN")).len;
} else if (too_large) {
string_from_char(arena, '?');
++final_len;
} else {
if (f < 0) {
string_from_char(arena, '-');
f = -f;
++final_len;
}
/* Add one half of next precision level to round up */
f += 0.5 / (f64)math_pow_u64(10, (u8)precision);
/* Print whole part */
u64 whole = (u64)f;
struct string whole_str = string_from_uint(arena, whole, 10);
final_len += whole_str.len;
/* Print decimal part */
if (precision > 0) {
string_from_char(arena, '.');
f -= (f64)whole;
for (u64 i = 0; i < precision; ++i) {
f *= 10.0;
u64 digit = (u64)f;
f -= (f64)digit;
string_from_char(arena, INT_CHARS[digit % 10]);
}
final_len += (u64)precision + 1;
}
}
return (struct string) {
.len = final_len,
.text = final_text
};
}
/* ========================== *
* String operations
* ========================== */
struct string string_cpy(struct arena *arena, struct string src)
{
struct string str = {
.len = src.len,
.text = arena_push_array(arena, u8, src.len)
};
MEMCPY(str.text, src.text, src.len);
return str;
}
/* TODO: Benchmark performance of appending each character while calculating size here */
// //struct string string_cpy_cstr(struct arena *arena, char *cstr)
// //{
// // u8 *final_text = arena_next(arena);
// // char *c = cstr;
// // for (; *c != 0; ++c) {
// // u8 *new_char = arena_push(arena, 1);
// // *new_char = *c;
// // }
// // return (struct string) {
// // .len = c - cstr,
// // .text = final_text
// // };
// //}
struct string string_from_cstr(char *cstr)
{
struct string str = { 0 };
if (cstr) {
char *c = cstr;
while (*c != 0) {
++c;
}
str.len = c - cstr;
str.text = (u8 *)cstr;
}
return str;
}
struct string string_from_cstr_len(char *cstr, u64 len)
{
return (struct string) {
.text = (u8 *)cstr,
.len = len
};
}
struct string string_repeat(struct arena *arena, struct string src, u64 count)
{
u64 final_len = src.len * count;
u8 *final_text = arena_push_array(arena, u8, final_len);
for (u64 i = 0; i < count; ++i) {
MEMCPY(final_text + (src.len * i), src.text, src.len);
}
return (struct string) {
.text = final_text,
.len = final_len
};
}
struct string string_cat(struct arena *arena, struct string str1, struct string str2)
{
struct string new_str = { 0 };
new_str.len = str1.len + str2.len;
new_str.text = arena_push_array(arena, u8, new_str.len);
MEMCPY(new_str.text, str1.text, str1.len);
MEMCPY(new_str.text + str1.len, str2.text, str2.len);
return new_str;
}
/* `arena` is where pieces items will be allocated. These strings point
* into the existing supplied string and do not allocate any new text. */
struct string_array string_split(struct arena *arena, struct string str, struct string delim)
{
struct string_array pieces = {
.count = 0,
.strings = arena_dry_push(arena, struct string)
};
struct string piece = {
.len = 0,
.text = str.text
};
for (u64 i = 0; i <= str.len - delim.len; ++i) {
/* Clamp comparison string so we don't overflow. */
struct string comp_str = {
.len = delim.len,
.text = &str.text[i]
};
b32 is_delimiter = string_eq(comp_str, delim);
b32 is_end = i == str.len - 1;
if (!is_delimiter || is_end) {
++piece.len;
}
if (is_delimiter || is_end) {
/* Delimiter found */
struct string *piece_pushed = arena_push(arena, struct string);
*piece_pushed = piece;
++pieces.count;
piece.text = piece.text + piece.len + delim.len;
piece.len = 0;
}
}
return pieces;
}
/* NOTE: Really slow */
struct string string_indent(struct arena *arena, struct string str, u32 indent)
{
struct temp_arena scratch = scratch_begin(arena);
u64 final_len = 0;
u8 *final_text = arena_dry_push(arena, u8);
struct string_array split = string_split(scratch.arena, str, STR("\n"));
for (u64 i = 0; i < split.count; ++i) {
struct string piece = split.strings[i];
for (u32 j = 0; j < indent; ++j) {
string_from_char(arena, ' ');
++final_len;
}
string_cpy(arena, piece);
final_len += piece.len;
if (i < split.count - 1) {
string_from_char(arena, '\n');
++final_len;
}
}
scratch_end(scratch);
return (struct string) {
.len = final_len,
.text = final_text
};
}
b32 string_eq(struct string str1, struct string str2)
{
if (str1.len == str2.len) {
for (u64 i = 0; i < str1.len; ++i) {
if (str1.text[i] != str2.text[i]) {
return false;
}
}
return true;
}
return false;
}
b32 string_contains(struct string str, struct string substring)
{
if (substring.len > str.len) {
return false;
}
for (u64 i = 0; i <= str.len - substring.len; ++i) {
b32 match = true;
for (u64 j = 0; j < substring.len; ++j) {
if (str.text[i + j] != substring.text[j]) {
match = false;
break;
}
}
if (match) {
return true;
}
}
return false;
}
b32 string_starts_with(struct string str, struct string substring)
{
if (str.len >= substring.len) {
for (u64 i = 0; i < substring.len; ++i) {
if (str.text[i] != substring.text[i]) {
return false;
}
}
return true;
}
return false;
}
b32 string_ends_with(struct string str, struct string substring)
{
if (str.len >= substring.len) {
u64 start = str.len - substring.len;
for (u64 i = 0; i < substring.len; ++i) {
if (str.text[start + i] != substring.text[i]) {
return false;
}
}
return true;
}
return false;
}
#if 0
/* NOTE: This is a LOSSY conversion.
* `wstr` must be null-terminated.
*/
struct string string_from_wstr(struct arena *arena, u16 *wstr)
{
u8 *final_text = arena_next(arena);
u16 *wchar = wstr;
for (; *wchar != 0; ++wchar) {
u8 *c = arena_push(arena, 1);
*c = (u8)(*wchar & 0xFF);
}
return (struct string) {
.len = wchar - wstr,
.text = final_text
};
}
#endif
char *string_to_cstr(struct arena *arena, struct string str)
{
u8 *text = arena_push_array(arena, u8, str.len + 1);
MEMCPY(text, str.text, str.len);
text[str.len] = '\0';
return (char *)text;
}
char *string_to_cstr_buff(struct string str, struct buffer buff)
{
if (buff.size > 0) {
u64 len = min_u64(str.len, buff.size - 1);
MEMCPY(buff.data, str.text, len);
buff.data[len] = '\0';
}
return (char *)buff.data;
}
wchar_t *string_to_wstr(struct arena *arena, struct string str)
{
/* FIXME: Do proper encoding. */
u16 *text = arena_push_array(arena, u16, str.len + 1);
for (u64 i = 0; i < str.len; ++i) {
text[i] = (u16)str.text[i];
}
text[str.len] = '\0';
return (wchar_t *)text;
}
/* ========================== *
* Format
* ========================== */
/* String formatting only has one format specifier: "%F". All specifier info is
* included in the arguments (instead of w/ the specifier like in printf). This
* is safer.
*
* Example:
* string_format(arena,
* STR("Hello there %F. You are %F feet %F inches tall!"),
* FMT_STR(STR("George")),
* FMT_UINT(6),
* FMT_FLOAT(5.375));
*
* NOTE: FMT_END must be passed as the last arg in the va_list (This is
* done automatically by the `string_format` macro).
*
* Format arguments:
* FMT_CHAR: Format a single u8 character
* FMT_STR: Format a `string` struct
* FMT_UINT: Format a u64
* FMT_SINT: Format an i64
* FMT_FLOAT: Format an f64 with DEFAULT_FMT_PRECISION
* FMT_FLOAT_P: Format an f64 with specified precision
* FMT_HEX: Format a u64 in hexadecimal notation
* FMT_PTR: Format a pointer in hexadecimal notation prefixed by "0x"
*
* FMT_END (internal): Denote the end of the va_list
*
* TODO:
* %n equivalent? (nothing)
* %e/%E equivalent? (scientific notation of floats)
* %o equivalent? (octal representation)
*/
struct string string_formatv(struct arena *arena, struct string fmt, va_list args)
{
__prof;
u64 final_len = 0;
u8 *final_text = arena_dry_push(arena, u8);
u8 *end = fmt.text + fmt.len;
b32 no_more_args = false;
for (u8 *c = fmt.text; c < end; ++c) {
u8 *next = ((c + 1) < end) ? (c + 1) : (u8 *)"\0";
/* Escape '%%' */
b32 escape = !no_more_args && *c == '%' && *next == '%';
if (escape) {
/* Skip the escape '%' char from parsing */
++c;
}
if (!no_more_args && !escape && *c == '%' && *next == 'F') {
struct string parsed_str = { 0 };
/* Detect arg type and parse to string */
struct fmt_arg arg = va_arg(args, struct fmt_arg);
switch (arg.type) {
case FMT_TYPE_CHAR: {
parsed_str = string_from_char(arena, arg.value.c);
} break;
case FMT_TYPE_STR: {
parsed_str = string_cpy(arena, arg.value.string);
} break;
case FMT_TYPE_UINT: {
parsed_str = string_from_uint(arena, arg.value.uint, 10);
} break;
case FMT_TYPE_SINT: {
parsed_str = string_from_int(arena, arg.value.sint, 10);
} break;
case FMT_TYPE_HEX: {
parsed_str = string_from_uint(arena, arg.value.sint, 16);
} break;
case FMT_TYPE_PTR: {
parsed_str = string_from_ptr(arena, arg.value.ptr);
} break;
case FMT_TYPE_FLOAT: {
parsed_str = string_from_float(arena, arg.value.f, arg.precision);
} break;
case FMT_TYPE_END: {
/* Unexpected end. Not enough FMT args passed to function. */
ASSERT(false);
parsed_str = string_cpy(arena, STR("<?>"));
no_more_args = true;
} break;
case FMT_TYPE_NONE: {
/* Unknown format type */
ASSERT(false);
parsed_str = string_cpy(arena, STR("<?>"));
no_more_args = true;
} break;
}
/* Update final string len / start */
final_len += parsed_str.len;
/* Skip 'F' from parsing */
++c;
} else {
/* Parse character normally */
string_from_char(arena, *c);
++final_len;
}
}
#if RTC
if (!no_more_args) {
struct fmt_arg last_arg = va_arg(args, struct fmt_arg);
/* End arg not reached. Too many FMT values passed to function. */
ASSERT(last_arg.type == FMT_TYPE_END);
}
#endif
return (struct string) {
.len = final_len,
.text = final_text
};
}
struct string _string_format(struct arena *arena, struct string fmt, ...)
{
va_list args;
va_start(args, fmt);
struct string new_str = string_formatv(arena, fmt, args);
va_end(args);
return new_str;
}
/* ========================== *
* Unicode
* ========================== */
/* Placeholder functions. Unicode not supported yet. */
struct string32 string32_from_string(struct arena *arena, struct string str)
{
u32 *text = arena_push_array(arena, u32, str.len);
for (u64 i = 0; i < str.len; ++i) {
u8 c = str.text[i];
text[i] = (u32)c;
}
return (struct string32) {
.text = text,
.len = str.len
};
}