power_play/src/json.c
2024-03-08 17:41:39 -06:00

1084 lines
29 KiB
C

#include "json.h"
#include "string.h"
#include "arena.h"
#include "memory.h"
#include "scratch.h"
#include "math.h"
#include "util.h"
#define OBJECT_HASH_TABLE_CAPACITY_FACTOR 1.5
#define MAX_CHILDREN (U32_MAX - 1)
#define CASE_NEWLINE \
case 0x0A: /* Line feed or New line */ \
case 0x0D /* Carriage return */
#define CASE_WHITESPACE \
CASE_NEWLINE: \
case 0x20: /* Space */ \
case 0x09 /* Horizontal tab */
#define CASE_NUMERIC \
case '0': \
case '1': \
case '2': \
case '3': \
case '4': \
case '5': \
case '6': \
case '7': \
case '8': \
case '9'
#define CASE_ALPHABETICAL_LOWERCASE \
case 'a': \
case 'b': \
case 'c': \
case 'd': \
case 'e': \
case 'f': \
case 'g': \
case 'h': \
case 'i': \
case 'j': \
case 'k': \
case 'l': \
case 'm': \
case 'n': \
case 'o': \
case 'p': \
case 'q': \
case 'r': \
case 's': \
case 't': \
case 'u': \
case 'v': \
case 'w': \
case 'x': \
case 'y': \
case 'z'
/* ========================== *
* Parse
* ========================== */
struct parser {
u8 *at;
u8 *end;
struct string error;
};
INTERNAL struct json_ir parse_json(struct arena *arena, struct parser *p, b32 parse_key);
INTERNAL void set_error_unexpected_character(struct arena *arena, struct parser *p)
{
__prof;
p->error = string_format(arena, STR("Unexpected character '%F'"), FMT_CHAR(*p->at));
}
INTERNAL b32 is_error(struct parser *p)
{
__prof;
return p->error.len > 0;
}
INTERNAL f64 parse_number(struct arena *arena, struct parser *p)
{
__prof;
/* Find start */
u8 *start = p->at;
b32 found_start = false;
while (!found_start && p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
case '-':
case '.':
CASE_NUMERIC: {
start = p->at;
found_start = true;
} break;
default: {
set_error_unexpected_character(arena, p);
return 0;
} break;
}
}
/* Find end, decimal, and sign */
u8 *end = p->at;
b32 found_end = false;
u8 *dec_at = NULL;
i8 sign = 1;
while (!found_end && p->at < p->end) {
switch (*p->at) {
case '.': {
if (dec_at) {
set_error_unexpected_character(arena, p);
return 0;
} else {
dec_at = p->at;
++p->at;
}
} break;
case '-': {
if (p->at == start) {
sign = -1;
++p->at;
++start;
} else {
set_error_unexpected_character(arena, p);
return 0;
}
} break;
CASE_NUMERIC: {
++p->at;
} break;
default: {
end = p->at;
found_end = true;
} break;
}
}
if (!dec_at) {
dec_at = end;
}
f64 sum = 0;
/* Accum whole num */
u8 *whole_at = dec_at - 1;
while (whole_at >= start) {
/* TODO: validate digit */
u64 digit = (u64)*whole_at - 48;
sum += digit * math_pow_u64(10, (u8)(dec_at - whole_at - 1));
--whole_at;
}
/* Accum frac num */
u8 *frac_at = dec_at + 1;
f64 pow = (f64)math_pow_u64(10, (u8)(frac_at - dec_at));
while (frac_at < end) {
/* TODO: validate digit */
u64 digit = (u64)*frac_at - 48;
sum += (f64)digit / pow;
++frac_at;
pow /= 10;
}
//sum += (f64)frac_sum / (f64)pow_u(10, end - dec_at);
return sum * sign;
}
INTERNAL void append_char(struct arena *arena, struct string *str, u8 c)
{
__prof;
u8 *c_ptr = arena_push(arena, u8);
*c_ptr = c;
++str->len;
}
INTERNAL struct string parse_string(struct arena *arena, struct parser *p)
{
__prof;
struct string str = {
.len = 0,
.text = arena_dry_push(arena, u8)
};
/* Find '"' string start */
u8 *start = NULL;
while (!start && p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
case '"': {
++p->at;
start = p->at;
} break;
default: {
set_error_unexpected_character(arena, p);
return str;
} break;
}
}
if (!start) {
p->error = STR("Reached end of file without finding a string");
return str;
}
/* Parse characters */
while (p->at < p->end) {
if (*p->at == '\\') {
/* Escaped */
if ((p->at + 1) < p->end) {
++p->at;
switch (*p->at) {
CASE_NEWLINE: {
p->error = STR("Expected '\"' to close string");
return str;
} break;
/* Backspace */
case 'b': {
append_char(arena, &str, '\b');
} break;
/* Form feed */
case 'f': {
append_char(arena, &str, '\f');
} break;
/* Newline */
case 'n': {
append_char(arena, &str, '\n');
} break;
/* Carriage return */
case 'r': {
append_char(arena, &str, '\r');
} break;
/* Tab*/
case 't': {
append_char(arena, &str, '\t');
} break;
case '"': /* Double quote */
case '\\': { /* Backslash */
append_char(arena, &str, *p->at);
} break;
default: {
p->error = string_format(
arena,
STR("Unknown escape sequence '\\%F'"),
FMT_CHAR(*p->at)
);
return str;
} break;
}
++p->at;
} else {
set_error_unexpected_character(arena, p);
return str;
}
} else {
switch (*p->at) {
CASE_NEWLINE: {
p->error = STR("Expected '\"' to close string");
return str;
} break;
/* End string */
case '"': {
++p->at;
return str;
} break;
/* Append character */
default: {
append_char(arena, &str, *p->at);
++p->at;
} break;
}
}
}
p->error = STR("Expected '\"' to close string");
return str;
}
/* Parses booleans & null.
* Returns boolean value (if value type is boolean). Returns false if null value type. */
INTERNAL b32 parse_primitive(struct arena *arena, struct parser *p, enum json_type *type)
{
__prof;
(UNUSED)arena;
/* Find start */
u8 *start = p->at;
b32 found_start = false;
while (!found_start && p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
default: {
start = p->at;
found_start = true;
};
}
}
/* Find end */
u8 *end = p->at;
b32 found_end = false;
while (!found_end && p->at < p->end) {
switch (*p->at) {
CASE_ALPHABETICAL_LOWERCASE: {
++p->at;
} break;
default: {
end = p->at;
found_end = true;
};
}
}
struct string val_str = {
.len = end - start,
.text = start
};
if (string_eq(val_str, STR("null"))) {
*type = JSON_TYPE_NULL;
return false;
} else if (string_eq(val_str, STR("true"))) {
*type = JSON_TYPE_BOOL;
return true;
} else if (string_eq(val_str, STR("false"))) {
*type = JSON_TYPE_BOOL;
return false;
} else {
p->error = STR("Expected value");
return false;
}
}
INTERNAL struct json_ir_parent_data parse_array(struct arena *arena, struct parser *p)
{
__prof;
struct json_ir_parent_data data = { 0 };
/* Find '[' array start */
b32 found_start = false;
while (!found_start && p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
case '[': {
++p->at;
found_start = true;
} break;
default: {
set_error_unexpected_character(arena, p);
return data;
} break;
}
}
if (!found_start) {
set_error_unexpected_character(arena, p);
return data;
}
/* Parse array */
b32 preceding_comma = false;
while (p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
/* End array */
case ']': {
if (preceding_comma) {
p->error = STR("Trailing comma");
return data;
}
++p->at;
return data;
} break;
/* Parse value */
case ',': {
if (data.child_first) {
++p->at;
preceding_comma = true;
} else {
set_error_unexpected_character(arena, p);
return data;
}
} break;
default: {
if (!preceding_comma && data.child_count > 0) {
p->error = STR("Expected comma");
return data;
}
if (data.child_count >= MAX_CHILDREN) {
p->error = STR("Max children reached");
return data;
}
struct json_ir *child = arena_push(arena, struct json_ir);
*child = parse_json(arena, p, false);
if (is_error(p)) {
return data;
}
if (data.child_last) {
data.child_last->next_child = child;
} else if (!data.child_first) {
data.child_first = child;
}
data.child_last = child;
++data.child_count;
preceding_comma = false;
} break;
}
}
p->error = STR("Reached end of file without closing object (no '}' found)");
return data;
}
INTERNAL struct json_ir_parent_data parse_object(struct arena *arena, struct parser *p)
{
__prof;
struct json_ir_parent_data data = { 0 };
/* Find '{' object start */
b32 found_start = false;
while (!found_start && p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
case '{': {
++p->at;
found_start = true;
} break;
default: {
set_error_unexpected_character(arena, p);
return data;
} break;
}
}
if (!found_start) {
set_error_unexpected_character(arena, p);
return data;
}
/* Parse object */
b32 preceding_comma = false;
while (p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
/* End object */
case '}': {
if (preceding_comma) {
p->error = STR("Trailing comma");
return data;
}
++p->at;
return data;
} break;
/* Parse value */
case ',': {
if (data.child_first) {
++p->at;
preceding_comma = true;
} else {
set_error_unexpected_character(arena, p);
return data;
}
} break;
case '"': {
if (!preceding_comma && data.child_count > 0) {
p->error = STR("Expected comma");
return data;
}
if (data.child_count >= MAX_CHILDREN) {
p->error = STR("Max children reached");
return data;
}
struct json_ir *child = arena_push(arena, struct json_ir);
*child = parse_json(arena, p, true);
if (is_error(p)) {
return data;
}
if (data.child_last) {
data.child_last->next_child = child;
} else if (!data.child_first) {
data.child_first = child;
}
data.child_last = child;
++data.child_count;
preceding_comma = false;
} break;
default: {
set_error_unexpected_character(arena, p);
return data;
} break;
}
}
p->error = STR("Reached end of file without closing object (no '}' found)");
return data;
}
INTERNAL void parse_whitespace_until_end(struct arena *arena, struct parser *p)
{
__prof;
while (p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
default: {
set_error_unexpected_character(arena, p);
return;
} break;
}
}
}
INTERNAL struct json_ir parse_json(struct arena *arena, struct parser *p, b32 parse_key)
{
__prof;
struct json_ir ir = { 0 };
/* Parse key (if necessary) */
if (parse_key) {
/* Parse key string */
ir.key = parse_string(arena, p);
if (is_error(p)) {
return ir;
}
/* Eat ':' */
b32 found_colon = false;
while (!found_colon && p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
case ':': {
++p->at;
found_colon = true;
} break;
default: {
p->error = STR("Colon expected");
return ir;
} break;
}
}
if (!found_colon) {
p->error = STR("Colon expected");
return ir;
}
}
/* Eat whitespace until value start */
b32 value_start = false;
while (!value_start && p->at < p->end) {
switch (*p->at) {
CASE_WHITESPACE: {
++p->at;
} break;
default: {
value_start = true;
} break;
}
}
/* Parse value */
switch (*p->at) {
case '"': {
ir.type = JSON_TYPE_STRING;
ir.val.string = parse_string(arena, p);
} break;
case '-':
case '.':
CASE_NUMERIC: {
ir.type = JSON_TYPE_NUMBER;
ir.val.number = parse_number(arena, p);
} break;
case '{': {
ir.type = JSON_TYPE_OBJECT;
ir.val.children = parse_object(arena, p);
} break;
case '[': {
ir.type = JSON_TYPE_ARRAY;
ir.val.children = parse_array(arena, p);
} break;
default: {
ir.val.boolean = parse_primitive(arena, p, &ir.type);
} break;
}
return ir;
}
const struct json_ir *json_parse(struct arena *arena, struct buffer bytes, struct string **error)
{
__prof;
struct parser p = {
.at = bytes.data,
.end = bytes.data + bytes.size
};
struct json_ir *root_ir = arena_push(arena, struct json_ir);
*root_ir = parse_json(arena, &p, false);
if (!is_error(&p)) {
/* Ensure that there is no data other than root object / value in buffer */
parse_whitespace_until_end(arena, &p);
}
if (is_error(&p)) {
/* Handle error */
if (error) {
struct string err_msg = p.error.len > 0 ? p.error : STR("Parsing error");
*error = arena_push(arena, struct string);
**error = string_copy(arena, err_msg);
}
}
return root_ir;
}
/* ========================== *
* Format
* ========================== */
INTERNAL struct json_val json_format_internal(struct arena *arena, const struct json_ir *ir)
{
__prof;
struct json_val val = { 0 };
switch (ir->type) {
case JSON_TYPE_OBJECT: {
u32 count = ir->val.children.child_count;
u32 capacity = (u32)((f64)count * OBJECT_HASH_TABLE_CAPACITY_FACTOR);
u64 hash_table_size = sizeof(u32) * capacity;
u64 entries_size = sizeof(struct json_object_entry) * count;
/* Not aligning entries, since get_object_entries currently
* depends on hash_table being situated right before the
* entries array in memory. */
arena_align(arena, ALIGNOF(u32));
u32 *hash_table = (u32 *)arena_push_array_zero(arena, u8, hash_table_size);
struct json_object_entry *entries = (struct json_object_entry *)arena_push_array_zero(arena, u8, entries_size);
val = (struct json_val) {
.type = JSON_TYPE_OBJECT,
.child_count = count,
.val.object_table = hash_table
};
struct json_ir *cur = ir->val.children.child_first;
u32 index = 0;
while (cur) {
entries[index] = (struct json_object_entry) {
.key = string_copy(arena, cur->key),
.value = json_format_internal(arena, cur)
};
u64 hash = hash_fnv64(BUFFER_FROM_STRING(cur->key));
u32 slot_index_home = hash % capacity;
u32 slot_index = slot_index_home;
while (true) {
u32 *temp = &hash_table[slot_index];
if (*temp) {
/* Occupied, linear probe for next slot */
if (slot_index < (capacity - 1)) {
++slot_index;
} else {
slot_index = 0;
}
/* Table loop around should be impossible */
ASSERT(slot_index != slot_index_home);
} else {
/* Empty */
*temp = index + 1;
break;
}
}
cur = cur->next_child;
++index;
}
} break;
case JSON_TYPE_ARRAY: {
u32 count = ir->val.children.child_count;
val = (struct json_val) {
.type = JSON_TYPE_ARRAY,
.val.array_children = arena_push_array(arena, struct json_val, count),
.child_count = count
};
struct json_ir *child_ir = ir->val.children.child_first;
u64 child_index = 0;
while (child_ir) {
val.val.array_children[child_index] = json_format_internal(arena, child_ir);
child_ir = child_ir->next_child;
++child_index;
}
} break;
case JSON_TYPE_STRING: {
struct string *str = arena_push(arena, struct string);
*str = string_copy(arena, ir->val.string);
val = (struct json_val){
.type = JSON_TYPE_STRING,
.val.string = str
};
} break;
case JSON_TYPE_BOOL: {
val = (struct json_val){
.type = JSON_TYPE_BOOL,
.val.boolean = ir->val.boolean
};
} break;
case JSON_TYPE_NULL: {
val = (struct json_val){
.type = JSON_TYPE_NULL
};
} break;
case JSON_TYPE_NUMBER: {
val = (struct json_val){
.type = JSON_TYPE_NUMBER,
.val.number = ir->val.number
};
} break;
case JSON_TYPE_INVALID: {
ASSERT(false);
return (struct json_val) { 0 };
} break;
}
return val;
}
const struct json_val *json_format(struct arena *arena, const struct json_ir *ir)
{
__prof;
struct json_val *root_obj = arena_push(arena, struct json_val);
*root_obj = json_format_internal(arena, ir);
return root_obj;
}
/* Returns NULL on error (and sets `error` string parameter) */
const struct json_val *json_parse_and_format(struct arena *arena, struct buffer bytes, struct string **error)
{
__prof;
struct temp_arena scratch = scratch_begin(arena);
struct string *error_temp = NULL;
const struct json_ir *root_ir = json_parse(scratch.arena, bytes, &error_temp);
struct json_val *root_val = NULL;
if (error_temp) {
if (error) {
/* Copy error message out of scratch */
*error = arena_push(arena, struct string);
**error = string_copy(arena, *error_temp);
}
} else {
root_val = arena_push(arena, struct json_val);
*root_val = json_format_internal(arena, root_ir);
}
/* Decommit since the immediate-representation memory usage may have been high */
scratch_end_and_decommit(scratch);
return root_val;
}
/* ========================== *
* Index
* ========================== */
INTERNAL struct json_object_entry *get_object_entries(const struct json_val *v)
{
__prof;
u32 hash_table_size = (u32)((u32)sizeof(u32) * (u32)(v->child_count * OBJECT_HASH_TABLE_CAPACITY_FACTOR));
return (struct json_object_entry *)((u8 *)v->val.object_table + hash_table_size);
}
const struct json_val *json_array_get(const struct json_val *obj, u32 index)
{
__prof;
ASSERT(json_is_array(obj));
ASSERT(index < obj->child_count);
return &obj->val.array_children[index];
}
const struct json_val *json_object_get(const struct json_val *obj, struct string key)
{
__prof;
ASSERT(json_is_object(obj));
u32 count = obj->child_count;
u32 capacity = (u32)(count * OBJECT_HASH_TABLE_CAPACITY_FACTOR);
if (count <= 0) {
return NULL;
}
u32 *hash_table = (u32 *)obj->val.object_table;
const struct json_object_entry *entries = get_object_entries(obj);
u64 hash = hash_fnv64(BUFFER_FROM_STRING(key));
u32 slot_index_home = hash % capacity;
u32 slot_index = slot_index_home;
while (true) {
u32 *temp = &hash_table[slot_index];
if (*temp) {
const struct json_object_entry *entry = &entries[*temp - 1];
if (string_eq(entry->key, key)) {
/* Match */
return &entry->value;
} else {
/* Not matching, linear probe next slot */
if (slot_index < (capacity - 1)) {
++slot_index;
} else {
slot_index = 0;
}
/* Table loop around means not found */
if (slot_index == slot_index_home) {
return NULL;
}
}
} else {
/* Not found */
return NULL;
}
}
}
const struct json_object_entry *json_object_get_index(const struct json_val *obj, u32 index)
{
__prof;
ASSERT(json_is_object(obj));
ASSERT(index < obj->child_count);
return &(get_object_entries(obj)[index]);
}
/* ========================== *
* Dump
* ========================== */
INTERNAL struct string json_dump_to_string_internal(struct arena *arena, const struct string *key, const struct json_val *val, u32 indent, u32 level)
{
__prof;
struct temp_arena scratch = scratch_begin(arena);
if (!val) {
return (struct string) { 0 };
}
u8 *final_text = arena_dry_push(arena, u8);
u64 final_len = 0;
u32 indent_len = indent * level;
final_len += string_repeat(arena, STR(" "), indent_len).len;
if (key) {
final_len += string_format(
arena,
STR("\"%F\": "),
FMT_STR(*key)
).len;
}
switch (val->type) {
case JSON_TYPE_OBJECT: {
u32 count = val->child_count;
u32 last_entry_index = (u32)max_u64(count, 1) - 1;
const struct json_object_entry *entries = get_object_entries(val);
final_len += string_copy(arena, STR("{\n")).len;
for (u32 i = 0; i < count; ++i) {
const struct json_object_entry *entry = &entries[i];
struct string value_str = json_dump_to_string_internal(arena, &entry->key, &entry->value, indent, level + 1);
final_len += value_str.len;
if (i != last_entry_index) {
final_len += string_copy(arena, STR(",\n")).len;
} else {
final_len += string_copy(arena, STR("\n")).len;
}
}
final_len += string_repeat(arena, STR(" "), indent_len).len;
final_len += string_copy(arena, STR("}")).len;
} break;
case JSON_TYPE_ARRAY: {
u32 count = val->child_count;
u32 last_entry_index = (u32)max_u64(count, 1) - 1;
final_len += string_copy(arena, STR("[\n")).len;
for (u32 i = 0; i < count; ++i) {
const struct json_val *child = &val->val.array_children[i];
struct string value_str = json_dump_to_string_internal(arena, NULL, child, indent, level + 1);
final_len += value_str.len;
if (i != last_entry_index) {
final_len += string_copy(arena, STR(",\n")).len;
} else {
final_len += string_copy(arena, STR("\n")).len;
}
}
final_len += string_repeat(arena, STR(" "), indent_len).len;
final_len += string_copy(arena, STR("]")).len;
} break;
case JSON_TYPE_STRING: {
final_len += string_format(arena, STR("\"%F\""), FMT_STR(*val->val.string)).len;
} break;
case JSON_TYPE_NUMBER: {
/* Precision = 15 */
const struct string precision_str = STR(".00000000000000");
struct string num_str = string_from_float(scratch.arena, val->val.number, (u32)precision_str.len - 1);
if (string_ends_with(num_str, precision_str)) {
num_str.len -= precision_str.len;
}
final_len += string_copy(arena, num_str).len;
} break;
case JSON_TYPE_NULL: {
final_len += string_copy(arena, STR("null")).len;
} break;
case JSON_TYPE_BOOL: {
struct string str = val->val.boolean ? STR("true") : STR("false");
final_len += string_copy(arena, str).len;
} break;
case JSON_TYPE_INVALID: {
/* Unknown type */
ASSERT(false);
return (struct string) { 0 };
} break;
}
scratch_end(scratch);
return (struct string) {
.text = final_text,
.len = final_len
};
}
struct string json_dump_to_string(struct arena *arena, const struct json_val *val, u32 indent)
{
__prof;
return json_dump_to_string_internal(arena, NULL, val, indent, 0);
}
/* ========================== *
* Write
* ========================== */
struct json_ir *json_ir_object(struct arena *arena)
{
__prof;
struct json_ir *ir = arena_push(arena, struct json_ir);
*ir = (struct json_ir) {
.type = JSON_TYPE_OBJECT
};
return ir;
}
struct json_ir *json_ir_number(struct arena *arena, f64 n)
{
__prof;
struct json_ir *ir = arena_push(arena, struct json_ir);
*ir = (struct json_ir) {
.type = JSON_TYPE_NUMBER,
.val.number = n
};
return ir;
}
struct json_ir *json_ir_bool(struct arena *arena, b32 b)
{
__prof;
struct json_ir *ir = arena_push(arena, struct json_ir);
*ir = (struct json_ir) {
.type = JSON_TYPE_BOOL,
.val.boolean = b
};
return ir;
}
#if 0
/* NOTE: Does NOT copy `str` text */
struct json_ir *json_ir_string(struct arena *arena, struct string str)
{
__prof;
struct json_ir *ir = ARENA_PUSH_STRUCT(arena, struct json_ir);
*ir = (struct json_ir) {
.type = JSON_TYPE_STRING,
.v_string = str
};
return ir;
}
#endif
struct json_ir *json_ir_object_set(struct json_ir *obj, struct string key, struct json_ir *value)
{
__prof;
ASSERT(obj->type == JSON_TYPE_OBJECT);
/* Value already set previously */
ASSERT(value->key.len == 0 && value->key.text == NULL);
value->key = key;
if (obj->val.children.child_last) {
obj->val.children.child_last->next_child = value;
} else if (!obj->val.children.child_first) {
obj->val.children.child_first = value;
}
obj->val.children.child_last = value;
obj->val.children.child_count++;
return value;
}