#include "json.h" #include "string.h" #include "arena.h" #include "memory.h" #include "scratch.h" #include "math.h" #include "util.h" #define OBJECT_HASH_TABLE_CAPACITY_FACTOR 1.5 #define MAX_CHILDREN (U32_MAX - 1) #define CASE_NEWLINE \ case 0x0A: /* Line feed or New line */ \ case 0x0D /* Carriage return */ #define CASE_WHITESPACE \ CASE_NEWLINE: \ case 0x20: /* Space */ \ case 0x09 /* Horizontal tab */ #define CASE_NUMERIC \ case '0': \ case '1': \ case '2': \ case '3': \ case '4': \ case '5': \ case '6': \ case '7': \ case '8': \ case '9' #define CASE_ALPHABETICAL_LOWERCASE \ case 'a': \ case 'b': \ case 'c': \ case 'd': \ case 'e': \ case 'f': \ case 'g': \ case 'h': \ case 'i': \ case 'j': \ case 'k': \ case 'l': \ case 'm': \ case 'n': \ case 'o': \ case 'p': \ case 'q': \ case 'r': \ case 's': \ case 't': \ case 'u': \ case 'v': \ case 'w': \ case 'x': \ case 'y': \ case 'z' /* ========================== * * Parse * ========================== */ struct parser { u8 *at; u8 *end; struct string error; }; INTERNAL struct json_ir parse_json(struct arena *arena, struct parser *p, b32 parse_key); INTERNAL void set_error_unexpected_character(struct arena *arena, struct parser *p) { __prof; p->error = string_format(arena, STR("Unexpected character '%F'"), FMT_CHAR(*p->at)); } INTERNAL b32 is_error(struct parser *p) { __prof; return p->error.len > 0; } INTERNAL f64 parse_number(struct arena *arena, struct parser *p) { __prof; /* Find start */ u8 *start = p->at; b32 found_start = false; while (!found_start && p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; case '-': case '.': CASE_NUMERIC: { start = p->at; found_start = true; } break; default: { set_error_unexpected_character(arena, p); return 0; } break; } } /* Find end, decimal, and sign */ u8 *end = p->at; b32 found_end = false; u8 *dec_at = NULL; i8 sign = 1; while (!found_end && p->at < p->end) { switch (*p->at) { case '.': { if (dec_at) { set_error_unexpected_character(arena, p); return 0; } else { dec_at = p->at; ++p->at; } } break; case '-': { if (p->at == start) { sign = -1; ++p->at; ++start; } else { set_error_unexpected_character(arena, p); return 0; } } break; CASE_NUMERIC: { ++p->at; } break; default: { end = p->at; found_end = true; } break; } } if (!dec_at) { dec_at = end; } f64 sum = 0; /* Accum whole num */ u8 *whole_at = dec_at - 1; while (whole_at >= start) { /* TODO: validate digit */ u64 digit = (u64)*whole_at - 48; sum += digit * math_pow_u64(10, (u8)(dec_at - whole_at - 1)); --whole_at; } /* Accum frac num */ u8 *frac_at = dec_at + 1; f64 pow = (f64)math_pow_u64(10, (u8)(frac_at - dec_at)); while (frac_at < end) { /* TODO: validate digit */ u64 digit = (u64)*frac_at - 48; sum += (f64)digit / pow; ++frac_at; pow /= 10; } //sum += (f64)frac_sum / (f64)pow_u(10, end - dec_at); return sum * sign; } INTERNAL void append_char(struct arena *arena, struct string *str, u8 c) { __prof; u8 *c_ptr = arena_push(arena, u8); *c_ptr = c; ++str->len; } INTERNAL struct string parse_string(struct arena *arena, struct parser *p) { __prof; struct string str = { .len = 0, .text = arena_dry_push(arena, u8) }; /* Find '"' string start */ u8 *start = NULL; while (!start && p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; case '"': { ++p->at; start = p->at; } break; default: { set_error_unexpected_character(arena, p); return str; } break; } } if (!start) { p->error = STR("Reached end of file without finding a string"); return str; } /* Parse characters */ while (p->at < p->end) { if (*p->at == '\\') { /* Escaped */ if ((p->at + 1) < p->end) { ++p->at; switch (*p->at) { CASE_NEWLINE: { p->error = STR("Expected '\"' to close string"); return str; } break; /* Backspace */ case 'b': { append_char(arena, &str, '\b'); } break; /* Form feed */ case 'f': { append_char(arena, &str, '\f'); } break; /* Newline */ case 'n': { append_char(arena, &str, '\n'); } break; /* Carriage return */ case 'r': { append_char(arena, &str, '\r'); } break; /* Tab*/ case 't': { append_char(arena, &str, '\t'); } break; case '"': /* Double quote */ case '\\': { /* Backslash */ append_char(arena, &str, *p->at); } break; default: { p->error = string_format( arena, STR("Unknown escape sequence '\\%F'"), FMT_CHAR(*p->at) ); return str; } break; } ++p->at; } else { set_error_unexpected_character(arena, p); return str; } } else { switch (*p->at) { CASE_NEWLINE: { p->error = STR("Expected '\"' to close string"); return str; } break; /* End string */ case '"': { ++p->at; return str; } break; /* Append character */ default: { append_char(arena, &str, *p->at); ++p->at; } break; } } } p->error = STR("Expected '\"' to close string"); return str; } /* Parses booleans & null. * Returns boolean value (if value type is boolean). Returns false if null value type. */ INTERNAL b32 parse_primitive(struct arena *arena, struct parser *p, enum json_type *type) { __prof; (UNUSED)arena; /* Find start */ u8 *start = p->at; b32 found_start = false; while (!found_start && p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; default: { start = p->at; found_start = true; }; } } /* Find end */ u8 *end = p->at; b32 found_end = false; while (!found_end && p->at < p->end) { switch (*p->at) { CASE_ALPHABETICAL_LOWERCASE: { ++p->at; } break; default: { end = p->at; found_end = true; }; } } struct string val_str = { .len = end - start, .text = start }; if (string_eq(val_str, STR("null"))) { *type = JSON_TYPE_NULL; return false; } else if (string_eq(val_str, STR("true"))) { *type = JSON_TYPE_BOOL; return true; } else if (string_eq(val_str, STR("false"))) { *type = JSON_TYPE_BOOL; return false; } else { p->error = STR("Expected value"); return false; } } INTERNAL struct json_ir_parent_data parse_array(struct arena *arena, struct parser *p) { __prof; struct json_ir_parent_data data = { 0 }; /* Find '[' array start */ b32 found_start = false; while (!found_start && p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; case '[': { ++p->at; found_start = true; } break; default: { set_error_unexpected_character(arena, p); return data; } break; } } if (!found_start) { set_error_unexpected_character(arena, p); return data; } /* Parse array */ b32 preceding_comma = false; while (p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; /* End array */ case ']': { if (preceding_comma) { p->error = STR("Trailing comma"); return data; } ++p->at; return data; } break; /* Parse value */ case ',': { if (data.child_first) { ++p->at; preceding_comma = true; } else { set_error_unexpected_character(arena, p); return data; } } break; default: { if (!preceding_comma && data.child_count > 0) { p->error = STR("Expected comma"); return data; } if (data.child_count >= MAX_CHILDREN) { p->error = STR("Max children reached"); return data; } struct json_ir *child = arena_push(arena, struct json_ir); *child = parse_json(arena, p, false); if (is_error(p)) { return data; } if (data.child_last) { data.child_last->next_child = child; } else if (!data.child_first) { data.child_first = child; } data.child_last = child; ++data.child_count; preceding_comma = false; } break; } } p->error = STR("Reached end of file without closing object (no '}' found)"); return data; } INTERNAL struct json_ir_parent_data parse_object(struct arena *arena, struct parser *p) { __prof; struct json_ir_parent_data data = { 0 }; /* Find '{' object start */ b32 found_start = false; while (!found_start && p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; case '{': { ++p->at; found_start = true; } break; default: { set_error_unexpected_character(arena, p); return data; } break; } } if (!found_start) { set_error_unexpected_character(arena, p); return data; } /* Parse object */ b32 preceding_comma = false; while (p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; /* End object */ case '}': { if (preceding_comma) { p->error = STR("Trailing comma"); return data; } ++p->at; return data; } break; /* Parse value */ case ',': { if (data.child_first) { ++p->at; preceding_comma = true; } else { set_error_unexpected_character(arena, p); return data; } } break; case '"': { if (!preceding_comma && data.child_count > 0) { p->error = STR("Expected comma"); return data; } if (data.child_count >= MAX_CHILDREN) { p->error = STR("Max children reached"); return data; } struct json_ir *child = arena_push(arena, struct json_ir); *child = parse_json(arena, p, true); if (is_error(p)) { return data; } if (data.child_last) { data.child_last->next_child = child; } else if (!data.child_first) { data.child_first = child; } data.child_last = child; ++data.child_count; preceding_comma = false; } break; default: { set_error_unexpected_character(arena, p); return data; } break; } } p->error = STR("Reached end of file without closing object (no '}' found)"); return data; } INTERNAL void parse_whitespace_until_end(struct arena *arena, struct parser *p) { __prof; while (p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; default: { set_error_unexpected_character(arena, p); return; } break; } } } INTERNAL struct json_ir parse_json(struct arena *arena, struct parser *p, b32 parse_key) { __prof; struct json_ir ir = { 0 }; /* Parse key (if necessary) */ if (parse_key) { /* Parse key string */ ir.key = parse_string(arena, p); if (is_error(p)) { return ir; } /* Eat ':' */ b32 found_colon = false; while (!found_colon && p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; case ':': { ++p->at; found_colon = true; } break; default: { p->error = STR("Colon expected"); return ir; } break; } } if (!found_colon) { p->error = STR("Colon expected"); return ir; } } /* Eat whitespace until value start */ b32 value_start = false; while (!value_start && p->at < p->end) { switch (*p->at) { CASE_WHITESPACE: { ++p->at; } break; default: { value_start = true; } break; } } /* Parse value */ switch (*p->at) { case '"': { ir.type = JSON_TYPE_STRING; ir.val.string = parse_string(arena, p); } break; case '-': case '.': CASE_NUMERIC: { ir.type = JSON_TYPE_NUMBER; ir.val.number = parse_number(arena, p); } break; case '{': { ir.type = JSON_TYPE_OBJECT; ir.val.children = parse_object(arena, p); } break; case '[': { ir.type = JSON_TYPE_ARRAY; ir.val.children = parse_array(arena, p); } break; default: { ir.val.boolean = parse_primitive(arena, p, &ir.type); } break; } return ir; } const struct json_ir *json_parse(struct arena *arena, struct buffer bytes, struct string **error) { __prof; struct parser p = { .at = bytes.data, .end = bytes.data + bytes.size }; struct json_ir *root_ir = arena_push(arena, struct json_ir); *root_ir = parse_json(arena, &p, false); if (!is_error(&p)) { /* Ensure that there is no data other than root object / value in buffer */ parse_whitespace_until_end(arena, &p); } if (is_error(&p)) { /* Handle error */ if (error) { struct string err_msg = p.error.len > 0 ? p.error : STR("Parsing error"); *error = arena_push(arena, struct string); **error = string_copy(arena, err_msg); } } return root_ir; } /* ========================== * * Format * ========================== */ INTERNAL struct json_val json_format_internal(struct arena *arena, const struct json_ir *ir) { __prof; struct json_val val = { 0 }; switch (ir->type) { case JSON_TYPE_OBJECT: { u32 count = ir->val.children.child_count; u32 capacity = (u32)((f64)count * OBJECT_HASH_TABLE_CAPACITY_FACTOR); u64 hash_table_size = sizeof(u32) * capacity; u64 entries_size = sizeof(struct json_object_entry) * count; /* Not aligning entries, since get_object_entries currently * depends on hash_table being situated right before the * entries array in memory. */ arena_align(arena, ALIGNOF(u32)); u32 *hash_table = (u32 *)arena_push_array_zero(arena, u8, hash_table_size); struct json_object_entry *entries = (struct json_object_entry *)arena_push_array_zero(arena, u8, entries_size); val = (struct json_val) { .type = JSON_TYPE_OBJECT, .child_count = count, .val.object_table = hash_table }; struct json_ir *cur = ir->val.children.child_first; u32 index = 0; while (cur) { entries[index] = (struct json_object_entry) { .key = string_copy(arena, cur->key), .value = json_format_internal(arena, cur) }; u64 hash = hash_fnv64(BUFFER_FROM_STRING(cur->key)); u32 slot_index_home = hash % capacity; u32 slot_index = slot_index_home; while (true) { u32 *temp = &hash_table[slot_index]; if (*temp) { /* Occupied, linear probe for next slot */ if (slot_index < (capacity - 1)) { ++slot_index; } else { slot_index = 0; } /* Table loop around should be impossible */ ASSERT(slot_index != slot_index_home); } else { /* Empty */ *temp = index + 1; break; } } cur = cur->next_child; ++index; } } break; case JSON_TYPE_ARRAY: { u32 count = ir->val.children.child_count; val = (struct json_val) { .type = JSON_TYPE_ARRAY, .val.array_children = arena_push_array(arena, struct json_val, count), .child_count = count }; struct json_ir *child_ir = ir->val.children.child_first; u64 child_index = 0; while (child_ir) { val.val.array_children[child_index] = json_format_internal(arena, child_ir); child_ir = child_ir->next_child; ++child_index; } } break; case JSON_TYPE_STRING: { struct string *str = arena_push(arena, struct string); *str = string_copy(arena, ir->val.string); val = (struct json_val){ .type = JSON_TYPE_STRING, .val.string = str }; } break; case JSON_TYPE_BOOL: { val = (struct json_val){ .type = JSON_TYPE_BOOL, .val.boolean = ir->val.boolean }; } break; case JSON_TYPE_NULL: { val = (struct json_val){ .type = JSON_TYPE_NULL }; } break; case JSON_TYPE_NUMBER: { val = (struct json_val){ .type = JSON_TYPE_NUMBER, .val.number = ir->val.number }; } break; case JSON_TYPE_INVALID: { ASSERT(false); return (struct json_val) { 0 }; } break; } return val; } const struct json_val *json_format(struct arena *arena, const struct json_ir *ir) { __prof; struct json_val *root_obj = arena_push(arena, struct json_val); *root_obj = json_format_internal(arena, ir); return root_obj; } /* Returns NULL on error (and sets `error` string parameter) */ const struct json_val *json_parse_and_format(struct arena *arena, struct buffer bytes, struct string **error) { __prof; struct temp_arena scratch = scratch_begin(arena); struct string *error_temp = NULL; const struct json_ir *root_ir = json_parse(scratch.arena, bytes, &error_temp); struct json_val *root_val = NULL; if (error_temp) { if (error) { /* Copy error message out of scratch */ *error = arena_push(arena, struct string); **error = string_copy(arena, *error_temp); } } else { root_val = arena_push(arena, struct json_val); *root_val = json_format_internal(arena, root_ir); } /* Decommit since the immediate-representation memory usage may have been high */ scratch_end_and_decommit(scratch); return root_val; } /* ========================== * * Index * ========================== */ INTERNAL struct json_object_entry *get_object_entries(const struct json_val *v) { __prof; u32 hash_table_size = (u32)((u32)sizeof(u32) * (u32)(v->child_count * OBJECT_HASH_TABLE_CAPACITY_FACTOR)); return (struct json_object_entry *)((u8 *)v->val.object_table + hash_table_size); } const struct json_val *json_array_get(const struct json_val *obj, u32 index) { __prof; ASSERT(json_is_array(obj)); ASSERT(index < obj->child_count); return &obj->val.array_children[index]; } const struct json_val *json_object_get(const struct json_val *obj, struct string key) { __prof; ASSERT(json_is_object(obj)); u32 count = obj->child_count; u32 capacity = (u32)(count * OBJECT_HASH_TABLE_CAPACITY_FACTOR); if (count <= 0) { return NULL; } u32 *hash_table = (u32 *)obj->val.object_table; const struct json_object_entry *entries = get_object_entries(obj); u64 hash = hash_fnv64(BUFFER_FROM_STRING(key)); u32 slot_index_home = hash % capacity; u32 slot_index = slot_index_home; while (true) { u32 *temp = &hash_table[slot_index]; if (*temp) { const struct json_object_entry *entry = &entries[*temp - 1]; if (string_eq(entry->key, key)) { /* Match */ return &entry->value; } else { /* Not matching, linear probe next slot */ if (slot_index < (capacity - 1)) { ++slot_index; } else { slot_index = 0; } /* Table loop around means not found */ if (slot_index == slot_index_home) { return NULL; } } } else { /* Not found */ return NULL; } } } const struct json_object_entry *json_object_get_index(const struct json_val *obj, u32 index) { __prof; ASSERT(json_is_object(obj)); ASSERT(index < obj->child_count); return &(get_object_entries(obj)[index]); } /* ========================== * * Dump * ========================== */ INTERNAL struct string json_dump_to_string_internal(struct arena *arena, const struct string *key, const struct json_val *val, u32 indent, u32 level) { __prof; struct temp_arena scratch = scratch_begin(arena); if (!val) { return (struct string) { 0 }; } u8 *final_text = arena_dry_push(arena, u8); u64 final_len = 0; u32 indent_len = indent * level; final_len += string_repeat(arena, STR(" "), indent_len).len; if (key) { final_len += string_format( arena, STR("\"%F\": "), FMT_STR(*key) ).len; } switch (val->type) { case JSON_TYPE_OBJECT: { u32 count = val->child_count; u32 last_entry_index = (u32)max_u64(count, 1) - 1; const struct json_object_entry *entries = get_object_entries(val); final_len += string_copy(arena, STR("{\n")).len; for (u32 i = 0; i < count; ++i) { const struct json_object_entry *entry = &entries[i]; struct string value_str = json_dump_to_string_internal(arena, &entry->key, &entry->value, indent, level + 1); final_len += value_str.len; if (i != last_entry_index) { final_len += string_copy(arena, STR(",\n")).len; } else { final_len += string_copy(arena, STR("\n")).len; } } final_len += string_repeat(arena, STR(" "), indent_len).len; final_len += string_copy(arena, STR("}")).len; } break; case JSON_TYPE_ARRAY: { u32 count = val->child_count; u32 last_entry_index = (u32)max_u64(count, 1) - 1; final_len += string_copy(arena, STR("[\n")).len; for (u32 i = 0; i < count; ++i) { const struct json_val *child = &val->val.array_children[i]; struct string value_str = json_dump_to_string_internal(arena, NULL, child, indent, level + 1); final_len += value_str.len; if (i != last_entry_index) { final_len += string_copy(arena, STR(",\n")).len; } else { final_len += string_copy(arena, STR("\n")).len; } } final_len += string_repeat(arena, STR(" "), indent_len).len; final_len += string_copy(arena, STR("]")).len; } break; case JSON_TYPE_STRING: { final_len += string_format(arena, STR("\"%F\""), FMT_STR(*val->val.string)).len; } break; case JSON_TYPE_NUMBER: { /* Precision = 15 */ const struct string precision_str = STR(".00000000000000"); struct string num_str = string_from_float(scratch.arena, val->val.number, (u32)precision_str.len - 1); if (string_ends_with(num_str, precision_str)) { num_str.len -= precision_str.len; } final_len += string_copy(arena, num_str).len; } break; case JSON_TYPE_NULL: { final_len += string_copy(arena, STR("null")).len; } break; case JSON_TYPE_BOOL: { struct string str = val->val.boolean ? STR("true") : STR("false"); final_len += string_copy(arena, str).len; } break; case JSON_TYPE_INVALID: { /* Unknown type */ ASSERT(false); return (struct string) { 0 }; } break; } scratch_end(scratch); return (struct string) { .text = final_text, .len = final_len }; } struct string json_dump_to_string(struct arena *arena, const struct json_val *val, u32 indent) { __prof; return json_dump_to_string_internal(arena, NULL, val, indent, 0); } /* ========================== * * Write * ========================== */ struct json_ir *json_ir_object(struct arena *arena) { __prof; struct json_ir *ir = arena_push(arena, struct json_ir); *ir = (struct json_ir) { .type = JSON_TYPE_OBJECT }; return ir; } struct json_ir *json_ir_number(struct arena *arena, f64 n) { __prof; struct json_ir *ir = arena_push(arena, struct json_ir); *ir = (struct json_ir) { .type = JSON_TYPE_NUMBER, .val.number = n }; return ir; } struct json_ir *json_ir_bool(struct arena *arena, b32 b) { __prof; struct json_ir *ir = arena_push(arena, struct json_ir); *ir = (struct json_ir) { .type = JSON_TYPE_BOOL, .val.boolean = b }; return ir; } #if 0 /* NOTE: Does NOT copy `str` text */ struct json_ir *json_ir_string(struct arena *arena, struct string str) { __prof; struct json_ir *ir = ARENA_PUSH_STRUCT(arena, struct json_ir); *ir = (struct json_ir) { .type = JSON_TYPE_STRING, .v_string = str }; return ir; } #endif struct json_ir *json_ir_object_set(struct json_ir *obj, struct string key, struct json_ir *value) { __prof; ASSERT(obj->type == JSON_TYPE_OBJECT); /* Value already set previously */ ASSERT(value->key.len == 0 && value->key.text == NULL); value->key = key; if (obj->val.children.child_last) { obj->val.children.child_last->next_child = value; } else if (!obj->val.children.child_first) { obj->val.children.child_first = value; } obj->val.children.child_last = value; obj->val.children.child_count++; return value; }