power_play/src/json/json.c

931 lines
20 KiB
C

// TODO (if we want to be JSON standard compliant):
// - Support unicode escape sequences in strings (\u)
// - Don't allow leading 0s in numbers
////////////////////////////////////////////////////////////
//~ Lex
JSON_Token *JSON_PushToken(Arena *arena, JSON_TokenList *list)
{
JSON_Token *t = PushStruct(arena, JSON_Token);
if (!list->token_first)
{
list->token_first = t;
}
else
{
list->token_last->next = t;
}
list->token_last = t;
return t;
}
JSON_TokenList JSON_TokensFromString(Arena *arena, String src)
{
JSON_TokenList result = ZI;
JSON_Token *bof = JSON_PushToken(arena, &result);
bof->kind = JSON_TokenKind_Bof;
u64 pos = 0;
b32 lexing_done = 0;
while (!lexing_done)
{
// Skip whitespace
b32 whitespace_done = 0;
while (!whitespace_done && pos < src.len)
{
switch (src.text[pos])
{
default:
{
whitespace_done = 1;
} break;
case JSON_Case_Newline:
case JSON_Case_Space:
{
++pos;
} break;
}
}
// Create token
JSON_Token *t = JSON_PushToken(arena, &result);
t->start = pos;
if (pos >= src.len)
{
t->kind = JSON_TokenKind_Eof;
t->next = t; // Self reference
lexing_done = 1;
}
else
{
// Lex known token kinds
switch (src.text[pos])
{
default: break;
// Symbols
case ',':
{
t->kind = JSON_TokenKind_Comma;
++pos;
} break;
case ':':
{
t->kind = JSON_TokenKind_Colon;
++pos;
} break;
case '[':
{
t->kind = JSON_TokenKind_SquareBraceOpen;
++pos;
} break;
case ']':
{
t->kind = JSON_TokenKind_SquareBraceClose;
++pos;
} break;
case '{':
{
t->kind = JSON_TokenKind_CurlyBraceOpen;
++pos;
} break;
case '}':
{
t->kind = JSON_TokenKind_CurlyBraceClose;
++pos;
} break;
// Number
case '-':
{
// Verify '-' precedes digit
b32 next_is_digit = 0;
if ((pos + 1) < src.len)
{
switch (src.text[pos + 1])
{
case JSON_Case_Digit0Through9:
{
next_is_digit = 1;
} break;
}
}
++pos;
if (!next_is_digit)
{
break;
}
} FALLTHROUGH;
case JSON_Case_Digit0Through9:
{
t->kind = JSON_TokenKind_Number;
JSON_LexNumberState state = JSON_LexNumberState_Whole;
b32 number_done = 0;
while (!number_done && pos < src.len)
{
switch (src.text[pos])
{
default:
{
number_done = 1;
} break;
case JSON_Case_Digit0Through9:
{
++pos;
} break;
case '.':
{
u64 consume = 0;
if (state == JSON_LexNumberState_Whole && (pos + 1) < src.len)
{
u8 c1 = src.text[pos + 1];
switch (c1)
{
default: break;
case JSON_Case_Digit0Through9:
{
// Consume '.'
++consume;
} break;
}
}
if (consume)
{
state = JSON_LexNumberState_Fraction;
pos += consume;
}
else
{
number_done = 1;
}
} break;
case 'e':
case 'E':
{
u64 consume = 0;
if ((state == JSON_LexNumberState_Whole || state == JSON_LexNumberState_Fraction) && (pos + 1) < src.len)
{
u8 c1 = src.text[pos + 1];
switch (c1)
{
case JSON_Case_Digit0Through9:
{
// Consume 'E'/'e'
++consume;
} break;
case '-':
case '+':
{
if ((pos + 2) < src.len)
{
u8 c2 = src.text[pos + 2];
switch (c2)
{
default: break;
case JSON_Case_Digit0Through9:
{
// Consume 'E'/'e' & '+'/'-'
consume += 2;
} break;
}
}
} break;
default: break;
}
}
if (consume)
{
state = JSON_LexNumberState_Exponent;
pos += consume;
}
else
{
number_done = 1;
}
} break;
}
}
} break;
// String
case '"':
{
++pos;
b32 string_done = 0;
b32 next_escaped = 0;
while (!string_done && pos < src.len)
{
b32 escaped = next_escaped;
next_escaped = 0;
switch (src.text[pos])
{
default:
{
++pos;
} break;
case JSON_Case_Newline:
{
++pos;
string_done = 1;
} break;
case '"':
{
++pos;
if (!escaped)
{
t->kind = JSON_TokenKind_String;
string_done = 1;
}
} break;
case '\\':
{
++pos;
if (!escaped)
{
next_escaped = 1;
}
} break;
}
}
} break;
// Keywords
case 't':
case 'f':
case 'n':
{
String keyword = JSON_keyword_strings[src.text[pos]];
b32 match = 1;
if ((pos + keyword.len - 1) < src.len)
{
if ((pos + keyword.len) < src.len)
{
// Don't match if word continues past keyword
switch (src.text[pos + keyword.len])
{
default:
{
match = 0;
} break;
case JSON_Case_Symbol:
case JSON_Case_Space:
case JSON_Case_Newline:
{
} break;
}
}
if (match)
{
String cmp_str = {
.len = keyword.len,
.text = &src.text[pos]
};
match = MatchString(cmp_str, keyword);
}
}
if (match)
{
t->kind = JSON_keyword_types[src.text[pos]];
pos += keyword.len;
}
} break;
}
}
// Lex unknown token
if (t->kind == JSON_TokenKind_Unknown)
{
b32 unknown_done = 0;
while (!unknown_done && pos < src.len)
{
switch (src.text[pos])
{
default:
{
++pos;
} break;
case JSON_Case_Symbol:
case JSON_Case_Space:
case JSON_Case_Newline:
{
unknown_done = 1;
} break;
}
}
t->end = pos;
// Exit early if unknown token encountered
return result;
}
else
{
t->end = pos;
}
}
return result;
}
////////////////////////////////////////////////////////////
//~ Interpret
f64 interpret_number(String src)
{
b32 whole_present = 0;
u64 whole_left = 0;
u64 whole_right = 0;
i32 whole_sign = 1;
b32 fraction_present = 0;
u64 fraction_left = 0;
u64 fraction_right = 0;
b32 exponent_present = 0;
u64 exponent_left = 0;
u64 exponent_right = 0;
i32 exponent_sign = 1;
// Lex number parts
{
u64 pos = 0;
if (src.len > 0 && src.text[0] == '-')
{
whole_sign = -1;
++pos;
}
JSON_LexNumberState state = JSON_LexNumberState_Whole;
while (pos < src.len)
{
switch (src.text[pos])
{
default:
{
// Unreachable
Assert(0);
++pos;
} break;
case JSON_Case_Digit0Through9:
{
switch (state)
{
case JSON_LexNumberState_Whole:
{
if (!whole_present)
{
whole_present = 1;
whole_left = pos;
}
whole_right = pos;
++pos;
} break;
case JSON_LexNumberState_Fraction:
{
if (!fraction_present)
{
fraction_present = 1;
fraction_left = pos;
}
fraction_right = pos;
++pos;
} break;
case JSON_LexNumberState_Exponent:
{
if (!exponent_present)
{
exponent_present = 1;
exponent_left = pos;
}
exponent_right = pos;
++pos;
} break;
}
} break;
case '.':
{
state = JSON_LexNumberState_Fraction;
++pos;
} break;
case 'e':
case 'E':
{
state = JSON_LexNumberState_Exponent;
++pos;
} break;
case '-':
{
switch (state)
{
default:
{
// Unreachable
Assert(0);
++pos;
} break;
case JSON_LexNumberState_Whole:
{
whole_sign = -1;
++pos;
} break;
case JSON_LexNumberState_Exponent:
{
exponent_sign = -1;
++pos;
} break;
}
} break;
case '+':
{
switch (state)
{
default:
{
// Unreachable
Assert(0);
++pos;
} break;
case JSON_LexNumberState_Exponent:
{
exponent_sign = 1;
++pos;
} break;
}
} break;
}
}
}
f64 result = 0;
// Process whole part
if (whole_present)
{
u64 pos = whole_left;
while (pos <= whole_right)
{
u8 digit = MinU8(src.text[pos] - 48, 9);
u64 exp = whole_right - pos;
result += digit * PowU64(10, exp);
++pos;
}
result *= whole_sign;
}
// Process fraction part
if (fraction_present)
{
u64 frac_whole = 0;
u64 pos = fraction_left;
while (pos <= fraction_right)
{
u8 digit = MinU8(src.text[pos] - 48, 9);
u64 exp = fraction_right - pos;
frac_whole += digit * PowU64(10, exp);
++pos;
}
result += (f64)frac_whole / PowU64(10, (fraction_right - fraction_left + 1));
}
// Process exponent part
if (exponent_present)
{
u64 exponent_whole = 0;
u64 pos = exponent_left;
while (pos <= exponent_right)
{
u8 digit = MinU8(src.text[pos] - 48, 9);
u64 exp = exponent_right - pos;
exponent_whole += digit * PowU64(10, exp);
++pos;
}
if (exponent_sign >= 0)
{
result *= PowU64(10, exponent_whole);
}
else
{
result /= PowU64(10, exponent_whole);
}
}
return result;
}
String interpret_string(Arena *arena, String src, String *error)
{
String result = {
.len = 0,
.text = PushDry(arena, u8)
};
if (src.len < 2)
{
if (error)
{
*error = Lit("Malformed string.");
}
return result;
}
// Ignore beginning quote
u64 pos = 1;
b32 valid_close = 0;
b32 string_done = 0;
b32 next_escaped = 0;
while (!string_done && pos < src.len)
{
b32 escaped = next_escaped;
next_escaped = 0;
if (escaped)
{
switch (src.text[pos])
{
default:
{
if (error)
{
*error = Lit("Invalid escape character in string.");
return result;
}
} break;
case '"':
case '\\':
case '/':
{
*PushStructNoZero(arena, u8) = src.text[pos];
++result.len;
++pos;
} break;
// Backspace
case 'b':
{
*PushStructNoZero(arena, u8) = '\b';
++result.len;
++pos;
} break;
// Formfeed
case 'f':
{
*PushStructNoZero(arena, u8) = '\f';
++result.len;
++pos;
} break;
// Linefeed
case 'n':
{
*PushStructNoZero(arena, u8) = '\n';
++result.len;
++pos;
} break;
// Carriage return
case 'r':
{
*PushStructNoZero(arena, u8) = '\r';
++result.len;
++pos;
} break;
// Horizontal tab
case 't':
{
*PushStructNoZero(arena, u8) = '\t';
++result.len;
++pos;
} break;
// TODO: Unicode escape support
// case 'u':
// {
// // TODO
// } break;
}
}
else
{
switch (src.text[pos])
{
default:
{
*PushStructNoZero(arena, u8) = src.text[pos];
++result.len;
++pos;
} break;
case '\\':
{
escaped = 1;
++pos;
} break;
case '"':
{
string_done = 1;
valid_close = 1;
++pos;
} break;
}
}
}
if (!valid_close)
{
if (error)
{
*error = Lit("Expected end of string.");
}
}
return result;
}
////////////////////////////////////////////////////////////
//~ Parse
void JSON_PushError(Arena *arena, JSON_Parser *p, JSON_Token *t, String msg)
{
JSON_Error *error = PushStruct(arena, JSON_Error);
error->msg = msg;
error->start = t->start;
error->end = t->end;
JSON_ErrorList *list = &p->errors;
if (!list->first)
{
list->first = error;
}
else
{
list->last->next = error;
}
list->last = error;
++list->count;
}
void JSON_Parse(Arena *arena, JSON_Parser *p)
{
TempArena scratch = BeginScratch(arena);
JSON_Blob *root = PushStruct(arena, JSON_Blob);
JSON_Token *at = p->at;
String src = p->src;
if (at->kind == JSON_TokenKind_Bof)
{
at = at->next;
}
// Depth first stack
*PushStructNoZero(scratch.arena, JSON_Blob *) = root;
u64 stack_count = 1;
while (stack_count > 0)
{
JSON_Blob *json = 0;
PopStruct(scratch.arena, JSON_Blob *, &json);
--stack_count;
JSON_Blob *parent_json = json->parent;
b32 is_new_parent = 0;
if (json->type == JSON_Type_Object || json->type == JSON_Type_Array)
{
// No more children to parse for object/array, check for closing brace.
JSON_TokenKind tok_close_kind = json->type == JSON_Type_Object ? JSON_TokenKind_CurlyBraceClose : JSON_TokenKind_SquareBraceClose;
if (at->kind == tok_close_kind)
{
at = at->next;
}
else
{
JSON_PushError(arena, p, at, Lit("Expected comma."));
at = at->next;
goto abort;
}
}
else
{
if (parent_json)
{
if (parent_json->type == JSON_Type_Object)
{
// Parse key
if (at->kind == JSON_TokenKind_String)
{
String t_text = (String) { .len = at->end - at->start, .text = &src.text[at->start] };
String error = ZI;
String key = interpret_string(arena, t_text, &error);
if (error.len > 0)
{
JSON_PushError(arena, p, at, error);
goto abort;
}
else
{
json->key = key;
at = at->next;
}
}
else
{
JSON_PushError(arena, p, at, Lit("Key expected."));
goto abort;
}
// Parse colon
if (at->kind == JSON_TokenKind_Colon)
{
at = at->next;
}
else
{
JSON_PushError(arena, p, at, Lit("Colon expected."));
goto abort;
}
}
if (parent_json->child_last)
{
parent_json->child_last->next = json;
}
else
{
parent_json->child_first = json;
}
parent_json->child_last = json;
}
// Parse value
switch (at->kind)
{
default:
{
JSON_PushError(arena, p, at, Lit("Value expected."));
at = at->next;
goto abort;
} break;
case JSON_TokenKind_Number:
{
String t_text = STRING(at->end - at->start, &src.text[at->start]);
f64 value = interpret_number(t_text);
json->type = JSON_Type_Number;
json->value.number = value;
at = at->next;
} break;
case JSON_TokenKind_String:
{
String t_text = STRING(at->end - at->start, &src.text[at->start]);
String error = ZI;
String value = interpret_string(arena, t_text, &error);
if (error.len > 0)
{
JSON_PushError(arena, p, at, error);
goto abort;
}
else
{
json->type = JSON_Type_String;
json->value.string = value;
at = at->next;
}
} break;
case JSON_TokenKind_KeywordTrue:
{
json->type = JSON_Type_Bool;
json->value.boolean = 1;
at = at->next;
} break;
case JSON_TokenKind_KeywordFalse:
{
json->type = JSON_Type_Bool;
json->value.boolean = 0;
at = at->next;
} break;
case JSON_TokenKind_KeywordNull:
{
json->type = JSON_Type_Null;
at = at->next;
} break;
case JSON_TokenKind_CurlyBraceOpen:
{
json->type = JSON_Type_Object;
at = at->next;
is_new_parent = 1;
} break;
case JSON_TokenKind_SquareBraceOpen:
{
json->type = JSON_Type_Array;
at = at->next;
is_new_parent = 1;
} break;
}
}
if (is_new_parent)
{
// Push self back to stack to re-check for closing brace later
*PushStructNoZero(scratch.arena, JSON_Blob *) = json;
++stack_count;
// Create child & push to stack
JSON_Blob *child = PushStruct(arena, JSON_Blob);
child->parent = json;
*PushStructNoZero(scratch.arena, JSON_Blob *) = child;
++stack_count;
}
else if (parent_json)
{
// Check for comma
if (at->kind == JSON_TokenKind_Comma)
{
// Create sibling & push to stack
JSON_Blob *sibling = PushStruct(arena, JSON_Blob);
sibling->parent = parent_json;
*PushStructNoZero(scratch.arena, JSON_Blob *) = sibling;
++stack_count;
at = at->next;
}
}
}
abort:
p->at = at;
p->root = root;
EndScratch(scratch);
}
JSON_Result JSON_BlobFromString(Arena *arena, String src)
{
TempArena scratch = BeginScratch(arena);
JSON_TokenList tl = JSON_TokensFromString(scratch.arena, src);
// Parse root
JSON_Parser p = ZI;
p.src = src;
p.at = tl.token_first;
JSON_Parse(arena, &p);
// Verify end of file
if (p.errors.count == 0 && p.at->kind != JSON_TokenKind_Eof)
{
JSON_PushError(arena, &p, p.at, Lit("Expected end of file."));
}
EndScratch(scratch);
JSON_Result result = ZI;
result.root = p.root;
result.errors = p.errors;
return result;
}