#include "bitbuff.h" #include "memory.h" #include "arena.h" /* TODO: Safety check that functions taking byte length can't overflow bit conversion (log2(num_bytes) > 61) */ #define WRITE_OVERFLOW_ARENA_PUSH_SIZE 4096 /* ========================== * * Debug * ========================== */ #if BITBUFF_DEBUG /* Magic numbers inserted to verify read/write type & length */ enum dbg_magic { DBG_MAGIC_ALIGN = 0x20A4, DBG_MAGIC_UBITS = 0xCB4A, DBG_MAGIC_IBITS = 0xB30D, DBG_MAGIC_UV = 0xE179, DBG_MAGIC_IV = 0x981f, DBG_MAGIC_F32 = 0x56F9, DBG_MAGIC_F64 = 0x7053, DBG_MAGIC_UID = 0xA24E, DBG_MAGIC_STRING = 0x7866, }; INTERNAL void bw_write_ubits_nomagic(struct bitbuff_writer *bw, u64 value, u8 num_bits); INTERNAL void _dbg_write_magic(struct bitbuff_writer *bw, enum dbg_magic magic, u8 num_bits) { if (bw->debug_enabled) { if (bw_check_overflow_bits(bw, 24)) { return; } u64 magic_ubits = (u64)magic | ((u64)num_bits << 16); bw_write_ubits_nomagic(bw, magic_ubits, 24); } } INTERNAL u64 br_read_ubits_nomagic(struct bitbuff_reader *br, u8 num_bits); INTERNAL void _dbg_read_magic(struct bitbuff_reader *br, enum dbg_magic expected_magic, u8 expected_num_bits) { if (br->debug_enabled) { if (br_check_overflow_bits(br, 24)) { return; } u64 stored = br_read_ubits_nomagic(br, 24); enum dbg_magic stored_magic = stored & 0xFFFF; u8 stored_num_bits = (stored >> 16) & 0xFF; /* Verify stored magic match */ ASSERT(expected_magic == stored_magic); /* Verify stored bit count match */ ASSERT(expected_num_bits == stored_num_bits); } } #else # define _dbg_write_magic(a, b, c) # define _dbg_read_magic(a, b, c) #endif /* ========================== * * Utils * ========================== */ INTERNAL u64 uint_to_twos_compliment(u64 value, u8 num_bits) { u64 mask = U64_MAX; if (num_bits < 64) { mask = ~(U64_MAX << num_bits); } u64 tc = (~value & mask) + 1; tc &= mask; return tc; } INTERNAL i64 sint_from_twos_compliment(u64 tc, u8 num_bits) { u64 msb_mask = (u64)1 << (num_bits - 1); i64 value = -(i64)(tc & msb_mask); value += tc & ~msb_mask; return value; } /* ========================== * * Bitbuff * ========================== */ struct bitbuff bitbuff_alloc(u64 arena_reserve) { struct bitbuff res = ZI; res.arena = arena_alloc(arena_reserve); res.is_backed_by_arena = true; return res; } void bitbuff_release(struct bitbuff *bb) { /* Only arena bitbuffs need to be released */ if (bb->is_backed_by_arena) { arena_release(bb->arena); } } struct bitbuff bitbuff_from_string(struct string s) { struct bitbuff res = ZI; res.fixed_buffer = s; return res; } /* ========================== * * Writer * ========================== */ struct bitbuff_writer bw_from_bitbuff(struct bitbuff *bb) { struct bitbuff_writer res = ZI; res.bb = bb; if (bb->is_backed_by_arena) { res.base = arena_base(bb->arena); } else { res.base = bb->fixed_buffer.text; } res.cur_bit = 0; #if BITBUFF_DEBUG res.debug_enabled = true; #endif return res; } /* Use this when writing external formats that will not verify bitbuff debug symbols / magic numbers */ struct bitbuff_writer bw_from_bitbuff_no_debug(struct bitbuff *bb) { struct bitbuff_writer res = bw_from_bitbuff(bb); #if BITBUFF_DEBUG res.debug_enabled = false; #endif return res; } /* FIXME: Handle overflowed bw */ u64 bw_num_bits_written(struct bitbuff_writer *bw) { return bw->cur_bit; } /* FIXME: Handle overflowed bw */ u64 bw_num_bytes_written(struct bitbuff_writer *bw) { return (bw->cur_bit + 7) >> 3; } /* FIXME: Handle overflowed bw */ struct string bw_get_written(struct arena *arena, struct bitbuff_writer *bw) { struct string res = ZI; res.len = (bw->cur_bit + 7) >> 3; res.text = arena_push_array_no_zero(arena, u8, res.len); MEMCPY(res.text, bw->base, res.len); return res; } /* FIXME: Handle overflowed bw */ u8 *bw_get_written_raw(struct bitbuff_writer *bw) { return bw->base; } /* Returns true if num_bits would cause the writer to overflow its fixed buffer size (if writer is not backed by a dynamic arena bitbuff) */ b32 bw_check_overflow_bits(struct bitbuff_writer *bw, u64 num_bits) { b32 res = false; struct bitbuff *bb = bw->bb; if (bw->overflowed) { res = true; } else { u64 bytes_needed = (bw->cur_bit + num_bits + 7) >> 3; if (bb->is_backed_by_arena) { struct arena *arena = bb->arena; if (bytes_needed >= arena->pos) { /* Grow arena */ u64 push_size = (((bytes_needed - arena->pos) / WRITE_OVERFLOW_ARENA_PUSH_SIZE) + 1) * WRITE_OVERFLOW_ARENA_PUSH_SIZE; arena_push_array_no_zero(arena, u8, push_size); } } else { u64 max_len = bb->fixed_buffer.len; if (bytes_needed >= max_len) { /* Writer overflowed fixed buffer */ ASSERT(false); res = true; bw->cur_bit = max_len << 3; bw->overflowed = true; } } } return res; } /* Align the pos to the next byte */ void bw_align(struct bitbuff_writer *bw) { #if BITBUFF_DEBUG if ((bw->cur_bit & 7) != 0) { _dbg_write_magic(bw, DBG_MAGIC_ALIGN, 0); } #endif bw->cur_bit += (8 - (bw->cur_bit & 7)) & 7; } #if BITBUFF_DEBUG INTERNAL void bw_write_ubits_nomagic(struct bitbuff_writer *bw, u64 value, u8 num_bits) #else void bw_write_ubits(struct bitbuff_writer *bw, u64 value, u8 num_bits) #endif { ASSERT(num_bits > 0 && (num_bits == 64 || value <= ~(U64_MAX << num_bits))); /* Bit count must be able to hold value */ if (bw_check_overflow_bits(bw, num_bits)) { return; } u8 offset = bw->cur_bit & 7; if (offset != 0) { /* Write unaligned bits */ u8 *at = bw->base + (bw->cur_bit >> 3); u8 num_mix_bits = min_u8((8 - offset), num_bits); u8 mix_byte = (u8)((value & ((1 << num_mix_bits) - 1)) << offset); *at |= mix_byte; value >>= num_mix_bits; num_bits -= num_mix_bits; bw->cur_bit += num_mix_bits; } /* cur_bit is now aligned to byte */ u8 *at = bw->base + (bw->cur_bit >> 3); u8 num_bytes = (num_bits + 7) >> 3; MEMCPY(at, &value, num_bytes); bw->cur_bit += num_bits; } #if BITBUFF_DEBUG void bw_write_ubits(struct bitbuff_writer *bw, u64 value, u8 num_bits) { _dbg_write_magic(bw, DBG_MAGIC_UBITS, num_bits); bw_write_ubits_nomagic(bw, value, num_bits); } #endif void bw_write_ibits(struct bitbuff_writer *bw, i64 value, u8 num_bits) { _dbg_write_magic(bw, DBG_MAGIC_IBITS, num_bits); u64 ubits; if (value >= 0) { ubits = value; } else { ubits = uint_to_twos_compliment(-value, num_bits); } bw_write_ubits(bw, ubits, num_bits); } /* Returns written bit to make writing delta encoding logic cleaner */ b32 bw_write_bit(struct bitbuff_writer *bw, u8 value) { bw_write_ubits(bw, value, 1); return value; } /* Writes a variable length unsigned integer. * Value is written in chunks of 7 bits w/ 8th bit signaling continuation. */ void bw_write_uv(struct bitbuff_writer *bw, u64 value) { _dbg_write_magic(bw, DBG_MAGIC_UV, 0); while (value > 0x7F) { u8 cont_byte = 0x80 | (value & 0x7F); bw_write_ubits(bw, cont_byte, 8); value >>= 7; } bw_write_ubits(bw, value, 8); } /* Writes a variable length signed integer. * Similar to bw_write_uv, except the 7th bit of the first byte is a sign bit * indicating that the value is stored in twos compliment. */ void bw_write_iv(struct bitbuff_writer *bw, i64 value) { _dbg_write_magic(bw, DBG_MAGIC_IV, 0); u8 sign_bit; u64 tc; if (value >= 0) { sign_bit = 0; tc = value; } else { sign_bit = 1; u64 unsigned_value = -value; u8 num_bits = 6; unsigned_value >>= 6; while (unsigned_value > 0) { num_bits += 7; unsigned_value >>= 7; } num_bits = min_u8(num_bits, 64); tc = uint_to_twos_compliment(-value, num_bits); } /* First byte contains not just cont bit, but sign bit as well. */ u8 first_byte = (tc & 0x3F); tc >>= 6; first_byte |= (tc > 0) << 7; /* Cont bit */ first_byte |= sign_bit << 6; /* Sign bit */ bw_write_ubits(bw, first_byte, 8); if (tc > 0) { while (tc > 0x7F) { u8 cont_byte = 0x80 | (tc & 0x7F); bw_write_ubits(bw, cont_byte, 8); tc >>= 7; } bw_write_ubits(bw, tc, 8); } } void bw_write_f32(struct bitbuff_writer *bw, f32 value) { _dbg_write_magic(bw, DBG_MAGIC_F32, 0); bw_write_ubits(bw, *(u32 *)&value, 32); } void bw_write_f64(struct bitbuff_writer *bw, f64 value) { _dbg_write_magic(bw, DBG_MAGIC_F64, 0); bw_write_ubits(bw, *(u64 *)&value, 64); } void bw_write_uid(struct bitbuff_writer *bw, struct uid value) { _dbg_write_magic(bw, DBG_MAGIC_UID, 128); bw_write_ubits(bw, value.hi, 64); bw_write_ubits(bw, value.lo, 64); } void bw_write_string(struct bitbuff_writer *bw, struct string s) { _dbg_write_magic(bw, DBG_MAGIC_STRING, 0); bw_write_uv(bw, s.len); bw_write_bytes(bw, s); } void bw_write_bytes(struct bitbuff_writer *bw, struct string bytes) { /* Align start of bytes */ bw_align(bw); u64 num_bits = bytes.len << 3; if (bw_check_overflow_bits(bw, num_bits)) { return; } u8 *at = bw->base + (bw->cur_bit >> 3); MEMCPY(at, bytes.text, bytes.len); bw->cur_bit += num_bits; } #if BITBUFF_DEBUG void bw_write_dbg_marker(struct bitbuff_writer *bw, struct string name) { bw->cur_bit += (8 - (bw->cur_bit & 7)) & 7; for (u64 i = 0; i < name.len; ++i) { bw_write_ubits_nomagic(bw, name.text[i], 8); } } #endif /* ========================== * * Reader * ========================== */ struct bitbuff_reader br_from_bitbuff(struct bitbuff *bb) { struct bitbuff_reader res = ZI; if (!bb->is_backed_by_arena) { res.base = bb->fixed_buffer.text; res.base_len = bb->fixed_buffer.len; } else { struct arena *arena = bb->arena; res.base = arena_base(arena); res.base_len = arena->pos; } res.cur_bit = 0; #if BITBUFF_DEBUG res.debug_enabled = true; #endif return res; } /* Use this when reading from external formats that will not contain bitbuff debug symbols / magic numbers */ struct bitbuff_reader br_from_bitbuff_no_debug(struct bitbuff *bb) { struct bitbuff_reader res = br_from_bitbuff(bb); #if BITBUFF_DEBUG res.debug_enabled = false; #endif return res; } /* Returns the number of bits read from the bitbuff */ /* FIXME: Handle overflowed br */ u64 br_cur_bit(struct bitbuff_reader *br) { return br->cur_bit; } /* Returns the number of *full* bytes read from the bitbuff */ /* FIXME: Handle overflowed br */ u64 br_cur_byte(struct bitbuff_reader *br) { return br->cur_bit >> 3; } /* Returns the number of bits left until the bitbuff overflows */ /* FIXME: Handle overflowed br */ u64 br_num_bits_left(struct bitbuff_reader *br) { return (br->base_len << 3) - br->cur_bit; } /* Returns the number of *full* bytes left until the bitbuff overflows */ /* FIXME: Handle overflowed br */ u64 br_num_bytes_left(struct bitbuff_reader *br) { return br->base_len - (br->cur_bit >> 3); } b32 br_check_overflow_bits(struct bitbuff_reader *br, u64 num_bits) { b32 res = false; if (br->overflowed) { res = true; } else { u64 bits_needed = br->cur_bit + num_bits; u64 base_len_bits = br->base_len << 3; if (bits_needed > base_len_bits) { /* Tried to read past bitbuff memory */ ASSERT(false); res = true; br->cur_bit = base_len_bits; br->overflowed = true; } } return res; } /* Align the pos to the next byte */ void br_align(struct bitbuff_reader *br) { #if BITBUFF_DEBUG if ((br->cur_bit & 7) != 0) { _dbg_read_magic(br, DBG_MAGIC_ALIGN, 0); } #endif br->cur_bit += (8 - (br->cur_bit & 7)) & 7; } #if BITBUFF_DEBUG INTERNAL u64 br_read_ubits_nomagic(struct bitbuff_reader *br, u8 num_bits) #else u64 br_read_ubits(struct bitbuff_reader *br, u8 num_bits) #endif { if (br_check_overflow_bits(br, num_bits)) { return 0; } u64 res = 0; u8 offset = br->cur_bit & 7; u8 num_trailing_bits = 0; if (offset) { u8 *at = br->base + (br->cur_bit >> 3); num_trailing_bits = min_u8(8 - offset, num_bits); u8 mix_byte = *at; mix_byte >>= offset; mix_byte &= (1 << num_trailing_bits) - 1; res = mix_byte; num_bits -= num_trailing_bits; br->cur_bit += num_trailing_bits; } /* cur_bit is now aligned to byte */ u8 *at = br->base + (br->cur_bit >> 3); u8 num_bytes = (num_bits + 7) >> 3; u64 tmp = 0; MEMCPY(&tmp, at, num_bytes); u64 mask = U64_MAX; if (num_bits < 64) { mask = ~(U64_MAX << num_bits); } tmp &= mask; res |= tmp << num_trailing_bits; br->cur_bit += num_bits; return res; } #if BITBUFF_DEBUG u64 br_read_ubits(struct bitbuff_reader *br, u8 num_bits) { _dbg_read_magic(br, DBG_MAGIC_UBITS, num_bits); return br_read_ubits_nomagic(br, num_bits); } #endif i64 br_read_ibits(struct bitbuff_reader *br, u8 num_bits) { ASSERT(num_bits > 1); _dbg_read_magic(br, DBG_MAGIC_IBITS, num_bits); u64 tc = br_read_ubits(br, num_bits); return sint_from_twos_compliment(tc, num_bits); } u8 br_read_bit(struct bitbuff_reader *br) { return br_read_ubits(br, 1); } /* Read a variable length unsigned integer. * See bw_write_uv for details. */ u64 br_read_uv(struct bitbuff_reader *br) { _dbg_read_magic(br, DBG_MAGIC_UV, 0); u64 res = 0; for (u64 i = 0; i <= 9; ++i) { u64 part = br_read_ubits(br, 8); u8 is_last_part = part <= 0x7F; res |= (part & 0x7F) << (i * 7); if (is_last_part) { break; } } return res; } /* Read a variable length signed integer. * See bw_write_iv for details. */ i64 br_read_iv(struct bitbuff_reader *br) { _dbg_read_magic(br, DBG_MAGIC_IV, 0); u8 first_byte = br_read_ubits(br, 8); u8 cont_bit = first_byte & 0x80; u8 sign_bit = first_byte & 0x40; u8 num_bits = 6; u64 tc = first_byte & 0x3F; if (cont_bit) { for (u64 i = 0; i <= 9; ++i) { u64 part = br_read_ubits(br, 8); u8 is_last_part = part <= 0x7F; tc |= (part & 0x7F) << num_bits; num_bits += 7; if (is_last_part) { break; } } } num_bits = min_u8(num_bits, 64); i64 res; if (sign_bit) { /* Sign bit is 1, indicating result is stored in twos compliment */ res = sint_from_twos_compliment(tc, num_bits); } else { res = (i64)tc; } return res; } f32 br_read_f32(struct bitbuff_reader *br) { _dbg_read_magic(br, DBG_MAGIC_F32, 0); u32 ubits = br_read_ubits(br, 32); return *(f32 *)&ubits; } f64 br_read_f64(struct bitbuff_reader *br) { _dbg_read_magic(br, DBG_MAGIC_F64, 0); u64 ubits = br_read_ubits(br, 64); return *(f64 *)&ubits; } struct uid br_read_uid(struct bitbuff_reader *br) { _dbg_read_magic(br, DBG_MAGIC_UID, 128); u64 hi = br_read_ubits(br, 64); u64 lo = br_read_ubits(br, 64); return UID(hi, lo); } struct string br_read_string(struct arena *arena, struct bitbuff_reader *br) { _dbg_read_magic(br, DBG_MAGIC_STRING, 0); struct string res = ZI; u64 len = br_read_uv(br); u8 *src = br_read_bytes_raw(br, len); if (src != NULL) { res.len = len; res.text = arena_push_array_no_zero(arena, u8, len); MEMCPY(res.text, src, len); } return res; } /* Will fill dst with zeroes if bitbuff overflows */ void br_read_bytes(struct bitbuff_reader *br, struct string out) { u8 *src = br_read_bytes_raw(br, out.len); if (src) { MEMCPY(out.text, src, out.len); } else { MEMZERO(out.text, out.len); } } /* NULL will return on bitbuff overflow, result should be checked. */ u8 *br_read_bytes_raw(struct bitbuff_reader *br, u64 num_bytes) { br_align(br); u64 num_bits = num_bytes << 3; if (br_check_overflow_bits(br, num_bits)) { return NULL; } u8 *raw = br->base + (br->cur_bit >> 3); br->cur_bit += num_bits; return raw; } void br_seek_bytes(struct bitbuff_reader *br, u64 num_bytes) { br_align(br); u64 num_bits = num_bytes << 3; if (br_check_overflow_bits(br, num_bits)) { return; } br->cur_bit += num_bits; } void br_seek_to_byte(struct bitbuff_reader *br, u64 pos) { u64 cur_byte_pos = br->cur_bit >> 3; if (pos >= cur_byte_pos) { br_seek_bytes(br, pos - cur_byte_pos); } else { /* Tried to seek byte backwards in reader */ ASSERT(false); br->overflowed = true; br->cur_bit = (br->base_len << 3); } } #if BITBUFF_DEBUG void br_read_dbg_marker(struct bitbuff_reader *br, struct string name) { br->cur_bit += (8 - (br->cur_bit & 7)) & 7; for (u64 i = 0; i < name.len; ++i) { u8 c_stored = br_read_ubits_nomagic(br, 8); u8 c_expected = name.text[i]; ASSERT(c_expected == c_stored); } } #endif /* ========================== * * Test * ========================== */ #if BITBUFF_TEST #include "string.h" #include "scratch.h" void bitbuff_test(void) { struct arena_temp scratch = scratch_begin_no_conflict(); u8 kind_ubits = 0; u8 kind_ibits = 1; u8 kind_uv = 2; u8 kind_iv = 3; u8 kind_string = 4; struct test_case_ubits { u64 v; u64 num_bits; }; struct test_case_ibits { i64 v; u64 num_bits; }; struct test_case_uv { u64 v; }; struct test_case_iv { i64 v; }; struct test_case_string { struct string v; }; struct test_case { u8 kind; union { struct test_case_ubits ubits; struct test_case_ibits ibits; struct test_case_uv uv; struct test_case_iv iv; struct test_case_string s; }; }; struct test_case cases[] = { { kind_ubits, .ubits = { 40, 8 } }, { kind_ubits, .ubits = { 32, 8 } }, { kind_ubits, .ubits = { 100, 7 } }, { kind_ubits, .ubits = { 4, 3 } }, { kind_ubits, .ubits = { 13, 8 } }, { kind_ibits, .ibits = { 0, 8 } }, { kind_ibits, .ibits = { -1, 8 } }, { kind_ibits, .ibits = { -2, 8 } }, { kind_ibits, .ibits = { -3, 8 } }, { kind_ibits, .ibits = { -100, 8 } }, { kind_ibits, .ibits = { -50, 7 } }, { kind_ibits, .ibits = { 50, 7 } }, { kind_ibits, .ibits = { 4, 7 } }, { kind_ibits, .ibits = { 1, 7 } }, { kind_ibits, .ibits = { 3, 3 } }, { kind_ibits, .ibits = { 1, 2 } }, { kind_ibits, .ibits = { 0, 2 } }, { kind_ibits, .ibits = { -1, 2 } }, { kind_uv, .uv = { 0 } }, { kind_uv, .uv = { 100 } }, { kind_uv, .uv = { 10000 } }, { kind_uv, .uv = { 10000000000000 } }, { kind_uv, .uv = { U64_MAX } }, { kind_iv, .iv = { 0 } }, { kind_iv, .iv = { -1 } }, { kind_iv, .iv = { 10000000000000 } }, { kind_iv, .iv = { -10000000000000 } }, { kind_iv, .iv = { I64_MAX } }, { kind_iv, .iv = { I64_MIN } }, { kind_string, .s = { LIT("Hello there! Hope you're doing well.") } }, { kind_ibits, .ibits = { 3, 3 } }, { kind_string, .s = { LIT("Alriiiiiiiiiiiiiiiiiiighty then") } }, { kind_string, .s = { LIT("Alriiiiiiiiiiiiiiiiiiighty then") } }, }; struct string encoded = ZI; { struct bitbuff bb = bitbuff_alloc(GIGABYTE(64)); struct bitbuff_writer bw = bw_from_bitbuff(&bb); for (u64 i = 0; i < ARRAY_COUNT(cases); ++i) { struct test_case c = cases[i]; if (c.kind == kind_ubits) { bw_write_ubits(&bw, c.ubits.v, c.ubits.num_bits); } else if (c.kind == kind_ibits) { bw_write_ibits(&bw, c.ibits.v, c.ibits.num_bits); } else if (c.kind == kind_uv) { bw_write_uv(&bw, c.uv.v); } else if (c.kind == kind_iv) { bw_write_iv(&bw, c.iv.v); } else if (c.kind == kind_string) { bw_write_string(&bw, c.s.v); } else { ASSERT(false); } } encoded = bw_get_written(scratch.arena, &bw); } { struct bitbuff bb = bitbuff_from_string(encoded); struct bitbuff_reader br = br_from_bitbuff(&bb); for (u64 i = 0; i < ARRAY_COUNT(cases); ++i) { struct test_case c = cases[i]; if (c.kind == kind_ubits) { u64 w = c.ubits.v; u64 r = br_read_ubits(&br, c.ubits.num_bits); ASSERT(r == w); } else if (c.kind == kind_ibits) { i64 w = c.ibits.v; i64 r = br_read_ibits(&br, c.ubits.num_bits); ASSERT(r == w); } else if (c.kind == kind_uv) { u64 w = c.uv.v; u64 r = br_read_uv(&br); ASSERT(r == w); } else if (c.kind == kind_iv) { i64 w = c.iv.v; i64 r = br_read_iv(&br); ASSERT(r == w); } else if (c.kind == kind_string) { struct string w = c.s.v; struct string r = br_read_string(scratch.arena, &br); ASSERT(string_eq(r, w)); } else { ASSERT(false); } } } scratch_end(scratch); } #endif