diff --git a/src/ase/ase.c b/src/ase/ase.c index 0af781f4..70b34f45 100644 --- a/src/ase/ase.c +++ b/src/ase/ase.c @@ -1,3 +1,512 @@ +// DEFLATE decoder based on Handmade Hero's png parser + +//////////////////////////////////////////////////////////// +//~ Inflate + +//- Bitbuff + +u32 ASE_PeekBits(ASE_Bitbuff *bb, u32 nbits) +{ + Assert(nbits <= 32); + + u64 cur_byte = bb->cur_bit >> 3; + u8 bit_index = bb->cur_bit % 8; + u64 nbytes = (nbits + bit_index + 7) >> 3; + + u64 val64 = 0; + CopyBytes(&val64, &bb->data[cur_byte], nbytes); + u32 val32 = (u32)(val64 >> bit_index); + val32 &= U32Max >> (32 - nbits); + + return val32; +} + +u32 ASE_ConsumeBits(ASE_Bitbuff *bb, u32 nbits) +{ + u32 val = ASE_PeekBits(bb, nbits); + bb->cur_bit += nbits; + return val; +} + +void ASE_SkipBits(ASE_Bitbuff *bb, u32 nbits) +{ + bb->cur_bit += nbits; +} + +//- Reverse bits + +u32 ASE_ReverseBits(u32 v, u32 bit_count) +{ + // 7 & 15 seem to be the most common bit_counts, so a + // more optimal path is laid out for them. + if (bit_count == 15) + { + u32 b1 = v & 0xFF; + b1 = (b1 & 0xF0) >> 4 | (b1 & 0x0F) << 4; + b1 = (b1 & 0xCC) >> 2 | (b1 & 0x33) << 2; + b1 = (b1 & 0xAA) >> 1 | (b1 & 0x55) << 1; + + u32 b2 = (v & 0xFF00) >> 8; + b2 = (b2 & 0xF0) >> 4 | (b2 & 0x0F) << 4; + b2 = (b2 & 0xCC) >> 2 | (b2 & 0x33) << 2; + b2 = (b2 & 0xAA) >> 1 | (b2 & 0x55) << 1; + b2 >>= 1; + + return (b1 << 7) | b2; + } + else if (bit_count == 7) + { + v = (v & 0xF0) >> 4 | (v & 0x0F) << 4; + v = (v & 0xCC) >> 2 | (v & 0x33) << 2; + v = (v & 0xAA) >> 1 | (v & 0x55) << 1; + return v >> 1; + } + else + { + u32 result = 0; + for (u32 i = 0; i <= (bit_count / 2); ++i) + { + u32 inv = (bit_count - (i + 1)); + result |= ((v >> i) & 0x1) << inv; + result |= ((v >> inv) & 0x1) << i; + } + return result; + } +} + +//- Dict + +ASE_HuffDict ASE_InitHuffDict(Arena *arena, u32 max_code_bits, u32 *bl_counts, u32 bl_counts_count) +{ + ASE_HuffDict result = Zi; + result.max_code_bits = max_code_bits; + result.entries_count = (1 << max_code_bits); + result.entries = PushStructsNoZero(arena, ASE_HuffEntry, result.entries_count); + + u32 code_length_hist[ASE_HuffBitCount] = Zi; + for (u32 bl_count_index = 0; bl_count_index < bl_counts_count; ++bl_count_index) + { + u32 count = bl_counts[bl_count_index]; + Assert(count <= countof(code_length_hist)); + ++code_length_hist[count]; + } + + u32 next_code[ASE_HuffBitCount] = Zi; + next_code[0] = 0; + code_length_hist[0] = 0; + for (u32 code_index = 1; code_index < countof(next_code); ++code_index) + { + next_code[code_index] = ((next_code[code_index - 1] + code_length_hist[code_index - 1]) << 1); + } + + for (u32 bl_count_index = 0; bl_count_index < bl_counts_count; ++bl_count_index) + { + u32 code_bits = bl_counts[bl_count_index]; + if (code_bits) + { + Assert(code_bits < countof(next_code)); + u32 code = next_code[code_bits]++; + u32 arbitrary_bits = result.max_code_bits - code_bits; + u32 entry_count = (1 << arbitrary_bits); + // TODO: Optimize this. It's bloating load times. + for (u32 entry_index = 0; entry_index < entry_count; ++entry_index) + { + u32 base_index = (code << arbitrary_bits) | entry_index; + u32 index = ASE_ReverseBits(base_index, result.max_code_bits); + ASE_HuffEntry *entry = &result.entries[index]; + entry->symbol = (u16)bl_count_index; + entry->bits_used = (u16)code_bits; + } + } + } + + return result; +} + +u16 ASE_DecodeHuffDict(ASE_HuffDict *huffman, ASE_Bitbuff *bb) +{ + u32 index = ASE_PeekBits(bb, huffman->max_code_bits); + Assert(index < huffman->entries_count); + + ASE_HuffEntry *entry = &huffman->entries[index]; + u16 result = entry->symbol; + ASE_SkipBits(bb, entry->bits_used); + Assert(entry->bits_used > 0); + return result; +} + +//- Inflate + +void ASE_Inflate(u8 *dst, u8 *encoded) +{ + TempArena scratch = BeginScratchNoConflict(); + + ASE_Bitbuff bb = Zi; + bb.data = encoded; + + // ZLIB header + u32 cm = ASE_ConsumeBits(&bb, 4); + u32 cinfo = ASE_ConsumeBits(&bb, 4); + Assert(cm == 8); + Assert(cinfo == 7); + + u32 fcheck = ASE_ConsumeBits(&bb, 5); + u32 fdict = ASE_ConsumeBits(&bb, 1); + u32 flevl = ASE_ConsumeBits(&bb, 2); + Assert(fdict == 0); + + u8 cmf = (u8)(cm | (cinfo << 4)); + u8 flg = fcheck | (fdict << 5) | (flevl << 6); + Assert(((cmf * 256) + flg) % 31 == 0); + + u8 bfinal = 0; + while (!bfinal) + { + bfinal = ASE_ConsumeBits(&bb, 1); + u8 btype = ASE_ConsumeBits(&bb, 2); + switch (btype) + { + case ASE_BlockType_Uncompressed: + { + ASE_SkipBits(&bb, (8 - (bb.cur_bit % 8)) % 8); + i16 len = ASE_ConsumeBits(&bb, 16); + i16 nlen = ASE_ConsumeBits(&bb, 16); + Assert(len == ~nlen); // Validation + while (len-- > 0) + { + *dst++ = ASE_ConsumeBits(&bb, 8); + } + } break; + + case ASE_BlockType_CompressedFixed: + case ASE_BlockType_CompressedDynamic: + { + TempArena temp = BeginTempArena(scratch.arena); + { + //- Decode huffman table + u32 lit_len_dist_table[512] = Zi; + u32 hlit = 0; + u32 hdist = 0; + { + if (btype == ASE_BlockType_CompressedDynamic) + { + // Read huffman table + hlit = ASE_ConsumeBits(&bb, 5) + 257; + hdist = ASE_ConsumeBits(&bb, 5) + 1; + u32 hclen = ASE_ConsumeBits(&bb, 4) + 4; + + // Init dict huffman (hclen) + u32 hclen_bl_counts[19] = Zi; + for (u32 i = 0; i < hclen; ++i) + { + u32 code = ASE_HuffHclenOrder[i]; + hclen_bl_counts[code] = ASE_ConsumeBits(&bb, 3); + } + ASE_HuffDict dict_huffman = ASE_InitHuffDict(temp.arena, 7, hclen_bl_counts, countof(hclen_bl_counts)); + + // Decode dict huffman + u32 lit_len_count = 0; + u32 len_count = hlit + hdist; + Assert(len_count <= countof(lit_len_dist_table)); + while (lit_len_count < len_count) + { + u32 rep_count = 1; + u32 rep_val = 0; + u32 encoded_len = ASE_DecodeHuffDict(&dict_huffman, &bb); + if (encoded_len <= 15) + { + rep_val = encoded_len; + } + else if (encoded_len == 16) + { + rep_count = 3 + ASE_ConsumeBits(&bb, 2); + Assert(lit_len_count > 0); + rep_val = lit_len_dist_table[lit_len_count - 1]; + } + else if (encoded_len == 17) + { + rep_count = 3 + ASE_ConsumeBits(&bb, 3); + } + else if (encoded_len == 18) + { + rep_count = 11 + ASE_ConsumeBits(&bb, 7); + } + else + { + // Invalid len + Assert(0); + } + while (rep_count--) + { + lit_len_dist_table[lit_len_count++] = rep_val; + } + } + Assert(lit_len_count == len_count); + } + else + { + // Decode fixed table + hlit = 288; + hdist = 32; + u32 index = 0; + for (u32 i = 0; i < countof(ASE_HuffBlCounts); ++i) + { + u32 bit_count = ASE_HuffBlCounts[i][1]; + u32 last_valuie = ASE_HuffBlCounts[i][0]; + while (index <= last_valuie) + { + lit_len_dist_table[index++] = bit_count; + } + } + } + } + + //- Decode + ASE_HuffDict lit_len_huffman = ASE_InitHuffDict(temp.arena, 15, lit_len_dist_table, hlit); + ASE_HuffDict dist_huffman = ASE_InitHuffDict(temp.arena, 15, lit_len_dist_table + hlit, hdist); + for (;;) + { + u32 lit_len = ASE_DecodeHuffDict(&lit_len_huffman, &bb); + if (lit_len <= 255) + { + *dst++ = lit_len & 0xFF; + } + else if (lit_len >= 257) + { + u32 length_index = (lit_len - 257); + ASE_HuffEntry length_entry = ASE_HuffLenTable[length_index]; + u32 length = length_entry.symbol; + if (length_entry.bits_used > 0) + { + u32 extra_bits = ASE_ConsumeBits(&bb, length_entry.bits_used); + length += extra_bits; + } + u32 dist_index = ASE_DecodeHuffDict(&dist_huffman, &bb); + ASE_HuffEntry dist_entry = ASE_HuffDistTable[dist_index]; + u32 distance = dist_entry.symbol; + if (dist_entry.bits_used > 0) + { + u32 extra_bits = ASE_ConsumeBits(&bb, dist_entry.bits_used); + distance += extra_bits; + } + u8 *src = dst - distance; + while (length--) + { + *dst++ = *src++; + } + } + else + { + break; + } + } + } + EndTempArena(temp); + } break; + + case ASE_BlockType_Reserved: + { + // TODO + Assert(0); + } break; + } + } + + EndScratch(scratch); +} + +//////////////////////////////////////////////////////////// +//~ Meta + +ASE_Meta ASE_DecodeMeta(Arena *arena, String encoded) +{ + ASE_Meta result = Zi; + return result; +} + +//////////////////////////////////////////////////////////// +//~ Image + +ASE_Image ASE_PushBlankImage(Arena *arena, Rng2 bounds) +{ + ASE_Image result = Zi; + i64 pixels_count = AreaFromVec2(DimsFromRng2(bounds)); + if (pixels_count > 0) + { + result.bounds = bounds; + result.pixels = PushStructs(arena, u32, pixels_count); + } + return result; +} + +ASE_Image ASE_DecompressImageFromCel(Arena *arena, ASE_Cel *cel) +{ + ASE_Image result = Zi; + result.bounds = cel->bounds; + result.opacity = cel->opacity; + + i64 pixels_count = AreaFromVec2(DimsFromRng2(result.bounds)); + result.pixels = PushStructsNoZero(arena, u32, pixels_count); + ASE_Inflate((u8 *)result.pixels, cel->encoded.text); + + return result; +} + +//////////////////////////////////////////////////////////// +//~ Blend + +u32 ASE_BlendPixel(u32 src, u32 dst, u8 opacity) +{ + u32 result = 0; + u32 dst_a = (dst >> 24) & 0xff; + u32 src_a = (src >> 24) & 0xff; + src_a = (u8)MulNormalizedU8(src_a, opacity); + u32 a = src_a + dst_a - MulNormalizedU8(src_a, dst_a); + if (a != 0) + { + u32 dst_r = (dst & 0xff); + u32 dst_g = (dst >> 8) & 0xff; + u32 dst_b = (dst >> 16) & 0xff; + u32 src_r = (src & 0xff); + u32 src_g = (src >> 8) & 0xff; + u32 src_b = (src >> 16) & 0xff; + u32 r = dst_r + (src_r - dst_r) * src_a / a; + u32 g = dst_g + (src_g - dst_g) * src_a / a; + u32 b = dst_b + (src_b - dst_b) * src_a / a; + result = (r << 0) | (g << 8) | (b << 16) | (a << 24); + } + return result; +} + +void ASE_BlendImage(ASE_Image src_img, ASE_Image dst_img) +{ + u8 opacity_u8 = (u8)(src_img.opacity * 255.0f); + Vec2 src_dims = DimsFromRng2(src_img.bounds); + Vec2 dst_dims = DimsFromRng2(dst_img.bounds); + Rng2 blend_bounds = IntersectRng2(src_img.bounds, dst_img.bounds); + Vec2 blend_dims = DimsFromRng2(blend_bounds); + if (!IsRng2Empty(blend_bounds)) + { + for (i32 blend_y = blend_bounds.p0.y; blend_y < blend_bounds.p1.y; ++blend_y) + { + i32 src_y = blend_y - src_img.bounds.p0.y; + i32 dst_y = blend_y - dst_img.bounds.p0.y; + i32 src_offset = src_y * src_dims.x; + i32 dst_offset = dst_y * dst_dims.x; + for (i32 blend_x = blend_bounds.p0.x; blend_x < blend_bounds.p1.x; ++blend_x) + { + i32 src_x = blend_x - src_img.bounds.p0.x; + i32 dst_x = blend_x - dst_img.bounds.p0.x; + u32 *src_pixel = &src_img.pixels[src_x + src_offset]; + u32 *dst_pixel = &dst_img.pixels[dst_x + dst_offset]; + *dst_pixel = ASE_BlendPixel(*src_pixel, *dst_pixel, opacity_u8); + } + } + } +} + + + + + + + + + + + + + +// //////////////////////////////////////////////////////////// +// //~ Query + +// u32 ASE_FirstPixelFromCel(ASE_Cel *cel) +// { +// u32 result = 0; +// return result; +// } + +// //////////////////////////////////////////////////////////// +// //~ Rasterize + +// void ASE_RasterizeCel(ASE_Cel *cel, u32 *dst_pixels, Vec2 dst_dims) +// { +// TempArena scratch = BeginScratchNoConflict(); + +// // ASE_Layer *layer = cel->layer; +// // ASE_Meta *meta = layer->meta; +// // u8 opacity = (cel->opacity / 255.0f) * (layer->opacity / 255.0f) * 255.0f; + +// // // Adjust bounds to ensure pixels outside of frame boundaries +// // // aren't processed (aseprite keeps chunks outside of frame +// // // around in project file). +// // { +// // i32 frame_right = cel_width + frame_left; +// // i32 frame_bottom = frame_top + cel_height; +// // if (frame_left < 0) +// // { +// // cel_left += -frame_left; +// // frame_left = 0; +// // } +// // if (frame_top < 0) +// // { +// // cel_top += -frame_top; +// // frame_top = 0; +// // } +// // if (frame_right > (i32)frame_width) +// // { +// // cel_right -= (frame_right - frame_width); +// // frame_right = frame_width; +// // } +// // if (frame_bottom > (i32)frame_height) +// // { +// // cel_bottom -= (frame_bottom - frame_height); +// // frame_bottom = frame_height; +// // } +// // } + +// i64 src_pixels_count = DimsFromRng2(cel->bounds).x * DimsFromRng2(cel->bounds).y; +// u32 *src_pixels = PushStructsNoZero(scratch.arena, u32, src_pixels_count); +// { + +// } + +// u8 opacity_u8 = (u8)(cel->opacity * 255.0f); + +// Rng2 bounds = IntersectRng2(cel->bounds, RNG2(Vec2(0, 0), dst_dims)); +// Vec2 dims = DimsFromRng2(bounds); + +// for (i32 cel_y = 0; cel_y < dims.y; ++cel_y) +// { +// i32 dst_y = bounds.p0.y + cel_y; +// i32 cel_offset = cel_y * cel_width; +// i32 dst_offset = dst_y * dst_width; +// for (i32 cel_x = 0; cel_x < dims.x; ++cel_x) +// { +// i32 dst_x = bounds.p0.x + cel_x; +// u32 *src_pixel = &cel->pixels[cel_x + cel_offset]; +// u32 *dst_pixel = &dst_pixels[dst_x + dst_offset]; +// *dst_pixel = ASE_Blend(*src_pixel, *dst_pixel, opacity_u8); +// } +// } + +// EndScratch(scratch); +// } + + + + + + + + + + + + + + + // // DEFLATE decoder based on Handmade Hero's png parser // //////////////////////////////////////////////////////////// diff --git a/src/ase/ase.h b/src/ase/ase.h index 35e9c9e9..01fcb372 100644 --- a/src/ase/ase.h +++ b/src/ase/ase.h @@ -1,9 +1,126 @@ +// DEFLATE decoder based on Handmade Hero's png parser + +//////////////////////////////////////////////////////////// +//~ Inflate types + +#define ASE_HuffBitCount 16 + +Struct(ASE_HuffEntry) +{ + u16 symbol; + u16 bits_used; +}; + +Struct(ASE_HuffDict) +{ + u32 max_code_bits; + u32 entries_count; + ASE_HuffEntry *entries; +}; + +Global Readonly u32 ASE_HuffHclenOrder[] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + +Global Readonly ASE_HuffEntry ASE_HuffLenTable[] = { + {3, 0}, // 257 + {4, 0}, // 258 + {5, 0}, // 259 + {6, 0}, // 260 + {7, 0}, // 261 + {8, 0}, // 262 + {9, 0}, // 263 + {10, 0}, // 264 + {11, 1}, // 265 + {13, 1}, // 266 + {15, 1}, // 267 + {17, 1}, // 268 + {19, 2}, // 269 + {23, 2}, // 270 + {27, 2}, // 271 + {31, 2}, // 272 + {35, 3}, // 273 + {43, 3}, // 274 + {51, 3}, // 275 + {59, 3}, // 276 + {67, 4}, // 277 + {83, 4}, // 278 + {99, 4}, // 279 + {115, 4}, // 280 + {131, 5}, // 281 + {163, 5}, // 282 + {195, 5}, // 283 + {227, 5}, // 284 + {258, 0}, // 285 +}; + +Global Readonly ASE_HuffEntry ASE_HuffDistTable[] = { + {1, 0}, // 0 + {2, 0}, // 1 + {3, 0}, // 2 + {4, 0}, // 3 + {5, 1}, // 4 + {7, 1}, // 5 + {9, 2}, // 6 + {13, 2}, // 7 + {17, 3}, // 8 + {25, 3}, // 9 + {33, 4}, // 10 + {49, 4}, // 11 + {65, 5}, // 12 + {97, 5}, // 13 + {129, 6}, // 14 + {193, 6}, // 15 + {257, 7}, // 16 + {385, 7}, // 17 + {513, 8}, // 18 + {769, 8}, // 19 + {1025, 9}, // 20 + {1537, 9}, // 21 + {2049, 10}, // 22 + {3073, 10}, // 23 + {4097, 11}, // 24 + {6145, 11}, // 25 + {8193, 12}, // 26 + {12289, 12}, // 27 + {16385, 13}, // 28 + {24577, 13}, // 29 +}; + +Global Readonly u32 ASE_HuffBlCounts[][2] = { + {143, 8}, + {255, 9}, + {279, 7}, + {287, 8}, + {319, 5}, +}; + +Struct(ASE_Bitbuff) +{ + u8 *data; + u64 cur_bit; +}; + +Enum(ASE_BlockType) +{ + ASE_BlockType_Uncompressed = 0, + ASE_BlockType_CompressedFixed = 1, + ASE_BlockType_CompressedDynamic = 2, + ASE_BlockType_Reserved = 3 +}; + //////////////////////////////////////////////////////////// //~ Meta types Struct (ASE_Cel) { - i32 _; + f32 opacity; + + i64 frame_idx; + i64 linked_frame_idx; + + String encoded; + Rng2 bounds; }; Struct(ASE_Layer) @@ -12,7 +129,6 @@ Struct(ASE_Layer) ASE_Layer *prev; String name; - i64 cels_count; ASE_Cel *cels; }; @@ -40,28 +156,68 @@ Struct(ASE_Meta) }; //////////////////////////////////////////////////////////// -//~ Query types +//~ Image types -Struct(ASE_SinglePixelResult) +Struct(ASE_Image) { - Vec2 pos; - u32 v; + f32 opacity; + Rng2 bounds; + u32 *pixels; }; +//////////////////////////////////////////////////////////// +//~ Inflate + +//- Bitbuff +u32 ASE_PeekBits(ASE_Bitbuff *bb, u32 nbits); +u32 ASE_ConsumeBits(ASE_Bitbuff *bb, u32 nbits); +void ASE_SkipBits(ASE_Bitbuff *bb, u32 nbits); + +//- Reverse bits +u32 ASE_ReverseBits(u32 v, u32 bit_count); + +//- Dict +ASE_HuffDict ASE_InitHuffDict(Arena *arena, u32 max_code_bits, u32 *bl_counts, u32 bl_counts_count); +u16 ASE_DecodeHuffDict(ASE_HuffDict *huffman, ASE_Bitbuff *bb); + +//- Inflate +void ASE_Inflate(u8 *dst, u8 *encoded); + //////////////////////////////////////////////////////////// //~ Meta ASE_Meta ASE_DecodeMeta(Arena *arena, String encoded); //////////////////////////////////////////////////////////// -//~ Query +//~ Image -ASE_SinglePixelResult ASE_SinglePixelFromCel(ASE_Cel *cel); +ASE_Image ASE_PushBlankImage(Arena *arena, Rng2 bounds); +ASE_Image ASE_DecompressImageFromCel(Arena *arena, ASE_Cel *cel); //////////////////////////////////////////////////////////// -//~ Rasterize +//~ Blend -void ASE_RasterizeCel(ASE_Cel *cel, u32 *dst_pixels, Vec2 dst_dims); +u32 ASE_BlendPixel(u32 src, u32 dst, u8 opacity); +void ASE_BlendImage(ASE_Image src_img, ASE_Image dst_img); + + + + + + + + + + +// //////////////////////////////////////////////////////////// +// //~ Query + +// u32 ASE_FirstPixelFromCel(ASE_Cel *cel); + +// //////////////////////////////////////////////////////////// +// //~ Rasterize + +// void ASE_RasterizeCel(ASE_Cel *cel, u32 *dst_pixels, Vec2 dst_dims); diff --git a/src/base/base_math.c b/src/base/base_math.c index f23bd861..9beeb2dc 100644 --- a/src/base/base_math.c +++ b/src/base/base_math.c @@ -190,14 +190,14 @@ f32 UnwindAngleF32(f32 a) //////////////////////////////////////////////////////////// //~ Float lerp -f32 LerpF32(f32 val0, f32 val1, f32 t) +f32 LerpF32(f32 a, f32 b, f32 t) { - return val0 + ((val1 - val0) * t); + return a + ((b - a) * t); } -f64 LerpF64(f64 val0, f64 val1, f64 t) +f64 LerpF64(f64 a, f64 b, f64 t) { - return val0 + ((val1 - val0) * t); + return a + ((b - a) * t); } f32 LerpAngleF32(f32 a, f32 b, f32 t) @@ -209,24 +209,24 @@ f32 LerpAngleF32(f32 a, f32 b, f32 t) //////////////////////////////////////////////////////////// //~ Int lerp -i32 LerpI32(i32 val0, i32 val1, f32 t) +i32 LerpI32(i32 a, i32 b, f32 t) { - return val0 + RoundF32((f32)(val1 - val0) * t); + return a + RoundF32((f32)(b - a) * t); } -i64 LerpI64(i64 val0, i64 val1, f64 t) +i64 LerpI64(i64 a, i64 b, f64 t) { - return val0 + RoundF64((f64)(val1 - val0) * t); + return a + RoundF64((f64)(b - a) * t); } -i32 LerpU32(u32 val0, u32 val1, f32 t) +i32 LerpU32(u32 a, u32 b, f32 t) { - return val0 + RoundF32((f32)(val1 - val0) * t); + return a + RoundF32((f32)(b - a) * t); } -i64 LerpU64(u64 val0, u64 val1, f64 t) +i64 LerpU64(u64 a, u64 b, f64 t) { - return val0 + RoundF64((f64)(val1 - val0) * t); + return a + RoundF64((f64)(b - a) * t); } //////////////////////////////////////////////////////////// @@ -336,12 +336,12 @@ Vec4 PremulFromSrgb(Vec4 srgb) return premul; } -Vec4 LerpSrgb(Vec4 v0, Vec4 v1, f32 t) +Vec4 LerpSrgb(Vec4 a, Vec4 b, f32 t) { Vec4 result = Zi; - Vec4 v0_lin = LinearFromSrgb(v0); - Vec4 v1_lin = LinearFromSrgb(v1); - Vec4 lerp_lin = LerpVec4(v0_lin, v1_lin, t); + Vec4 a_lin = LinearFromSrgb(a); + Vec4 b_lin = LinearFromSrgb(b); + Vec4 lerp_lin = LerpVec4(a_lin, b_lin, t); result = SrgbFromLinear(lerp_lin); return result; } @@ -548,14 +548,11 @@ f32 AngleFromVec2(Vec2 v) return ArcTan2F32(v.y, v.x); } -f32 AngleFromVec2Dirs(Vec2 dir1, Vec2 dir2) -{ - return ArcTan2F32(WedgeVec2(dir1, dir2), DotVec2(dir1, dir2)); -} +//- Area -f32 AngleFromVec2Points(Vec2 pt1, Vec2 pt2) +f32 AreaFromVec2(Vec2 v) { - return AngleFromVec2(SubVec2(pt2, pt1)); + return AbsF32(v.x * v.y); } //- Closest point @@ -571,21 +568,21 @@ Vec2 ClosestPointFromRay(Vec2 ray_pos, Vec2 ray_dir_norm, Vec2 p) //- Lerp // Interpolate position vectors -Vec2 LerpVec2(Vec2 v0, Vec2 v1, f32 t) +Vec2 LerpVec2(Vec2 a, Vec2 b, f32 t) { - return VEC2(LerpF32(v0.x, v1.x, t), LerpF32(v0.y, v1.y, t)); + return VEC2(LerpF32(a.x, b.x, t), LerpF32(a.y, b.y, t)); } -Vec2 LerpVec2Vec2(Vec2 v0, Vec2 v1, Vec2 t) +Vec2 LerpVec2Vec2(Vec2 a, Vec2 b, Vec2 t) { - return VEC2(LerpF32(v0.x, v1.x, t.x), LerpF32(v0.y, v1.y, t.y)); + return VEC2(LerpF32(a.x, b.x, t.x), LerpF32(a.y, b.y, t.y)); } // Interpolate direction vectors (spherical lerp) -Vec2 SlerpVec2(Vec2 v0, Vec2 v1, f32 t) +Vec2 SlerpVec2(Vec2 a, Vec2 b, f32 t) { - f32 rot = LerpAngleF32(AngleFromVec2(v0), AngleFromVec2(v1), t); - f32 len = LerpF32(Vec2Len(v0), Vec2Len(v1), t); + f32 rot = LerpAngleF32(AngleFromVec2(a), AngleFromVec2(b), t); + f32 len = LerpF32(Vec2Len(a), Vec2Len(b), t); return MulVec2(Vec2FromAngle(rot), len); } @@ -639,13 +636,13 @@ Vec4 MulVec4Vec4(Vec4 a, Vec4 b) //- Lerp -Vec4 LerpVec4(Vec4 v0, Vec4 v1, f32 t) +Vec4 LerpVec4(Vec4 a, Vec4 b, f32 t) { Vec4 result = Zi; - result.x = LerpF32(v0.x, v1.x, t); - result.y = LerpF32(v0.y, v1.y, t); - result.z = LerpF32(v0.z, v1.z, t); - result.w = LerpF32(v0.w, v1.w, t); + result.x = LerpF32(a.x, b.x, t); + result.y = LerpF32(a.y, b.y, t); + result.z = LerpF32(a.z, b.z, t); + result.w = LerpF32(a.w, b.w, t); return result; } @@ -683,6 +680,11 @@ f32 NormRng(Rng r, f32 v) //- Rng2 +b32 IsRng2Empty(Rng2 r) +{ + return r.p0.x >= r.p1.x || r.p0.y >= r.p1.y; +} + Vec2 DimsFromRng2(Rng2 r) { Vec2 result = Zi; @@ -719,6 +721,16 @@ Rng2 UnionRng2(Rng2 a, Rng2 b) return result; } +Rng2 IntersectRng2(Rng2 a, Rng2 b) +{ + Rng2 result = Zi; + result.p0.x = MaxF32(a.p0.x, b.p0.x); + result.p0.y = MaxF32(a.p0.y, b.p0.y); + result.p1.x = MinF32(a.p1.x, b.p1.x); + result.p1.y = MinF32(a.p1.y, b.p1.y); + return result; +} + Rng2 AddRng2Vec2(Rng2 r, Vec2 v) { Rng2 result = Zi; @@ -745,6 +757,11 @@ Rng2 DivRng2Vec2(Rng2 r, Vec2 v) //- Rng2I32 +b32 IsRng2I32Empty(Rng2I32 r) +{ + return r.p0.x >= r.p1.x || r.p0.y >= r.p1.y; +} + Vec2I32 DimsFromRng2I32(Rng2I32 r) { Vec2I32 result = Zi; @@ -765,10 +782,20 @@ Vec2I32 CenterFromRng2I32(Rng2I32 r) Rng2I32 UnionRng2I32(Rng2I32 a, Rng2I32 b) { Rng2I32 result = Zi; - result.p0.x = MinF32(a.p0.x, b.p0.x); - result.p0.y = MinF32(a.p0.y, b.p0.y); - result.p1.x = MaxF32(a.p1.x, b.p1.x); - result.p1.y = MaxF32(a.p1.y, b.p1.y); + result.p0.x = MinI32(a.p0.x, b.p0.x); + result.p0.y = MinI32(a.p0.y, b.p0.y); + result.p1.x = MaxI32(a.p1.x, b.p1.x); + result.p1.y = MaxI32(a.p1.y, b.p1.y); + return result; +} + +Rng2I32 IntersectRng2I32(Rng2I32 a, Rng2I32 b) +{ + Rng2I32 result = Zi; + result.p0.x = MaxI32(a.p0.x, b.p0.x); + result.p0.y = MaxI32(a.p0.y, b.p0.y); + result.p1.x = MinI32(a.p1.x, b.p1.x); + result.p1.y = MinI32(a.p1.y, b.p1.y); return result; } @@ -799,9 +826,9 @@ Rng2I32 DivRng2I32Vec2I32(Rng2I32 r, Vec2I32 v) //////////////////////////////////////////////////////////// //~ Affine -b32 MatchAffine(Affine af1, Affine af2) +b32 MatchAffine(Affine af0, Affine af1) { - return MatchVec2(af1.og, af2.og) && MatchVec2(af1.bx, af2.bx) && MatchVec2(af1.by, af2.by); + return MatchVec2(af0.og, af1.og) && MatchVec2(af0.bx, af1.bx) && MatchVec2(af0.by, af1.by); } //- Initialization @@ -1085,17 +1112,17 @@ Mat4x4 Mat4x4FromOrtho(f32 left, f32 right, f32 bottom, f32 top, f32 near_z, f32 return m; } -Mat4x4 MulMat4x4(Mat4x4 m1, Mat4x4 m2) +Mat4x4 MulMat4x4(Mat4x4 m0, Mat4x4 m1) { - f32 a00 = m1.e[0][0], a01 = m1.e[0][1], a02 = m1.e[0][2], a03 = m1.e[0][3], - a10 = m1.e[1][0], a11 = m1.e[1][1], a12 = m1.e[1][2], a13 = m1.e[1][3], - a20 = m1.e[2][0], a21 = m1.e[2][1], a22 = m1.e[2][2], a23 = m1.e[2][3], - a30 = m1.e[3][0], a31 = m1.e[3][1], a32 = m1.e[3][2], a33 = m1.e[3][3], + f32 a00 = m0.e[0][0], a01 = m0.e[0][1], a02 = m0.e[0][2], a03 = m0.e[0][3], + a10 = m0.e[1][0], a11 = m0.e[1][1], a12 = m0.e[1][2], a13 = m0.e[1][3], + a20 = m0.e[2][0], a21 = m0.e[2][1], a22 = m0.e[2][2], a23 = m0.e[2][3], + a30 = m0.e[3][0], a31 = m0.e[3][1], a32 = m0.e[3][2], a33 = m0.e[3][3], - b00 = m2.e[0][0], b01 = m2.e[0][1], b02 = m2.e[0][2], b03 = m2.e[0][3], - b10 = m2.e[1][0], b11 = m2.e[1][1], b12 = m2.e[1][2], b13 = m2.e[1][3], - b20 = m2.e[2][0], b21 = m2.e[2][1], b22 = m2.e[2][2], b23 = m2.e[2][3], - b30 = m2.e[3][0], b31 = m2.e[3][1], b32 = m2.e[3][2], b33 = m2.e[3][3]; + b00 = m1.e[0][0], b01 = m1.e[0][1], b02 = m1.e[0][2], b03 = m1.e[0][3], + b10 = m1.e[1][0], b11 = m1.e[1][1], b12 = m1.e[1][2], b13 = m1.e[1][3], + b20 = m1.e[2][0], b21 = m1.e[2][1], b22 = m1.e[2][2], b23 = m1.e[2][3], + b30 = m1.e[3][0], b31 = m1.e[3][1], b32 = m1.e[3][2], b33 = m1.e[3][3]; Mat4x4 result; result.e[0][0] = a00 * b00 + a10 * b01 + a20 * b02 + a30 * b03; diff --git a/src/base/base_math.h b/src/base/base_math.h index c0522c20..4522129c 100644 --- a/src/base/base_math.h +++ b/src/base/base_math.h @@ -308,17 +308,17 @@ f32 UnwindAngleF32(f32 a); //////////////////////////////////////////////////////////// //~ Float lerp -f32 LerpF32(f32 val0, f32 val1, f32 t); -f64 LerpF64(f64 val0, f64 val1, f64 t); +f32 LerpF32(f32 a, f32 b, f32 t); +f64 LerpF64(f64 a, f64 b, f64 t); f32 LerpAngleF32(f32 a, f32 b, f32 t); //////////////////////////////////////////////////////////// //~ Int lerp -i32 LerpI32(i32 val0, i32 val1, f32 t); -i64 LerpI64(i64 val0, i64 val1, f64 t); -i32 LerpU32(u32 val0, u32 val1, f32 t); -i64 LerpU64(u64 val0, u64 val1, f64 t); +i32 LerpI32(i32 a, i32 b, f32 t); +i64 LerpI64(i64 a, i64 b, f64 t); +i32 LerpU32(u32 a, u32 b, f32 t); +i64 LerpU64(u64 a, u64 b, f64 t); //////////////////////////////////////////////////////////// //~ Smoothstep @@ -340,7 +340,7 @@ Vec4 SrgbFromLinear(Vec4 lin); Vec4 PremulFromLinear(Vec4 lin); Vec4 PremulFromSrgb(Vec4 srgb); -Vec4 LerpSrgb(Vec4 v0, Vec4 v1, f32 t); +Vec4 LerpSrgb(Vec4 a, Vec4 b, f32 t); //////////////////////////////////////////////////////////// //~ Vec2 @@ -391,16 +391,17 @@ Vec2 RotateVec2(Vec2 v, f32 a); Vec2 RotateVec2Vec2(Vec2 a, Vec2 b); Vec2 Vec2FromAngle(f32 a); f32 AngleFromVec2(Vec2 v); -f32 AngleFromVec2Dirs(Vec2 dir1, Vec2 dir2); -f32 AngleFromVec2Points(Vec2 pt1, Vec2 pt2); + +//- Area +f32 DimsFromVec2(Vec2 v); //- Closest point Vec2 ClosestPointFromRay(Vec2 ray_pos, Vec2 ray_dir_norm, Vec2 p); //- Lerp -Vec2 LerpVec2(Vec2 v0, Vec2 v1, f32 t); -Vec2 LerpVec2Vec2(Vec2 v0, Vec2 v1, Vec2 t); -Vec2 SlerpVec2(Vec2 v0, Vec2 v1, f32 t); +Vec2 LerpVec2(Vec2 a, Vec2 b, f32 t); +Vec2 LerpVec2Vec2(Vec2 a, Vec2 b, Vec2 t); +Vec2 SlerpVec2(Vec2 a, Vec2 b, f32 t); //////////////////////////////////////////////////////////// //~ Vec2I32 @@ -418,7 +419,7 @@ Vec4 MulVec4(Vec4 v, f32 s); Vec4 MulVec4Vec4(Vec4 a, Vec4 b); //- Lerp -Vec4 LerpVec4(Vec4 v0, Vec4 v1, f32 t); +Vec4 LerpVec4(Vec4 a, Vec4 b, f32 t); //- Conversion Vec4 Vec4FromU32(u32 v); @@ -432,18 +433,22 @@ f32 NormRng(Rng r, f32 v); #define Norm(min, max, v) NormRng(RNG((min), (max)), (v)) //- Rng2 +b32 IsRng2Empty(Rng2 r); Vec2 DimsFromRng2(Rng2 r); Vec2 CenterFromRng2(Rng2 r); Vec2 NormRng2(Rng2 r, Vec2 v); Rng2 UnionRng2(Rng2 a, Rng2 b); +Rng2 IntersectRng2(Rng2 a, Rng2 b); Rng2 AddRng2Vec2(Rng2 r, Vec2 v); Rng2 MulRng2Vec2(Rng2 a, Vec2 v); Rng2 DivRng2Vec2(Rng2 a, Vec2 v); //- Rng2I32 +b32 IsRng2I32Empty(Rng2I32 r); Vec2I32 DimsFromRng2I32(Rng2I32 r); Vec2I32 CenterFromRng2I32(Rng2I32 r); Rng2I32 UnionRng2I32(Rng2I32 a, Rng2I32 b); +Rng2I32 IntersectRng2I32(Rng2I32 a, Rng2I32 b); Rng2I32 AddRng2I32Vec2I32(Rng2I32 r, Vec2I32 v); Rng2I32 MulRng2I32Vec2I32(Rng2I32 a, Vec2I32 v); Rng2I32 DivRng2I32Vec2I32(Rng2I32 a, Vec2I32 v); @@ -454,7 +459,7 @@ Rng2I32 DivRng2I32Vec2I32(Rng2I32 a, Vec2I32 v); #define AffineIdentity ((Affine) { .bx = { .x = 1 }, .by = { .y = 1 } }) #define CompAffineIdentity { .bx = { .x = 1 }, .by = { .y = 1 } } -b32 MatchAffine(Affine af1, Affine af2); +b32 MatchAffine(Affine af0, Affine af1); //- Initialization Affine AffineFromPos(Vec2 v); @@ -516,5 +521,5 @@ SoftSpring MakeSpring(f32 hertz, f32 damping_ratio, f32 dt); Mat4x4 Mat4x4FromAffine(Affine af); Mat4x4 Mat4x4FromOrtho(f32 left, f32 right, f32 bottom, f32 top, f32 near_z, f32 far_z); -Mat4x4 MulMat4x4(Mat4x4 m1, Mat4x4 m2); +Mat4x4 MulMat4x4(Mat4x4 m0, Mat4x4 m1); Mat4x4 ProjectMat4x4View(Affine view, f32 viewport_width, f32 viewport_height); diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index d18e9480..1f2fd639 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -107,68 +107,100 @@ SPR_Slice SPR_SliceFromSheet(SPR_SheetKey sheet_key, SPR_SpanKey span_key, i64 f for (i64 slice_idx = 0; slice_idx < sheet->slices_count; ++slice_idx) { SPR_SliceEntry *slice = &sheet->slices[slice_idx]; + slice->bounds = Rng2Empty; for (SPR_RayKind ray_kind = 0; ray_kind < SPR_RayKind_COUNT; ++ray_kind) { slice->rays[ray_kind] = XformIdentity; } } + //- Compute slice bounds + for (ASE_Layer *ase_layer = sheet->meta.first_layer; ase_layer; ase_layer = ase_layer->next) + { + SPR_LayerKind kind = SPR_LayerKindFromName(ase_layer->name); + if (kind == SPR_LayerKind_Visual) + { + for (i64 slice_idx = 0; slice_idx < sheet->meta.frames_count; ++slice_idx) + { + ASE_Cel *cel = &ase_layer->cels[slice_idx]; + SPR_SliceEntry *slice = &sheet->slices[slice_idx]; + slice->bounds = UnionRng2(slice->bounds, cel->bounds); + } + } + } + //- Push async rasterization commands if (sheet->meta.frames_count > 0) { Lock cmds_lock = LockE(&SPR.submit.mutex); { + i64 submit_count = 0; for (i64 slice_idx = 0; slice_idx < sheet->meta.frames_count; ++slice_idx) { - SPR_CmdNode *cmd_node = SPR.submit.first_free; - if (cmd_node) + SPR_SliceEntry *slice = &sheet->slices[slice_idx]; + if (!IsRng2Empty(slice->bounds)) { - SllStackPop(SPR.submit.first_free); - ZeroStruct(cmd_node); + SPR_CmdNode *cmd_node = SPR.submit.first_free; + if (cmd_node) + { + SllStackPop(SPR.submit.first_free); + ZeroStruct(cmd_node); + } + else + { + cmd_node = PushStruct(perm, SPR_CmdNode); + } + cmd_node->cmd.sheet = sheet; + cmd_node->cmd.slice_idx = slice_idx; + SllQueuePush(SPR.submit.first, SPR.submit.last, cmd_node); + submit_count += 1; } - else - { - cmd_node = PushStruct(perm, SPR_CmdNode); - } - cmd_node->cmd.sheet = sheet; - cmd_node->cmd.slice_idx = slice_idx; - SllQueuePush(SPR.submit.first, SPR.submit.last, cmd_node); - ++SPR.submit.count; } - Atomic32FetchSet(&SPR.new_cmds_present, 1); - SignalAsyncTick(); + if (submit_count > 0) + { + SPR.submit.count += submit_count; + SignalAsyncTick(); + } } Unlock(&cmds_lock); } //- Compute rays - for (SPR_RayKind ray_kind = 0; ray_kind < SPR_RayKind_COUNT; ++ray_kind) { - String ray_name = SPR_NameFromRayKind(ray_kind); - b32 match = 0; - for (ASE_Layer *ase_layer = sheet->meta.last_layer; ase_layer && !match; ase_layer = ase_layer->prev) + TempArena scratch = BeginScratchNoConflict(); + for (SPR_RayKind ray_kind = 0; ray_kind < SPR_RayKind_COUNT; ++ray_kind) { - if (MatchString(ray_name, ase_layer->name)) + String ray_name = SPR_NameFromRayKind(ray_kind); + b32 match = 0; + for (ASE_Layer *ase_layer = sheet->meta.last_layer; ase_layer && !match; ase_layer = ase_layer->prev) { - match = 1; - for (i64 slice_idx = 0; slice_idx < sheet->meta.frames_count; ++slice_idx) + if (MatchString(ray_name, ase_layer->name)) { - ASE_Cel *ase_cel = &ase_layer->cels[slice_idx]; - SPR_SliceEntry *slice = &sheet->slices[slice_idx]; - ASE_SinglePixelResult ray_pix = ASE_SinglePixelFromCel(ase_cel); - u32 alpha = (ray_pix.v >> 24) & 0xFF; - if (alpha > 0) + match = 1; + for (i64 slice_idx = 0; slice_idx < sheet->meta.frames_count; ++slice_idx) { - // TODO: Different quantization so that 128 equals 0, instead of approximately 0 - f32 rot_x = (((f32)((ray_pix.v >> 0) & 0xFF) / 255.0) * 2.0) - 1; - f32 rot_y = (((f32)((ray_pix.v >> 8) & 0xFF) / 255.0) * 2.0) - 1; - Vec2 rot = NormVec2(VEC2(rot_x, rot_y)); - slice->rays[ray_kind].r = rot; - slice->rays[ray_kind].t = ray_pix.pos; + ASE_Cel *ase_cel = &ase_layer->cels[slice_idx]; + SPR_SliceEntry *slice = &sheet->slices[slice_idx]; + ASE_Image image = ASE_DecompressImageFromCel(scratch.arena, ase_cel); + if (!IsRng2Empty(image.bounds)) + { + u32 ray_pix = image.pixels[0]; + u32 alpha = (ray_pix >> 24) & 0xFF; + if (alpha > 0) + { + // TODO: Different quantization so that 128 equals 0, instead of approximately 0 + f32 rot_x = (((f32)((ray_pix >> 0) & 0xFF) / 255.0) * 2.0) - 1; + f32 rot_y = (((f32)((ray_pix >> 8) & 0xFF) / 255.0) * 2.0) - 1; + Vec2 rot = NormVec2(VEC2(rot_x, rot_y)); + slice->rays[ray_kind].r = rot; + slice->rays[ray_kind].t = ase_cel->bounds.p0; + } + } } } } } + EndScratch(scratch); } //- Init spans @@ -207,9 +239,12 @@ SPR_Slice SPR_SliceFromSheet(SPR_SheetKey sheet_key, SPR_SpanKey span_key, i64 f ////////////////////////////// //- Fetch span - SPR_SpanEntry *span = 0; + // FIXME: Ensure slices_count always > 0 + i64 span_start = 0; + i64 span_end = sheet->slices_count; b32 span_matched = 0; { + SPR_SpanEntry *span = 0; if (sheet->ok) { SPR_SpanBin *span_bin = &sheet->span_bins[span_key.v % sheet->span_bins_count]; @@ -224,9 +259,13 @@ SPR_Slice SPR_SliceFromSheet(SPR_SheetKey sheet_key, SPR_SpanKey span_key, i64 f } if (!span) { - // FIXME: Ensure first span always exists in sheet span = sheet->first_span; } + if (span) + { + span_start = span->start; + span_end = span->end; + } } ////////////////////////////// @@ -235,7 +274,7 @@ SPR_Slice SPR_SliceFromSheet(SPR_SheetKey sheet_key, SPR_SpanKey span_key, i64 f SPR_SliceEntry *slice = 0; { // FIXME: Ensure span->end is never <= span->start - i64 slice_idx = span->start + (frame_seq % (span->end - span->start)); + i64 slice_idx = span_start + (frame_seq % (span_end - span_start)); slice = &sheet->slices[slice_idx]; } @@ -258,13 +297,13 @@ SPR_Slice SPR_SliceFromSheet(SPR_SheetKey sheet_key, SPR_SpanKey span_key, i64 f { result.tex = slice->atlas->tex_ref; result.tex_rect_uv = slice->atlas_rect_uv; - result.dims = slice->dims; + result.bounds = slice->bounds; } else { result.tex = SPR.unready_tex; result.tex_rect_uv = RNG2(VEC2(0, 0), VEC2(1, 1)); - result.dims = SPR.unready_tex_dims; + result.bounds = RNG2(VEC2(0, 0), SPR.unready_tex_dims); } // Fill rays StaticAssert(countof(result.rays) == countof(slice->rays)); @@ -286,140 +325,148 @@ void SPR_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame) // TODO: Distribute rasterization accross wave if (lane->idx == 0) { - if (Atomic32Fetch(&SPR.new_cmds_present)) + i64 cmds_count = 0; + SPR_Cmd *cmds = 0; { - Atomic32Set(&SPR.new_cmds_present, 0); - i64 cmds_count = 0; - SPR_Cmd *cmds = 0; + Lock lock = LockE(&SPR.submit.mutex); + if (SPR.submit.count > 0) { - Lock lock = LockE(&SPR.submit.mutex); + cmds_count = SPR.submit.count; + cmds = PushStructsNoZero(frame_arena, SPR_Cmd, cmds_count); + i64 cmd_idx = 0; + for (SPR_CmdNode *n = SPR.submit.first; n; n = n->next) { - cmds_count = SPR.submit.count; - cmds = PushStructsNoZero(frame_arena, SPR_Cmd, SPR.submit.count); - i64 cmd_idx = 0; - for (SPR_CmdNode *n = SPR.submit.first; n; n = n->next) - { - cmds[cmd_idx] = n->cmd; - ++cmd_idx; - } - // Reset submission queue - SPR.submit.first_free = SPR.submit.first; - SPR.submit.count = 0; - SPR.submit.first = 0; - SPR.submit.last = 0; + cmds[cmd_idx] = n->cmd; + ++cmd_idx; } - Unlock(&lock); + // Reset submission queue + SPR.submit.first_free = SPR.submit.first; + SPR.submit.first = 0; + SPR.submit.last = 0; + SPR.submit.count = 0; } + Unlock(&lock); + } - if (cmds_count > 0) + if (cmds_count > 0) + { + G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_AsyncCopy); + for (i64 cmd_idx = 0; cmd_idx < cmds_count; ++cmd_idx) { - G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_AsyncCopy); - for (i64 cmd_idx = 0; cmd_idx < cmds_count; ++cmd_idx) - { - SPR_Cmd *cmd = &cmds[cmd_idx]; - SPR_SheetEntry *sheet = cmd->sheet; - ASE_Meta meta = sheet->meta; + TempArena scratch = BeginScratchNoConflict(); + SPR_Cmd *cmd = &cmds[cmd_idx]; + SPR_SheetEntry *sheet = cmd->sheet; + ASE_Meta meta = sheet->meta; - // String encoded = DataFromResource(sheet->key.r); - // String name = NameFromResource(sheet->key.r); - // LogInfoF("Rasterizing sprite sheet %F \"%F\" (%F bytes)", FmtHandle(sheet->key.r), FmtString(name), FmtUint(encoded.len)); + // String encoded = DataFromResource(sheet->key.r); + // String name = NameFromResource(sheet->key.r); + // LogInfoF("Rasterizing sprite sheet %F \"%F\" (%F bytes)", FmtHandle(sheet->key.r), FmtString(name), FmtUint(encoded.len)); - SPR_SliceEntry *slice = &sheet->slices[cmd->slice_idx]; - - ////////////////////////////// - //- Rasterize - - u32 *pixels = PushStructs(frame_arena, u32, slice->dims.x * slice->dims.y); - for (ASE_Layer *ase_layer = sheet->meta.last_layer; ase_layer; ase_layer = ase_layer->prev) - { - SPR_LayerKind kind = SPR_LayerKindFromName(ase_layer->name); - if (kind == SPR_LayerKind_Visual) - { - ASE_Cel *cel = &ase_layer->cels[cmd->slice_idx]; - ASE_RasterizeCel(cel, pixels, slice->dims); - } - } - - ////////////////////////////// - //- Write atlas - - // TODO: Use a more efficient atlas packing algorithm for less wasted space - SPR_Atlas *atlas = SPR.first_atlas; - b32 can_use_atlas = 0; - Vec2I32 pos_in_atlas = Zi; - while (can_use_atlas == 0) - { - // Create atlas - if (!atlas) - { - atlas = PushStruct(perm, SPR_Atlas); - i32 atlas_size = MaxI32(1024, NextPow2U64(MaxI32(slice->dims.x, slice->dims.y))); - atlas->dims = VEC2I32(atlas_size, atlas_size); - { - G_ArenaHandle gpu_perm = G_PermArena(); - atlas->tex = G_PushTexture2D( - gpu_perm, cl, - G_Format_R8G8B8A8_Unorm_Srgb, - atlas->dims, - G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present, - ); - atlas->tex_ref = G_PushTexture2DRef(gpu_perm, atlas->tex); - } - SllStackPush(SPR.first_atlas, atlas); - ++SPR.atlases_count; - } - // Determine pos in atlas - pos_in_atlas = atlas->cur_pos; - atlas->cur_row_height = MaxI32(atlas->cur_row_height, slice->dims.y); - if (pos_in_atlas.x + slice->dims.x > atlas->dims.x); - { - atlas->cur_pos.x = 0; - atlas->cur_pos.y += atlas->cur_row_height; - atlas->cur_row_height = slice->dims.y; - } - atlas->cur_pos.x += slice->dims.x; - if (atlas->cur_pos.x < atlas->dims.x && atlas->cur_pos.y < atlas->dims.y) - { - can_use_atlas = 1; - } - else - { - atlas = 0; - } - } - - // Fill slice_entry atlas info - { - Rng2I32 atlas_rect = RNG2I32(pos_in_atlas, AddVec2I32(pos_in_atlas, Vec2I32FromVec(slice->dims))); - slice->atlas = atlas; - slice->atlas_rect_uv.p0.x = (f32)atlas_rect.p0.x / (f32)atlas->dims.x; - slice->atlas_rect_uv.p0.y = (f32)atlas_rect.p0.y / (f32)atlas->dims.x; - slice->atlas_rect_uv.p1.x = (f32)atlas_rect.p1.x / (f32)atlas->dims.x; - slice->atlas_rect_uv.p1.y = (f32)atlas_rect.p1.y / (f32)atlas->dims.x; - } - - // Copy to atlas - G_CopyCpuToTexture( - cl, - atlas->tex, VEC3I32(pos_in_atlas.x, pos_in_atlas.y, 0), - pixels, VEC3I32(slice->dims.x, slice->dims.y, 1), - RNG3I32( - VEC3I32(0, 0, 0), - VEC3I32(slice->dims.x, slice->dims.y, 1) - ) - ); - } - i64 completion_target = G_CommitCommandList(cl); + SPR_SliceEntry *slice = &sheet->slices[cmd->slice_idx]; + Vec2 slice_dims = DimsFromRng2(slice->bounds); ////////////////////////////// - //- Update completion targets + //- Composite - for (i64 cmd_idx = 0; cmd_idx < cmds_count; ++cmd_idx) + ASE_Image composite = ASE_PushBlankImage(frame_arena, slice->bounds); + for (ASE_Layer *ase_layer = sheet->meta.last_layer; ase_layer; ase_layer = ase_layer->prev) { - SPR_Cmd *cmd = &cmds[cmd_idx]; - SPR_SliceEntry *slice = &cmd->sheet->slices[cmd->slice_idx]; - Atomic64Set(&slice->atlas_copy_completion_target, completion_target); + SPR_LayerKind kind = SPR_LayerKindFromName(ase_layer->name); + if (kind == SPR_LayerKind_Visual) + { + ASE_Cel *cel = &ase_layer->cels[cmd->slice_idx]; + { + TempArena temp = BeginTempArena(scratch.arena); + { + // TODO: Reuse decompressed images for linked cels + ASE_Image image = ASE_DecompressImageFromCel(temp.arena, cel); + ASE_BlendImage(image, composite); + } + EndTempArena(temp); + } + } } + + ////////////////////////////// + //- Write atlas + + // TODO: Use a more efficient atlas packing algorithm for less wasted space + SPR_Atlas *atlas = SPR.first_atlas; + b32 can_use_atlas = 0; + Vec2I32 pos_in_atlas = Zi; + while (can_use_atlas == 0) + { + // Create atlas + if (!atlas) + { + atlas = PushStruct(perm, SPR_Atlas); + i32 atlas_size = MaxI32(1024, NextPow2U64(MaxI32(slice_dims.x, slice_dims.y))); + atlas->dims = VEC2I32(atlas_size, atlas_size); + { + G_ArenaHandle gpu_perm = G_PermArena(); + atlas->tex = G_PushTexture2D( + gpu_perm, cl, + G_Format_R8G8B8A8_Unorm_Srgb, + atlas->dims, + G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present, + ); + atlas->tex_ref = G_PushTexture2DRef(gpu_perm, atlas->tex); + } + SllStackPush(SPR.first_atlas, atlas); + ++SPR.atlases_count; + } + // Determine pos in atlas + pos_in_atlas = atlas->cur_pos; + atlas->cur_row_height = MaxI32(atlas->cur_row_height, slice_dims.y); + if (pos_in_atlas.x + slice_dims.x > atlas->dims.x); + { + atlas->cur_pos.x = 0; + atlas->cur_pos.y += atlas->cur_row_height; + atlas->cur_row_height = slice_dims.y; + } + atlas->cur_pos.x += slice_dims.x; + if (atlas->cur_pos.x < atlas->dims.x && atlas->cur_pos.y < atlas->dims.y) + { + can_use_atlas = 1; + } + else + { + atlas = 0; + } + } + + // Fill slice_entry atlas info + { + Rng2I32 atlas_rect = RNG2I32(pos_in_atlas, AddVec2I32(pos_in_atlas, Vec2I32FromVec(slice_dims))); + slice->atlas = atlas; + slice->atlas_rect_uv.p0.x = (f32)atlas_rect.p0.x / (f32)atlas->dims.x; + slice->atlas_rect_uv.p0.y = (f32)atlas_rect.p0.y / (f32)atlas->dims.x; + slice->atlas_rect_uv.p1.x = (f32)atlas_rect.p1.x / (f32)atlas->dims.x; + slice->atlas_rect_uv.p1.y = (f32)atlas_rect.p1.y / (f32)atlas->dims.x; + } + + // Copy to atlas + G_CopyCpuToTexture( + cl, + atlas->tex, VEC3I32(pos_in_atlas.x, pos_in_atlas.y, 0), + composite.pixels, VEC3I32(slice_dims.x, slice_dims.y, 1), + RNG3I32( + VEC3I32(0, 0, 0), + VEC3I32(slice_dims.x, slice_dims.y, 1) + ) + ); + EndScratch(scratch); + } + i64 completion_target = G_CommitCommandList(cl); + + ////////////////////////////// + //- Update completion targets + + for (i64 cmd_idx = 0; cmd_idx < cmds_count; ++cmd_idx) + { + SPR_Cmd *cmd = &cmds[cmd_idx]; + SPR_SliceEntry *slice = &cmd->sheet->slices[cmd->slice_idx]; + Atomic64Set(&slice->atlas_copy_completion_target, completion_target); } } } diff --git a/src/sprite/sprite.h b/src/sprite/sprite.h index 90c9837f..98af9323 100644 --- a/src/sprite/sprite.h +++ b/src/sprite/sprite.h @@ -55,7 +55,7 @@ Struct(SPR_Slice) G_Texture2DRef tex; Rng2 tex_rect_uv; - Vec2 dims; + Rng2 bounds; b32 exists; b32 ready; @@ -73,8 +73,8 @@ Struct(SPR_SliceEntry) Xform rays[SPR_RayKind_COUNT]; SPR_Atlas *atlas; - Vec2 dims; Rng2 atlas_rect_uv; + Rng2 bounds; Atomic64 atlas_copy_completion_target; }; @@ -154,14 +154,13 @@ Struct(SPR_Ctx) i64 atlases_count; SPR_Atlas *first_atlas; - Atomic32 new_cmds_present; struct { Mutex mutex; SPR_CmdNode *first; SPR_CmdNode *last; SPR_CmdNode *first_free; - u64 count; + i64 count; } submit; SPR_AsyncCtx async;