GC_Ctx GC = Zi; //////////////////////////////////////////////////////////// //~ Bootstrap void GC_Bootstrap(void) { OnAsyncTick(GC_TickAsync); } //////////////////////////////////////////////////////////// //~ Key helpers GC_FontKey GC_FontKeyFromResource(ResourceKey resource) { GC_FontKey result = Zi; result.r = resource; return result; } u64 GC_HashFromGlyphDesc(GC_GlyphDesc desc) { // TODO: Lower font-size precision to prevent unique hashes for slightly-different font sizes return MixU64s(desc.font.r.v, ((u64)desc.codepoint << 32) | *(u32 *)&desc.font_size); } //////////////////////////////////////////////////////////// //~ Run // TODO: Thread-local cache GC_Run GC_RunFromString(Arena *arena, String str, GC_FontKey font, f32 font_size) { GC_Run result = Zi; if (str.len > 0) { TempArena scratch = BeginScratch(arena); Arena *perm = PermArena(); u64 codepoints_count = 0; u32 *codepoints = 0; { String32 str32 = String32FromString(scratch.arena, str); codepoints_count = str32.len; codepoints = str32.text; } ////////////////////////////// //- Grab glyphs from cache u64 ready_glyphs_count = 0; GC_Glyph **ready_glyphs = PushStructsNoZero(scratch.arena, GC_Glyph *, codepoints_count); u32 *uncached_codepoints = PushStructsNoZero(scratch.arena, u32, codepoints_count); u64 uncached_codepoints_count = 0; // TODO: Include advances for glyphs in run that have rasterized but not finished uploading to atlas u64 pending_glyphs_count = 0; { if (codepoints_count > 0) { Lock lock = LockS(&GC.glyphs_mutex); { i64 completion = G_CompletionValueFromQueue(G_QueueKind_AsyncCopy); for (u64 codepoint_idx = 0; codepoint_idx < codepoints_count; ++codepoint_idx) { u32 codepoint = codepoints[codepoint_idx]; GC_GlyphDesc desc = Zi; desc.font = font; desc.font_size = font_size; desc.codepoint = codepoint; u64 hash = GC_HashFromGlyphDesc(desc); GC_GlyphBin *bin = &GC.glyph_bins[hash % countof(GC.glyph_bins)]; GC_Glyph *glyph = bin->first; for (; glyph; glyph = glyph->next) { if (glyph->hash == hash) break; } if (glyph == 0) { uncached_codepoints[uncached_codepoints_count] = codepoint; uncached_codepoints_count += 1; } else if (completion < Atomic64Fetch(&glyph->async_copy_completion_target)) { pending_glyphs_count += 1; } else { ready_glyphs[ready_glyphs_count] = glyph; ready_glyphs_count += 1; } } } Unlock(&lock); } } ////////////////////////////// //- Create cache entries u64 submit_cmds_count = 0; GC_Cmd *submit_cmds = PushStructsNoZero(scratch.arena, GC_Cmd, uncached_codepoints_count); if (uncached_codepoints_count > 0) { Lock lock = LockE(&GC.glyphs_mutex); { for (u64 uncached_codepoint_idx = 0; uncached_codepoint_idx < uncached_codepoints_count; ++uncached_codepoint_idx) { GC_GlyphDesc desc = Zi; desc.font = font; desc.font_size = font_size; desc.codepoint = uncached_codepoints[uncached_codepoint_idx]; u64 hash = GC_HashFromGlyphDesc(desc); GC_GlyphBin *bin = &GC.glyph_bins[hash % countof(GC.glyph_bins)]; GC_Glyph *glyph = bin->first; for (; glyph; glyph = glyph->next) { if (glyph->hash == hash) break; } if (glyph == 0) { glyph = PushStruct(perm, GC_Glyph); glyph->desc = desc; glyph->hash = hash; Atomic64FetchSet(&glyph->async_copy_completion_target, I64Max); SllStackPush(bin->first, glyph); // Create cmd { GC_Cmd *cmd = &submit_cmds[submit_cmds_count]; cmd->glyph = glyph; ++submit_cmds_count; } } } } Unlock(&lock); } ////////////////////////////// //- Submit cmds if (submit_cmds_count > 0) { Lock lock = LockE(&GC.submit.mutex); for (u64 cmd_idx = 0; cmd_idx < submit_cmds_count; ++cmd_idx) { GC_Cmd *src = &submit_cmds[cmd_idx]; GC_CmdNode *n = GC.submit.first_free; if (n) { SllStackPop(GC.submit.first_free); ZeroStruct(n); } else { n = PushStruct(perm, GC_CmdNode); } n->cmd = *src; GC.submit.count += 1; SllQueuePush(GC.submit.first, GC.submit.last, n); } Unlock(&lock); SignalAsyncTick(); } ////////////////////////////// //- Create run from glyphs f32 baseline_pos = 0; result.rects = PushStructs(arena, GC_RunRect, ready_glyphs_count); result.rects_count = ready_glyphs_count; for (u64 glyph_idx = 0; glyph_idx < ready_glyphs_count; ++glyph_idx) { GC_Glyph *glyph = ready_glyphs[glyph_idx]; GC_RunRect *rect = &result.rects[glyph_idx]; f32 advance = advance = glyph->advance; if (TweakB32("Ceil glyph advances", 0)) { advance = CeilF32(advance); } if (TweakB32("Floor glyph advances", 1)) { advance = FloorF32(advance); } if (TweakB32("Round glyph advances", 0)) { advance = RoundF32(advance); } Rng2 bounds = glyph->bounds; rect->tex = glyph->atlas->tex_ref; rect->tex_slice = glyph->tex_slice; rect->tex_slice_uv = glyph->tex_slice_uv; rect->baseline_pos = baseline_pos; rect->advance = advance; rect->bounds = bounds; if (glyph_idx == 0) { result.bounds = rect->bounds; } else { result.bounds = UnionRng2(result.bounds, rect->bounds); } baseline_pos += rect->advance; result.baseline_length = MaxF32(result.baseline_length, baseline_pos); } if (ready_glyphs_count > 0) { GC_Glyph *glyph = ready_glyphs[0]; result.font_size = glyph->font_size; result.font_ascent = glyph->font_ascent; result.font_descent = glyph->font_descent; result.font_cap = glyph->font_cap; } EndScratch(scratch); } // result.ready = uncached_codepoints_count == 0 && pending_glyphs_count == 0; result.ready = 1; return result; } //////////////////////////////////////////////////////////// //~ Async void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame) { GC_AsyncCtx *async = &GC.async_ctx; ////////////////////////////// //- Begin tick // TODO: Limit cmds processed per-tick if (lane->idx == 0) { ZeroStruct(&async->cmds); Lock lock = LockE(&GC.submit.mutex); { // Pop cmds from submission queue async->cmds.count = GC.submit.count; async->cmds.v = PushStructsNoZero(frame->arena, GC_Cmd, GC.submit.count); u64 cmd_idx = 0; for (GC_CmdNode *n = GC.submit.first; n; n = n->next) { async->cmds.v[cmd_idx] = n->cmd; ++cmd_idx; } // Reset submission queue GC.submit.first_free = GC.submit.first; GC.submit.count = 0; GC.submit.first = 0; GC.submit.last = 0; } Unlock(&lock); } WaveSync(lane); if (async->cmds.count > 0) { ////////////////////////////// //- Rasterize glyphs // TODO: Process cmds unevenly to account for varying work size { RngU64 cmd_idxs = WaveIdxRangeFromCount(lane, async->cmds.count); for (u64 cmd_idx = cmd_idxs.min; cmd_idx < cmd_idxs.max; ++cmd_idx) { GC_Cmd *cmd = &async->cmds.v[cmd_idx]; GC_Glyph *glyph = cmd->glyph; ResourceKey resource = glyph->desc.font.r; GC_GlyphDesc desc = glyph->desc; TTF_GlyphResult ttf_result = TTF_RasterizeGlyphFromCodepoint(frame->arena, desc.codepoint, resource, desc.font_size);; glyph->font_size = desc.font_size; glyph->font_ascent = ttf_result.font_ascent; glyph->font_descent = ttf_result.font_descent; glyph->font_cap = ttf_result.font_cap; glyph->advance = ttf_result.advance; glyph->bounds = ttf_result.bounds; cmd->rasterized = ttf_result; } } // TODO: Only sync first lane? WaveSync(lane); //////////////////////////// //- Allocate atlas slices if (lane->idx == 0) { G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_AsyncCopy); for (u64 cmd_idx = 0; cmd_idx < async->cmds.count; ++cmd_idx) { GC_Cmd *cmd = &async->cmds.v[cmd_idx]; GC_Glyph *glyph = cmd->glyph; GC_GlyphDesc desc = glyph->desc; TTF_GlyphResult ttf_result = cmd->rasterized; Vec2I32 image_dims = ttf_result.image_dims; // TODO: Use a more efficient atlas packing algorithm for less wasted space GC_Atlas *atlas = GC.first_atlas; b32 can_use_atlas = 0; Vec2I32 pos_in_atlas = Zi; while (can_use_atlas == 0) { // Create atlas if (!atlas) { Arena *perm = PermArena(); atlas = PushStruct(perm, GC_Atlas); atlas->dims = VEC2I32(1024, 1024); { G_ArenaHandle gpu_perm = G_PermArena(); atlas->tex = G_PushTexture2D( gpu_perm, cl, G_Format_R8G8B8A8_Unorm_Srgb, atlas->dims, G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present, ); atlas->tex_ref = G_PushTexture2DRef(gpu_perm, atlas->tex); } SllStackPush(GC.first_atlas, atlas); ++GC.atlases_count; } // Determine pos in atlas pos_in_atlas = atlas->cur_pos; atlas->cur_row_height = MaxI32(atlas->cur_row_height, image_dims.y); if (pos_in_atlas.x + image_dims.x > atlas->dims.x); { atlas->cur_pos.x = 0; atlas->cur_pos.y += atlas->cur_row_height; atlas->cur_row_height = image_dims.y; } atlas->cur_pos.x += image_dims.x; if (atlas->cur_pos.x < atlas->dims.x && atlas->cur_pos.y < atlas->dims.y) { can_use_atlas = 1; } else { atlas = 0; } } // Atlas info glyph->atlas = atlas; glyph->tex_slice = RNG2I32(pos_in_atlas, AddVec2I32(pos_in_atlas, image_dims)); glyph->tex_slice_uv.p0.x = (f32)glyph->tex_slice.p0.x / (f32)atlas->dims.x; glyph->tex_slice_uv.p0.y = (f32)glyph->tex_slice.p0.y / (f32)atlas->dims.x; glyph->tex_slice_uv.p1.x = (f32)glyph->tex_slice.p1.x / (f32)atlas->dims.x; glyph->tex_slice_uv.p1.y = (f32)glyph->tex_slice.p1.y / (f32)atlas->dims.x; // Copy to atlas u32 *image_pixels = ttf_result.image_pixels; if (image_dims.x > 0 && image_dims.y > 0) { G_CopyCpuToTexture( cl, glyph->atlas->tex, VEC3I32(glyph->tex_slice.p0.x, glyph->tex_slice.p0.y, 0), image_pixels, VEC3I32(image_dims.x, image_dims.y, 1), RNG3I32( VEC3I32(0, 0, 0), VEC3I32(image_dims.x, image_dims.y, 1) ) ); } } i64 completion_target = G_CommitCommandList(cl); // Update completion targets for (u64 cmd_idx = 0; cmd_idx < async->cmds.count; ++cmd_idx) { GC_Cmd *cmd = &async->cmds.v[cmd_idx]; GC_Glyph *glyph = cmd->glyph; Atomic64Set(&glyph->async_copy_completion_target, completion_target); } } } }