GC_Ctx GC = Zi;

////////////////////////////////////////////////////////////
//~ Bootstrap

void GC_Bootstrap(void)
{
  OnAsyncTick(GC_TickAsync);
}

////////////////////////////////////////////////////////////
//~ Key helpers

GC_FontKey GC_FontKeyFromResource(ResourceKey resource)
{
  GC_FontKey result = Zi;
  result.r = resource;
  return result;
}

u64 GC_HashFromGlyphDesc(GC_GlyphDesc desc)
{
  // TODO: Lower font-size precision to prevent unique hashes for slightly-different font sizes
  return MixU64s(desc.font.r.v, ((u64)desc.codepoint << 32) | *(u32 *)&desc.font_size);
}

////////////////////////////////////////////////////////////
//~ Run

// TODO: Thread-local cache
GC_Run GC_RunFromString(Arena *arena, String str, GC_FontKey font, f32 font_size)
{
  GC_Run result = Zi;
  if (str.len > 0)
  {
    TempArena scratch = BeginScratch(arena);
    Arena *perm = PermArena();

    u64 codepoints_count = 0;
    u32 *codepoints = 0;
    {
      String32 str32 = String32FromString(scratch.arena, str);
      codepoints_count = str32.len;
      codepoints = str32.text;
    }

    //////////////////////////////
    //- Grab glyphs from cache

    u64 ready_glyphs_count = 0;
    GC_Glyph **ready_glyphs = PushStructsNoZero(scratch.arena, GC_Glyph *, codepoints_count);

    u32 *uncached_codepoints = PushStructsNoZero(scratch.arena, u32, codepoints_count);
    u64 uncached_codepoints_count = 0;

    // TODO: Include advances for glyphs in run that have rasterized but not finished uploading to atlas
    u64 pending_glyphs_count = 0;
    {
      if (codepoints_count > 0)
      {
        Lock lock = LockS(&GC.glyphs_mutex);
        {
          i64 completion = G_CompletionValueFromQueue(G_QueueKind_AsyncCopy);
          for (u64 codepoint_idx = 0; codepoint_idx < codepoints_count; ++codepoint_idx)
          {
            u32 codepoint = codepoints[codepoint_idx];

            GC_GlyphDesc desc = Zi;
            desc.font = font;
            desc.font_size = font_size;
            desc.codepoint = codepoint;

            u64 hash = GC_HashFromGlyphDesc(desc);
            GC_GlyphBin *bin = &GC.glyph_bins[hash % countof(GC.glyph_bins)];
            GC_Glyph *glyph = bin->first;
            for (; glyph; glyph = glyph->next)
            {
              if (glyph->hash == hash) break;
            }

            if (glyph == 0)
            {
              uncached_codepoints[uncached_codepoints_count] = codepoint;
              uncached_codepoints_count += 1;
            }
            else if (completion < Atomic64Fetch(&glyph->async_copy_completion_target))
            {
              pending_glyphs_count += 1;
            }
            else
            {
              ready_glyphs[ready_glyphs_count] = glyph;
              ready_glyphs_count += 1;
            }
          }
        }
        Unlock(&lock);
      }
    }

    //////////////////////////////
    //- Create cache entries

    u64 submit_cmds_count = 0;
    GC_Cmd *submit_cmds = PushStructsNoZero(scratch.arena, GC_Cmd, uncached_codepoints_count);
    if (uncached_codepoints_count > 0)
    {
      Lock lock = LockE(&GC.glyphs_mutex);
      {
        for (u64 uncached_codepoint_idx = 0; uncached_codepoint_idx < uncached_codepoints_count; ++uncached_codepoint_idx)
        {
          GC_GlyphDesc desc = Zi;
          desc.font = font;
          desc.font_size = font_size;
          desc.codepoint = uncached_codepoints[uncached_codepoint_idx];

          u64 hash = GC_HashFromGlyphDesc(desc);
          GC_GlyphBin *bin = &GC.glyph_bins[hash % countof(GC.glyph_bins)];
          GC_Glyph *glyph = bin->first;
          for (; glyph; glyph = glyph->next)
          {
            if (glyph->hash == hash) break;
          }

          if (glyph == 0)
          {
            glyph = PushStruct(perm, GC_Glyph);
            glyph->desc = desc;
            glyph->hash = hash;
            Atomic64FetchSet(&glyph->async_copy_completion_target, I64Max);
            SllStackPush(bin->first, glyph);
            // Create cmd
            {
              GC_Cmd *cmd = &submit_cmds[submit_cmds_count];
              cmd->glyph = glyph;
              ++submit_cmds_count;
            }
          }
        }
      }
      Unlock(&lock);
    }

    //////////////////////////////
    //- Submit cmds

    if (submit_cmds_count > 0)
    {
      Lock lock = LockE(&GC.submit.mutex);
      for (u64 cmd_idx = 0; cmd_idx < submit_cmds_count; ++cmd_idx)
      {
        GC_Cmd *src = &submit_cmds[cmd_idx];
        GC_CmdNode *n = GC.submit.first_free;
        if (n)
        {
          SllStackPop(GC.submit.first_free);
          ZeroStruct(n);
        }
        else
        {
          n = PushStruct(perm, GC_CmdNode);
        }
        n->cmd = *src;
        GC.submit.count += 1;
        SllQueuePush(GC.submit.first, GC.submit.last, n);
      }
      Unlock(&lock);
      SignalAsyncTick();
    }

    //////////////////////////////
    //- Create run from glyphs

    f32 baseline_pos = 0;
    result.rects = PushStructs(arena, GC_RunRect, ready_glyphs_count);
    result.rects_count = ready_glyphs_count;
    for (u64 glyph_idx = 0; glyph_idx < ready_glyphs_count; ++glyph_idx)
    {
      GC_Glyph *glyph = ready_glyphs[glyph_idx];
      GC_RunRect *rect = &result.rects[glyph_idx];

      f32 advance = advance = glyph->advance;
      if (TweakB32("Ceil glyph advances", 0))
      {
        advance = CeilF32(advance);
      }
      if (TweakB32("Floor glyph advances", 1))
      {
        advance = FloorF32(advance);
      }
      if (TweakB32("Round glyph advances", 0))
      {
        advance = RoundF32(advance);
      }

      Rng2 bounds = glyph->bounds;

      rect->tex = glyph->atlas->tex_ref;
      rect->tex_slice = glyph->tex_slice;
      rect->tex_slice_uv = glyph->tex_slice_uv;

      rect->baseline_pos = baseline_pos;
      rect->advance = advance;

      rect->bounds = bounds;

      if (glyph_idx == 0)
      {
        result.bounds = rect->bounds;
      }
      else
      {
        result.bounds = UnionRng2(result.bounds, rect->bounds);
      }

      baseline_pos += rect->advance;
      result.baseline_length = MaxF32(result.baseline_length, baseline_pos);
    }

    if (ready_glyphs_count > 0)
    {
      GC_Glyph *glyph = ready_glyphs[0];
      result.font_size = glyph->font_size;
      result.font_ascent = glyph->font_ascent;
      result.font_descent = glyph->font_descent;
      result.font_cap = glyph->font_cap;
    }

    EndScratch(scratch);
  }

  // result.ready = uncached_codepoints_count == 0 && pending_glyphs_count == 0;
  result.ready = 1;

  return result;
}

////////////////////////////////////////////////////////////
//~ Async

void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame)
{
  GC_AsyncCtx *async = &GC.async_ctx;

  //////////////////////////////
  //- Begin tick

  // TODO: Limit cmds processed per-tick

  if (lane->idx == 0)
  {
    ZeroStruct(&async->cmds);

    Lock lock = LockE(&GC.submit.mutex);
    {
      // Pop cmds from submission queue
      async->cmds.count = GC.submit.count;
      async->cmds.v = PushStructsNoZero(frame->arena, GC_Cmd, GC.submit.count);
      u64 cmd_idx = 0;
      for (GC_CmdNode *n = GC.submit.first; n; n = n->next)
      {
        async->cmds.v[cmd_idx] = n->cmd;
        ++cmd_idx;
      }
      // Reset submission queue
      GC.submit.first_free = GC.submit.first;
      GC.submit.count = 0;
      GC.submit.first = 0;
      GC.submit.last = 0;
    }
    Unlock(&lock);
  }

  WaveSync(lane);

  if (async->cmds.count > 0)
  {
    //////////////////////////////
    //- Rasterize glyphs

    // TODO: Process cmds unevenly to account for varying work size

    {
      RngU64 cmd_idxs = WaveIdxRangeFromCount(lane, async->cmds.count);
      for (u64 cmd_idx = cmd_idxs.min; cmd_idx < cmd_idxs.max; ++cmd_idx)
      {
        GC_Cmd *cmd = &async->cmds.v[cmd_idx];
        GC_Glyph *glyph = cmd->glyph;
        ResourceKey resource = glyph->desc.font.r;
        GC_GlyphDesc desc = glyph->desc;
        TTF_GlyphResult ttf_result = TTF_RasterizeGlyphFromCodepoint(frame->arena, desc.codepoint, resource, desc.font_size);;
        glyph->font_size = desc.font_size;
        glyph->font_ascent = ttf_result.font_ascent;
        glyph->font_descent = ttf_result.font_descent;
        glyph->font_cap = ttf_result.font_cap;
        glyph->advance = ttf_result.advance;
        glyph->bounds = ttf_result.bounds;
        cmd->rasterized = ttf_result;
      }
    }

    // TODO: Only sync first lane?

    WaveSync(lane);

    ////////////////////////////
    //- Allocate atlas slices

    if (lane->idx == 0)
    {
      G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_AsyncCopy);
      for (u64 cmd_idx = 0; cmd_idx < async->cmds.count; ++cmd_idx)
      {
        GC_Cmd *cmd = &async->cmds.v[cmd_idx];
        GC_Glyph *glyph = cmd->glyph;
        GC_GlyphDesc desc = glyph->desc;
        TTF_GlyphResult ttf_result = cmd->rasterized;

        Vec2I32 image_dims = ttf_result.image_dims;

        // TODO: Use a more efficient atlas packing algorithm for less wasted space
        GC_Atlas *atlas = GC.first_atlas;
        b32 can_use_atlas = 0;
        Vec2I32 pos_in_atlas = Zi;
        while (can_use_atlas == 0)
        {
          // Create atlas
          if (!atlas)
          {
            Arena *perm = PermArena();
            atlas = PushStruct(perm, GC_Atlas);
            atlas->dims = VEC2I32(1024, 1024);
            {
              G_ArenaHandle gpu_perm = G_PermArena();
              atlas->tex = G_PushTexture2D(
                gpu_perm, cl,
                G_Format_R8G8B8A8_Unorm_Srgb,
                atlas->dims,
                G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present,
              );
              atlas->tex_ref = G_PushTexture2DRef(gpu_perm, atlas->tex);
            }
            SllStackPush(GC.first_atlas, atlas);
            ++GC.atlases_count;
          }

          // Determine pos in atlas
          pos_in_atlas = atlas->cur_pos;
          atlas->cur_row_height = MaxI32(atlas->cur_row_height, image_dims.y);
          if (pos_in_atlas.x + image_dims.x > atlas->dims.x);
          {
            atlas->cur_pos.x = 0;
            atlas->cur_pos.y += atlas->cur_row_height;
            atlas->cur_row_height = image_dims.y;
          }
          atlas->cur_pos.x += image_dims.x;
          if (atlas->cur_pos.x < atlas->dims.x && atlas->cur_pos.y < atlas->dims.y)
          {
            can_use_atlas = 1;
          }
          else
          {
            atlas = 0;
          }
        }

        // Atlas info
        glyph->atlas = atlas;
        glyph->tex_slice = RNG2I32(pos_in_atlas, AddVec2I32(pos_in_atlas, image_dims));
        glyph->tex_slice_uv.p0.x = (f32)glyph->tex_slice.p0.x / (f32)atlas->dims.x;
        glyph->tex_slice_uv.p0.y = (f32)glyph->tex_slice.p0.y / (f32)atlas->dims.x;
        glyph->tex_slice_uv.p1.x = (f32)glyph->tex_slice.p1.x / (f32)atlas->dims.x;
        glyph->tex_slice_uv.p1.y = (f32)glyph->tex_slice.p1.y / (f32)atlas->dims.x;

        // Copy to atlas
        u32 *image_pixels = ttf_result.image_pixels;
        if (image_dims.x > 0 && image_dims.y > 0)
        {
          G_CopyCpuToTexture(
            cl,
            glyph->atlas->tex, VEC3I32(glyph->tex_slice.p0.x, glyph->tex_slice.p0.y, 0),
            image_pixels, VEC3I32(image_dims.x, image_dims.y, 1),
            RNG3I32(
              VEC3I32(0, 0, 0),
              VEC3I32(image_dims.x, image_dims.y, 1)
            )
          );
        }
      }
      i64 completion_target = G_CommitCommandList(cl);

      // Update completion targets
      for (u64 cmd_idx = 0; cmd_idx < async->cmds.count; ++cmd_idx)
      {
        GC_Cmd *cmd = &async->cmds.v[cmd_idx];
        GC_Glyph *glyph = cmd->glyph;
        Atomic64Set(&glyph->async_copy_completion_target, completion_target);
      }
    }
  }
}