gpu buffer -> buffer copy

This commit is contained in:
jacob 2025-09-23 15:08:22 -05:00
parent e54178aea9
commit e4975e06c4
7 changed files with 147 additions and 88 deletions

View File

@ -55,6 +55,9 @@ Struct(SharedArenaCtx)
#define PopStructNoCopy(a, type) PopBytesNoCopy((a), sizeof(type))
#define PopStructsNoCopy(a, type, n) PopBytesNoCopy((a), sizeof(type) * (n))
#define ArenaBase(arena) ((u8 *)(arena) + ArenaHeaderSize)
#define ArenaCount(arena, type) ((arena)->pos / sizeof(type))
/* Returns a pointer to where the next push would be (at alignment of type).
* Equivalent to PushStruct but without actually allocating anything or modifying the arena. */
#define PushDry(a, type) (type *)(_PushDry((a), alignof(type)))
@ -68,11 +71,6 @@ Inline void *PushBytes(Arena *arena, u64 size, u64 align)
return p;
}
Inline u8 *ArenaBase(Arena *arena)
{
return (u8 *)arena + ArenaHeaderSize;
}
Inline void PopTo(Arena *arena, u64 pos)
{
Assert(arena->pos >= pos);

View File

@ -292,9 +292,8 @@ Struct(GPU_ResourceDesc)
struct
{
GPU_HeapKind heap_kind;
u32 size;
u32 element_count;
u32 element_size;
u32 count;
u32 stride; /* Defaults to 1 */
} buffer;
struct
{

View File

@ -912,9 +912,11 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
Panic(Lit("Unknown gpu resource type"));
}
u64 buffer_size = 0;
if (desc.kind == GPU_ResourceKind_Buffer)
{
desc.buffer.size = MaxU64(AlignU64Pow2(desc.buffer.size), Kibi(64));
desc.buffer.stride = MaxU32(desc.buffer.stride, 1);
buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64));
}
u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc);
@ -982,19 +984,19 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
: desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK
: D3D12_HEAP_TYPE_DEFAULT
};
Assert(!(desc.flags & GPU_ResourceFlag_Renderable));
D3D12_RESOURCE_DESC d3d_desc = ZI;
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
d3d_desc.Alignment = 0;
d3d_desc.Width = desc.buffer.size;
d3d_desc.Width = buffer_size;
d3d_desc.Height = 1;
d3d_desc.DepthOrArraySize = 1;
d3d_desc.MipLevels = 1;
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_Writable);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_Renderable);
r->state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, r->state, 0, &IID_ID3D12Resource, (void **)&r->d3d_resource);
if (FAILED(hr))
@ -1055,15 +1057,15 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
/* Create buffer srv descriptor */
if (desc.kind == GPU_ResourceKind_Buffer
&& desc.buffer.heap_kind != GPU_HeapKind_Download
&& desc.buffer.element_size > 0)
&& desc.buffer.count > 0)
{
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = ZI;
srv_desc.Format = DXGI_FORMAT_UNKNOWN;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Buffer.FirstElement = 0;
srv_desc.Buffer.NumElements = MaxU32(desc.buffer.element_count, 1);
srv_desc.Buffer.StructureByteStride = desc.buffer.element_size;
srv_desc.Buffer.NumElements = desc.buffer.count;
srv_desc.Buffer.StructureByteStride = desc.buffer.stride;
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap);
ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, &srv_desc, r->srv_descriptor->handle);
@ -1161,17 +1163,32 @@ void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags)
u32 GPU_GetReadableId(GPU_Resource *resource)
{
return ((GPU_D12_Resource *)resource)->srv_descriptor->index;
u32 result = U32Max;
if (resource && ((GPU_D12_Resource *)resource)->srv_descriptor)
{
result = ((GPU_D12_Resource *)resource)->srv_descriptor->index;
}
return result;
}
u32 GPU_GetWritableId(GPU_Resource *resource)
{
return ((GPU_D12_Resource *)resource)->uav_descriptor->index;
u32 result = U32Max;
if (resource && ((GPU_D12_Resource *)resource)->uav_descriptor)
{
result = ((GPU_D12_Resource *)resource)->uav_descriptor->index;
}
return result;
}
u32 GPU_GetSamplerId(GPU_Resource *resource)
{
return ((GPU_D12_Resource *)resource)->sampler_descriptor->index;
u32 result = U32Max;
if (resource && ((GPU_D12_Resource *)resource)->sampler_descriptor)
{
result = ((GPU_D12_Resource *)resource)->sampler_descriptor->index;
}
return result;
}
Vec2I32 GPU_GetTextureSize2D(GPU_Resource *gpu_resource)
@ -1399,8 +1416,22 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
GPU_D12_Resource *src = cmd->copy.src;
D3D12_RESOURCE_DESC dst_desc = ZI;
D3D12_RESOURCE_DESC src_desc = ZI;
ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc);
ID3D12Resource_GetDesc(src->d3d_resource, &src_desc);
if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
{ /* Copy buffer -> buffer */
u64 dst_len = dst->desc.buffer.count * dst->desc.buffer.stride;
u64 src_len = src->desc.buffer.count * src->desc.buffer.stride;
u64 cpy_len = MinU64(dst_len, src_len);
if (cpy_len > 0)
{
ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len);
}
}
else if (src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
{ /* Copy buffer -> texture */
D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_placed_footprint = ZI;
ID3D12Device_GetCopyableFootprints(g->device, &dst_desc, 0, 1, 0, &dst_placed_footprint, 0, 0, 0);
@ -1415,6 +1446,12 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
src_loc.PlacedFootprint = dst_placed_footprint;
ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0);
}
else if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
{ /* Copy texture -> buffer */
/* TODO */
Assert(0);
}
cmd = cmd->next;
} break;
@ -1456,7 +1493,7 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
}
if (pipeline
&& cmd->rasterize.index_buffer->desc.buffer.element_count > 0)
&& cmd->rasterize.index_buffer->desc.buffer.count > 0)
{
/* Set descriptor heaps */
if (!descriptor_heaps_set)
@ -1531,17 +1568,17 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
GPU_D12_Resource *indices = cmd->rasterize.index_buffer;
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
ibv.BufferLocation = indices->buffer_gpu_address;
if (indices->desc.buffer.element_size == 2)
if (indices->desc.buffer.stride == 2)
{
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R16_UINT);
}
else
{
Assert(indices->desc.buffer.element_size == 4);
Assert(indices->desc.buffer.stride == 4);
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT);
}
ibv.SizeInBytes = indices->desc.buffer.element_size * indices->desc.buffer.element_count;
indices_count = indices->desc.buffer.element_count;
ibv.SizeInBytes = indices->desc.buffer.count * indices->desc.buffer.stride;
indices_count = indices->desc.buffer.count;
ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv);
}

View File

@ -1328,7 +1328,7 @@ P_WindowEventArray P_PopWindowEvents(Arena *arena, P_Window *p_window)
}
Arena *events_arena = window->event_arenas[event_arena_index];
P_WindowEventArray events = ZI;
events.count = events_arena->pos / sizeof(P_WindowEvent);
events.count = ArenaCount(events_arena, P_WindowEvent);
events.events = PushStructsNoZero(arena, P_WindowEvent, events.count);
CopyBytes(events.events, ArenaBase(events_arena), events_arena->pos);
ResetArena(events_arena);

View File

@ -26,7 +26,7 @@ void StartupUser(void)
g->user_blended_client = AcquireClient(g->user_client_store);
g->ss_blended = NilSnapshot();
/* Create sampler */
/* Renderer sampler */
g->pt_sampler = GPU_AcquireResource((GPU_ResourceDesc) { .kind = GPU_ResourceKind_Sampler, .sampler.filter = GPU_Filter_MinMagMipPoint });
/* Renderer data arenas */
@ -412,9 +412,8 @@ GPU_Resource *AcquireUploadBuffer_(void *src, u32 element_size, u32 element_coun
desc.kind = GPU_ResourceKind_Buffer;
desc.flags = GPU_ResourceFlag_None;
desc.buffer.heap_kind = GPU_HeapKind_Upload;
desc.buffer.size = element_size * element_count;
desc.buffer.element_count = element_count;
desc.buffer.element_size = element_size;
desc.buffer.count = element_count;
desc.buffer.stride = element_size;
GPU_Resource *r = GPU_AcquireResource(desc);
{
__profn("Copy to transfer buffer");
@ -2199,8 +2198,15 @@ void UpdateUser(P_Window *window)
g->ui_target = AcquireGbuffer(GPU_Format_R8G8B8A8_Unorm, g->ui_size);
}
/* Upload noise texture */
if (!g->noise)
/* Init renderer resources */
if (!g->gpu_noise || !g->gpu_quad)
{
GPU_Resource *noise_upload = 0;
GPU_Resource *quad_upload = 0;
GPU_Resource *noise = 0;
GPU_Resource *quad = 0;
GPU_CommandList *cl = GPU_BeginCommandList(gpu_render_queue);
/* Upload noise */
{
Vec3I32 noise_size = VEC3I32(128, 128, 64);
Resource noise_resource = ResourceFromStore(&GameResources, Lit("noise_128x128x64_16.dat"));
@ -2213,46 +2219,63 @@ void UpdateUser(P_Window *window)
desc.kind = GPU_ResourceKind_Texture3D;
desc.texture.format = GPU_Format_R16_Uint;
desc.texture.size = noise_size;
GPU_Resource *noise = GPU_AcquireResource(desc);
GPU_Resource *upload = 0;
noise = GPU_AcquireResource(desc);
{
u64 footprint_size = GPU_GetFootprintSize(noise);
GPU_ResourceDesc upload_desc = ZI;
upload_desc.kind = GPU_ResourceKind_Buffer;
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
upload_desc.buffer.size = footprint_size;
upload = GPU_AcquireResource(upload_desc);
GPU_Mapped mapped = GPU_Map(upload);
upload_desc.buffer.count = footprint_size;
noise_upload = GPU_AcquireResource(upload_desc);
GPU_Mapped mapped = GPU_Map(noise_upload);
GPU_CopyBytesToFootprint(mapped.mem, noise_res_data.text, noise);
GPU_Unmap(mapped);
}
GPU_CommandList *cl = GPU_BeginCommandList(gpu_render_queue);
{
GPU_TransitionToCopyDst(cl, noise);
GPU_CopyResource(cl, noise, upload);
GPU_CopyResource(cl, noise, noise_upload);
GPU_TransitionToReadable(cl, noise);
}
/* Upload quad indices */
{
u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Buffer;
desc.buffer.count = countof(quad_indices);
desc.buffer.stride = sizeof(quad_indices[0]);
quad = GPU_AcquireResource(desc);
{
GPU_ResourceDesc upload_desc = ZI;
upload_desc.kind = GPU_ResourceKind_Buffer;
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
upload_desc.buffer.count = desc.buffer.count * desc.buffer.stride;
quad_upload = GPU_AcquireResource(upload_desc);
GPU_Mapped mapped = GPU_Map(quad_upload);
CopyBytes(mapped.mem, quad_indices, sizeof(quad_indices));
GPU_Unmap(mapped);
}
GPU_TransitionToCopyDst(cl, quad);
GPU_CopyResource(cl, quad, quad_upload);
GPU_TransitionToReadable(cl, quad);
}
g->gpu_render_fence_target = GPU_EndCommandList(cl);
YieldOnFence(render_fence, g->gpu_render_fence_target);
GPU_ReleaseResource(upload, GPU_ReleaseFlag_None);
g->noise = noise;
GPU_ReleaseResource(noise_upload, GPU_ReleaseFlag_None);
GPU_ReleaseResource(quad_upload, GPU_ReleaseFlag_None);
g->gpu_noise = noise;
g->gpu_quad = quad;
}
Vec3I32 noise_size = GPU_GetTextureSize3D(g->noise);
/* Acquire transfer buffers */
/* TODO: Make quad indices static */
u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
GPU_Resource *quad_index_buffer = AcquireUploadBuffer(quad_indices, u16, countof(quad_indices));
GPU_Resource *material_instance_buffer = AcquireUploadBufferFromArena(g->material_instances_arena, MaterialInstance);
GPU_Resource *ui_rect_instance_buffer = AcquireUploadBufferFromArena(g->ui_rect_instances_arena, UiRectInstance);
GPU_Resource *ui_shape_verts_buffer = AcquireUploadBufferFromArena(g->ui_shape_verts_arena, UiShapeVert);
GPU_Resource *ui_shape_indices_buffer = AcquireUploadBufferFromArena(g->ui_shape_indices_arena, u32);
GPU_Resource *grids_buffer = AcquireUploadBufferFromArena(g->grids_arena, MaterialGrid);
u64 material_instances_count = g->material_instances_arena->pos / sizeof(MaterialInstance);
u64 ui_rect_instances_count = g->ui_rect_instances_arena->pos / sizeof(UiRectInstance);
u64 ui_shape_verts_count = g->ui_shape_verts_arena->pos / sizeof(UiShapeVert);
u64 ui_shape_indices_count = g->ui_shape_indices_arena->pos / sizeof(u32);
u64 grids_count = g->grids_arena->pos / sizeof(MaterialGrid);
u64 material_instances_count = ArenaCount(g->material_instances_arena, MaterialInstance);
u64 ui_rect_instances_count = ArenaCount(g->ui_rect_instances_arena, UiRectInstance);
u64 ui_shape_verts_count = ArenaCount(g->ui_shape_verts_arena, UiShapeVert);
u64 ui_shape_indices_count = ArenaCount(g->ui_shape_indices_arena, u32);
u64 grids_count = ArenaCount(g->grids_arena, MaterialGrid);
GPU_CommandList *cl = GPU_BeginCommandList(gpu_render_queue);
{
@ -2279,6 +2302,7 @@ void UpdateUser(P_Window *window)
}
//- Material pass
if (material_instances_count > 0)
{
__profn("Material pass");
GPU_ProfN(cl, Lit("Material pass"));
@ -2298,7 +2322,7 @@ void UpdateUser(P_Window *window)
viewport,
scissor,
material_instances_count,
quad_index_buffer,
g->gpu_quad,
GPU_RasterizeMode_TriangleList);
}
@ -2369,6 +2393,7 @@ void UpdateUser(P_Window *window)
{
__profn("Shade pass");
GPU_ProfN(cl, Lit("Shade pass"));
Vec3I32 noise_size = GPU_GetTextureSize3D(g->gpu_noise);
u32 shade_flags = ShadeFlag_None;
if (effects_disabled)
@ -2390,7 +2415,7 @@ void UpdateUser(P_Window *window)
sig.emittance_flood_tex_urid = GPU_GetWritableId(g->emittance_flood_read);
sig.read_tex_urid = GPU_GetWritableId(g->shade_read);
sig.target_tex_urid = GPU_GetWritableId(g->shade_target);
sig.noise_tex_urid = GPU_GetReadableId(g->noise);
sig.noise_tex_urid = GPU_GetReadableId(g->gpu_noise);
sig.noise_tex_width = noise_size.x;
sig.noise_tex_height = noise_size.y;
sig.noise_tex_depth = noise_size.z;
@ -2433,7 +2458,7 @@ void UpdateUser(P_Window *window)
viewport,
scissor,
1,
quad_index_buffer,
g->gpu_quad,
GPU_RasterizeMode_TriangleList);
}
@ -2456,7 +2481,7 @@ void UpdateUser(P_Window *window)
viewport,
scissor,
ui_rect_instances_count,
quad_index_buffer,
g->gpu_quad,
GPU_RasterizeMode_TriangleList);
}
@ -2488,7 +2513,6 @@ void UpdateUser(P_Window *window)
{
{
GPU_Resource *release_resources[] = {
quad_index_buffer,
material_instance_buffer,
ui_rect_instance_buffer,
ui_shape_verts_buffer,

View File

@ -174,7 +174,8 @@ Struct(SharedUserState)
//- Gpu resources
GPU_Resource *pt_sampler;
GPU_Resource *noise;
GPU_Resource *gpu_noise;
GPU_Resource *gpu_quad;
GPU_Resource *albedo;
GPU_Resource *emittance;

View File

@ -35,7 +35,7 @@ JobDef(S_LoadTexture, sig, _)
GPU_ResourceDesc upload_desc = ZI;
upload_desc.kind = GPU_ResourceKind_Buffer;
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
upload_desc.buffer.size = GPU_GetFootprintSize(texture->gpu_texture);
upload_desc.buffer.count = GPU_GetFootprintSize(texture->gpu_texture);
GPU_Resource *upload = GPU_AcquireResource(upload_desc);
{
GPU_Mapped mapped = GPU_Map(upload);