gpu refactor progress
This commit is contained in:
parent
34294754c7
commit
f231c8322b
@ -588,14 +588,14 @@ ForceInline i64 Atomic64FetchAdd (Atomic64 *x, i64 a) { return
|
|||||||
#if LanguageIsC
|
#if LanguageIsC
|
||||||
Struct(TicketMutex)
|
Struct(TicketMutex)
|
||||||
{
|
{
|
||||||
Atomic16Padded ticket;
|
Atomic64Padded ticket;
|
||||||
Atomic16Padded serving;
|
Atomic64Padded serving;
|
||||||
};
|
};
|
||||||
|
|
||||||
ForceInline void LockTicketMutex(TicketMutex *tm)
|
ForceInline void LockTicketMutex(TicketMutex *tm)
|
||||||
{
|
{
|
||||||
u16 ticket = Atomic16FetchAdd(&tm->ticket.v, 1);
|
i64 ticket = Atomic64FetchAdd(&tm->ticket.v, 1);
|
||||||
while (Atomic16Fetch(&tm->serving.v) != ticket)
|
while (Atomic64Fetch(&tm->serving.v) != ticket)
|
||||||
{
|
{
|
||||||
_mm_pause();
|
_mm_pause();
|
||||||
}
|
}
|
||||||
@ -603,7 +603,7 @@ ForceInline void LockTicketMutex(TicketMutex *tm)
|
|||||||
|
|
||||||
ForceInline void UnlockTicketMutex(TicketMutex *tm)
|
ForceInline void UnlockTicketMutex(TicketMutex *tm)
|
||||||
{
|
{
|
||||||
Atomic16FetchAdd(&tm->serving.v, 1);
|
Atomic64FetchAdd(&tm->serving.v, 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -174,9 +174,8 @@ Enum(GPU_ResourceKind)
|
|||||||
Enum(GPU_ResourceFlag)
|
Enum(GPU_ResourceFlag)
|
||||||
{
|
{
|
||||||
GPU_ResourceFlag_None = 0,
|
GPU_ResourceFlag_None = 0,
|
||||||
GPU_ResourceFlag_AllowSrv = (1 << 0),
|
GPU_ResourceFlag_AllowUav = (1 << 0),
|
||||||
GPU_ResourceFlag_AllowUav = (1 << 1),
|
GPU_ResourceFlag_AllowRtv = (1 << 1),
|
||||||
GPU_ResourceFlag_AllowRtv = (1 << 2),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Enum(GPU_HeapKind)
|
Enum(GPU_HeapKind)
|
||||||
@ -215,7 +214,7 @@ Struct(GPU_ResourceDesc)
|
|||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
GPU_HeapKind heap_kind;
|
GPU_HeapKind heap_kind;
|
||||||
u32 element_capacity;
|
u32 size;
|
||||||
u32 element_count;
|
u32 element_count;
|
||||||
u32 element_size;
|
u32 element_size;
|
||||||
} buffer;
|
} buffer;
|
||||||
|
|||||||
@ -380,10 +380,11 @@ JobDef(GPU_D12_StartQueueSync, _, __)
|
|||||||
{
|
{
|
||||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||||
HANDLE queue_fences_events[GPU_NumQueues] = ZI;
|
HANDLE queue_fences_events[GPU_NumQueues] = ZI;
|
||||||
u64 queue_fences_seen[GPU_NumQueues] = ZI;
|
i64 queue_fences_seen[GPU_NumQueues] = ZI;
|
||||||
for (i32 i = 0; i < countof(queue_fences_events); ++i)
|
for (i32 i = 0; i < countof(queue_fences_events); ++i)
|
||||||
{
|
{
|
||||||
queue_fences_events[i] = CreateEvent(0, 0, 1, 0);
|
queue_fences_events[i] = CreateEvent(0, 0, 1, 0);
|
||||||
|
queue_fences_seen[i] = -1;
|
||||||
}
|
}
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
@ -391,8 +392,8 @@ JobDef(GPU_D12_StartQueueSync, _, __)
|
|||||||
for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind)
|
for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind)
|
||||||
{
|
{
|
||||||
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
||||||
u64 last_seen = queue_fences_seen[queue_kind];
|
i64 last_seen = queue_fences_seen[queue_kind];
|
||||||
u64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence);
|
i64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence);
|
||||||
if (completed > last_seen)
|
if (completed > last_seen)
|
||||||
{
|
{
|
||||||
SetFence(&queue->sync_fence, completed);
|
SetFence(&queue->sync_fence, completed);
|
||||||
@ -442,10 +443,15 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
|
|||||||
{
|
{
|
||||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||||
GPU_D12_Resource *r = 0;
|
GPU_D12_Resource *r = 0;
|
||||||
u64 hash = GPU_D12_ReuseHashFromResourceDesc(desc);
|
|
||||||
|
|
||||||
/* Grab reusable */
|
if (desc.kind == GPU_ResourceKind_Buffer)
|
||||||
|
{
|
||||||
|
desc.buffer.size = MaxU64(AlignU64Pow2(desc.buffer.size), Kibi(64));
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc);
|
||||||
#if 0
|
#if 0
|
||||||
|
/* Grab reusable */
|
||||||
{
|
{
|
||||||
u64 bin_index = hash % countof(g->reuse_bins);
|
u64 bin_index = hash % countof(g->reuse_bins);
|
||||||
GPU_D12_ReuseBin *bin = &g->reuse_bins[bin_index];
|
GPU_D12_ReuseBin *bin = &g->reuse_bins[bin_index];
|
||||||
@ -493,7 +499,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
|
|||||||
|
|
||||||
if (r->reuse_hash == 0)
|
if (r->reuse_hash == 0)
|
||||||
{
|
{
|
||||||
r->reuse_hash = hash;
|
r->reuse_hash = reuse_hash;
|
||||||
|
|
||||||
switch (desc.kind)
|
switch (desc.kind)
|
||||||
{
|
{
|
||||||
@ -511,19 +517,16 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
|
|||||||
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||||
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
|
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||||
d3d_desc.Alignment = 0;
|
d3d_desc.Alignment = 0;
|
||||||
d3d_desc.Width = desc.buffer.element_capacity * desc.buffer.element_size;
|
d3d_desc.Width = desc.buffer.size;
|
||||||
d3d_desc.Height = 1;
|
d3d_desc.Height = 1;
|
||||||
d3d_desc.DepthOrArraySize = 1;
|
d3d_desc.DepthOrArraySize = 1;
|
||||||
d3d_desc.MipLevels = 1;
|
d3d_desc.MipLevels = 1;
|
||||||
d3d_desc.SampleDesc.Count = 1;
|
d3d_desc.SampleDesc.Count = 1;
|
||||||
d3d_desc.SampleDesc.Quality = 0;
|
d3d_desc.SampleDesc.Quality = 0;
|
||||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE * !(desc.flags & GPU_ResourceFlag_AllowSrv);
|
|
||||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav);
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav);
|
||||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv);
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv);
|
||||||
D3D12_RESOURCE_STATES initial_state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
|
D3D12_RESOURCE_STATES initial_state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } };
|
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, 0, &IID_ID3D12Resource, (void **)&r->raw);
|
||||||
D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
|
|
||||||
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->raw);
|
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
{
|
{
|
||||||
/* TODO: Don't panic */
|
/* TODO: Don't panic */
|
||||||
@ -555,7 +558,6 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
|
|||||||
d3d_desc.MipLevels = 1;
|
d3d_desc.MipLevels = 1;
|
||||||
d3d_desc.SampleDesc.Count = 1;
|
d3d_desc.SampleDesc.Count = 1;
|
||||||
d3d_desc.SampleDesc.Quality = 0;
|
d3d_desc.SampleDesc.Quality = 0;
|
||||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE * !(desc.flags & GPU_ResourceFlag_AllowSrv);
|
|
||||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav);
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav);
|
||||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv);
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv);
|
||||||
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
|
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
@ -758,7 +760,7 @@ u64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
|
|||||||
Assert(indices->desc.buffer.element_size == 4);
|
Assert(indices->desc.buffer.element_size == 4);
|
||||||
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT);
|
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT);
|
||||||
}
|
}
|
||||||
ibv.SizeInBytes = indices->desc.buffer.element_size * indices->desc.buffer.element_capacity;
|
ibv.SizeInBytes = indices->desc.buffer.element_size * indices->desc.buffer.element_count;
|
||||||
indices_count = indices->desc.buffer.element_count;
|
indices_count = indices->desc.buffer.element_count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -978,17 +980,28 @@ void GPU_CopyToMapped(GPU_Mapped *mapped, String data)
|
|||||||
|
|
||||||
{
|
{
|
||||||
D3D12_RANGE read_range = ZI;
|
D3D12_RANGE read_range = ZI;
|
||||||
u8 *dst = (u8 *)mapped->mem + placed_footprint.Offset;
|
u8 *dst_base = (u8 *)mapped->mem + placed_footprint.Offset;
|
||||||
u8 *src = data.text;
|
u8 *src_base = data.text;
|
||||||
|
|
||||||
u32 z_size = upload_row_size * upload_num_rows;
|
u32 z_size = upload_row_size * upload_num_rows;
|
||||||
|
|
||||||
for (u32 z = 0; z < desc.DepthOrArraySize; ++z)
|
b32 src_overflow = 0;
|
||||||
|
for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z)
|
||||||
{
|
{
|
||||||
u32 z_offset = z * z_size;
|
u32 z_offset = z * z_size;
|
||||||
for (u32 y = 0; y < upload_num_rows; ++y)
|
for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y)
|
||||||
{
|
{
|
||||||
CopyBytes(dst + y * footprint.RowPitch + z_offset, (u8 *)src + y * upload_row_size + z_offset, upload_row_size);
|
u8 *dst = dst_base + y * footprint.RowPitch + z_offset;
|
||||||
|
u8 *src = src_base + y * upload_row_size + z_offset;
|
||||||
|
i64 cpy_size = MinI64(upload_row_size, data.len - (src - src_base));
|
||||||
|
if (cpy_size > 0)
|
||||||
|
{
|
||||||
|
CopyBytes(dst, src, cpy_size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
src_overflow = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
24
src/pp/pp.c
24
src/pp/pp.c
@ -394,7 +394,7 @@ GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size)
|
|||||||
__prof;
|
__prof;
|
||||||
GPU_ResourceDesc desc = ZI;
|
GPU_ResourceDesc desc = ZI;
|
||||||
desc.kind = GPU_ResourceKind_Texture2D;
|
desc.kind = GPU_ResourceKind_Texture2D;
|
||||||
desc.flags = GPU_ResourceFlag_AllowSrv | GPU_ResourceFlag_AllowUav | GPU_ResourceFlag_AllowRtv;
|
desc.flags = GPU_ResourceFlag_AllowUav | GPU_ResourceFlag_AllowRtv;
|
||||||
desc.texture.format = format;
|
desc.texture.format = format;
|
||||||
desc.texture.size = VEC3I32(size.x, size.y, 1);
|
desc.texture.size = VEC3I32(size.x, size.y, 1);
|
||||||
desc.texture.mip_levels = 1;
|
desc.texture.mip_levels = 1;
|
||||||
@ -403,15 +403,15 @@ GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size)
|
|||||||
|
|
||||||
//- Upload buffer
|
//- Upload buffer
|
||||||
|
|
||||||
GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src)
|
GPU_Resource *AcquireUploadBuffer_(void *src, u32 element_size, u32 element_count)
|
||||||
{
|
{
|
||||||
__prof;
|
__prof;
|
||||||
GPU_ResourceDesc desc = ZI;
|
GPU_ResourceDesc desc = ZI;
|
||||||
desc.kind = GPU_ResourceKind_Buffer;
|
desc.kind = GPU_ResourceKind_Buffer;
|
||||||
desc.flags = GPU_ResourceFlag_None;
|
desc.flags = GPU_ResourceFlag_None;
|
||||||
desc.buffer.heap_kind = GPU_HeapKind_Upload;
|
desc.buffer.heap_kind = GPU_HeapKind_Upload;
|
||||||
|
desc.buffer.size = element_size * element_count;
|
||||||
desc.buffer.element_count = element_count;
|
desc.buffer.element_count = element_count;
|
||||||
desc.buffer.element_capacity = element_count;
|
|
||||||
desc.buffer.element_size = element_size;
|
desc.buffer.element_size = element_size;
|
||||||
GPU_Resource *r = GPU_AcquireResource(desc);
|
GPU_Resource *r = GPU_AcquireResource(desc);
|
||||||
{
|
{
|
||||||
@ -423,11 +423,11 @@ GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena)
|
GPU_Resource *AcquireUploadBufferFromArena_(Arena *arena, u32 element_size)
|
||||||
{
|
{
|
||||||
__prof;
|
__prof;
|
||||||
u64 element_size = element_count > 0 ? arena->pos / element_count : 0;
|
u64 element_count = arena->pos / element_size;
|
||||||
GPU_Resource *r = AcquireUploadBuffer(element_count, element_size, (void *)ArenaBase(arena));
|
GPU_Resource *r = AcquireUploadBuffer_(ArenaBase(arena), element_size, element_count);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2198,12 +2198,12 @@ void UpdateUser(P_Window *window)
|
|||||||
/* Acquire transfer buffers */
|
/* Acquire transfer buffers */
|
||||||
/* TODO: Make these static */
|
/* TODO: Make these static */
|
||||||
u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
|
u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
|
||||||
GPU_Resource *quad_index_buffer = AcquireUploadBuffer(countof(quad_indices), sizeof(*quad_indices), quad_indices);
|
GPU_Resource *quad_index_buffer = AcquireUploadBuffer(quad_indices, u16, countof(quad_indices));
|
||||||
GPU_Resource *material_instance_buffer = AcquireUploadBufferFromArena(g->material_instances_count, g->material_instances_arena);
|
GPU_Resource *material_instance_buffer = AcquireUploadBufferFromArena(g->material_instances_arena, MaterialInstance);
|
||||||
GPU_Resource *ui_rect_instance_buffer = AcquireUploadBufferFromArena(g->ui_rect_instances_count, g->ui_rect_instances_arena);
|
GPU_Resource *ui_rect_instance_buffer = AcquireUploadBufferFromArena(g->ui_rect_instances_arena, UiRectInstance);
|
||||||
GPU_Resource *ui_shape_verts_buffer = AcquireUploadBufferFromArena(g->ui_shape_verts_count, g->ui_shape_verts_arena);
|
GPU_Resource *ui_shape_verts_buffer = AcquireUploadBufferFromArena(g->ui_shape_verts_arena, UiShapeVert);
|
||||||
GPU_Resource *ui_shape_indices_buffer = AcquireUploadBufferFromArena(g->ui_shape_indices_count, g->ui_shape_indices_arena);
|
GPU_Resource *ui_shape_indices_buffer = AcquireUploadBufferFromArena(g->ui_shape_indices_arena, u32);
|
||||||
GPU_Resource *grids_buffer = AcquireUploadBufferFromArena(g->grids_count, g->grids_arena);
|
GPU_Resource *grids_buffer = AcquireUploadBufferFromArena(g->grids_arena, MaterialGrid);
|
||||||
|
|
||||||
GPU_CommandList *cl = GPU_BeginCommandList(gpu_render_queue);
|
GPU_CommandList *cl = GPU_BeginCommandList(gpu_render_queue);
|
||||||
{
|
{
|
||||||
|
|||||||
@ -297,9 +297,12 @@ void DrawDebugConsole(i32 level, b32 minimized);
|
|||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
//~ Gpu buffer helpers
|
//~ Gpu buffer helpers
|
||||||
|
|
||||||
|
#define AcquireUploadBuffer(src, type, count) AcquireUploadBuffer_((src), sizeof(type), (count))
|
||||||
|
#define AcquireUploadBufferFromArena(arena, type) AcquireUploadBufferFromArena_((arena), sizeof(type))
|
||||||
|
|
||||||
GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size);
|
GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size);
|
||||||
GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src);
|
GPU_Resource *AcquireUploadBuffer_(void *src, u32 element_size, u32 element_count);
|
||||||
GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena);
|
GPU_Resource *AcquireUploadBufferFromArena_(Arena *arena, u32 element_size);
|
||||||
JobDecl(DelayReleaseGpuResources, { Fence *begin_fence; u64 begin_fence_target; GPU_Resource **resources; GPU_ReleaseFlag flags; });
|
JobDecl(DelayReleaseGpuResources, { Fence *begin_fence; u64 begin_fence_target; GPU_Resource **resources; GPU_ReleaseFlag flags; });
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
|
|||||||
@ -24,9 +24,7 @@ JobDef(S_LoadTexture, sig, _)
|
|||||||
{
|
{
|
||||||
GPU_ResourceDesc desc = ZI;
|
GPU_ResourceDesc desc = ZI;
|
||||||
desc.kind = GPU_ResourceKind_Texture2D;
|
desc.kind = GPU_ResourceKind_Texture2D;
|
||||||
desc.flags = GPU_ResourceFlag_AllowSrv;
|
|
||||||
desc.flags = GPU_ResourceFlag_AllowUav;
|
desc.flags = GPU_ResourceFlag_AllowUav;
|
||||||
desc.flags = GPU_ResourceFlag_AllowRtv;
|
|
||||||
desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb;
|
desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb;
|
||||||
desc.texture.size = VEC3I32(decoded.width, decoded.height, 1);
|
desc.texture.size = VEC3I32(decoded.width, decoded.height, 1);
|
||||||
desc.texture.mip_levels = 1;
|
desc.texture.mip_levels = 1;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user