From ddf4f5c421af71014c68982ee136cae8e18e8c0a Mon Sep 17 00:00:00 2001 From: jacob Date: Mon, 15 Sep 2025 23:01:30 -0500 Subject: [PATCH] gpu layer progress --- src/base/base.h | 42 ++-- src/font/font.c | 4 +- src/gpu/gpu.h | 36 +++- src/gpu/gpu_dx12/gpu_dx12.c | 413 +++++++++++++++++++++++++++++++++--- src/gpu/gpu_dx12/gpu_dx12.h | 74 +++++-- src/pp/pp.c | 57 ++--- src/pp/pp.h | 4 +- src/pp/pp_draw.gpu | 10 +- 8 files changed, 530 insertions(+), 110 deletions(-) diff --git a/src/base/base.h b/src/base/base.h index 1af18dbe..e978df8d 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -286,14 +286,14 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); */ #define CheckNil(nil,p) ((p) == 0 || (p) == nil) -#define SetNil(nil,p) ((p) = nil) +#define SetNil(nil,p) ((p) = nil) //- Singly linked list stack (first & next pointers) -#define StackPushN(f,n,next) ((n)->next=(f), (f)=(n)) -#define StackPopN(f,next) ((f)=(f)->next) -#define StackPush(f,n) StackPushN(f,n,next) -#define StackPop(f) StackPopN(f,next) +#define StackPushN(f,n,next) ((n)->next=(f), (f)=(n)) +#define StackPopN(f,next) ((f)=(f)->next) +#define StackPush(f,n) StackPushN(f,n,next) +#define StackPop(f) StackPopN(f,next) //- Singly linked list queue (first, last, & next pointers) @@ -333,29 +333,29 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); (CheckNil(nil,(n)->next) ? (0) : \ ((n)->next->prev = (n)->prev))) -#define DllPushBackNPZ(nil,f,l,n,next,prev) DllInsertNPZ(nil,f,l,l,n,next,prev) -#define DllPushFrontNPZ(nil,f,l,n,next,prev) DllInsertNPZ(nil,l,f,f,n,prev,next) -#define DllInsertNP(f,l,p,n,next,prev) DllInsertNPZ(0,f,l,p,n,next,prev) -#define DllPushBackNP(f,l,n,next,prev) DllPushBackNPZ(0,f,l,n,next,prev) -#define DllPushFrontNP(f,l,n,next,prev) DllPushFrontNPZ(0,f,l,n,next,prev) -#define DllRemoveNP(f,l,n,next,prev) DllRemoveNPZ(0,f,l,n,next,prev) -#define DllInsert(f,l,p,n) DllInsertNPZ(0,f,l,p,n,next,prev) -#define DllPushBack(f,l,n) DllPushBackNPZ(0,f,l,n,next,prev) -#define DllPushFront(f,l,n) DllPushFrontNPZ(0,f,l,n,next,prev) -#define DllRemove(f,l,n) DllRemoveNPZ(0,f,l,n,next,prev) +#define DllPushBackNPZ(nil,f,l,n,next,prev) DllInsertNPZ(nil,f,l,l,n,next,prev) +#define DllPushFrontNPZ(nil,f,l,n,next,prev) DllInsertNPZ(nil,l,f,f,n,prev,next) +#define DllInsertNP(f,l,p,n,next,prev) DllInsertNPZ(0,f,l,p,n,next,prev) +#define DllPushBackNP(f,l,n,next,prev) DllPushBackNPZ(0,f,l,n,next,prev) +#define DllPushFrontNP(f,l,n,next,prev) DllPushFrontNPZ(0,f,l,n,next,prev) +#define DllRemoveNP(f,l,n,next,prev) DllRemoveNPZ(0,f,l,n,next,prev) +#define DllInsert(f,l,p,n) DllInsertNPZ(0,f,l,p,n,next,prev) +#define DllPushBack(f,l,n) DllPushBackNPZ(0,f,l,n,next,prev) +#define DllPushFront(f,l,n) DllPushFrontNPZ(0,f,l,n,next,prev) +#define DllRemove(f,l,n) DllRemoveNPZ(0,f,l,n,next,prev) //////////////////////////////// //~ Color helper macros //- Rgba 32 bit helpers -#define Rgb32(r, g, b) Rgba32((r), (g), (b), 0xFF) -#define Rgba32(r, g, b, a) (u32)((u32)(r) | ((u32)(g) << 8) | ((u32)(b) << 16) | ((u32)(a) << 24)) -#define Bgr32(rgb) ((((rgb >> 0) & 0xFF) << 16) | (((rgb >> 8) & 0xFF) << 8) | (((rgb >> 16) & 0xFF) << 0)) +#define Rgb32(r, g, b) Rgba32((r), (g), (b), 0xFF) +#define Rgba32(r, g, b, a) (u32)((u32)(r) | ((u32)(g) << 8) | ((u32)(b) << 16) | ((u32)(a) << 24)) +#define Bgr32(rgb) ((((rgb >> 0) & 0xFF) << 16) | (((rgb >> 8) & 0xFF) << 8) | (((rgb >> 16) & 0xFF) << 0)) //- Rgba 32 bit float float helpers -#define _Rgb32U8FromF(fl) ((u8)((fl * 255.0) + 0.5)) +#define _Rgb32U8FromF(fl) ((u8)((fl * 255.0) + 0.5)) #define Rgba32F(r, g, b, a) Rgba32(_Rgb32U8FromF((r)), _Rgb32U8FromF((g)), _Rgb32U8FromF((b)), _Rgb32U8FromF((a))) -#define Rgb32F(r, g, b) Rgba32F((r), (g), (b), 1.f) +#define Rgb32F(r, g, b) Rgba32F((r), (g), (b), 1.f) #define Alpha32F(color, a) ((color) & 0x00FFFFFF) | (_Rgb32U8FromF((a)) << 24) @@ -717,7 +717,7 @@ Struct(ComputeShader) { Resource resource; }; * and fiber suspend/resume will be emulated using OS thread primitives. * This is slow but allows for easier debugging in tricky cases * since the debugger won't be confused by fiber context switching. */ -#define VirtualFibersEnabled 1 +#define VirtualFibersEnabled 0 # define MaxFibers 4096 StaticAssert(MaxFibers < I16Max); /* MaxFibers should fit in FiberId */ diff --git a/src/font/font.c b/src/font/font.c index f4d2a002..58639fce 100644 --- a/src/font/font.c +++ b/src/font/font.c @@ -55,7 +55,9 @@ JobDef(F_Load, sig, _) desc.texture.format = GPU_Format_R8G8B8A8_Unorm; desc.texture.size = VEC3I32(64, 64, 1); texture = GPU_AcquireResource(desc); - GPU_CopyString(0, texture, STRING(desc.texture.size.x * desc.texture.size.y * 4, (u8 *)result.image_pixels)); + GPU_Mapped mapped = GPU_Map(texture); + GPU_CopyToMapped(&mapped, STRING(desc.texture.size.x * desc.texture.size.y * 4, (u8 *)result.image_pixels)); + GPU_Unmap(&mapped); } /* Acquire store memory */ diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h index 418862b2..79603bbb 100644 --- a/src/gpu/gpu.h +++ b/src/gpu/gpu.h @@ -164,7 +164,6 @@ Enum(GPU_Format) Enum(GPU_ResourceKind) { - GPU_ResourceKind_Unknown, GPU_ResourceKind_Buffer, GPU_ResourceKind_Texture1D, GPU_ResourceKind_Texture2D, @@ -174,10 +173,10 @@ Enum(GPU_ResourceKind) Enum(GPU_ResourceFlag) { - GPU_ResourceFlag_None = 0, - GPU_ResourceFlag_AllowSrv = (1 << 0), - GPU_ResourceFlag_AllowUav = (1 << 1), - GPU_ResourceFlag_AllowRtv = (1 << 2) + GPU_ResourceFlag_None = 0, + GPU_ResourceFlag_AllowSrv = (1 << 0), + GPU_ResourceFlag_AllowUav = (1 << 1), + GPU_ResourceFlag_AllowRtv = (1 << 2), }; Enum(GPU_HeapKind) @@ -193,6 +192,14 @@ Enum(GPU_ReleaseFlag) GPU_ReleaseFlag_Reuse = (1 << 0) }; +Enum(GPU_ResourceIdKind) +{ + GPU_ResourceIdKind_None, + GPU_ResourceIdKind_Srv, + GPU_ResourceIdKind_Uav, + GPU_ResourceIdKind_Sampler, +}; + Struct(GPU_ResourceDesc) { GPU_ResourceKind kind; @@ -208,13 +215,19 @@ Struct(GPU_ResourceDesc) struct { GPU_HeapKind heap_kind; - u32 size; + u32 element_capacity; u32 element_count; u32 element_size; } buffer; }; }; +Struct(GPU_Mapped) +{ + GPU_Resource *resource; + void *mem; +}; + //////////////////////////////// //~ Rasterizer types @@ -285,7 +298,7 @@ GPU_Fence GPU_GetGlobalFence(void); GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc); void GPU_ReleaseResource(GPU_Resource *resource, GPU_ReleaseFlag flags); -u32 GPU_GetResourceId(GPU_Resource *resource); +u32 GPU_GetResourceId(GPU_Resource *resource, GPU_ResourceIdKind kind); Vec2I32 GPU_GetTextureSize(GPU_Resource *resource); //////////////////////////////// @@ -342,7 +355,14 @@ void GPU_Compute_(GPU_CommandList *cl, //~ @hookdecl Resource copy operations void GPU_CopyResource(GPU_CommandList *cl, GPU_Resource *dst, GPU_Resource *src); -void GPU_CopyString(GPU_CommandList *cl, GPU_Resource *dst, String src); + +//////////////////////////////// +//~ @hookdecl Map operations + +GPU_Mapped GPU_Map(GPU_Resource *r); +void GPU_Unmap(GPU_Mapped *mapped); + +void GPU_CopyToMapped(GPU_Mapped *mapped, String data); //////////////////////////////// //~ @hookdecl Memory info operations diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 9ab53868..6449f81b 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -1,30 +1,22 @@ GPU_D12_SharedState GPU_D12_shared_state = ZI; -//////////////////////////////// -//~ State operations -GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id) -{ - PERSIST GPU_D12_FiberState *fiber_states[MaxFibers] = ZI; - GPU_D12_FiberState *result = fiber_states[fiber_id]; - if (!result) - { - Arena *perm = PermArena(); - fiber_states[fiber_id] = PushStruct(perm, GPU_D12_FiberState); - result = fiber_states[fiber_id]; - } - return result; -} - -GPU_D12_SharedState *GPU_D12_GetSharedState(void) -{ - PERSIST GPU_D12_SharedState g = ZI; - return &g; -} //////////////////////////////// //~ Helpers +GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_FiberState **f = &g->fiber_states[fiber_id]; + if (!*f) + { + Arena *perm = PermArena(); + *f = PushStruct(perm, GPU_D12_FiberState); + } + return *f; +} + DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format) { return (DXGI_FORMAT)format; @@ -49,6 +41,99 @@ GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl) return cmd; } +u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc) +{ + /* TODO */ + u64 result = 0; + return result; +} + +//////////////////////////////// +//~ Startup + +void GPU_D12_Startup(void) +{ + GPU_D12_InitDevice(); +} + +//////////////////////////////// +//~ Device initialization + +void GPU_D12_InitDevice(void) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + TempArena scratch = BeginScratchNoConflict(); + HRESULT hr = 0; + u32 dxgi_factory_flags = 0; + + /* Create factory */ + { + __profn("Create factory"); + hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory); + if (FAILED(hr)) + { + Panic(Lit("Failed to initialize DXGI factory")); + } + } + + /* Create device */ + { + __profn("Create device"); + IDXGIAdapter1 *adapter = 0; + ID3D12Device *device = 0; + String error = Lit("Could not initialize GPU device."); + String first_gpu_name = ZI; + u32 adapter_index = 0; + b32 skip = 0; /* For debugging iGPU */ + for (;;) + { + { + hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter); + } + if (SUCCEEDED(hr)) + { + DXGI_ADAPTER_DESC1 desc; + IDXGIAdapter1_GetDesc1(adapter, &desc); + if (first_gpu_name.len == 0) + { + first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description); + } + { + hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); + } + if (SUCCEEDED(hr) && !skip) + { + break; + } + skip = 0; + ID3D12Device_Release(device); + IDXGIAdapter1_Release(adapter); + adapter = 0; + device = 0; + ++adapter_index; + } + else + { + break; + } + } + if (!device) + { + if (first_gpu_name.len > 0) + { + error = StringF(scratch.arena, + "Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.", + FmtString(first_gpu_name)); + } + Panic(error); + } + g->adapter = adapter; + g->device = device; + } + + EndScratch(scratch); +} + //////////////////////////////// //~ Pipeline operations @@ -58,16 +143,120 @@ GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc) return 0; } +//////////////////////////////// +//~ Queue operations + +GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind) +{ + /* TODO */ + return 0; +} + //////////////////////////////// //~ Raw command list GPU_D12_RawCommandList *GPU_D12_BeginRawCommandList(GPU_QueueKind queue_kind) { - return 0; + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); + + /* Pull first completed command list from queue if ready */ + GPU_D12_RawCommandList *cl = ZI; + { + Lock lock = LockE(&queue->submit_mutex); + { + u64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence); + cl = queue->first_submitted_cl; + if (cl && cl->submit_fence_target <= completed) + { + QueuePop(queue->first_submitted_cl, queue->last_submitted_cl); + } + else + { + cl = 0; + } + } + Unlock(&lock); + } + + /* Allocate new command list if none are available */ + if (!cl) + { + Arena *perm = PermArena(); + { + PushAlign(perm, CachelineSize); + cl = PushStruct(perm, GPU_D12_RawCommandList); + PushAlign(perm, CachelineSize); + } + cl->queue = queue; + + HRESULT hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); + if (FAILED(hr)) + { + Panic(Lit("Failed to create command allocator")); + } + + hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl); + if (FAILED(hr)) + { + Panic(Lit("Failed to create command list")); + } + + hr = ID3D12GraphicsCommandList_Close(cl->cl); + if (FAILED(hr)) + { + Panic(Lit("Failed to close command list during initialization")); + } + } + + /* Reset command list */ + { + HRESULT hr = ID3D12CommandAllocator_Reset(cl->ca); + if (FAILED(hr)) + { + Panic(Lit("Failed to reset command allocator")); + } + + hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0); + if (FAILED(hr)) + { + Panic(Lit("Failed to reset command list")); + } + } + + return cl; } void GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl) { + GPU_D12_Queue *queue = cl->queue; + + /* Close */ + { + __profn("Close DX12 command list"); + HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl); + if (FAILED(hr)) + { + /* TODO: Don't panic */ + Panic(Lit("Failed to close command list before execution")); + } + } + + /* Submit */ + { + __profn("Execute"); + Lock lock = LockE(&queue->submit_mutex); + { + u64 target = ++queue->submit_fence_target; + cl->submit_fence_target = target; + /* Execute */ + ID3D12CommandQueue_ExecuteCommandLists(queue->cq, 1, (ID3D12CommandList **)&cl->cl); + ID3D12CommandQueue_Signal(queue->cq, queue->submit_fence, target); + /* Append */ + QueuePush(queue->first_submitted_cl, queue->last_submitted_cl, cl); + } + Unlock(&lock); + } } //////////////////////////////// @@ -75,7 +264,7 @@ void GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl) void GPU_Startup(void) { - GPU_D12_SharedState *g = GPU_D12_GetSharedState(); + GPU_D12_Startup(); } //////////////////////////////// @@ -106,8 +295,119 @@ GPU_Fence GPU_GetGlobalFence(void) GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) { - /* TODO */ - return 0; + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_Resource *r = 0; + + /* Grab reusable */ +#if 0 + u64 hash = GPU_D12_ReuseHashFromResourceDesc(desc); + u64 bin_index = hash % countof(g->reuse_bins); + GPU_D12_ReuseBin *bin = &g->reuse_bins[bin_index]; + { + Lock lock = LockE(&bin->mutex); + for (r = bin->first; r; r = r->next_reuse) + { + if (r->reuse_hash == hash) + { + DllRemoveNP(bin->first, bin->last, r, next_reuse, prev_reuse); + break; + } + } + Unlock(&lock); + } +#else + u64 hash = 0; +#endif + + if (!r) + { + { + Arena *perm = PermArena(); + PushAlign(perm, CachelineSize); + r = PushStruct(perm, GPU_D12_Resource); + PushAlign(perm, CachelineSize); + } + r->reuse_hash = hash; + + switch (desc.kind) + { + /* Buffer */ + case GPU_ResourceKind_Buffer: + { + D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + D3D12_HEAP_PROPERTIES heap_props = { + .Type = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_HEAP_TYPE_UPLOAD + : desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK + : D3D12_HEAP_TYPE_DEFAULT + }; + D3D12_RESOURCE_DESC d3d_desc = ZI; + d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + d3d_desc.Format = DXGI_FORMAT_UNKNOWN; + d3d_desc.Alignment = 0; + d3d_desc.Width = desc.buffer.element_capacity * desc.buffer.element_size; + d3d_desc.Height = 1; + d3d_desc.DepthOrArraySize = 1; + d3d_desc.MipLevels = 1; + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE * !(desc.flags & GPU_ResourceFlag_AllowSrv); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv); + D3D12_RESOURCE_STATES initial_state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST; + D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } }; + D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0; + HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->raw); + if (FAILED(hr)) + { + /* TODO: Don't panic */ + Panic(Lit("Failed to create buffer resource")); + } + } break; + + /* Texture */ + case GPU_ResourceKind_Texture1D: + case GPU_ResourceKind_Texture2D: + case GPU_ResourceKind_Texture3D: + { + D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + D3D12_HEAP_PROPERTIES heap_props = { + .Type = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_HEAP_TYPE_UPLOAD + : desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK + : D3D12_HEAP_TYPE_DEFAULT + }; + D3D12_RESOURCE_DESC d3d_desc = ZI; + d3d_desc.Dimension = desc.kind == GPU_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D + : desc.kind == GPU_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D + : GPU_ResourceKind_Texture3D; + d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.texture.format); + d3d_desc.Alignment = 0; + d3d_desc.Width = desc.texture.size.x; + d3d_desc.Height = desc.texture.size.y; + d3d_desc.DepthOrArraySize = desc.texture.size.y; + d3d_desc.MipLevels = 1; + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE * !(desc.flags & GPU_ResourceFlag_AllowSrv); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv); + D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; + D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } }; + D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0; + HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->raw); + if (FAILED(hr)) + { + /* TODO: Don't panic */ + Panic(Lit("Failed to create buffer resource")); + } + } break; + + /* TODO: Sampler */ + } + } + + return (GPU_Resource *)r; } void GPU_ReleaseResource(GPU_Resource *resource, GPU_ReleaseFlag flags) @@ -115,7 +415,7 @@ void GPU_ReleaseResource(GPU_Resource *resource, GPU_ReleaseFlag flags) /* TODO */ } -u32 GPU_GetResourceId(GPU_Resource *resource) +u32 GPU_GetResourceId(GPU_Resource *resource, GPU_ResourceIdKind kind) { /* TODO */ return 0; @@ -270,7 +570,7 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl, JobCounter *counter) ibv.BufferLocation = indices->gpu_address; ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(indices->format); ibv.SizeInBytes = indices->size; - indices_count = indices->count; + indices_count = indices->element_count; } } @@ -448,14 +748,61 @@ void GPU_CopyResource(GPU_CommandList *gpu_cl, GPU_Resource *gpu_dst, GPU_Resour cmd->copy.src_resource = src; } -void GPU_CopyString(GPU_CommandList *gpu_cl, GPU_Resource *gpu_dst, String src) +//////////////////////////////// +//~ @hookdef Map hooks + +GPU_Mapped GPU_Map(GPU_Resource *gpu_r) { - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; - GPU_D12_Resource *dst = (GPU_D12_Resource *)gpu_dst; - GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); - cmd->kind = GPU_D12_CommandKind_Copy; - cmd->copy.dst = dst; - cmd->copy.src_string = src; + GPU_Mapped result = ZI; + result.resource = gpu_r; + GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r; + D3D12_RANGE read_range = ZI; + HRESULT hr = ID3D12Resource_Map(r->raw, 0, &read_range, &result.mem); + if (FAILED(hr) || !result.mem) + { + /* TODO: Don't panic */ + Panic(Lit("Failed to map command buffer resource")); + } + return result; +} + +void GPU_Unmap(GPU_Mapped *m) +{ + GPU_D12_Resource *r = (GPU_D12_Resource *)m->resource; + ID3D12Resource_Unmap(r->raw, 0, 0); +} + +void GPU_CopyToMapped(GPU_Mapped *mapped, String data) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_Resource *r = (GPU_D12_Resource *)mapped->resource; + + D3D12_RESOURCE_DESC desc = ZI; + ID3D12Resource_GetDesc(r->raw, &desc); + + u64 upload_size = 0; + u64 upload_row_size = 0; + u32 upload_num_rows = 0; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; + ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); + D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; + + { + D3D12_RANGE read_range = ZI; + u8 *dst = (u8 *)mapped->mem + placed_footprint.Offset; + u8 *src = data.text; + + u32 z_size = upload_row_size * upload_num_rows; + + for (u32 z = 0; z < desc.DepthOrArraySize; ++z) + { + u32 z_offset = z * z_size; + for (u32 y = 0; y < upload_num_rows; ++y) + { + CopyBytes(dst + y * footprint.RowPitch + z_offset, (u8 *)src + y * upload_row_size + z_offset, upload_row_size); + } + } + } } //////////////////////////////// diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 020924ea..9156b65c 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -1,10 +1,13 @@ //////////////////////////////// -//~ DirectX12 headers +//~ DirectX12 libs #include #include #include +#pragma comment(lib, "d3d12") +#pragma comment(lib, "dxgi") + //////////////////////////////// //~ Tweakable defines @@ -37,20 +40,51 @@ Struct(GPU_D12_Pipeline) Struct(GPU_D12_Resource) { ID3D12Resource *raw; + u64 reuse_hash; + GPU_Format format; - u32 element_size; u32 size; - u32 count; + u32 element_size; + u32 element_count; + D3D12_GPU_VIRTUAL_ADDRESS gpu_address; }; +//////////////////////////////// +//~ Queue types + +Struct(GPU_D12_QueueDesc) +{ + enum D3D12_COMMAND_LIST_TYPE type; + enum D3D12_COMMAND_QUEUE_PRIORITY priority; + String dbg_name; +}; + +Struct(GPU_D12_Queue) +{ + GPU_D12_QueueDesc desc; + ID3D12CommandQueue *cq; + + Mutex submit_mutex; + ID3D12Fence *submit_fence; + u64 submit_fence_target; + struct GPU_D12_RawCommandList *first_submitted_cl; + struct GPU_D12_RawCommandList *last_submitted_cl; +}; + //////////////////////////////// //~ Raw command list types Struct(GPU_D12_RawCommandList) { - struct ID3D12CommandAllocator *ca; - struct ID3D12GraphicsCommandList *cl; + GPU_D12_Queue *queue; + GPU_D12_RawCommandList *next; + + u64 submit_fence_target; + + ID3D12CommandAllocator *ca; + ID3D12GraphicsCommandList *cl; + }; //////////////////////////////// @@ -155,26 +189,42 @@ Struct(GPU_D12_FiberState) Struct(GPU_D12_SharedState) { - i32 _; + GPU_D12_FiberState *fiber_states[MaxFibers]; + + /* Device */ + IDXGIFactory6 *factory; + IDXGIAdapter1 *adapter; + ID3D12Device *device; } extern GPU_D12_shared_state; -//////////////////////////////// -//~ State operations - -GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id); -GPU_D12_SharedState *GPU_D12_GetSharedState(void); - //////////////////////////////// //~ Helpers +GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id); DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format); GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl); +u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc); + +//////////////////////////////// +//~ Startup + +void GPU_D12_Startup(void); + +//////////////////////////////// +//~ Device initialization + +void GPU_D12_InitDevice(void); //////////////////////////////// //~ Pipeline operations GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc); +//////////////////////////////// +//~ Queue operations + +GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind); + //////////////////////////////// //~ Raw command list operations diff --git a/src/pp/pp.c b/src/pp/pp.c index ede27e0c..a15a1fdc 100644 --- a/src/pp/pp.c +++ b/src/pp/pp.c @@ -401,9 +401,9 @@ GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size) return GPU_AcquireResource(desc); } -//- Transfer buffer +//- Upload buffer -GPU_Resource *AcquireTransferBuffer(u32 element_count, u32 element_size, void *src) +GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src) { __prof; u64 size = element_size * element_count; @@ -411,22 +411,24 @@ GPU_Resource *AcquireTransferBuffer(u32 element_count, u32 element_size, void *s desc.kind = GPU_ResourceKind_Buffer; desc.flags = GPU_ResourceFlag_None; desc.buffer.heap_kind = GPU_HeapKind_Upload; - desc.buffer.size = size; + desc.buffer.element_size = size; desc.buffer.element_count = element_count; desc.buffer.element_size = element_size; GPU_Resource *r = GPU_AcquireResource(desc); { __profn("Copy to transfer buffer"); - GPU_CopyString(0, r, STRING(size, src)); + GPU_Mapped m = GPU_Map(r); + GPU_CopyToMapped(&m, STRING(element_size * element_count, src)); + GPU_Unmap(&m); } return r; } -GPU_Resource *AcquireTransferBufferFromArena(u32 element_count, Arena *arena) +GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena) { __prof; u64 element_size = element_count > 0 ? arena->pos / element_count : 0; - GPU_Resource *r = AcquireTransferBuffer(element_count, element_size, (void *)ArenaBase(arena)); + GPU_Resource *r = AcquireUploadBuffer(element_count, element_size, (void *)ArenaBase(arena)); return r; } @@ -2184,12 +2186,12 @@ void UpdateUser(P_Window *window) /* Acquire transfer buffers */ /* TODO: Make these static */ u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; - GPU_Resource *quad_index_buffer = AcquireTransferBuffer(countof(quad_indices), sizeof(*quad_indices), quad_indices); - GPU_Resource *material_instance_buffer = AcquireTransferBufferFromArena(g->material_instances_count, g->material_instances_arena); - GPU_Resource *ui_rect_instance_buffer = AcquireTransferBufferFromArena(g->ui_rect_instances_count, g->ui_rect_instances_arena); - GPU_Resource *ui_shape_verts_buffer = AcquireTransferBufferFromArena(g->ui_shape_verts_count, g->ui_shape_verts_arena); - GPU_Resource *ui_shape_indices_buffer = AcquireTransferBufferFromArena(g->ui_shape_indices_count, g->ui_shape_indices_arena); - GPU_Resource *grids_buffer = AcquireTransferBufferFromArena(g->grids_count, g->grids_arena); + GPU_Resource *quad_index_buffer = AcquireUploadBuffer(countof(quad_indices), sizeof(*quad_indices), quad_indices); + GPU_Resource *material_instance_buffer = AcquireUploadBufferFromArena(g->material_instances_count, g->material_instances_arena); + GPU_Resource *ui_rect_instance_buffer = AcquireUploadBufferFromArena(g->ui_rect_instances_count, g->ui_rect_instances_arena); + GPU_Resource *ui_shape_verts_buffer = AcquireUploadBufferFromArena(g->ui_shape_verts_count, g->ui_shape_verts_arena); + GPU_Resource *ui_shape_indices_buffer = AcquireUploadBufferFromArena(g->ui_shape_indices_count, g->ui_shape_indices_arena); + GPU_Resource *grids_buffer = AcquireUploadBufferFromArena(g->grids_count, g->grids_arena); GPU_CommandList *cl = GPU_BeginCommandList(); { @@ -2230,8 +2232,8 @@ void UpdateUser(P_Window *window) MaterialSig sig = ZI; /* FIXME: set sampler urid id here */ sig.projection = world_to_render_vp_matrix; - sig.instances_urid = GPU_GetResourceId(material_instance_buffer); - sig.grids_urid = GPU_GetResourceId(grids_buffer); + sig.instances_urid = GPU_GetResourceId(material_instance_buffer, GPU_ResourceIdKind_Srv); + sig.grids_urid = GPU_GetResourceId(grids_buffer, GPU_ResourceIdKind_Srv); GPU_Rasterize(cl, &sig, MaterialVS, MaterialPS, @@ -2270,9 +2272,9 @@ void UpdateUser(P_Window *window) FloodSig sig = ZI; sig.step_len = step_length; - sig.emittance_tex_urid = GPU_GetResourceId(g->emittance); - sig.read_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_read); - sig.target_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_target); + sig.emittance_tex_urid = GPU_GetResourceId(g->emittance, GPU_ResourceIdKind_Srv); + sig.read_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_read, GPU_ResourceIdKind_Uav); + sig.target_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_target, GPU_ResourceIdKind_Srv); sig.tex_width = g->render_size.x; sig.tex_height = g->render_size.y; GPU_Compute(cl, &sig, FloodCS, (g->render_size.x + 7) / 8, (g->render_size.y + 7) / 8, 1); @@ -2327,11 +2329,11 @@ void UpdateUser(P_Window *window) (u32)(RandU64FromState(&g->frame_rand) & 0xFFFFFFFF)); sig.frame_index = g->frame_index; sig.camera_offset = g->world_to_render_xf.og; - sig.albedo_tex_urid = GPU_GetResourceId(g->albedo); - sig.emittance_tex_urid = GPU_GetResourceId(g->emittance); - sig.emittance_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_read); - sig.read_tex_urid = GPU_GetResourceId(g->shade_read); - sig.target_tex_urid = GPU_GetResourceId(g->shade_target); + sig.albedo_tex_urid = GPU_GetResourceId(g->albedo, GPU_ResourceIdKind_Srv); + sig.emittance_tex_urid = GPU_GetResourceId(g->emittance, GPU_ResourceIdKind_Srv); + sig.emittance_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_read, GPU_ResourceIdKind_Uav); + sig.read_tex_urid = GPU_GetResourceId(g->shade_read, GPU_ResourceIdKind_Uav); + sig.target_tex_urid = GPU_GetResourceId(g->shade_target, GPU_ResourceIdKind_Uav); GPU_Compute(cl, &sig, ShadeCS, (g->render_size.x + 7) / 8, (g->render_size.y + 7) / 8, 1); /* Swap */ @@ -2362,7 +2364,7 @@ void UpdateUser(P_Window *window) sig.flags = UiBlitFlag_ToneMap | UiBlitFlag_GammaCorrect; sig.exposure = 2.0; sig.gamma = (f32)2.2; - sig.tex_urid = GPU_GetResourceId(g->shade_read); + sig.tex_urid = GPU_GetResourceId(g->shade_read, GPU_ResourceIdKind_Uav); GPU_Rasterize(cl, &sig, UiBlitVS, UiBlitPS, @@ -2384,7 +2386,7 @@ void UpdateUser(P_Window *window) UiRectSig sig = ZI; sig.projection = ui_vp_matrix; - sig.instances_urid = GPU_GetResourceId(ui_rect_instance_buffer); + sig.instances_urid = GPU_GetResourceId(ui_rect_instance_buffer, GPU_ResourceIdKind_Srv); GPU_Rasterize(cl, &sig, UiRectVS, UiRectPS, @@ -2406,7 +2408,7 @@ void UpdateUser(P_Window *window) UiShapeSig sig = ZI; sig.projection = ui_vp_matrix; - sig.verts_urid = GPU_GetResourceId(ui_shape_verts_buffer); + sig.verts_urid = GPU_GetResourceId(ui_shape_verts_buffer, GPU_ResourceIdKind_Srv); GPU_Rasterize(cl, &sig, UiShapeVS, UiShapePS, @@ -2418,10 +2420,9 @@ void UpdateUser(P_Window *window) GPU_RasterizeMode_TriangleList); } } - GPU_EndCommandList(cl, &g->render_counter); - /* FIXME: Enable this */ #if 0 + g->last_gpu_barrier = GPU_EndCommandList(cl); /* Release transfer buffers */ { @@ -2438,7 +2439,7 @@ void UpdateUser(P_Window *window) { ReleaseRenderResources_Sig *sig = PushStruct(job->arena, ReleaseRenderResources_Sig); job->count = countof(resources); - sig->render_fence = g->most_recent_render_counter; + sig->barrier = g->last_gpu_barrier; sig->resources = PushStructsNoZero(sig->arena, GPU_Resource *, job->count); sig->flags = GPU_ReleaseFlag_Reuse; CopyBytes(sig->resources, resources, sizeof(resources)); diff --git a/src/pp/pp.h b/src/pp/pp.h index 3e9c927b..4c93d612 100644 --- a/src/pp/pp.h +++ b/src/pp/pp.h @@ -295,8 +295,8 @@ void DrawDebugConsole(i32 level, b32 minimized); //~ Gpu buffer helpers GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size); -GPU_Resource *AcquireTransferBuffer(u32 element_count, u32 element_size, void *src); -GPU_Resource *AcquireTransferBufferFromArena(u32 element_count, Arena *arena); +GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src); +GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena); //////////////////////////////// //~ Entity sorting diff --git a/src/pp/pp_draw.gpu b/src/pp/pp_draw.gpu index 103ca84e..3ba8ae87 100644 --- a/src/pp/pp_draw.gpu +++ b/src/pp/pp_draw.gpu @@ -154,13 +154,13 @@ void CSDef(FloodCS, Semantic(uint3, SV_DispatchThreadID)) /* Flood */ Vec2I32 read_coords[9] = { (Vec2I32)id + Vec2I32(-step_len, -step_len), /* top left */ - (Vec2I32)id + Vec2I32(0, -step_len), /* top center */ + (Vec2I32)id + Vec2I32(0, -step_len), /* top center */ (Vec2I32)id + Vec2I32(+step_len, -step_len), /* top right */ - (Vec2I32)id + Vec2I32(-step_len, 0), /* center left */ - (Vec2I32)id + Vec2I32(0, 0), /* center center */ - (Vec2I32)id + Vec2I32(+step_len, 0), /* center right */ + (Vec2I32)id + Vec2I32(-step_len, 0), /* center left */ + (Vec2I32)id + Vec2I32(0, 0), /* center center */ + (Vec2I32)id + Vec2I32(+step_len, 0), /* center right */ (Vec2I32)id + Vec2I32(-step_len, +step_len), /* bottom left */ - (Vec2I32)id + Vec2I32(0, +step_len), /* bottom center */ + (Vec2I32)id + Vec2I32(0, +step_len), /* bottom center */ (Vec2I32)id + Vec2I32(+step_len, +step_len) /* bottom right */ }; uint2 closest_seed = uint2(0xFFFF, 0xFFFF);