gpu refactor progress

This commit is contained in:
jacob 2025-09-18 10:16:57 -05:00
parent f231c8322b
commit 77affdd9b0
14 changed files with 561 additions and 65 deletions

View File

@ -13,7 +13,9 @@ Enum(JobPool)
JobPool_Inherit = -1,
/* Contains un-affinitized worker threads.
* Meant to take on temporary high-throughput work that is allowed to interfere with all other pools (e.g. loading a level). */
* Meant to take on temporary high-throughput work that is allowed to
* interfere with all other pools (e.g. program startup, loading a level,
* etc). */
JobPool_Hyper = 0,
/* Contains un-affinitized worker threads.
@ -46,18 +48,18 @@ Enum(JobFlag)
/* A dedicated job is a heavy weight job that will receive its own OS
* thread and will never yield. When the fiber running the job suspends
* itself, the dedicated thread will perform a blocking wait rather than
* yielding the thread to another fiber. This is mainly useful long-running
* dispatcher-esque jobs that block on OS primitives, since occupying a
* worker thread (and thereby preventing non-blocking jobs from running on
* that worker) is unwanted.
* yielding the thread to another fiber. This is mainly useful for
* long-running dispatcher-esque jobs that block on OS primitives, since
* occupying a worker thread (and thereby preventing non-blocking jobs from
* running on that worker) is unwanted.
*
* For example, Win32 window message processing is required by the OS to
* occur on the same thread that initially created the window, which means
* it actually must run inside a dedicated job to prevent message processing
* from yielding & resuming on another thread. The message processing loop
* can block until messages are received from the OS without having to
* occupy a job worker while it blocks, and can then wake yielding
* jobs onto job worker pools based on the messages it received.
* occupy a job worker while it blocks, and can then schedule yielded
* jobs onto job worker pools based on the processed messages.
*/
JobFlag_Dedicated = (1 << 0),
};

View File

@ -193,7 +193,7 @@ i64 FetchAddFence(Fence *fence, i64 x)
return fetch;
}
void YieldOnFence(Fence *fence, i64 target)
i64 YieldOnFence(Fence *fence, i64 target)
{
i64 v = Atomic64Fetch(&fence->v.v);
while (v < target)
@ -201,4 +201,5 @@ void YieldOnFence(Fence *fence, i64 target)
FutexYieldGte(&fence->v.v, &v, sizeof(v));
v = Atomic64Fetch(&fence->v.v);
}
return v;
}

View File

@ -74,4 +74,4 @@ void SetFence(Fence *fence, i64 x);
i64 FetchSetFence(Fence *fence, i64 x);
i64 FetchAddFence(Fence *fence, i64 x);
void YieldOnFence(Fence *fence, i64 target);
i64 YieldOnFence(Fence *fence, i64 target);

View File

@ -55,9 +55,10 @@ JobDef(F_Load, sig, _)
desc.texture.format = GPU_Format_R8G8B8A8_Unorm;
desc.texture.size = VEC3I32(64, 64, 1);
texture = GPU_AcquireResource(desc);
GPU_Mapped mapped = GPU_Map(texture);
GPU_CopyToMapped(&mapped, STRING(desc.texture.size.x * desc.texture.size.y * 4, (u8 *)result.image_pixels));
GPU_Unmap(&mapped);
/* FIXME: Copy to GPU resource here */
//GPU_Mapped mapped = GPU_Map(texture);
//GPU_CopyToMapped(&mapped, STRING(desc.texture.size.x * desc.texture.size.y * 4, (u8 *)result.image_pixels));
//GPU_Unmap(&mapped);
}
/* Acquire store memory */

View File

@ -367,7 +367,7 @@ void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain);
/* Waits until a new backbuffer is ready to be written to.
* This should be called before rendering for minimum latency. */
void GPU_WaitOnSwapchain(GPU_Swapchain *swapchain);
void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain);
/* 1. Clears the backbuffer and ensures it's at size `backbuffer_resolution`
* 2. Blits `texture` to the backbuffer using `texture_xf`

View File

@ -27,13 +27,13 @@ GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl)
GPU_D12_Command *cmd = f->first_free_command;
if (cmd)
{
StackPop(f->first_free_command);
ZeroStruct(cmd);
f->first_free_command = cmd->next;
}
else
{
cmd = PushStruct(perm, GPU_D12_Command);
cmd = PushStructNoZero(perm, GPU_D12_Command);
}
ZeroStruct(cmd);
QueuePush(cl->first, cl->last, cmd);
++cl->count;
return cmd;
@ -51,6 +51,8 @@ u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc)
void GPU_D12_Startup(void)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
/* Init device */
GPU_D12_InitDevice();
@ -67,6 +69,20 @@ void GPU_D12_Startup(void)
YieldOnFence(&job_fence, job_count);
}
/* Init descriptor heaps */
g->cbv_srv_uav_heap = GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
GPU_D12_MaxCbvSrvUavDescriptors,
ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV));
g->sampler_heap = GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
GPU_D12_MaxSamplerDescriptors,
ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER));
g->rtv_heap = GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
GPU_D12_MaxRtvDescriptors,
ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV));
/* Start queue sync job */
RunJob(GPU_D12_StartQueueSync, .pool = JobPool_Hyper, .flags = JobFlag_Dedicated);
}
@ -230,7 +246,7 @@ JobDef(GPU_D12_InitQueue, sig, id)
D3D12_COMMAND_QUEUE_DESC d3d_desc = ZI;
d3d_desc.Type = desc.d3d_type;
d3d_desc.Priority = desc.d3d_priority;
hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->cq);
hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue);
if (FAILED(hr))
{
Panic(Lit("Failed to create command queue"));
@ -245,6 +261,32 @@ JobDef(GPU_D12_InitQueue, sig, id)
g->queues[desc.kind] = queue;
}
//- Heap initialization
GPU_D12_CpuDescriptorHeap *GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, u32 max_descs, u32 desc_size)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
Arena *arena = AcquireArena(Gibi(64));
GPU_D12_CpuDescriptorHeap *heap = PushStruct(arena, GPU_D12_CpuDescriptorHeap);
heap->arena = arena;
heap->type = type;
heap->max_count = max_descs;
heap->descriptor_size = desc_size;
D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI;
d3d_desc.Type = type;
d3d_desc.NumDescriptors = max_descs;
HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap);
if (FAILED(hr))
{
Panic(Lit("Failed to create CPU descriptor heap"));
}
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle);
return heap;
}
////////////////////////////////
//~ Pipeline operations
@ -263,6 +305,53 @@ GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind)
return g->queues[kind];
}
////////////////////////////////
//~ Descriptor operations
GPU_D12_CpuDescriptor *GPU_D12_AcquireCpuDescriptor(GPU_D12_CpuDescriptorHeap *heap)
{
GPU_D12_CpuDescriptor *d = 0;
u32 index = 0;
D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI;
{
Lock lock = LockE(&heap->mutex);
if (heap->first_free)
{
d = heap->first_free;
heap->first_free = d->next_free;
handle = d->handle;
index = d->index;
}
else
{
if (heap->allocated_count >= heap->max_count)
{
Panic(Lit("Max descriptors reached in heap"));
}
d = PushStructNoZero(heap->arena, GPU_D12_CpuDescriptor);
index = heap->allocated_count++;
handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size);
}
Unlock(&lock);
}
ZeroStruct(d);
d->heap = heap;
d->handle = handle;
d->index = index;
return d;
}
void GPU_D12_ReleaseCpuDescriptor(GPU_D12_CpuDescriptor *descriptor)
{
GPU_D12_CpuDescriptorHeap *dh = descriptor->heap;
Lock lock = LockE(&dh->mutex);
{
descriptor->next_free = dh->first_free;
dh->first_free = descriptor;
}
Unlock(&lock);
}
////////////////////////////////
//~ Raw command list
@ -362,8 +451,8 @@ u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl)
target = ++queue->submit_fence_target;
cl->submit_fence_target = target;
/* Execute */
ID3D12CommandQueue_ExecuteCommandLists(queue->cq, 1, (ID3D12CommandList **)&cl->cl);
ID3D12CommandQueue_Signal(queue->cq, queue->submit_fence, target);
ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->cl);
ID3D12CommandQueue_Signal(queue->d3d_queue, queue->submit_fence, target);
/* Append */
QueuePush(queue->first_submitted_cl, queue->last_submitted_cl, cl);
}
@ -373,6 +462,197 @@ u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl)
return target;
}
////////////////////////////////
//~ Swapchain helpers
void GPU_D12_InitSwapchainResources(GPU_D12_Swapchain *swapchain)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
for (u32 i = 0; i < countof(swapchain->buffers); ++i)
{
ID3D12Resource *resource = 0;
HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource);
if (FAILED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to get swapchain buffer"));
}
GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i];
ZeroStruct(sb);
sb->swapchain = swapchain;
sb->d3d_resource = resource;
sb->rtv_descriptor = GPU_D12_AcquireCpuDescriptor(g->rtv_heap);
sb->state = D3D12_RESOURCE_STATE_COMMON;
ID3D12Device_CreateRenderTargetView(g->device, sb->d3d_resource, 0, sb->rtv_descriptor->handle);
}
}
GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, Vec2I32 resolution)
{
__prof;
GPU_D12_SharedState *g = &GPU_D12_shared_state;
resolution.x = MaxI32(resolution.x, 1);
resolution.y = MaxI32(resolution.y, 1);
b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution);
if (should_rebuild)
{
HRESULT hr = 0;
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
/* Lock direct queue submissions (in case any write to backbuffer) */
/* TODO: Less overkill approach - Only flush GPU_D12_BlitToSwapchain since we know it's the only operation targeting backbuffer */
Lock lock = LockE(&queue->submit_mutex);
//DEBUGBREAKABLE;
//Lock lock = LockE(&g->global_command_list_record_mutex);
{
/* Flush direct queue */
//ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target);
{
HANDLE event = CreateEvent(0, 0, 0, 0);
ID3D12Fence_SetEventOnCompletion(queue->submit_fence, queue->submit_fence_target, event);
WaitForSingleObject(event, INFINITE);
CloseHandle(event);
}
/* Release buffers */
for (u32 i = 0; i < countof(swapchain->buffers); ++i)
{
GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i];
GPU_D12_ReleaseCpuDescriptor(sb->rtv_descriptor);
ID3D12Resource_Release(sb->d3d_resource);
}
/* Resize buffers */
hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, GPU_D12_SwapchainFlags);
if (FAILED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to resize swapchain"));
}
}
Unlock(&lock);
GPU_D12_InitSwapchainResources(swapchain);
swapchain->resolution = resolution;
}
u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain);
return &swapchain->buffers[backbuffer_index];
}
void GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture_resource, Xform texture_xf)
{
#if 1
GPU_D12_SharedState *g = &GPU_D12_shared_state;
#else
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_Pipeline *blit_pl = 0;
{
GPU_D12_PipelineDesc desc = ZI;
desc.vs = GPU_BlitVS;
desc.ps = GPU_BlitPS;
desc.render_target_formats[0] = GPU_Format_R8G8B8A8_Unorm;
blit_pl = GPU_D12_PipelineFromDesc(desc);
}
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
if (blit_pl)
{
GPU_D12_CommandList *cl = GPU_D12_BeginCommandList(cq->cl_pool);
{
__profn("Present blit");
__profnc_dx12(cl->cq->prof, cl->cl, "Present blit", Rgb32F(0.5, 0.2, 0.2));
GPU_D12_Swapchain *swapchain = dst->swapchain;
/* Upload dummmy vert & index buffer */
/* TODO: Make these static */
/* Dummy vertex buffer */
LocalPersist u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
GPU_D12_CommandBuffer *dummy_vertex_buffer = GPU_D12_PushCommandBuffer(cl, 0, (u8 *)0);
GPU_D12_CommandBuffer *quad_index_buffer = GPU_D12_PushCommandBuffer(cl, countof(quad_indices), quad_indices);
/* Upload descriptor heap */
GPU_D12_CommandDescriptorHeap *descriptor_heap = GPU_D12_PushDescriptorHeap(cl, g->cbv_srv_uav_heap);
ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
Rect viewport_rect = RectFromVec2(VEC2(0, 0), VEC2(swapchain->resolution.x, swapchain->resolution.y));
D3D12_VIEWPORT viewport = GPU_D12_ViewportFromRect(viewport_rect);
D3D12_RECT scissor = GPU_D12_ScissorRectFromRect(viewport_rect);
Mat4x4 vp_matrix = ZI;
{
Xform xf = src_xf;
xf = ScaleXform(xf, VEC2(src->texture_size.x, src->texture_size.y));
xf = TranslateXform(xf, VEC2(0.5, 0.5));
vp_matrix = ProjectMat4x4View(xf, viewport.Width, viewport.Height);
}
/* Transition dst to render target */
{
struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
rtb.pResource = dst->resource;
rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rtb.StateBefore = dst->state;
rtb.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
struct D3D12_RESOURCE_BARRIER rb = ZI;
rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb.Flags = 0;
rb.Transition = rtb;
ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
dst->state = rtb.StateAfter;
}
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &dst->rtv_descriptor->handle, 0, 0);
/* Clear */
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, dst->rtv_descriptor->handle, clear_color, 0, 0);
/* Bind pipeline */
GPU_D12_SetPipeline(cl, blit_pipeline);
/* Set Rasterizer State */
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
/* Set sig */
K_BlitSig sig = ZI;
sig.projection = vp_matrix;
sig.flags = K_BLIT_FLAG_NONE;
sig.tex_urid = src->srv_descriptor->index;
GPU_D12_SetSig(cl, &sig, sizeof(sig));
/* Draw */
D3D12_VERTEX_BUFFER_VIEW vbv = GPU_D12_VbvFromCommandBuffer(dummy_vertex_buffer, 0);
D3D12_INDEX_BUFFER_VIEW ibv = GPU_D12_IbvFromCommandBuffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0);
/* Transition dst to presentable */
{
struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
rtb.pResource = dst->resource;
rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rtb.StateBefore = dst->state;
rtb.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
struct D3D12_RESOURCE_BARRIER rb = ZI;
rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb.Flags = 0;
rb.Transition = rtb;
ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
dst->state = rtb.StateAfter;
}
}
GPU_D12_EndCommandList(cl);
}
#endif
}
////////////////////////////////
//~ Queue sync job
@ -526,12 +806,13 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv);
D3D12_RESOURCE_STATES initial_state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, 0, &IID_ID3D12Resource, (void **)&r->raw);
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, 0, &IID_ID3D12Resource, (void **)&r->d3d_resource);
if (FAILED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to create buffer resource"));
}
r->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->d3d_resource);
} break;
/* Texture */
@ -563,7 +844,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } };
D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->raw);
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->d3d_resource);
if (FAILED(hr))
{
/* TODO: Don't panic */
@ -593,7 +874,7 @@ void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags)
case GPU_ResourceKind_Texture2D:
case GPU_ResourceKind_Texture3D:
{
ID3D12Resource_Release(r->raw);
ID3D12Resource_Release(r->d3d_resource);
}
/* TODO: Sampler */
@ -700,7 +981,7 @@ u64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
{
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->raw);
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->d3d_pipeline);
/* Fill signature */
{
@ -750,7 +1031,7 @@ u64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
if (indices)
{
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
ibv.BufferLocation = indices->gpu_address;
ibv.BufferLocation = indices->buffer_gpu_address;
if (indices->desc.buffer.element_size == 2)
{
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R16_UINT);
@ -783,7 +1064,7 @@ u64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
if (pipeline)
{
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->raw);
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->d3d_pipeline);
/* Fill signature */
{
@ -948,7 +1229,7 @@ GPU_Mapped GPU_Map(GPU_Resource *gpu_r)
result.resource = gpu_r;
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r;
D3D12_RANGE read_range = ZI;
HRESULT hr = ID3D12Resource_Map(r->raw, 0, &read_range, &result.mem);
HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem);
if (FAILED(hr) || !result.mem)
{
/* TODO: Don't panic */
@ -960,7 +1241,7 @@ GPU_Mapped GPU_Map(GPU_Resource *gpu_r)
void GPU_Unmap(GPU_Mapped *m)
{
GPU_D12_Resource *r = (GPU_D12_Resource *)m->resource;
ID3D12Resource_Unmap(r->raw, 0, 0);
ID3D12Resource_Unmap(r->d3d_resource, 0, 0);
}
void GPU_CopyToMapped(GPU_Mapped *mapped, String data)
@ -969,7 +1250,7 @@ void GPU_CopyToMapped(GPU_Mapped *mapped, String data)
GPU_D12_Resource *r = (GPU_D12_Resource *)mapped->resource;
D3D12_RESOURCE_DESC desc = ZI;
ID3D12Resource_GetDesc(r->raw, &desc);
ID3D12Resource_GetDesc(r->d3d_resource, &desc);
u64 upload_size = 0;
u64 upload_row_size = 0;
@ -1021,8 +1302,77 @@ GPU_MemoryInfo GPU_QueryMemoryInfo(void)
GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, Vec2I32 size)
{
/* TODO */
return 0;
GPU_D12_SharedState *g = &GPU_D12_shared_state;
HRESULT hr = 0;
HWND hwnd = (HWND)P_GetInternalWindowHandle(window);
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
GPU_D12_Swapchain *swapchain = 0;
{
Lock lock = LockE(&g->free_swapchains_mutex);
{
swapchain = g->first_free_swapchain;
if (swapchain)
{
g->first_free_swapchain = swapchain->next;
}
}
Unlock(&lock);
}
if (!swapchain)
{
Arena *perm = PermArena();
PushAlign(perm, CachelineSize);
swapchain = PushStructNoZero(perm, GPU_D12_Swapchain);
PushAlign(perm, CachelineSize);
}
ZeroStruct(swapchain);
/* Create swapchain1 */
IDXGISwapChain1 *swapchain1 = 0;
{
DXGI_SWAP_CHAIN_DESC1 desc = ZI;
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc.Width = size.x;
desc.Height = size.y;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT;
desc.BufferCount = GPU_D12_SwapchainBufferCount;
desc.Scaling = DXGI_SCALING_NONE;
desc.Flags = GPU_D12_SwapchainFlags;
desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)queue->d3d_queue, hwnd, &desc, 0, 0, &swapchain1);
if (FAILED(hr))
{
Panic(Lit("Failed to create IDXGISwapChain1"));
}
}
/* Upgrade to swapchain3 */
hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain);
if (FAILED(hr))
{
Panic(Lit("Failed to create IDXGISwapChain3"));
}
/* Create waitable object */
#if GPU_D12_FrameLatency > 0
IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, GPU_D12_FrameLatency);
swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain);
Assert(swapchain->waitable);
#endif
/* Disable Alt+Enter changing monitor resolution to match window size */
IDXGIFactory_MakeWindowAssociation(g->factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
IDXGISwapChain1_Release(swapchain1);
swapchain->window_hwnd = hwnd;
GPU_D12_InitSwapchainResources(swapchain);
return (GPU_Swapchain *)swapchain;
}
void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain)
@ -1030,7 +1380,7 @@ void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain)
/* TODO */
}
void GPU_WaitOnSwapchain(GPU_Swapchain *swapchain)
void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain)
{
/* TODO */
}
@ -1038,11 +1388,11 @@ void GPU_WaitOnSwapchain(GPU_Swapchain *swapchain)
void GPU_PresentSwapchain(GPU_Swapchain *gpu_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync)
{
GPU_D12_Swapchain *swapchain = (GPU_D12_Swapchain *)gpu_swapchain;
// GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_resolution);
// GPU_D12_Resource *texture_resource = (GPU_D12_Resource *)texture;
GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_resolution);
GPU_D12_Resource *texture_resource = (GPU_D12_Resource *)texture;
/* Blit */
// GPU_D12_BlitToSwapchain(swapchain_buffer, texture_resource, texture_xf);
GPU_D12_BlitToSwapchain(swapchain_buffer, texture_resource, texture_xf);
u32 present_flags = 0;
if (GPU_D12_TearingIsAllowed && vsync == 0)
@ -1053,7 +1403,7 @@ void GPU_PresentSwapchain(GPU_Swapchain *gpu_swapchain, Vec2I32 backbuffer_resol
/* Present */
{
__profn("Present");
HRESULT hr = IDXGISwapChain3_Present(swapchain->raw, vsync, present_flags);
HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
if (!SUCCEEDED(hr))
{
Assert(0);

View File

@ -12,10 +12,15 @@
//~ Tweakable defines
#define GPU_D12_TearingIsAllowed 1
#define GPU_D12_FrameLatency 1
#define GPU_D12_FrameLatency 0 /* TODO: Set this to 1 */
#define GPU_D12_SwapchainBufferCount 4
#define GPU_D12_SwapchainFlags (((GPU_D12_TearingIsAllowed != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) \
| ((GPU_D12_FrameLatency != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
#define GPU_D12_SwapchainBufferCount (4)
#define GPU_D12_MaxCbvSrvUavDescriptors (1024 * 64)
#define GPU_D12_MaxSamplerDescriptors (1024 * 1)
#define GPU_D12_MaxRtvDescriptors (1024 * 1)
////////////////////////////////
//~ Pipeline types
@ -30,7 +35,7 @@ Struct(GPU_D12_PipelineDesc)
Struct(GPU_D12_Pipeline)
{
ID3D12PipelineState *raw;
ID3D12PipelineState *d3d_pipeline;
ID3D12RootSignature *rootsig;
};
@ -42,10 +47,10 @@ Struct(GPU_D12_Resource)
GPU_D12_Resource *next_free;
GPU_ResourceDesc desc;
ID3D12Resource *raw;
ID3D12Resource *d3d_resource;
u64 reuse_hash;
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
};
////////////////////////////////
@ -62,7 +67,7 @@ Struct(GPU_D12_QueueDesc)
Struct(GPU_D12_Queue)
{
GPU_D12_QueueDesc desc;
ID3D12CommandQueue *cq;
ID3D12CommandQueue *d3d_queue;
Mutex submit_mutex;
ID3D12Fence *submit_fence;
@ -73,6 +78,33 @@ Struct(GPU_D12_Queue)
Fence sync_fence;
};
////////////////////////////////
//~ Descriptor types
Struct(GPU_D12_CpuDescriptor)
{
GPU_D12_CpuDescriptor *next_free;
struct GPU_D12_CpuDescriptorHeap *heap;
u32 index;
D3D12_CPU_DESCRIPTOR_HANDLE handle;
};
Struct(GPU_D12_CpuDescriptorHeap)
{
Arena *arena;
D3D12_DESCRIPTOR_HEAP_TYPE type;
u32 descriptor_size;
ID3D12DescriptorHeap *d3d_heap;
D3D12_CPU_DESCRIPTOR_HANDLE start_handle;
Mutex mutex;
GPU_D12_CpuDescriptor *first_free;
u32 allocated_count;
u32 max_count;
};
////////////////////////////////
//~ Raw command list types
@ -173,12 +205,24 @@ Struct(GPU_D12_CommandList)
////////////////////////////////
//~ Swapchain types
Struct(GPU_D12_SwapchainBuffer)
{
struct GPU_D12_Swapchain *swapchain;
ID3D12Resource *d3d_resource;
GPU_D12_CpuDescriptor *rtv_descriptor;
D3D12_RESOURCE_STATES state;
};
Struct(GPU_D12_Swapchain)
{
IDXGISwapChain3 *raw;
GPU_D12_Swapchain *next;
IDXGISwapChain3 *swapchain;
HWND window_hwnd;
HANDLE waitable;
Vec3I32 resolution;
Vec2I32 resolution;
GPU_D12_SwapchainBuffer buffers[GPU_D12_SwapchainBufferCount];
};
////////////////////////////////
@ -197,10 +241,19 @@ Struct(GPU_D12_SharedState)
/* Queues */
GPU_D12_Queue *queues[GPU_NumQueues];
/* Descriptor heaps */
GPU_D12_CpuDescriptorHeap *cbv_srv_uav_heap;
GPU_D12_CpuDescriptorHeap *sampler_heap;
GPU_D12_CpuDescriptorHeap *rtv_heap;
/* Resources */
Mutex free_resources_mutex;
GPU_D12_Resource *first_free_resource;
/* Swapchains */
Mutex free_swapchains_mutex;
GPU_D12_Swapchain *first_free_swapchain;
/* Device */
IDXGIFactory6 *factory;
IDXGIAdapter1 *adapter;
@ -229,6 +282,9 @@ void GPU_D12_InitDevice(void);
//- Queue initialization
JobDecl(GPU_D12_InitQueue, { GPU_D12_QueueDesc *descs; });
//- Heap initialization
GPU_D12_CpuDescriptorHeap *GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, u32 max_descs, u32 desc_size);
////////////////////////////////
//~ Pipeline operations
@ -239,12 +295,25 @@ GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc);
GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind);
////////////////////////////////
//~ Descriptor operations
GPU_D12_CpuDescriptor *GPU_D12_AcquireCpuDescriptor(GPU_D12_CpuDescriptorHeap *heap);
void GPU_D12_ReleaseCpuDescriptor(GPU_D12_CpuDescriptor *descriptor);
////////////////////////////////
//~ Raw command list operations
GPU_D12_RawCommandList *GPU_D12_BeginRawCommandList(GPU_QueueKind queue_kind);
u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl);
////////////////////////////////
//~ Swapchain helpers
void GPU_D12_InitSwapchainResources(GPU_D12_Swapchain *swapchain);
GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, Vec2I32 resolution);
void GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *swapchain_buffer, GPU_D12_Resource *texture_resource, Xform texture_xf);
////////////////////////////////
//~ Sync job

View File

@ -2,6 +2,12 @@
//- Api
@IncludeC gpu_dx12.h
@IncludeGpu gpu_dx12_blit.h
//- Impl
@IncludeC gpu_dx12.c
@IncludeGpu gpu_dx12_blit.gpu
//- Shaders
@VertexShader GPU_D12_BlitVS
@PixelShader GPU_D12_BlitPS

View File

@ -0,0 +1,49 @@
ConstantBuffer<GPU_D12_BlitSig> GPU_D12_blit_sig : register (b0);
////////////////////////////////
//~ Ui Blit
Struct(GPU_D12_BlitPS_Input)
{
Semantic(Vec4, SV_Position);
Semantic(Vec2, uv);
};
Struct(GPU_D12_BlitPS_Output)
{
Semantic(Vec4, SV_Target);
};
//- Vertex shader
GPU_D12_BlitPS_Input VSDef(GPU_D12_BlitVS, Semantic(u32, SV_VertexID))
{
ConstantBuffer<GPU_D12_BlitSig> sig = GPU_D12_blit_sig;
static const Vec2 unit_quad_verts[4] = {
Vec2(-0.5f, -0.5f),
Vec2(0.5f, -0.5f),
Vec2(0.5f, 0.5f),
Vec2(-0.5f, 0.5f)
};
Vec2 vert = unit_quad_verts[SV_VertexID];
GPU_D12_BlitPS_Input output;
output.SV_Position = mul(sig.projection, Vec4(vert, 0, 1));
output.uv = vert + 0.5;
return output;
}
//- Pixel shader
GPU_D12_BlitPS_Output PSDef(GPU_D12_BlitPS, GPU_D12_BlitPS_Input input)
{
ConstantBuffer<GPU_D12_BlitSig> sig = GPU_D12_blit_sig;
SamplerState sampler = GpuSamplerFromUrid(sig.tex_sampler_urid);
GPU_D12_BlitPS_Output output;
Texture2D<Vec4> tex = GpuResourceFromUrid(sig.tex_urid);
Vec4 color = tex.Sample(sampler, input.uv);
output.SV_Target = color;
return output;
}

View File

@ -0,0 +1,15 @@
////////////////////////////////
//~ Blit types
Struct(GPU_D12_BlitSig)
{
/* ----------------------------------------------------- */
Mat4x4 projection; /* 16 consts */
/* ----------------------------------------------------- */
u32 tex_urid; /* 01 consts */
u32 tex_sampler_urid; /* 01 consts */
u32 _pad0; /* 01 consts (padding) */
u32 _pad1; /* 01 consts (padding) */
/* ----------------------------------------------------- */
};
AssertRootConst(GPU_D12_BlitSig, 20);

View File

@ -435,7 +435,7 @@ void P_W32_ProcessWindowEvent(P_W32_Window *window, P_WindowEvent event)
void P_W32_WakeWindow(P_W32_Window *window)
{
/* Post a blank message to the window's thread message queue to wake it. */
PostMessageW(window->hwnd, 0, 0, 0);
PostMessageW(window->hwnd, WM_NULL, 0, 0);
}
LRESULT CALLBACK P_W32_Win32WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM lparam)

View File

@ -2434,10 +2434,9 @@ void UpdateUser(P_Window *window)
}
g->gpu_render_fence_target = GPU_EndCommandList(cl);
/* Release transfer buffers */
/* Reset transfer buffers & queue their release */
{
{
/* FIXME: Release resources */
GPU_Resource *release_resources[] = {
quad_index_buffer,
material_instance_buffer,
@ -2469,6 +2468,8 @@ void UpdateUser(P_Window *window)
g->ui_shape_indices_count = 0;
g->grids_count = 0;
}
GPU_PresentSwapchain(g->swapchain, g->ui_size, g->ui_target, g->ui_to_screen_xf, 1);
}
EndScratch(scratch);
@ -2488,7 +2489,7 @@ JobDef(UpdateUserOrSleep, UNUSED sig, UNUSED id)
__profn("User sleep");
{
__profn("Swapchain wait");
GPU_WaitOnSwapchain(g->swapchain);
GPU_YieldOnSwapchain(g->swapchain);
}
{
__profn("Frame limiter wait");

View File

@ -1,9 +1,9 @@
ConstantBuffer<MaterialSig> g_mat_sig : register (b0);
ConstantBuffer<FloodSig> g_flood_sig : register (b0);
ConstantBuffer<ShadeSig> g_shade_sig : register (b0);
ConstantBuffer<UiBlitSig> g_ui_blit_sig : register (b0);
ConstantBuffer<UiRectSig> g_ui_rect_sig : register (b0);
ConstantBuffer<UiShapeSig> g_ui_shape_sig : register (b0);
ConstantBuffer<MaterialSig> mat_sig : register (b0);
ConstantBuffer<FloodSig> flood_sig : register (b0);
ConstantBuffer<ShadeSig> shade_sig : register (b0);
ConstantBuffer<UiBlitSig> ui_blit_sig : register (b0);
ConstantBuffer<UiRectSig> ui_rect_sig : register (b0);
ConstantBuffer<UiShapeSig> ui_shape_sig : register (b0);
////////////////////////////////
//~ Material
@ -28,7 +28,7 @@ Struct(MaterialPS_Output)
MaterialPS_Input VSDef(MaterialVS, Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
{
ConstantBuffer<MaterialSig> sig = g_mat_sig;
ConstantBuffer<MaterialSig> sig = mat_sig;
static const Vec2 unit_quad_verts[4] = {
Vec2(-0.5f, -0.5f),
Vec2(0.5f, -0.5f),
@ -56,7 +56,7 @@ MaterialPS_Input VSDef(MaterialVS, Semantic(u32, SV_InstanceID), Semantic(u32, S
MaterialPS_Output PSDef(MaterialPS, MaterialPS_Input input)
{
ConstantBuffer<MaterialSig> sig = g_mat_sig;
ConstantBuffer<MaterialSig> sig = mat_sig;
MaterialPS_Output output;
Vec4 albedo = input.tint_lin;
@ -128,7 +128,7 @@ MaterialPS_Output PSDef(MaterialPS, MaterialPS_Input input)
[numthreads(8, 8, 1)]
void CSDef(FloodCS, Semantic(uint3, SV_DispatchThreadID))
{
ConstantBuffer<FloodSig> sig = g_flood_sig;
ConstantBuffer<FloodSig> sig = flood_sig;
uint2 id = SV_DispatchThreadID.xy;
uint2 tex_size = uint2(sig.tex_width, sig.tex_height);
@ -194,7 +194,7 @@ void CSDef(FloodCS, Semantic(uint3, SV_DispatchThreadID))
float RandAngle(uint2 pos, u32 ray_index)
{
ConstantBuffer<ShadeSig> sig = g_shade_sig;
ConstantBuffer<ShadeSig> sig = shade_sig;
Texture3D<u32> noise_tex = GpuResourceFromUrid(sig.noise_tex_urid);
Vec3I32 noise_coord = Vec3I32(1, 1, 1);
@ -208,7 +208,7 @@ float RandAngle(uint2 pos, u32 ray_index)
Vec3 ColorFromDir(uint2 ray_start, Vec2 ray_dir)
{
ConstantBuffer<ShadeSig> sig = g_shade_sig;
ConstantBuffer<ShadeSig> sig = shade_sig;
Texture2D<uint2> flood_tex = GpuResourceFromUrid(sig.emittance_flood_tex_urid);
Texture2D<Vec4> emittance_tex = GpuResourceFromUrid(sig.emittance_tex_urid);
Texture3D<u32> noise_tex = GpuResourceFromUrid(sig.noise_tex_urid);
@ -263,7 +263,7 @@ Vec3 ColorFromPos(uint2 pos)
[numthreads(8, 8, 1)]
void CSDef(ShadeCS, Semantic(uint3, SV_DispatchThreadID))
{
ConstantBuffer<ShadeSig> sig = g_shade_sig;
ConstantBuffer<ShadeSig> sig = shade_sig;
uint2 id = SV_DispatchThreadID.xy;
if (id.x < sig.tex_width && id.y < sig.tex_height)
@ -321,7 +321,7 @@ Vec3 ToneMap(Vec3 v)
UiBlitPS_Input VSDef(UiBlitVS, Semantic(u32, SV_VertexID))
{
ConstantBuffer<UiBlitSig> sig = g_ui_blit_sig;
ConstantBuffer<UiBlitSig> sig = ui_blit_sig;
static const Vec2 unit_quad_verts[4] = {
Vec2(-0.5f, -0.5f),
Vec2(0.5f, -0.5f),
@ -340,7 +340,7 @@ UiBlitPS_Input VSDef(UiBlitVS, Semantic(u32, SV_VertexID))
UiBlitPS_Output PSDef(UiBlitPS, UiBlitPS_Input input)
{
ConstantBuffer<UiBlitSig> sig = g_ui_blit_sig;
ConstantBuffer<UiBlitSig> sig = ui_blit_sig;
SamplerState sampler = GpuSamplerFromUrid(sig.tex_sampler_urid);
UiBlitPS_Output output;
@ -385,7 +385,7 @@ Struct(UiRectPS_Output)
UiRectPS_Input VSDef(UiRectVS, Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
{
ConstantBuffer<UiRectSig> sig = g_ui_rect_sig;
ConstantBuffer<UiRectSig> sig = ui_rect_sig;
static const Vec2 unit_quad_verts[4] = {
Vec2(-0.5f, -0.5f),
Vec2(0.5f, -0.5f),
@ -410,7 +410,7 @@ UiRectPS_Input VSDef(UiRectVS, Semantic(u32, SV_InstanceID), Semantic(u32, SV_Ve
UiRectPS_Output PSDef(UiRectPS, UiRectPS_Input input)
{
ConstantBuffer<UiRectSig> sig = g_ui_rect_sig;
ConstantBuffer<UiRectSig> sig = ui_rect_sig;
UiRectPS_Output output;
Vec4 color = input.tint_srgb;
@ -444,7 +444,7 @@ Struct(UiShapePS_Output)
UiShapePS_Input VSDef(UiShapeVS, Semantic(u32, SV_VertexID))
{
ConstantBuffer<UiShapeSig> sig = g_ui_shape_sig;
ConstantBuffer<UiShapeSig> sig = ui_shape_sig;
StructuredBuffer<UiShapeVert> verts = GpuResourceFromUrid(sig.verts_urid);
UiShapeVert vert = verts[SV_VertexID];
UiShapePS_Input output;

View File

@ -24,15 +24,17 @@ JobDef(S_LoadTexture, sig, _)
{
GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Texture2D;
desc.flags = GPU_ResourceFlag_AllowUav;
desc.flags = GPU_ResourceFlag_None;
desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb;
desc.texture.size = VEC3I32(decoded.width, decoded.height, 1);
desc.texture.mip_levels = 1;
texture->gpu_resource = GPU_AcquireResource(desc);
texture->width = decoded.width;
texture->height = decoded.height;
/* FIXME: Upload to resource here */
}
texture->loaded = 1;
SetFence(&entry->texture_ready_fence, 1);
EndScratch(scratch);