diff --git a/src/ase/ase.c b/src/ase/ase.c index 2536f260..d461c8b6 100644 --- a/src/ase/ase.c +++ b/src/ase/ase.c @@ -770,7 +770,7 @@ abort: if (result.errors.count <= 0) { - result.success = 1; + result.ok = 1; } EndScratch(scratch); @@ -966,7 +966,7 @@ abort: if (result.errors.count <= 0) { - result.success = 1; + result.ok = 1; } return result; diff --git a/src/ase/ase.h b/src/ase/ase.h index 865be406..031d1c36 100644 --- a/src/ase/ase.h +++ b/src/ase/ase.h @@ -60,7 +60,7 @@ Struct(ASE_DecodedImage) u32 height; u32 *pixels; /* Array of [width * height] pixels */ ASE_ErrorList errors; - b32 success; + b32 ok; }; Struct(ASE_DecodedSheet) @@ -74,7 +74,7 @@ Struct(ASE_DecodedSheet) ASE_Span *first_span; ASE_SliceKey *first_slice_key; ASE_ErrorList errors; - b32 success; + b32 ok; }; //////////////////////////////// diff --git a/src/base/base_win32/base_win32.c b/src/base/base_win32/base_win32.c index 46d109ba..67dc513b 100644 --- a/src/base/base_win32/base_win32.c +++ b/src/base/base_win32/base_win32.c @@ -177,8 +177,8 @@ i32 W32_Main(void) wchar_t cmd[sizeof(ProfilingCmdWstr)] = ZI; CopyBytes(cmd, ProfilingCmdWstr, sizeof(ProfilingCmdWstr)); DeleteFileW(ProfilingOutFileWstr); - b32 success = CreateProcessW(0, cmd, 0, 0, 0, DETACHED_PROCESS, 0, 0, &si, &pi); - if (!success) + b32 ok = CreateProcessW(0, cmd, 0, 0, 0, DETACHED_PROCESS, 0, 0, &si, &pi); + if (!ok) { MessageBoxExW(0, L"Failed to launch profiler using command '" ProfilingCmdWstr L"'.", L"Error", MB_ICONSTOP | MB_SETFOREGROUND | MB_TOPMOST, 0); } @@ -214,20 +214,20 @@ i32 W32_Main(void) if (thread_name_len >= prefix_name_wstr_len && EqBytes(thread_name_wstr, prefix_name_wstr, prefix_name_wstr_len)) { __profn("Set profiler thread affinity"); - b32 success = SetThreadAffinityMask(thread, ProfilerThreadAffinityMask) != 0; + b32 ok = SetThreadAffinityMask(thread, ProfilerThreadAffinityMask) != 0; { /* Retry until external tools can set correct process affinity */ i32 delay_ms = 16; - while (!success && delay_ms <= 1024) + while (!ok && delay_ms <= 1024) { __profn("Profiler thread affinity retry"); Sleep(delay_ms); - success = SetThreadAffinityMask(thread, ProfilerThreadAffinityMask) != 0; + ok = SetThreadAffinityMask(thread, ProfilerThreadAffinityMask) != 0; delay_ms *= 2; } } - Assert(success); - LAX success; + Assert(ok); + LAX ok; } } CloseHandle(thread); diff --git a/src/base/base_win32/base_win32_job.c b/src/base/base_win32/base_win32_job.c index 39b7737d..829e922b 100644 --- a/src/base/base_win32/base_win32_job.c +++ b/src/base/base_win32/base_win32_job.c @@ -160,7 +160,7 @@ b32 W32_TryEndThread(W32_Thread *thread, f32 timeout_seconds) { __prof; W32_SharedJobState *g = &W32_shared_job_state; - b32 success = 0; + b32 ok = 0; W32_Thread *t = (W32_Thread *)thread; HANDLE handle = t->handle; if (handle) @@ -171,18 +171,18 @@ b32 W32_TryEndThread(W32_Thread *thread, f32 timeout_seconds) if (wait_result == WAIT_OBJECT_0) { /* Release thread */ - success = 1; + ok = 1; CloseHandle(handle); } } - return success; + return ok; } void W32_WaitEndThread(W32_Thread *thread) { __prof; - b32 success = W32_TryEndThread(thread, F32Infinity); - Assert(success); + b32 ok = W32_TryEndThread(thread, F32Infinity); + Assert(ok); } //////////////////////////////// @@ -424,29 +424,29 @@ W32_ThreadDef(W32_JobWorkerEntryPoint, worker_ctx_arg) if (pool->thread_priority != 0) { __profn("Set priority"); - b32 success = SetThreadPriority(thread_handle, pool->thread_priority) != 0; - Assert(success); + b32 ok = SetThreadPriority(thread_handle, pool->thread_priority) != 0; + Assert(ok); } #if 0 if (pool->thread_affinity_mask) { __profn("Set affinity"); - b32 success = SetThreadAffinityMask(thread_handle, pool->thread_affinity_mask) != 0; + b32 ok = SetThreadAffinityMask(thread_handle, pool->thread_affinity_mask) != 0; #if RtcIsEnabled || ProfilingIsEnabled { /* Retry until external tools can set correct process affinity */ i32 delay_ms = 16; - while (!success && delay_ms <= 1024) + while (!ok && delay_ms <= 1024) { __profn("Affinity retry"); Sleep(delay_ms); - success = SetThreadAffinityMask(thread_handle, pool->thread_affinity_mask) != 0; + ok = SetThreadAffinityMask(thread_handle, pool->thread_affinity_mask) != 0; delay_ms *= 2; } } #endif - Assert(success); + Assert(ok); } #endif diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h index 88de629d..f1b914db 100644 --- a/src/gpu/gpu.h +++ b/src/gpu/gpu.h @@ -191,14 +191,6 @@ Enum(GPU_ReleaseFlag) GPU_ReleaseFlag_Reuse = (1 << 0) }; -Enum(GPU_ResourceIdKind) -{ - GPU_ResourceIdKind_None, - GPU_ResourceIdKind_Srv, - GPU_ResourceIdKind_Uav, - GPU_ResourceIdKind_Sampler, -}; - Struct(GPU_ResourceDesc) { GPU_ResourceKind kind; @@ -274,7 +266,8 @@ void GPU_Startup(void); //////////////////////////////// //~ @hookdecl Fence operations -Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind); +Fence *GPU_FenceFromQueue(GPU_QueueKind queue); +void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value); /* Tells queue A Forces `waiting_queue` to wait until `target_queue`'s fence reaches the specified value */ //////////////////////////////// //~ @hookdecl Rasterizer helpers @@ -288,13 +281,16 @@ GPU_Scissor GPU_ScissorFromRect(Rect rect); GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc); void GPU_ReleaseResource(GPU_Resource *resource, GPU_ReleaseFlag flags); -u32 GPU_GetResourceId(GPU_Resource *resource, GPU_ResourceIdKind kind); -Vec2I32 GPU_GetTextureSize(GPU_Resource *resource); +u32 GPU_GetReadableId(GPU_Resource *resource); +u32 GPU_GetWritableId(GPU_Resource *resource); +Vec2I32 GPU_GetTextureSize2D(GPU_Resource *resource); +Vec3I32 GPU_GetTextureSize3D(GPU_Resource *resource); +u64 GPU_GetFootprintSize(GPU_Resource *resource); //////////////////////////////// //~ @hookdecl Command list operations -GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind); +GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue); i64 GPU_EndCommandList(GPU_CommandList *cl); /* Returns the value that the queue's fence will be set to once the command is completed */ //////////////////////////////// @@ -303,20 +299,24 @@ i64 GPU_EndCommandList(GPU_CommandList *cl); /* Returns the value that the queu void GPU_ProfN(GPU_CommandList *cl, String name); //////////////////////////////// -//~ @hookdecl Resource barrier operations +//~ @hookdecl Barrier operations -void GPU_TransitionToSrv(GPU_CommandList *cl, GPU_Resource *resource); -void GPU_TransitionToUav(GPU_CommandList *cl, GPU_Resource *resource); -void GPU_TransitionToRtv(GPU_CommandList *cl, GPU_Resource *resource); -void GPU_FlushUav(GPU_CommandList *cl, GPU_Resource *resource); +void GPU_TransitionToReadable(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be read via read-only types in shaders */ +void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be read/written to via read-write types in shader */ +void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot); /* Allows the resource to be used as a render target bound at slot */ + +void GPU_TransitionToCopySrc(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be used as a src in copy operations */ +void GPU_TransitionToCopyDst(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be used as a dst in copy operations */ + +void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource); /* Waits until all writes to a shader writable resource have completed */ //////////////////////////////// //~ @hookdecl Dispatch operations -void GPU_ClearResource(GPU_CommandList *cl, GPU_Resource *resource, Vec4 clear_value); +void GPU_Clear(GPU_CommandList *cl, GPU_Resource *resource, Vec4 clear_value); -#define GPU_Rasterize(cl, sig_ptr, vs, ps, rts_count, rts, viewport, scissor, instances_count, index_buffer, mode) \ - GPU_Rasterize_((cl), sizeof(*(sig_ptr)), (sig_ptr), (vs), (ps), (rts_count), (rts), (viewport), (scissor), (instances_count), (index_buffer), (mode)) +#define GPU_Rasterize(cl, sig_ptr, vs, ps, rts_count, viewport, scissor, instances_count, index_buffer, mode) \ + GPU_Rasterize_((cl), sizeof(*(sig_ptr)), (sig_ptr), (vs), (ps), (rts_count), (viewport), (scissor), (instances_count), (index_buffer), (mode)) #define GPU_Compute(cl, sig_ptr, cs, x, y, z) GPU_Compute_((cl), sizeof(*(sig_ptr)), (sig_ptr), (cs), (x), (y), (z)) @@ -326,7 +326,6 @@ void GPU_Rasterize_(GPU_CommandList *cl, VertexShader vs, PixelShader ps, u32 rts_count, - GPU_Resource **rts, GPU_Viewport viewport, GPU_Scissor scissor, u32 instances_count, @@ -352,7 +351,7 @@ void GPU_CopyResource(GPU_CommandList *cl, GPU_Resource *dst, GPU_Resource *src) GPU_Mapped GPU_Map(GPU_Resource *r); void GPU_Unmap(GPU_Mapped mapped); -void GPU_CopyToMapped(GPU_Mapped mapped, String data); +void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference); //////////////////////////////// //~ @hookdecl Memory info operations @@ -362,7 +361,7 @@ GPU_MemoryInfo GPU_QueryMemoryInfo(void); //////////////////////////////// //~ @hookdecl Swapchain operations -GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, Vec2I32 size); +GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, GPU_Format format, Vec2I32 size); void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain); /* Waits until a new backbuffer is ready to be written to. diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 9054cc76..14ff0239 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -82,6 +82,8 @@ void GPU_D12_Startup(void) GPU_D12_MaxRtvDescriptors, ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV)); + /* Init rootsig */ + GPU_D12_InitRootsig(); /* Start queue sync job */ RunJob(GPU_D12_StartQueueSync, .pool = JobPool_Hyper, .flags = JobFlag_Dedicated); @@ -287,13 +289,218 @@ GPU_D12_CpuDescriptorHeap *GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_T return heap; } +//- Rootsig initialization + +void GPU_D12_InitRootsig(void) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + b32 ok = 1; + HRESULT hr = 0; + String error_str = ZI; + + /* Serialize root signature */ + ID3D10Blob *blob = 0; + if (ok) + { + __profn("Create root signature"); + + D3D12_ROOT_PARAMETER param = ZI; + param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + param.Constants.ShaderRegister = 0; + param.Constants.RegisterSpace = 0; + param.Constants.Num32BitValues = 64; + + D3D12_ROOT_SIGNATURE_DESC desc = ZI; + desc.NumParameters = 1; + desc.pParameters = ¶m; + desc.NumStaticSamplers = 0; + desc.pStaticSamplers = 0; + desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; + + hr = D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, 0); + if (FAILED(hr)) + { + error_str = Lit("Failed to serialize root signature"); + ok = 0; + } + } + + /* Create root signature */ + ID3D12RootSignature *rootsig = 0; + if (ok) + { + __profn("Create root signature"); + + hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig); + if (FAILED(hr)) + { + error_str = Lit("Failed to create root signature"); + ok = 0; + } + } + + if (blob) + { + ID3D10Blob_Release(blob); + } + + g->bindless_rootsig = rootsig; + if (!ok) + { + Panic(error_str); + } +} + //////////////////////////////// //~ Pipeline operations +JobDef(GPU_D12_LoadPipeline, sig, _) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_Pipeline *pipeline = sig->pipeline; + GPU_D12_PipelineDesc desc = pipeline->desc; + + HRESULT hr = 0; + b32 ok = 1; + String error_str = ZI; + + /* Create PSO */ + ID3D12PipelineState *pso = 0; + if (ok && (!IsResourceNil(desc.vs.resource) != 0 || !IsResourceNil(desc.ps.resource))) + { + D3D12_RASTERIZER_DESC raster_desc = ZI; + raster_desc.FillMode = D3D12_FILL_MODE_SOLID; + raster_desc.CullMode = D3D12_CULL_MODE_NONE; + raster_desc.FrontCounterClockwise = 0; + raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; + raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; + raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; + raster_desc.DepthClipEnable = 1; + raster_desc.MultisampleEnable = 0; + raster_desc.AntialiasedLineEnable = 0; + raster_desc.ForcedSampleCount = 0; + raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + + D3D12_BLEND_DESC blend_desc = ZI; + blend_desc.AlphaToCoverageEnable = 0; + blend_desc.IndependentBlendEnable = 0; + blend_desc.RenderTarget[0].BlendEnable = 1; + blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; + blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; + blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; + blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; + blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; + blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; + blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + + D3D12_DEPTH_STENCIL_DESC ds_desc = ZI; + ds_desc.DepthEnable = 0; + ds_desc.StencilEnable = 0; + + String vs = DataFromResource(desc.vs.resource); + String ps = DataFromResource(desc.ps.resource); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = ZI; + pso_desc.pRootSignature = g->bindless_rootsig; + pso_desc.VS.pShaderBytecode = vs.text; + pso_desc.VS.BytecodeLength = vs.len; + pso_desc.PS.pShaderBytecode = ps.text; + pso_desc.PS.BytecodeLength = ps.len; + pso_desc.RasterizerState = raster_desc; + pso_desc.BlendState = blend_desc; + pso_desc.DepthStencilState = ds_desc; + pso_desc.PrimitiveTopologyType = desc.topology_type; + pso_desc.SampleMask = UINT_MAX; + pso_desc.SampleDesc.Count = 1; + pso_desc.SampleDesc.Quality = 0; + for (i32 i = 0; i < (i32)countof(desc.render_target_formats); ++i) + { + StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc.render_target_formats)); + DXGI_FORMAT format = GPU_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[i]); + if (format != DXGI_FORMAT_UNKNOWN) + { + pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format; + } + else + { + break; + } + } + hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); + if (FAILED(hr)) + { + error_str = Lit("Failed to create pipeline state object"); + ok = 0; + } + } + else if (ok) + { + String cs = DataFromResource(desc.vs.resource); + + D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = ZI; + pso_desc.pRootSignature = g->bindless_rootsig; + pso_desc.CS.pShaderBytecode = cs.text; + pso_desc.CS.BytecodeLength = cs.len; + hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); + if (FAILED(hr)) + { + error_str = Lit("Failed to create pipeline state object"); + ok = 0; + } + } + + pipeline->pso = pso; + pipeline->error = error_str; + pipeline->ok = 1; +} + GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc) { - /* TODO */ - return 0; + GPU_D12_SharedState *g = &GPU_D12_shared_state; + u64 hash = RandU64FromSeed(HashFnv64(Fnv64Basis, StringFromStruct(&desc))); + + GPU_D12_Pipeline *pipeline = 0; + b32 is_pipeline_new = 0; + GPU_D12_PipelineBin *bin = &g->pipeline_bins[hash % countof(g->pipeline_bins)]; + { + { + Lock lock = LockS(&bin->mutex); + for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) + { + if (pipeline->hash == hash) break; + } + Unlock(&lock); + } + if (!pipeline) + { + Lock lock = LockE(&bin->mutex); + for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) + { + if (pipeline->hash == hash) break; + } + if (!pipeline) + { + Arena *perm = PermArena(); + PushAlign(perm, CachelineSize); + pipeline = PushStruct(perm, GPU_D12_Pipeline); + pipeline->desc = desc; + pipeline->hash = hash; + is_pipeline_new = 1; + PushAlign(perm, CachelineSize); + StackPushN(bin->first, pipeline, next_in_bin); + } + Unlock(&lock); + } + } + + if (is_pipeline_new) + { + RunJob(GPU_D12_LoadPipeline, .fence = &pipeline->ready_fence, .sig.pipeline = pipeline); + } + YieldOnFence(&pipeline->ready_fence, 1); + + return pipeline; } //////////////////////////////// @@ -542,7 +749,6 @@ GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, V i64 GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture) { -#if 1 GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(GPU_QueueKind_Direct); @@ -555,26 +761,22 @@ i64 GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *text /* Transition backbuffer to COPY_DEST */ { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; - rtb.pResource = dst->d3d_resource; - rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rtb.StateBefore = D3D12_RESOURCE_STATE_PRESENT; - rtb.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb->Transition = rtb; + rb->Transition.pResource = dst->d3d_resource; + rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + rb->Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; + rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; } /* Transition texture to COPY_SRC */ if (texture->state != D3D12_RESOURCE_STATE_COPY_SOURCE) { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; - rtb.pResource = texture->d3d_resource; - rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rtb.StateBefore = texture->state; - rtb.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb->Transition = rtb; - texture->state = rtb.StateAfter; + rb->Transition.pResource = texture->d3d_resource; + rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + rb->Transition.StateBefore = texture->state; + rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + texture->state = rb->Transition.StateAfter; } ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); } @@ -588,143 +790,28 @@ i64 GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *text /* Transition backbuffer to PRESENT */ { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; - rtb.pResource = dst->d3d_resource; - rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rtb.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - rtb.StateAfter = D3D12_RESOURCE_STATE_PRESENT; rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb->Transition = rtb; + rb->Transition.pResource = dst->d3d_resource; + rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + rb->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + rb->Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; } /* Transition texture to original state */ if (texture->state != old_texture_state) { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; - rtb.pResource = texture->d3d_resource; - rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rtb.StateBefore = texture->state; - rtb.StateAfter = old_texture_state; rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb->Transition = rtb; - texture->state = rtb.StateAfter; + rb->Transition.pResource = texture->d3d_resource; + rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + rb->Transition.StateBefore = texture->state; + rb->Transition.StateAfter = old_texture_state; + texture->state = rb->Transition.StateAfter; } ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); } i64 fence_target = GPU_D12_EndRawCommandList(dx12_cl); return fence_target; - - - - - -#else - GPU_D12_SharedState *g = &GPU_D12_shared_state; - - GPU_D12_Pipeline *blit_pl = 0; - { - GPU_D12_PipelineDesc desc = ZI; - desc.vs = GPU_BlitVS; - desc.ps = GPU_BlitPS; - desc.render_target_formats[0] = GPU_Format_R8G8B8A8_Unorm; - blit_pl = GPU_D12_PipelineFromDesc(desc); - } - - GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct); - - if (blit_pl) - { - GPU_D12_CommandList *cl = GPU_D12_BeginCommandList(cq->cl_pool); - { - __profn("Present blit"); - __profnc_dx12(cl->cq->prof, cl->cl, "Present blit", Rgb32F(0.5, 0.2, 0.2)); - GPU_D12_Swapchain *swapchain = dst->swapchain; - - /* Upload dummmy vert & index buffer */ - /* TODO: Make these static */ - /* Dummy vertex buffer */ - LocalPersist u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; - GPU_D12_CommandBuffer *dummy_vertex_buffer = GPU_D12_PushCommandBuffer(cl, 0, (u8 *)0); - GPU_D12_CommandBuffer *quad_index_buffer = GPU_D12_PushCommandBuffer(cl, countof(quad_indices), quad_indices); - - /* Upload descriptor heap */ - GPU_D12_CommandDescriptorHeap *descriptor_heap = GPU_D12_PushDescriptorHeap(cl, g->cbv_srv_uav_heap); - ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps); - - Rect viewport_rect = RectFromVec2(VEC2(0, 0), VEC2(swapchain->resolution.x, swapchain->resolution.y)); - D3D12_VIEWPORT viewport = GPU_D12_ViewportFromRect(viewport_rect); - D3D12_RECT scissor = GPU_D12_ScissorRectFromRect(viewport_rect); - - Mat4x4 vp_matrix = ZI; - { - Xform xf = src_xf; - xf = ScaleXform(xf, VEC2(src->texture_size.x, src->texture_size.y)); - xf = TranslateXform(xf, VEC2(0.5, 0.5)); - vp_matrix = ProjectMat4x4View(xf, viewport.Width, viewport.Height); - } - - /* Transition dst to render target */ - { - struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; - rtb.pResource = dst->resource; - rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rtb.StateBefore = dst->state; - rtb.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; - struct D3D12_RESOURCE_BARRIER rb = ZI; - rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb.Flags = 0; - rb.Transition = rtb; - ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb); - dst->state = rtb.StateAfter; - } - ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &dst->rtv_descriptor->handle, 0, 0); - - /* Clear */ - f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, dst->rtv_descriptor->handle, clear_color, 0, 0); - - /* Bind pipeline */ - GPU_D12_SetPipeline(cl, blit_pipeline); - - /* Set Rasterizer State */ - ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); - ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); - - /* Set sig */ - K_BlitSig sig = ZI; - sig.projection = vp_matrix; - sig.flags = K_BLIT_FLAG_NONE; - sig.tex_urid = src->srv_descriptor->index; - GPU_D12_SetSig(cl, &sig, sizeof(sig)); - - /* Draw */ - D3D12_VERTEX_BUFFER_VIEW vbv = GPU_D12_VbvFromCommandBuffer(dummy_vertex_buffer, 0); - D3D12_INDEX_BUFFER_VIEW ibv = GPU_D12_IbvFromCommandBuffer(quad_index_buffer, DXGI_FORMAT_R16_UINT); - ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); - ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); - ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0); - - /* Transition dst to presentable */ - { - struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; - rtb.pResource = dst->resource; - rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rtb.StateBefore = dst->state; - rtb.StateAfter = D3D12_RESOURCE_STATE_PRESENT; - struct D3D12_RESOURCE_BARRIER rb = ZI; - rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb.Flags = 0; - rb.Transition = rtb; - ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb); - dst->state = rtb.StateAfter; - } - } - GPU_D12_EndCommandList(cl); - } -#endif } //////////////////////////////// @@ -775,6 +862,14 @@ Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind) return &queue->sync_fence; } +void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value) +{ + GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a); + GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b); + ID3D12Fence *b_fence = queue_b->submit_fence; + ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value); +} + //////////////////////////////// //~ @hookdef Rasterizer helper hooks @@ -895,11 +990,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) case GPU_ResourceKind_Texture3D: { D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; - D3D12_HEAP_PROPERTIES heap_props = { - .Type = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_HEAP_TYPE_UPLOAD - : desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK - : D3D12_HEAP_TYPE_DEFAULT - }; + D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; D3D12_RESOURCE_DESC d3d_desc = ZI; d3d_desc.Dimension = desc.kind == GPU_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : desc.kind == GPU_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D @@ -960,16 +1051,41 @@ void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags) Unlock(&lock); } -u32 GPU_GetResourceId(GPU_Resource *resource, GPU_ResourceIdKind kind) +u32 GPU_GetReadableId(GPU_Resource *resource) { /* TODO */ return 0; } -Vec2I32 GPU_GetTextureSize(GPU_Resource *resource) +u32 GPU_GetWritableId(GPU_Resource *resource) { /* TODO */ - return VEC2I32(0, 0); + return 0; +} + +Vec2I32 GPU_GetTextureSize2D(GPU_Resource *gpu_resource) +{ + GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource; + return VEC2I32(resource->desc.texture.size.x, resource->desc.texture.size.y); +} + +Vec3I32 GPU_GetTextureSize3D(GPU_Resource *gpu_resource) +{ + GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource; + return resource->desc.texture.size; +} + +u64 GPU_GetFootprintSize(GPU_Resource *gpu_resource) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + D3D12_RESOURCE_DESC desc = ZI; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; + ID3D12Resource_GetDesc(((GPU_D12_Resource *)gpu_resource)->d3d_resource, &desc); + u64 footprint_size = 0; + u64 upload_row_size = 0; + u32 upload_num_rows = 0; + ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &footprint_size); + return footprint_size; } //////////////////////////////// @@ -1000,17 +1116,32 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; GPU_QueueKind queue_kind = cl->queue_kind; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); + TempArena scratch = BeginScratchNoConflict(); + + GPU_D12_Resource *slotted_render_targets[GPU_MaxRenderTargets] = ZI; + GPU_D12_Resource *bound_render_targets[GPU_MaxRenderTargets] = ZI; /* Begin dx12 command list */ GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind); ID3D12GraphicsCommandList *rcl = dx12_cl->cl; + /* Set rootsigs */ + if (cl->has_rasterize_cmd) + { + ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig); + } + if (cl->has_compute_cmd) + { + ID3D12GraphicsCommandList_SetComputeRootSignature(rcl, g->bindless_rootsig); + } + + /* Process gpu commands into dx12 commands */ { - for (GPU_D12_Command *cmd = cl->first; cmd; cmd = cmd->next) + GPU_D12_Command *cmd = cl->first; + while (cmd) { - GPU_D12_CommandKind kind = cmd->kind; - switch (kind) + switch (cmd->kind) { default: break; @@ -1018,15 +1149,117 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) case GPU_D12_CommandKind_TransitionToSrv: case GPU_D12_CommandKind_TransitionToUav: case GPU_D12_CommandKind_TransitionToRtv: + case GPU_D12_CommandKind_TransitionToCopySrc: + case GPU_D12_CommandKind_TransitionToCopyDst: case GPU_D12_CommandKind_FlushUav: { - /* TODO */ + /* Build barriers batch list */ + u32 barriers_count = 0; + Struct(TmpBarrier) { TmpBarrier *next; GPU_D12_Resource *r; D3D12_RESOURCE_BARRIER_TYPE type; D3D12_RESOURCE_STATES state; }; + TmpBarrier *first_barrier = 0; + TmpBarrier *last_barrier = 0; + while (cmd && (cmd->kind == GPU_D12_CommandKind_TransitionToSrv + || cmd->kind == GPU_D12_CommandKind_TransitionToUav + || cmd->kind == GPU_D12_CommandKind_TransitionToRtv + || cmd->kind == GPU_D12_CommandKind_TransitionToCopySrc + || cmd->kind == GPU_D12_CommandKind_TransitionToCopyDst + || cmd->kind == GPU_D12_CommandKind_FlushUav)) + { + D3D12_RESOURCE_BARRIER_TYPE type = ZI; + D3D12_RESOURCE_STATES state = ZI; + GPU_D12_Resource *resource = cmd->barrier.resource; + + switch (cmd->kind) + { + case GPU_D12_CommandKind_TransitionToSrv: + { + type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + state = D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; + } break; + case GPU_D12_CommandKind_TransitionToUav: + { + type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } break; + case GPU_D12_CommandKind_TransitionToRtv: + { + type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + state = D3D12_RESOURCE_STATE_RENDER_TARGET; + i32 slot = cmd->barrier.rt_slot; + if (slot >= 0 && slot < countof(slotted_render_targets)) + { + slotted_render_targets[slot] = resource; + } + } break; + case GPU_D12_CommandKind_TransitionToCopySrc: + { + type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + state = D3D12_RESOURCE_STATE_COPY_SOURCE; + } break; + case GPU_D12_CommandKind_TransitionToCopyDst: + { + type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + state = D3D12_RESOURCE_STATE_COPY_DEST; + } break; + case GPU_D12_CommandKind_FlushUav: + { + type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + } break; + } + + b32 skip = 0; + if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) + { + skip = resource->state == state; + } + + if (!skip) + { + TmpBarrier *b = PushStruct(scratch.arena, TmpBarrier); + b->r = resource; + b->type = type; + b->state = state; + QueuePush(first_barrier, last_barrier, b); + ++barriers_count; + } + + cmd = cmd->next; + } + + /* Submit batched barriers */ + if (barriers_count > 0) + { + D3D12_RESOURCE_BARRIER *rbs = PushStructs(scratch.arena, D3D12_RESOURCE_BARRIER, barriers_count); + { + i32 i = 0; + for (TmpBarrier *b = first_barrier; b; b = b->next) + { + D3D12_RESOURCE_BARRIER *rb = &rbs[i]; + rb->Type = b->type; + if (b->type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) + { + rb->Transition.pResource = b->r->d3d_resource; + rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + rb->Transition.StateBefore = b->r->state; + rb->Transition.StateAfter = b->state; + b->r->state = rb->Transition.StateAfter; + } + else if (b->type == D3D12_RESOURCE_BARRIER_TYPE_UAV) + { + rb->UAV.pResource = b->r->d3d_resource; + } + ++i; + } + } + ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); + } } break; //- Clear resource case GPU_D12_CommandKind_Clear: { /* TODO */ + cmd = cmd->next; } break; //- Copy resource @@ -1034,7 +1267,6 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) { GPU_D12_Resource *dst = cmd->copy.dst; GPU_D12_Resource *src = cmd->copy.src; - // ID3D12GraphicsCommandList_CopyResource(rcl, dst->d3d_resource, src->d3d_resource); D3D12_RESOURCE_DESC dst_desc = ZI; ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc); @@ -1054,6 +1286,7 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0); + cmd = cmd->next; } break; //- Dispatch Vs/Ps shader @@ -1064,16 +1297,29 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) GPU_D12_PipelineDesc pipeline_desc = ZI; pipeline_desc.vs = cmd->rasterize.vs; pipeline_desc.ps = cmd->rasterize.ps; - for (u32 i = 0; i < countof(cmd->rasterize.rts); ++i) { - GPU_D12_Resource *r = cmd->rasterize.rts[i]; + pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; + switch (cmd->rasterize.mode) + { + default: Assert(0); break; + case GPU_RasterizeMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; + case GPU_RasterizeMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case GPU_RasterizeMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case GPU_RasterizeMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case GPU_RasterizeMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + } + } + for (u32 i = 0; i < cmd->rasterize.rts_count; ++i) + { + GPU_D12_Resource *r = slotted_render_targets[i]; if (r) { pipeline_desc.render_target_formats[i] = r->desc.texture.format; } else { - break; + Assert(0); /* No bound render target in slot */ + pipeline_desc.render_target_formats[i] = GPU_Format_Unknown; } } pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); @@ -1081,11 +1327,12 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) if (pipeline) { - /* Bind pipeline */ - ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->d3d_pipeline); + /* TODO: Only set dirty */ + ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); /* Fill signature */ + /* TODO: Only upload dirty */ { u32 sig_size = cmd->rasterize.sig_size; void *sig = cmd->rasterize.sig; @@ -1094,6 +1341,7 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) } /* Set rasterizer state */ + /* TODO: Only set dirty */ { D3D12_RECT scissor = ZI; scissor.left = cmd->rasterize.scissor.left; @@ -1112,6 +1360,7 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) } /* Set topology */ + /* TODO: Only set dirty */ { D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; switch (cmd->rasterize.mode) @@ -1148,9 +1397,31 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) } } + /* Bind render targets */ + { + b32 om_dirty = 0; + D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_render_targets)] = ZI; + for (u32 i = 0; i < cmd->rasterize.rts_count; ++i) + { + GPU_D12_Resource *target = slotted_render_targets[i]; + if (bound_render_targets[i] != target) + { + bound_render_targets[i] = target; + om_dirty = 1; + } + rtvs[i] = target->rtv_descriptor->handle; + } + if (om_dirty) + { + ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, cmd->rasterize.rts_count, rtvs, 0, 0); + } + } + /* Dispatch */ ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, cmd->rasterize.instances_count, indices_count, 0, 0, 0); } + + cmd = cmd->next; } break; //- Dispatch compute shader @@ -1166,9 +1437,11 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) if (pipeline) { /* Bind pipeline */ - ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->d3d_pipeline); + /* TODO: Only set dirty */ + ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); /* Fill signature */ + /* TODO: Only upload dirty */ { u32 sig_size = cmd->compute.sig_size; void *sig = cmd->compute.sig; @@ -1179,6 +1452,8 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) /* Dispatch */ ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.num_threads_x, cmd->compute.num_threads_y, cmd->compute.num_threads_z); } + + cmd = cmd->next; } break; } } @@ -1197,6 +1472,7 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) /* Free command list */ StackPush(f->first_free_command_list, cl); + EndScratch(scratch); return fence_target; } @@ -1209,36 +1485,47 @@ void GPU_ProfN(GPU_CommandList *cl, String name) } //////////////////////////////// -//~ @hookdef Resource barrier hooks +//~ @hookdef Barrier hooks -void GPU_TransitionToSrv(GPU_CommandList *gpu_cl, GPU_Resource *resource) +void GPU_TransitionToReadable(GPU_CommandList *cl, GPU_Resource *resource) { - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; - GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); + GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_TransitionToSrv; cmd->barrier.resource = (GPU_D12_Resource *)resource; } -void GPU_TransitionToUav(GPU_CommandList *gpu_cl, GPU_Resource *resource) +void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource) { - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; - GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); + GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_TransitionToUav; cmd->barrier.resource = (GPU_D12_Resource *)resource; } -void GPU_TransitionToRtv(GPU_CommandList *gpu_cl, GPU_Resource *resource) +void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot) { - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; - GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); + GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_TransitionToRtv; cmd->barrier.resource = (GPU_D12_Resource *)resource; + cmd->barrier.rt_slot = slot; +} + +void GPU_TransitionToCopySrc(GPU_CommandList *cl, GPU_Resource *resource) +{ + GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); + cmd->kind = GPU_D12_CommandKind_TransitionToCopySrc; + cmd->barrier.resource = (GPU_D12_Resource *)resource; } -void GPU_FlushUav(GPU_CommandList *gpu_cl, GPU_Resource *resource) +void GPU_TransitionToCopyDst(GPU_CommandList *cl, GPU_Resource *resource) { - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; - GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); + GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); + cmd->kind = GPU_D12_CommandKind_TransitionToCopyDst; + cmd->barrier.resource = (GPU_D12_Resource *)resource; +} + +void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource) +{ + GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_FlushUav; cmd->barrier.resource = (GPU_D12_Resource *)resource; } @@ -1246,7 +1533,7 @@ void GPU_FlushUav(GPU_CommandList *gpu_cl, GPU_Resource *resource) //////////////////////////////// //~ @hookdef Dispatch hooks -void GPU_ClearResource(GPU_CommandList *gpu_cl, GPU_Resource *resource, Vec4 clear_value) +void GPU_Clear(GPU_CommandList *gpu_cl, GPU_Resource *resource, Vec4 clear_value) { GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); @@ -1261,7 +1548,6 @@ void GPU_Rasterize_(GPU_CommandList *gpu_cl, VertexShader vs, PixelShader ps, u32 rts_count, - GPU_Resource **rts, GPU_Viewport viewport, GPU_Scissor scissor, u32 instances_count, @@ -1276,16 +1562,14 @@ void GPU_Rasterize_(GPU_CommandList *gpu_cl, CopyBytes(cmd->rasterize.sig, sig, cmd->rasterize.sig_size); cmd->rasterize.vs = vs; cmd->rasterize.ps = ps; + cmd->rasterize.rts_count = rts_count; Assert(rts_count < GPU_MaxRenderTargets); - for (u32 i = 0; i < MinU32(rts_count, GPU_MaxRenderTargets); ++i) - { - cmd->rasterize.rts[i] = (GPU_D12_Resource *)rts[i]; - } cmd->rasterize.viewport = viewport; cmd->rasterize.scissor = scissor; cmd->rasterize.instances_count = instances_count; cmd->rasterize.index_buffer = (GPU_D12_Resource *)index_buffer; cmd->rasterize.mode = mode; + cl->has_rasterize_cmd = 1; } void GPU_Compute_(GPU_CommandList *gpu_cl, @@ -1306,6 +1590,7 @@ void GPU_Compute_(GPU_CommandList *gpu_cl, cmd->compute.num_threads_x = num_threads_x; cmd->compute.num_threads_y = num_threads_y; cmd->compute.num_threads_z = num_threads_z; + cl->has_compute_cmd = 1; } //////////////////////////////// @@ -1346,13 +1631,12 @@ void GPU_Unmap(GPU_Mapped m) ID3D12Resource_Unmap(r->d3d_resource, 0, 0); } -void GPU_CopyToMapped(GPU_Mapped mapped, String data) +void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference) { GPU_D12_SharedState *g = &GPU_D12_shared_state; - GPU_D12_Resource *r = (GPU_D12_Resource *)mapped.resource; D3D12_RESOURCE_DESC desc = ZI; - ID3D12Resource_GetDesc(r->d3d_resource, &desc); + ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc); u64 upload_size = 0; u64 upload_row_size = 0; @@ -1363,8 +1647,8 @@ void GPU_CopyToMapped(GPU_Mapped mapped, String data) { D3D12_RANGE read_range = ZI; - u8 *dst_base = (u8 *)mapped.mem + placed_footprint.Offset; - u8 *src_base = data.text; + u8 *dst_base = (u8 *)dst + placed_footprint.Offset; + u8 *src_base = src; u32 z_size = upload_row_size * upload_num_rows; @@ -1374,17 +1658,9 @@ void GPU_CopyToMapped(GPU_Mapped mapped, String data) u32 z_offset = z * z_size; for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y) { - u8 *dst = dst_base + y * footprint.RowPitch + z_offset; - u8 *src = src_base + y * upload_row_size + z_offset; - i64 cpy_size = MinI64(upload_row_size, data.len - (src - src_base)); - if (cpy_size > 0) - { - CopyBytes(dst, src, cpy_size); - } - else - { - src_overflow = 1; - } + u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset; + u8 *src_row = src_base + y * upload_row_size + z_offset; + CopyBytes(dst_row, src_row, upload_row_size); } } } @@ -1402,7 +1678,7 @@ GPU_MemoryInfo GPU_QueryMemoryInfo(void) //////////////////////////////// //~ @hookdef Swapchain hooks -GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, Vec2I32 size) +GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, GPU_Format format, Vec2I32 size) { GPU_D12_SharedState *g = &GPU_D12_shared_state; HRESULT hr = 0; @@ -1434,7 +1710,7 @@ GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, Vec2I32 size) IDXGISwapChain1 *swapchain1 = 0; { DXGI_SWAP_CHAIN_DESC1 desc = ZI; - desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.Format = GPU_D12_DxgiFormatFromGpuFormat(format); desc.Width = size.x; desc.Height = size.y; desc.SampleDesc.Count = 1; diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index dd20d834..952e9165 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -30,53 +30,27 @@ Struct(GPU_D12_PipelineDesc) VertexShader vs; PixelShader ps; ComputeShader cs; + D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type; GPU_Format render_target_formats[GPU_MaxRenderTargets]; }; Struct(GPU_D12_Pipeline) { - ID3D12PipelineState *d3d_pipeline; - ID3D12RootSignature *rootsig; + GPU_D12_Pipeline *next_in_bin; + u64 hash; + + GPU_D12_PipelineDesc desc; + ID3D12PipelineState *pso; + + Fence ready_fence; + b32 ok; + String error; }; -//////////////////////////////// -//~ Resource types - -Struct(GPU_D12_Resource) +Struct(GPU_D12_PipelineBin) { - GPU_D12_Resource *next_free; - GPU_ResourceDesc desc; - - ID3D12Resource *d3d_resource; - D3D12_RESOURCE_STATES state; - u64 reuse_hash; - - D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; -}; - -//////////////////////////////// -//~ Queue types - -Struct(GPU_D12_QueueDesc) -{ - GPU_QueueKind kind; - D3D12_COMMAND_LIST_TYPE d3d_type; - D3D12_COMMAND_QUEUE_PRIORITY d3d_priority; - String dbg_name; -}; - -Struct(GPU_D12_Queue) -{ - GPU_D12_QueueDesc desc; - ID3D12CommandQueue *d3d_queue; - - Mutex submit_mutex; - ID3D12Fence *submit_fence; - u64 submit_fence_target; - struct GPU_D12_RawCommandList *first_submitted_cl; - struct GPU_D12_RawCommandList *last_submitted_cl; - - Fence sync_fence; + Mutex mutex; + GPU_D12_Pipeline *first; }; //////////////////////////////// @@ -106,6 +80,51 @@ Struct(GPU_D12_CpuDescriptorHeap) u32 max_count; }; +//////////////////////////////// +//~ Resource types + +Struct(GPU_D12_Resource) +{ + GPU_D12_Resource *next_free; + GPU_ResourceDesc desc; + + ID3D12Resource *d3d_resource; + D3D12_RESOURCE_STATES state; + u64 reuse_hash; + + GPU_D12_CpuDescriptor *srv_descriptor; + GPU_D12_CpuDescriptor *uav_descriptor; + GPU_D12_CpuDescriptor *rtv_descriptor; + GPU_D12_CpuDescriptor *sampler_descriptor; + + D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; +}; + +//////////////////////////////// +//~ Queue types + +Struct(GPU_D12_QueueDesc) +{ + GPU_QueueKind kind; + D3D12_COMMAND_LIST_TYPE d3d_type; + D3D12_COMMAND_QUEUE_PRIORITY d3d_priority; + String dbg_name; +}; + +Struct(GPU_D12_Queue) +{ + GPU_D12_QueueDesc desc; + ID3D12CommandQueue *d3d_queue; + + Mutex submit_mutex; + ID3D12Fence *submit_fence; + u64 submit_fence_target; + struct GPU_D12_RawCommandList *first_submitted_cl; + struct GPU_D12_RawCommandList *last_submitted_cl; + + Fence sync_fence; +}; + //////////////////////////////// //~ Raw command list types @@ -132,6 +151,8 @@ Enum(GPU_D12_CommandKind) GPU_D12_CommandKind_TransitionToSrv, GPU_D12_CommandKind_TransitionToUav, GPU_D12_CommandKind_TransitionToRtv, + GPU_D12_CommandKind_TransitionToCopySrc, + GPU_D12_CommandKind_TransitionToCopyDst, GPU_D12_CommandKind_FlushUav, /* Copy */ @@ -156,6 +177,7 @@ Struct(GPU_D12_Command) struct { GPU_D12_Resource *resource; + i32 rt_slot; } barrier; struct { @@ -174,7 +196,7 @@ Struct(GPU_D12_Command) u8 sig[256]; VertexShader vs; PixelShader ps; - GPU_D12_Resource *rts[GPU_MaxRenderTargets]; + u32 rts_count; GPU_Viewport viewport; GPU_Scissor scissor; u32 instances_count; @@ -201,6 +223,9 @@ Struct(GPU_D12_CommandList) u64 count; GPU_QueueKind queue_kind; + + b32 has_rasterize_cmd; + b32 has_compute_cmd; }; //////////////////////////////// @@ -242,6 +267,12 @@ Struct(GPU_D12_SharedState) /* Queues */ GPU_D12_Queue *queues[GPU_NumQueues]; + /* Rootsig */ + ID3D12RootSignature *bindless_rootsig; + + /* Pipelines */ + GPU_D12_PipelineBin pipeline_bins[1024]; + /* Descriptor heaps */ GPU_D12_CpuDescriptorHeap *cbv_srv_uav_heap; GPU_D12_CpuDescriptorHeap *sampler_heap; @@ -286,9 +317,13 @@ JobDecl(GPU_D12_InitQueue, { GPU_D12_QueueDesc *descs; }); //- Heap initialization GPU_D12_CpuDescriptorHeap *GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, u32 max_descs, u32 desc_size); +//- Rootsig initialization +void GPU_D12_InitRootsig(void); + //////////////////////////////// //~ Pipeline operations +JobDecl(GPU_D12_LoadPipeline, { GPU_D12_Pipeline *pipeline; }); GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc); //////////////////////////////// diff --git a/src/gpu/gpu_dx12/gpu_dx12.lay b/src/gpu/gpu_dx12/gpu_dx12.lay index 29ed05f2..47557a18 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.lay +++ b/src/gpu/gpu_dx12/gpu_dx12.lay @@ -2,12 +2,6 @@ //- Api @IncludeC gpu_dx12.h -@IncludeGpu gpu_dx12_blit.h //- Impl @IncludeC gpu_dx12.c -@IncludeGpu gpu_dx12_blit.gpu - -//- Shaders -@VertexShader GPU_D12_BlitVS -@PixelShader GPU_D12_BlitPS diff --git a/src/gpu/gpu_dx12/gpu_dx12_blit.gpu b/src/gpu/gpu_dx12/gpu_dx12_blit.gpu deleted file mode 100644 index 2c3663b5..00000000 --- a/src/gpu/gpu_dx12/gpu_dx12_blit.gpu +++ /dev/null @@ -1,49 +0,0 @@ -ConstantBuffer GPU_D12_blit_sig : register (b0); - -//////////////////////////////// -//~ Ui Blit - -Struct(GPU_D12_BlitPS_Input) -{ - Semantic(Vec4, SV_Position); - Semantic(Vec2, uv); -}; - -Struct(GPU_D12_BlitPS_Output) -{ - Semantic(Vec4, SV_Target); -}; - -//- Vertex shader - -GPU_D12_BlitPS_Input VSDef(GPU_D12_BlitVS, Semantic(u32, SV_VertexID)) -{ - ConstantBuffer sig = GPU_D12_blit_sig; - static const Vec2 unit_quad_verts[4] = { - Vec2(-0.5f, -0.5f), - Vec2(0.5f, -0.5f), - Vec2(0.5f, 0.5f), - Vec2(-0.5f, 0.5f) - }; - Vec2 vert = unit_quad_verts[SV_VertexID]; - - GPU_D12_BlitPS_Input output; - output.SV_Position = mul(sig.projection, Vec4(vert, 0, 1)); - output.uv = vert + 0.5; - return output; -} - -//- Pixel shader - -GPU_D12_BlitPS_Output PSDef(GPU_D12_BlitPS, GPU_D12_BlitPS_Input input) -{ - ConstantBuffer sig = GPU_D12_blit_sig; - SamplerState sampler = GpuSamplerFromUrid(sig.tex_sampler_urid); - - GPU_D12_BlitPS_Output output; - Texture2D tex = GpuResourceFromUrid(sig.tex_urid); - Vec4 color = tex.Sample(sampler, input.uv); - - output.SV_Target = color; - return output; -} diff --git a/src/gpu/gpu_dx12/gpu_dx12_blit.h b/src/gpu/gpu_dx12/gpu_dx12_blit.h deleted file mode 100644 index a50b224c..00000000 --- a/src/gpu/gpu_dx12/gpu_dx12_blit.h +++ /dev/null @@ -1,15 +0,0 @@ -//////////////////////////////// -//~ Blit types - -Struct(GPU_D12_BlitSig) -{ - /* ----------------------------------------------------- */ - Mat4x4 projection; /* 16 consts */ - /* ----------------------------------------------------- */ - u32 tex_urid; /* 01 consts */ - u32 tex_sampler_urid; /* 01 consts */ - u32 _pad0; /* 01 consts (padding) */ - u32 _pad1; /* 01 consts (padding) */ - /* ----------------------------------------------------- */ -}; -AssertRootConst(GPU_D12_BlitSig, 20); diff --git a/src/mp3/mp3.h b/src/mp3/mp3.h index d33e2c11..15d6961c 100644 --- a/src/mp3/mp3.h +++ b/src/mp3/mp3.h @@ -11,7 +11,7 @@ Struct(MP3_Result) { u64 samples_count; i16 *samples; - b32 success; + b32 ok; }; //////////////////////////////// diff --git a/src/mp3/mp3_mmf/mp3_mmf.c b/src/mp3/mp3_mmf/mp3_mmf.c index 2dc2ec33..139ac864 100644 --- a/src/mp3/mp3_mmf/mp3_mmf.c +++ b/src/mp3/mp3_mmf/mp3_mmf.c @@ -79,7 +79,7 @@ MP3_Result MP3_Decode(Arena *arena, String encoded, u32 sample_rate, MP3_DecodeF /* Check if done */ if (sample_flags & MF_SOURCE_READERF_ENDOFSTREAM) { - result.success = 1; + result.ok = 1; break; } Assert(sample_flags == 0); diff --git a/src/platform/platform_win32/platform_win32.c b/src/platform/platform_win32/platform_win32.c index c6daa66d..7904b3f1 100644 --- a/src/platform/platform_win32/platform_win32.c +++ b/src/platform/platform_win32/platform_win32.c @@ -85,9 +85,9 @@ void P_Startup(void) //.dwFlags = RIDEV_NOLEGACY /* Adds mouse and also ignores legacy mouse messages */ }; - b32 success = RegisterRawInputDevices(&rid, 1, sizeof(rid)); - Assert(success); - LAX success; + b32 ok = RegisterRawInputDevices(&rid, 1, sizeof(rid)); + Assert(ok); + LAX ok; } //- Init watches pool @@ -1204,8 +1204,8 @@ P_FileTime P_GetFileTime(P_File file) FILETIME ft_created; FILETIME ft_accessed; FILETIME ft_modified; - b32 success = !!GetFileTime((HANDLE)file.handle, &ft_created, &ft_accessed, &ft_modified); - if (success) + b32 ok = !!GetFileTime((HANDLE)file.handle, &ft_created, &ft_accessed, &ft_modified); + if (ok) { /* Convert file times to local file time */ FileTimeToLocalFileTime(&ft_created, &ft_created); diff --git a/src/pp/pp.c b/src/pp/pp.c index dbf36e67..ce1341a6 100644 --- a/src/pp/pp.c +++ b/src/pp/pp.c @@ -44,7 +44,7 @@ void StartupUser(void) //P_RegisterLogCallback(ConsoleLogCallback, P_LogLevel_Success); P_RegisterLogCallback(ConsoleLogCallback, P_LogLevel_Debug); g->window = P_AcquireWindow(); - g->swapchain = GPU_AcquireSwapchain(g->window, VEC2I32(100, 100)); + g->swapchain = GPU_AcquireSwapchain(g->window, GPU_Format_R8G8B8A8_Unorm, VEC2I32(100, 100)); P_ShowWindow(g->window); /* Start jobs */ @@ -417,7 +417,7 @@ GPU_Resource *AcquireUploadBuffer_(void *src, u32 element_size, u32 element_coun { __profn("Copy to transfer buffer"); GPU_Mapped m = GPU_Map(r); - GPU_CopyToMapped(m, STRING(element_size * element_count, src)); + CopyBytes(m.mem, src, element_size * element_count); GPU_Unmap(m); } return r; @@ -2154,16 +2154,13 @@ void UpdateUser(P_Window *window) Rect ui_viewport = RectFromVec2(VEC2(0, 0), VEC2(g->ui_size.x, g->ui_size.y)); Rect render_viewport = RectFromVec2(VEC2(0, 0), VEC2(g->render_size.x, g->render_size.y)); - if (!g->gpu_render_fence) - { - g->gpu_render_fence = GPU_FenceFromQueue(gpu_render_queue); - } + Fence *render_fence = GPU_FenceFromQueue(gpu_render_queue); /* Acquire gbuffers */ - if (g->shade_target && !EqVec2I32(g->render_size, GPU_GetTextureSize(g->shade_target))) + if (g->shade_target && !EqVec2I32(g->render_size, GPU_GetTextureSize2D(g->shade_target))) { __profn("Release render resources"); - YieldOnFence(g->gpu_render_fence, g->gpu_render_fence_target); + YieldOnFence(render_fence, g->gpu_render_fence_target); GPU_ReleaseResource(g->albedo, GPU_ReleaseFlag_None); GPU_ReleaseResource(g->emittance, GPU_ReleaseFlag_None); GPU_ReleaseResource(g->emittance_flood_read, GPU_ReleaseFlag_None); @@ -2184,9 +2181,9 @@ void UpdateUser(P_Window *window) } /* Acquire ui buffers */ - if (g->ui_target && !EqVec2I32(g->ui_size, GPU_GetTextureSize(g->ui_target))) + if (g->ui_target && !EqVec2I32(g->ui_size, GPU_GetTextureSize2D(g->ui_target))) { - YieldOnFence(g->gpu_render_fence, g->gpu_render_fence_target); + YieldOnFence(render_fence, g->gpu_render_fence_target); GPU_ReleaseResource(g->ui_target, GPU_ReleaseFlag_None); g->ui_target = 0; } @@ -2223,10 +2220,10 @@ void UpdateUser(P_Window *window) { __profn("Clear gbuffers"); GPU_ProfN(cl, Lit("Clear gbuffers")); - GPU_TransitionToRtv(cl, g->albedo); - GPU_TransitionToRtv(cl, g->emittance); - GPU_ClearResource(cl, g->albedo, VEC4(0, 0, 0, 0)); - GPU_ClearResource(cl, g->emittance, VEC4(0, 0, 0, 0)); + GPU_TransitionToRenderable(cl, g->albedo, 0); + GPU_TransitionToRenderable(cl, g->emittance, 1); + GPU_Clear(cl, g->albedo, VEC4(0, 0, 0, 0)); + GPU_Clear(cl, g->emittance, VEC4(0, 0, 0, 0)); } //- Material pass @@ -2234,22 +2231,18 @@ void UpdateUser(P_Window *window) __profn("Material pass"); GPU_ProfN(cl, Lit("Material pass")); - GPU_Resource *rts[] = { - g->albedo, - g->emittance - }; GPU_Viewport viewport = GPU_ViewportFromRect(render_viewport); GPU_Scissor scissor = GPU_ScissorFromRect(render_viewport); MaterialSig sig = ZI; /* FIXME: set sampler urid id here */ sig.projection = world_to_render_vp_matrix; - sig.instances_urid = GPU_GetResourceId(material_instance_buffer, GPU_ResourceIdKind_Srv); - sig.grids_urid = GPU_GetResourceId(grids_buffer, GPU_ResourceIdKind_Srv); + sig.instances_urid = GPU_GetReadableId(material_instance_buffer); + sig.grids_urid = GPU_GetReadableId(grids_buffer); GPU_Rasterize(cl, &sig, MaterialVS, MaterialPS, - countof(rts), rts, + 2, viewport, scissor, g->material_instances_count, @@ -2259,9 +2252,9 @@ void UpdateUser(P_Window *window) //- Prep flood pass { - GPU_TransitionToSrv(cl, g->emittance); - GPU_TransitionToUav(cl, g->emittance_flood_read); - GPU_TransitionToUav(cl, g->emittance_flood_target); + GPU_TransitionToReadable(cl, g->emittance); + GPU_TransitionToWritable(cl, g->emittance_flood_read); + GPU_TransitionToWritable(cl, g->emittance_flood_target); } //- Flood pass @@ -2280,13 +2273,13 @@ void UpdateUser(P_Window *window) __profn("Flood step"); GPU_ProfN(cl, Lit("Flood step")); - GPU_FlushUav(cl, g->emittance_flood_read); + GPU_FlushWritable(cl, g->emittance_flood_read); FloodSig sig = ZI; sig.step_len = step_length; - sig.emittance_tex_urid = GPU_GetResourceId(g->emittance, GPU_ResourceIdKind_Srv); - sig.read_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_read, GPU_ResourceIdKind_Uav); - sig.target_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_target, GPU_ResourceIdKind_Srv); + sig.emittance_tex_urid = GPU_GetReadableId(g->emittance); + sig.read_flood_tex_urid = GPU_GetWritableId(g->emittance_flood_read); + sig.target_flood_tex_urid = GPU_GetReadableId(g->emittance_flood_target); sig.tex_width = g->render_size.x; sig.tex_height = g->render_size.y; GPU_Compute(cl, &sig, FloodCS, (g->render_size.x + 7) / 8, (g->render_size.y + 7) / 8, 1); @@ -2313,12 +2306,12 @@ void UpdateUser(P_Window *window) { __profn("Clear shade target"); GPU_ProfN(cl, Lit("Clear shade target")); - GPU_TransitionToSrv(cl, g->albedo); - GPU_TransitionToSrv(cl, g->emittance); - GPU_TransitionToUav(cl, g->shade_target); - GPU_FlushUav(cl, g->emittance_flood_read); - GPU_FlushUav(cl, g->shade_read); - GPU_ClearResource(cl, g->shade_target, VEC4(0, 0, 0, 0)); + GPU_TransitionToReadable(cl, g->albedo); + GPU_TransitionToReadable(cl, g->emittance); + GPU_TransitionToWritable(cl, g->shade_target); + GPU_FlushWritable(cl, g->emittance_flood_read); + GPU_FlushWritable(cl, g->shade_read); + GPU_Clear(cl, g->shade_target, VEC4(0, 0, 0, 0)); } //- Shade pass @@ -2341,11 +2334,11 @@ void UpdateUser(P_Window *window) (u32)(RandU64FromState(&g->frame_rand) & 0xFFFFFFFF)); sig.frame_index = g->frame_index; sig.camera_offset = g->world_to_render_xf.og; - sig.albedo_tex_urid = GPU_GetResourceId(g->albedo, GPU_ResourceIdKind_Srv); - sig.emittance_tex_urid = GPU_GetResourceId(g->emittance, GPU_ResourceIdKind_Srv); - sig.emittance_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_read, GPU_ResourceIdKind_Uav); - sig.read_tex_urid = GPU_GetResourceId(g->shade_read, GPU_ResourceIdKind_Uav); - sig.target_tex_urid = GPU_GetResourceId(g->shade_target, GPU_ResourceIdKind_Uav); + sig.albedo_tex_urid = GPU_GetReadableId(g->albedo); + sig.emittance_tex_urid = GPU_GetReadableId(g->emittance); + sig.emittance_flood_tex_urid = GPU_GetWritableId(g->emittance_flood_read); + sig.read_tex_urid = GPU_GetWritableId(g->shade_read); + sig.target_tex_urid = GPU_GetWritableId(g->shade_target); GPU_Compute(cl, &sig, ShadeCS, (g->render_size.x + 7) / 8, (g->render_size.y + 7) / 8, 1); /* Swap */ @@ -2358,9 +2351,9 @@ void UpdateUser(P_Window *window) { __profn("Clear ui target"); GPU_ProfN(cl, Lit("Clear ui target")); - GPU_TransitionToRtv(cl, g->ui_target); - GPU_FlushUav(cl, g->shade_read); - GPU_ClearResource(cl, g->ui_target, VEC4(0, 0, 0, 0)); + GPU_TransitionToRenderable(cl, g->ui_target, 0); + GPU_FlushWritable(cl, g->shade_read); + GPU_Clear(cl, g->ui_target, VEC4(0, 0, 0, 0)); } //- Ui blit pass @@ -2376,11 +2369,11 @@ void UpdateUser(P_Window *window) sig.flags = UiBlitFlag_ToneMap | UiBlitFlag_GammaCorrect; sig.exposure = 2.0; sig.gamma = (f32)2.2; - sig.tex_urid = GPU_GetResourceId(g->shade_read, GPU_ResourceIdKind_Uav); + sig.tex_urid = GPU_GetWritableId(g->shade_read); GPU_Rasterize(cl, &sig, UiBlitVS, UiBlitPS, - 1, &g->ui_target, + 1, viewport, scissor, 1, @@ -2398,11 +2391,11 @@ void UpdateUser(P_Window *window) UiRectSig sig = ZI; sig.projection = ui_vp_matrix; - sig.instances_urid = GPU_GetResourceId(ui_rect_instance_buffer, GPU_ResourceIdKind_Srv); + sig.instances_urid = GPU_GetReadableId(ui_rect_instance_buffer); GPU_Rasterize(cl, &sig, UiRectVS, UiRectPS, - 1, &g->ui_target, + 1, viewport, scissor, g->ui_rect_instances_count, @@ -2420,11 +2413,11 @@ void UpdateUser(P_Window *window) UiShapeSig sig = ZI; sig.projection = ui_vp_matrix; - sig.verts_urid = GPU_GetResourceId(ui_shape_verts_buffer, GPU_ResourceIdKind_Srv); + sig.verts_urid = GPU_GetReadableId(ui_shape_verts_buffer); GPU_Rasterize(cl, &sig, UiShapeVS, UiShapePS, - 1, &g->ui_target, + 1, viewport, scissor, 1, @@ -2448,7 +2441,7 @@ void UpdateUser(P_Window *window) { DelayReleaseGpuResources_Sig *sig = PushStruct(job->arena, DelayReleaseGpuResources_Sig); job->count = countof(release_resources); - sig->begin_fence = g->gpu_render_fence; + sig->begin_fence = render_fence; sig->begin_fence_target = g->gpu_render_fence_target; sig->resources = PushStructsNoZero(job->arena, GPU_Resource *, job->count); sig->flags = GPU_ReleaseFlag_Reuse; @@ -2469,11 +2462,11 @@ void UpdateUser(P_Window *window) g->grids_count = 0; } - /* FIXME: Enable this */ -#if 0 +#if 1 g->gpu_render_fence_target = GPU_PresentSwapchain(g->swapchain, g->ui_target, 1); #else - Resource test_sprite = ResourceFromStore(&GameResources, Lit("sprite/tim.ase")); + /* TODO: Remove this */ + Resource test_sprite = ResourceFromStore(&GameResources, Lit("sprite/gun.ase")); S_Texture *test = S_TextureFromResource(test_sprite); g->gpu_render_fence_target = GPU_PresentSwapchain(g->swapchain, test->gpu_texture, 1); #endif diff --git a/src/pp/pp.h b/src/pp/pp.h index 13e12433..3684bc66 100644 --- a/src/pp/pp.h +++ b/src/pp/pp.h @@ -195,7 +195,6 @@ Struct(SharedUserState) u32 ui_shape_indices_count; u32 grids_count; - Fence *gpu_render_fence; u64 gpu_render_fence_target; //- Bind state diff --git a/src/sound/sound.c b/src/sound/sound.c index e6fa2481..af3dccb0 100644 --- a/src/sound/sound.c +++ b/src/sound/sound.c @@ -26,7 +26,7 @@ JobDef(SND_Load, sig, UNUSED id) decode_flags |= MP3_DecodeFlag_Stereo; } decoded = MP3_Decode(scratch.arena, resource_data, SND_SampleRate, decode_flags); - if (!decoded.success) + if (!decoded.ok) { error_msg = Lit("Failed to decode sound file"); } @@ -36,7 +36,7 @@ JobDef(SND_Load, sig, UNUSED id) error_msg = Lit("Missing resource data"); } - if (decoded.success) + if (decoded.ok) { /* Store */ SND_Sound *sound = 0; diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index 63740f14..fd8a2a46 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -10,7 +10,7 @@ JobDef(S_LoadTexture, sig, _) TempArena scratch = BeginScratchNoConflict(); S_Entry *entry = sig->entry; Resource resource = entry->resource; - b32 success = 1; + b32 ok = 1; S_Texture *texture = &entry->texture; texture->valid = 1; @@ -18,17 +18,16 @@ JobDef(S_LoadTexture, sig, _) String data = DataFromResource(resource); ASE_DecodedImage decoded = ASE_DecodeImage(scratch.arena, data); - success = decoded.success; + ok = decoded.ok; - if (success) + if (ok) { GPU_ResourceDesc desc = ZI; desc.kind = GPU_ResourceKind_Texture2D; desc.flags = GPU_ResourceFlag_None; /* FIXME: Use srgb format */ - desc.texture.format = GPU_Format_R8G8B8A8_Unorm; - // desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb; + desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb; desc.texture.size = VEC3I32(decoded.width, decoded.height, 1); desc.texture.mip_levels = 1; @@ -37,32 +36,59 @@ JobDef(S_LoadTexture, sig, _) texture->height = decoded.height; /* Fill upload buffer */ - u32 upload_size = desc.texture.size.x * desc.texture.size.y * sizeof(*decoded.pixels); GPU_ResourceDesc upload_desc = ZI; upload_desc.kind = GPU_ResourceKind_Buffer; upload_desc.buffer.heap_kind = GPU_HeapKind_Upload; - upload_desc.buffer.size = upload_size; + upload_desc.buffer.size = GPU_GetFootprintSize(texture->gpu_texture); GPU_Resource *upload = GPU_AcquireResource(upload_desc); { GPU_Mapped mapped = GPU_Map(upload); - GPU_CopyToMapped(mapped, STRING(upload_size, (u8 *)decoded.pixels)); + GPU_CopyBytesToFootprint(mapped.mem, (u8 *)decoded.pixels, texture->gpu_texture); GPU_Unmap(mapped); } - /* Upload to GPU resource */ - GPU_QueueKind queue = GPU_QueueKind_BackgroundCopy; - Fence *queue_fence = GPU_FenceFromQueue(queue); - i64 queue_fence_target = 0; + GPU_QueueKind copy_queue = GPU_QueueKind_BackgroundCopy; + GPU_QueueKind direct_queue = GPU_QueueKind_Direct; + Fence *direct_queue_fence = GPU_FenceFromQueue(direct_queue); + i64 direct_queue_fence_target = 0; + if (copy_queue == direct_queue) { - GPU_CommandList *cl = GPU_BeginCommandList(queue); + /* Copy & transition GPU resource on direct queue*/ { - GPU_CopyResource(cl, texture->gpu_texture, upload); + GPU_CommandList *cl = GPU_BeginCommandList(direct_queue); + { + GPU_TransitionToCopyDst(cl, texture->gpu_texture); + GPU_CopyResource(cl, texture->gpu_texture, upload); + GPU_TransitionToReadable(cl, texture->gpu_texture); + } + direct_queue_fence_target = GPU_EndCommandList(cl); + } + } + else + { + /* Copy to GPU resource on background copy queue*/ + i64 copy_queue_fence_target = 0; + { + GPU_CommandList *cl = GPU_BeginCommandList(copy_queue); + { + GPU_TransitionToCopyDst(cl, texture->gpu_texture); + GPU_CopyResource(cl, texture->gpu_texture, upload); + } + copy_queue_fence_target = GPU_EndCommandList(cl); + } + /* Once copy finishes, transition resource to readable on direct queue */ + { + GPU_QueueWait(direct_queue, copy_queue, copy_queue_fence_target); + GPU_CommandList *cl = GPU_BeginCommandList(direct_queue); + { + GPU_TransitionToReadable(cl, texture->gpu_texture); + } + direct_queue_fence_target = GPU_EndCommandList(cl); } - queue_fence_target = GPU_EndCommandList(cl); } - /* Release upload buffer after copy finishes */ - YieldOnFence(queue_fence, queue_fence_target); + /* Release upload buffer once transition finishes */ + YieldOnFence(direct_queue_fence, direct_queue_fence_target); GPU_ReleaseResource(upload, GPU_ReleaseFlag_None); } @@ -77,7 +103,7 @@ JobDef(S_LoadSheet, sig, _) Arena *perm = PermArena(); S_Entry *entry = sig->entry; Resource resource = entry->resource; - b32 success = 1; + b32 ok = 1; S_Sheet *sheet = &entry->sheet; sheet->valid = 1; @@ -85,9 +111,9 @@ JobDef(S_LoadSheet, sig, _) String data = DataFromResource(resource); ASE_DecodedSheet decoded = ASE_DecodeSheet(scratch.arena, data); - success = decoded.success; + ok = decoded.ok; - if (success) + if (ok) { Vec2 image_size = decoded.image_size; Vec2 frame_size = decoded.frame_size;