diff --git a/src/base/base.h b/src/base/base.h index b511b45c..3ea85d4b 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -704,9 +704,10 @@ #endif //////////////////////////////////////////////////////////// -//~ Shader types +//~ C <-> Shader interop types //- Shader linkage + #if IsLanguageC Struct(VertexShader) { ResourceKey resource; }; Struct(PixelShader) { ResourceKey resource; }; @@ -721,6 +722,7 @@ #endif //- Shader resource handles + Struct(StructuredBufferHandle) { u32 v; }; Struct(RWStructuredBufferHandle) { u32 v; }; Struct(Texture1DHandle) { u32 v; }; diff --git a/src/base/base_gpu.h b/src/base/base_gpu.h index bdc81ec3..807d6f35 100644 --- a/src/base/base_gpu.h +++ b/src/base/base_gpu.h @@ -110,3 +110,12 @@ Vec2 NdcFromPos(Vec2 pos, Vec2 size) result += Vec2(-1, 1); return result; } + +Vec2 NdcFromUv(Vec2 uv) +{ + Vec2 result; + result = uv; + result *= Vec2(2, -2); + result += Vec2(-1, 1); + return result; +} diff --git a/src/base/base_math.c b/src/base/base_math.c index 998fde78..b192dde8 100644 --- a/src/base/base_math.c +++ b/src/base/base_math.c @@ -122,7 +122,7 @@ f64 ModF64(f64 x, f64 m) } //////////////////////////////////////////////////////////// -//~ Floating point sign +//~ Abs f32 AbsF32(f32 f) { @@ -159,7 +159,9 @@ i64 SignF64(f64 f) } //////////////////////////////////////////////////////////// -//~ U64 pow +//~ Exponential ops + +//- Pow u64 /* Taken from https://gist.github.com/orlp/3551590 */ u64 PowU64(u64 base, u8 exp) @@ -254,28 +256,7 @@ u64 PowU64(u64 base, u8 exp) } } -//////////////////////////////////////////////////////////// -//~ Align up - -u64 AlignU64Pow2(u64 x) -{ - u64 result = 0; - if (x > 0) - { - result = x - 1; - result |= result >> 1; - result |= result >> 2; - result |= result >> 4; - result |= result >> 8; - result |= result >> 16; - result |= result >> 32; - ++result; - } - return result; -} - -//////////////////////////////////////////////////////////// -//~ Logn +//- Logn /* Based on FreeBSD's implementation * https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_logf.c */ @@ -369,8 +350,7 @@ f32 LnF32(f32 x) } } -//////////////////////////////////////////////////////////// -//~ Exp +//- Exp /* Based on FreeBSD's implementation * https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_expf.c */ @@ -481,8 +461,7 @@ f32 ExpF32(f32 x) } } -//////////////////////////////////////////////////////////// -//~ Pow +//- Pow f32 PowF32(f32 a, f32 b) { @@ -499,8 +478,7 @@ f32 PowF32(f32 a, f32 b) } } -//////////////////////////////////////////////////////////// -//~ Sqrt +//- Sqrt f32 SqrtF32(f32 x) { @@ -517,6 +495,34 @@ f32 RSqrtF32(f32 x) return IxRsqrtF32(x); } +//////////////////////////////////////////////////////////// +//~ Align + +u64 AlignU64(u64 x, u64 align) +{ + align = MaxU64(align, 1); + u64 result = (x + (align - 1)); + result -= result % align; + return result; +} + +u64 AlignU64ToNextPow2(u64 x) +{ + u64 result = 0; + if (x > 0) + { + result = x - 1; + result |= result >> 1; + result |= result >> 2; + result |= result >> 4; + result |= result >> 8; + result |= result >> 16; + result |= result >> 32; + ++result; + } + return result; +} + //////////////////////////////////////////////////////////// //~ Trig diff --git a/src/base/base_math.h b/src/base/base_math.h index b7e3ffc2..77af776b 100644 --- a/src/base/base_math.h +++ b/src/base/base_math.h @@ -235,7 +235,6 @@ i64 SignF64(f64 f); //~ Exponential ops u64 PowU64(u64 base, u8 exp); -u64 AlignU64Pow2(u64 x); f32 LnF32(f32 x); f32 ExpF32(f32 x); f32 PowF32(f32 a, f32 b); @@ -243,6 +242,12 @@ f32 SqrtF32(f32 x); f64 SqrtF64(f64 x); f32 RSqrtF32(f32 x); +//////////////////////////////////////////////////////////// +//~ Align + +u64 AlignU64(u64 x, u64 align); +u64 AlignU64ToNextPow2(u64 x); + //////////////////////////////////////////////////////////// //~ Trig diff --git a/src/config.h b/src/config.h index b25f9992..8a224e25 100644 --- a/src/config.h +++ b/src/config.h @@ -70,7 +70,7 @@ #define FLOOD_DEBUG 0 #define GPU_DEBUG 1 -#define GPU_DEBUG_VALIDATION 0 +#define GPU_DEBUG_VALIDATION 1 /* If virtual fibers are enabled, each fiber will get its own OS thread, * and fiber suspend/resume will be emulated using OS thread primitives. diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index 67533d50..6ad27090 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -7,11 +7,38 @@ void GPU_StartupCommon(void) { GPU_SharedUtilState *g = &GPU_shared_util_state; - // GPU_ArenaHandle gpu_perm = GPU_PermArena(); + GPU_ArenaHandle gpu_perm = GPU_PermArena(); + + /* Init point sampler */ + { + GPU_ResourceHandle pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint }); + g->pt_sampler = GPU_PushSamplerStateHandle(gpu_perm, pt_sampler); + } + + GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct); + { + /* Init quad index buffer */ + { + u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 }; + GPU_ResourceHandle quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data)); + GPU_CopyCpuBytes(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data))); + g->quad_indices.resource = quad_indices; + g->quad_indices.index_size = sizeof(quad_data[0]); + g->quad_indices.index_count = countof(quad_data); + } + + /* TODO: Init noise texture */ + { + } + } + GPU_CommitCommandList(cl); + + GPU_SyncAllQueues(GPU_QueueKind_Direct); + + + + - // /* Init point sampler */ - // GPU_ResourceHandle pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint }); - // g->pt_sampler = GPU_PushSamplerPtr(gpu_perm, pt_sampler); // GPU_CommandListHandle cl = GPU_PrepareCommandList(); // { diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 2c5cc2e8..0b777f2a 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -326,7 +326,7 @@ Struct(GPU_TextureDesc) GPU_Format format; Vec3I32 dims; GPU_Layout initial_layout; - i32 mip_levels; /* Will be clamped to range [1, max] */ + i32 mip_levels; /* Will be clamped to range [1, inf) */ Vec4 clear_color; }; @@ -560,10 +560,13 @@ SamplerStateHandle GPU_PushSamplerStateHandle (GPU_ArenaHandle //- Count -u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer); -u64 GPU_Count1D(GPU_ResourceHandle texture1d); -u64 GPU_Count2D(GPU_ResourceHandle texture2d); -u64 GPU_Count3D(GPU_ResourceHandle texture3d); +u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer); +i32 GPU_Count1D(GPU_ResourceHandle texture); +Vec2I32 GPU_Count2D(GPU_ResourceHandle texture); +Vec3I32 GPU_Count3D(GPU_ResourceHandle texture); +i32 GPU_CountWidth(GPU_ResourceHandle texture); +i32 GPU_CountHeight(GPU_ResourceHandle texture); +i32 GPU_CountDepth(GPU_ResourceHandle texture); #define GPU_CountBuffer(buffer, type) GPU_CountBufferSize(buffer) / sizeof(type) @@ -572,10 +575,10 @@ u64 GPU_Count3D(GPU_ResourceHandle texture3d); //- Command list -GPU_CommandListHandle GPU_PrepareCommandList(void); -void GPU_CommitCommandListEx(GPU_CommandListHandle cl, GPU_QueueKind queue, u64 fence_ops_count, GPU_FenceOp *fence_ops); +GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue); +void GPU_CommitCommandListEx(GPU_CommandListHandle cl, u64 fence_ops_count, GPU_FenceOp *fence_ops); -#define GPU_CommitCommandList(cl, queue) GPU_CommitCommandListEx((cl), (queue), 0, 0) +#define GPU_CommitCommandList(cl) GPU_CommitCommandListEx((cl), 0, 0) //- Arena @@ -605,42 +608,42 @@ void GPU_SetConstant_(GPU_CommandListHandle cl, i32 slot, void *src_32bit, u32 s void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc); -#define GPU_MemoryBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \ - GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ - .resource = (_resource), \ - .sync_prev = _sync_prev, \ - .sync_next = _sync_next, \ - .access_prev = _access_prev, \ - .access_next = _access_next, \ +#define GPU_MemoryBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \ + GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ + .resource = (_resource), \ + .sync_prev = _sync_prev, \ + .sync_next = _sync_next, \ + .access_prev = _access_prev, \ + .access_next = _access_next, \ }) -#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \ - GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ - .is_global = 1, \ - .sync_prev = _sync_prev, \ - .sync_next = _sync_next, \ - .access_prev = _access_prev, \ - .access_next = _access_next, \ +#define GPU_MemoryLayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \ + GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ + .resource = (_resource), \ + .sync_prev = _sync_prev, \ + .sync_next = _sync_next, \ + .access_prev = _access_prev, \ + .access_next = _access_next, \ + .layout = _layout, \ }) -#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \ - GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ - .resource = (_resource), \ - .sync_prev = _sync_prev, \ - .sync_next = _sync_next, \ - .access_prev = _access_prev, \ - .access_next = _access_next, \ - .layout = _layout, \ +#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \ + GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ + .is_global = 1, \ + .sync_prev = _sync_prev, \ + .sync_next = _sync_next, \ + .access_prev = _access_prev, \ + .access_next = _access_next, \ }) -#define GPU_DumbMemoryBarrier(_cl, _resource) \ - GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) +#define GPU_DumbMemoryBarrier(cl, resource) \ + GPU_MemoryBarrier((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) -#define GPU_DumbGlobalMemoryBarrier(_cl) \ - GPU_GlobalMemoryBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) +#define GPU_DumbMemoryLayoutBarrier(cl, resource, layout) \ + GPU_MemoryLayoutBarrier((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (layout)) -#define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \ - GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout)) +#define GPU_DumbGlobalMemoryBarrier(cl) \ + GPU_GlobalMemoryBarrier((cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) //- Compute @@ -648,12 +651,29 @@ void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups); //- Rasterize -void GPU_Rasterize(GPU_CommandListHandle cl, - VertexShader vs, PixelShader ps, - u32 instances_count, GPU_IndexBufferDesc index_buffer, - u32 render_targets_count, GPU_ResourceHandle *render_targets, - Rng3 viewport, Rng2 scissor, - GPU_RasterMode mode); +void GPU_RasterizeEx(GPU_CommandListHandle cl, + VertexShader vs, PixelShader ps, + u32 instances_count, GPU_IndexBufferDesc index_buffer, + u32 render_targets_count, GPU_ResourceHandle *render_targets, + Rng3 viewport, Rng2 scissor, + GPU_RasterMode mode); + +#define GPU_Rasterize(cl, vs, ps, instances_count, index_buffer, render_target, mode) \ + GPU_RasterizeEx( \ + (cl), \ + (vs), (ps), \ + (instances_count), (index_buffer), \ + 1, &(render_target), \ + RNG3( \ + VEC3(0, 0, 0), \ + VEC3(GPU_CountWidth(render_target), GPU_CountHeight(render_target), 1) \ + ), \ + RNG2( \ + VEC2(0, 0), \ + Vec2FromVec(GPU_Count2D(render_target)) \ + ), \ + (mode) \ + ) //- Clear @@ -663,6 +683,15 @@ void GPU_ClearRenderTarget(GPU_CommandListHandle cl, GPU_ResourceHandle render_t void GPU_ProfN(GPU_CommandListHandle cl, String name); +//////////////////////////////////////////////////////////// +//~ @hookdecl Synchronization + +/* `waiter_queue` will block until `completion_queue` completes all submitted commands */ +void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue); + +/* All queues will block until `completion_queue` completes all submitted commands */ +void GPU_SyncAllQueues(GPU_QueueKind completion_queue); + //////////////////////////////////////////////////////////// //~ @hookdecl Statistics diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index fc711b2c..d90e7c23 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -62,7 +62,7 @@ void GPU_Startup(void) { __profn("Create device"); IDXGIAdapter3 *adapter = 0; - ID3D12Device *device = 0; + ID3D12Device10 *device = 0; String error = Lit("Could not initialize GPU device."); String first_gpu_name = ZI; u32 adapter_index = 0; @@ -86,7 +86,7 @@ void GPU_Startup(void) * - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3 * - EnhancedBarriersSupported == 1 */ - hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); + hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); } if (SUCCEEDED(hr) && !skip) { @@ -769,7 +769,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) if (desc.kind == GPU_ResourceKind_Buffer) { desc.buffer.stride = MaxU32(desc.buffer.stride, 1); - buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64)); + buffer_size = MaxU64(AlignU64ToNextPow2(desc.buffer.count * desc.buffer.stride), Kibi(64)); } u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size); @@ -1137,9 +1137,13 @@ GPU_ArenaHandle GPU_AcquireArena(void) GPU_D12_Arena *gpu_arena = 0; { Arena *perm = PermArena(); + PushAlign(perm, CachelineSize); gpu_arena = PushStruct(perm, GPU_D12_Arena); + PushAlign(perm, CachelineSize); } - return (GPU_ArenaHandle) { .v = (u64)gpu_arena }; + gpu_arena->arena = AcquireArena(Gibi(1)); + + return GPU_D12_HandleFromPointer(GPU_ArenaHandle, gpu_arena); } void GPU_ReleaseArena(GPU_ArenaHandle arena) @@ -1238,19 +1242,109 @@ GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_ GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc) { /* TODO */ - return (GPU_ResourceHandle) { 0 }; + return GPU_D12_HandleFromPointer(GPU_ResourceHandle, 0); } -GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena, GPU_TextureDesc desc) +GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena_handle, GPU_TextureDesc desc) { - /* TODO */ - return (GPU_ResourceHandle) { 0 }; + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); + D3D12_BARRIER_LAYOUT initial_layout = GPU_D12_BarrierLayoutFromLayout(desc.initial_layout); + + /* Create resource heap */ + if (!gpu_arena->d3d_resource_heap) + { + /* FIXME: Dynamic size */ + D3D12_HEAP_DESC d3d_desc = ZI; + d3d_desc.SizeInBytes = Mebi(64); + d3d_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; /* TODO: Remove this and support tier 1 resource heaps */ + d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + + ID3D12Heap *heap = 0; + HRESULT hr = ID3D12Device_CreateHeap(g->device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap); + if (!SUCCEEDED(hr)) + { + /* TODO: Don't panic */ + Panic(Lit("Failed to create D3D12 resource heap")); + } + + gpu_arena->d3d_resource_heap = heap; + gpu_arena->heap_size = d3d_desc.SizeInBytes; + } + + ID3D12Resource *d3d_resource = 0; + { + D3D12_RESOURCE_DESC1 d3d_desc = ZI; + d3d_desc.Dimension = desc.kind == GPU_TextureKind_1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : + GPU_TextureKind_2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : + D3D12_RESOURCE_DIMENSION_TEXTURE3D; + d3d_desc.Width = MaxI32(desc.dims.x, 1); + d3d_desc.Height = MaxI32(desc.dims.y, 1); + d3d_desc.DepthOrArraySize = MaxI32(desc.dims.z, 1); + d3d_desc.MipLevels = MaxI32(desc.mip_levels, 1); + d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.format); + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_AllowShaderReadWrite); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_AllowRenderTarget); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(desc.flags, GPU_ResourceFlag_AllowDepthStencil); + + u64 alloc_size = 0; + u64 alloc_align = 0; + { + D3D12_RESOURCE_ALLOCATION_INFO alloc_info = ZI; + ID3D12Device_GetResourceAllocationInfo(g->device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc); + alloc_size = alloc_info.SizeInBytes; + alloc_align = alloc_info.Alignment; + } + + u64 alloc_pos = gpu_arena->heap_pos; + alloc_pos = AlignU64(alloc_pos, alloc_align); + gpu_arena->heap_pos = alloc_pos + alloc_size; + + if (alloc_pos + alloc_size > gpu_arena->heap_size) + { + Panic(Lit("Gpu arena overflow")); + } + + D3D12_CLEAR_VALUE clear_value = { + .Color[0] = desc.clear_color.x, + .Color[1] = desc.clear_color.y, + .Color[2] = desc.clear_color.z, + .Color[3] = desc.clear_color.w, + .Format = d3d_desc.Format + }; + + HRESULT hr = ID3D12Device10_CreatePlacedResource2(g->device, + gpu_arena->d3d_resource_heap, + alloc_pos, + &d3d_desc, + initial_layout, + (d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : 0, + 0, + 0, + &IID_ID3D12Resource, + (void **)&d3d_resource); + } + + GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource); + resource->d3d_resource = d3d_resource; + resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; + resource->flags = desc.flags; + + resource->is_texture = 1; + resource->texture_format = desc.format; + resource->texture_dims = desc.dims; + resource->texture_mip_levels = desc.mip_levels; + resource->texture_layout = initial_layout; + + return GPU_D12_HandleFromPointer(GPU_ResourceHandle, resource); } GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc) { /* TODO */ - return (GPU_ResourceHandle) { 0 }; + return GPU_D12_HandleFromPointer(GPU_ResourceHandle, 0); } b32 GPU_IsResourceNil(GPU_ResourceHandle handle) @@ -1263,81 +1357,103 @@ b32 GPU_IsResourceNil(GPU_ResourceHandle handle) StructuredBufferHandle GPU_PushStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range) { /* TODO */ - return (StructuredBufferHandle) { 0 }; + return GPU_D12_HandleFromPointer(StructuredBufferHandle, 0); } RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range) { /* TODO */ - return (RWStructuredBufferHandle) { 0 }; + return GPU_D12_HandleFromPointer(RWStructuredBufferHandle, 0); } Texture1DHandle GPU_PushTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (Texture1DHandle) { 0 }; + return GPU_D12_HandleFromPointer(Texture1DHandle, 0); } RWTexture1DHandle GPU_PushRWTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (RWTexture1DHandle) { 0 }; + return GPU_D12_HandleFromPointer(RWTexture1DHandle, 0); } Texture2DHandle GPU_PushTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (Texture2DHandle) { 0 }; + return GPU_D12_HandleFromPointer(Texture2DHandle, 0); } -RWTexture2DHandle GPU_PushRWTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) +RWTexture2DHandle GPU_PushRWTexture2DHandle(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { - /* TODO */ - return (RWTexture2DHandle) { 0 }; + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); + GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav); + ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, 0, descriptor->handle); + return GPU_D12_HandleFromPointer(RWTexture2DHandle, descriptor->index); } Texture3DHandle GPU_PushTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (Texture3DHandle) { 0 }; + return GPU_D12_HandleFromPointer(Texture3DHandle, 0); } RWTexture3DHandle GPU_PushRWTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (RWTexture3DHandle) { 0 }; + return GPU_D12_HandleFromPointer(RWTexture3DHandle, 0); } SamplerStateHandle GPU_PushSamplerStateHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (SamplerStateHandle) { 0 }; + return GPU_D12_HandleFromPointer(SamplerStateHandle, 0); } //- Count u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer) { - /* TODO */ - return 0; + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(buffer); + return resource->buffer_size; } -u64 GPU_Count1D(GPU_ResourceHandle texture1d) +i32 GPU_Count1D(GPU_ResourceHandle texture) { - /* TODO */ - return 0; + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); + return resource->texture_dims.x; } -u64 GPU_Count2D(GPU_ResourceHandle texture2d) +Vec2I32 GPU_Count2D(GPU_ResourceHandle texture) { - /* TODO */ - return 0; + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); + return VEC2I32(resource->texture_dims.x, resource->texture_dims.y); } -u64 GPU_Count3D(GPU_ResourceHandle texture3d) +Vec3I32 GPU_Count3D(GPU_ResourceHandle texture) { - /* TODO */ - return 0; + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); + return resource->texture_dims; +} + +i32 GPU_CountWidth(GPU_ResourceHandle texture) +{ + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); + return resource->texture_dims.x; +} + +i32 GPU_CountHeight(GPU_ResourceHandle texture) +{ + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); + return resource->texture_dims.y; +} + +i32 GPU_CountDepth(GPU_ResourceHandle texture) +{ + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); + return resource->texture_dims.z; } //////////////////////////////////////////////////////////// @@ -1399,12 +1515,210 @@ GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v) return cmd; } +GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_QueueKind queue_kind = cl->queue_kind; + GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); + GPU_D12_StagingRegionNode *result = 0; + + Lock lock = LockE(&queue->staging_mutex); + { + GPU_D12_StagingHeap *heap = queue->staging_heap; + i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); + + /* Find first completed region with matching size. + * For each region in heap: + * - If region size > size, split off a smaller region & use it + * + * - If region size < size, try to merge with next completed region + * + * - If no available completed region with eligible size, queue the + * current heap for deletion & create a new heap + * with larger size + */ + + + + + /* FIXME: Region completion target should be atomic, and initialized to + * u64/i64 max until cl submission actually sets value */ + + /* Find region with large enough size */ + GPU_D12_StagingRegionNode *match = 0; + if (heap && heap->size >= size) + { + GPU_D12_StagingRegionNode *r = heap->head_region_node; + for (;;) + { + b32 is_completed = completed >= Atomic64Fetch(&r->completion_target); + if (is_completed) + { + GPU_D12_StagingRegionNode *next = r->next; + u64 region_size = 0; + if (next->pos > r->pos) + { + region_size = next->pos - r->pos; + } + else + { + region_size = heap->size - r->pos; + } + + if (region_size < size) + { + GPU_D12_StagingRegionNode *prev = r->prev; + b32 prev_is_completed = completed >= Atomic64Fetch(&prev->completion_target); + if (prev_is_completed && prev->pos < r->pos) + { + /* Merge with previous region & retry */ + prev->next = next; + SllStackPush(heap->first_free_region_node, r); + r = prev; + } + else + { + /* Continue to next region */ + r = next; + } + } + else + { + /* Found matching region */ + match = r; + break; + } + } + else + { + /* No large-enough completed region found */ + break; + } + } + } + + /* Create new heap if no match found */ + if (!match) + { + /* Queue old heap for deletion */ + u64 new_heap_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64)); + if (heap) + { + /* FIXME: Queue for deletion here */ + new_heap_size = MaxU64(new_heap_size, heap->size * 2); + heap = 0; + } + + /* Create new heap */ + { + Arena *arena = AcquireArena(Gibi(1)); + heap = PushStruct(arena, GPU_D12_StagingHeap); + heap->arena = arena; + heap->size = new_heap_size; + + /* Create backing upload heap resource */ + ID3D12Resource *d3d_resource = 0; + { + D3D12_RESOURCE_DESC d3d_desc = ZI; + d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + d3d_desc.Format = DXGI_FORMAT_UNKNOWN; + d3d_desc.Alignment = 0; + d3d_desc.Width = new_heap_size; + d3d_desc.Height = 1; + d3d_desc.DepthOrArraySize = 1; + d3d_desc.MipLevels = 1; + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + + D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; + HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, + &heap_props, + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED, + &d3d_desc, + D3D12_RESOURCE_STATE_COMMON, + 0, + &IID_ID3D12Resource, + (void **)&d3d_resource); + if (!SUCCEEDED(hr)) + { + /* TODO: Don't panic */ + Panic(Lit("Failed to create upload heap")); + } + } + heap->resource.d3d_resource = d3d_resource; + heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; + heap->resource.buffer_size = new_heap_size; + heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource); + + /* Map */ + { + D3D12_RANGE read_range = ZI; + HRESULT hr = ID3D12Resource_Map(d3d_resource, 0, &read_range, &heap->mapped); + if (!SUCCEEDED(hr)) + { + /* TODO: Don't panic */ + Panic(Lit("Failed to map upload heap")); + } + } + } + + /* Create initial region */ + match = PushStruct(heap->arena, GPU_D12_StagingRegionNode); + match->heap = heap; + match->next = match; + match->prev = match; + heap->head_region_node = match; + } + + /* Split extra region space */ + { + GPU_D12_StagingRegionNode *next = match->next; + u64 region_size = 0; + if (next->pos > match->pos) + { + region_size = next->pos - match->pos; + } + else + { + region_size = heap->size - match->pos; + } + + if (region_size > size) + { + GPU_D12_StagingRegionNode *new_next = heap->first_free_region_node; + if (new_next) + { + SllStackPop(heap->first_free_region_node); + } + else + { + new_next = PushStruct(heap->arena, GPU_D12_StagingRegionNode); + } + new_next->next = next; + new_next->prev = match; + next->prev = new_next; + match->next = new_next; + + new_next->heap = heap; + new_next->pos = match->pos + size; + } + } + + Atomic64Set(&match->completion_target, I64Max); + result = match; + } + Unlock(&lock); + + return result; +} + //////////////////////////////////////////////////////////// //~ @hookimpl Command //- Command list -GPU_CommandListHandle GPU_PrepareCommandList(void) +GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = 0; @@ -1423,13 +1737,16 @@ GPU_CommandListHandle GPU_PrepareCommandList(void) } } Unlock(&lock); - return (GPU_CommandListHandle) { .v = (u64)cl }; + cl->queue_kind = queue; + + return GPU_D12_HandleFromPointer(GPU_CommandListHandle, cl); } -void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_kind, u64 fence_ops_count, GPU_FenceOp *fence_ops) +void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, u64 fence_ops_count, GPU_FenceOp *fence_ops) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); + GPU_QueueKind queue_kind = cl->queue_kind; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); TempArena scratch = BeginScratchNoConflict(); @@ -1849,8 +2166,9 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu u32 indices_count = 0; D3D12_INDEX_BUFFER_VIEW ibv = ZI; { + GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; + if (desc.index_count > 0) { - GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; GPU_D12_Resource *index_buffer_resource = GPU_D12_ResourceFromHandle(desc.resource); ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; ibv.SizeInBytes = desc.index_size * desc.index_count; @@ -2057,16 +2375,27 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu //- Arena -void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena) +void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena_handle) { + GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); + /* TODO */ + + /* FIXME: Move descriptors into committed lists */ + + /* FIXME: Release id3d12 resource com object references */ + gpu_arena->heap_pos = 0; } //- Cpu -> Gpu copy -void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range) +void GPU_CopyCpuBytes(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range) { - /* TODO */ + GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); + u64 size = src_copy_range.max - src_copy_range.min; + GPU_D12_StagingRegionNode *region = GPU_D12_PushStagingRegion(cl, size); + CopyBytes((u8 *)region->heap->mapped + region->pos, (u8 *)src + src_copy_range.min, size); + GPU_CopyBytes(cl_handle, dst_handle, dst_offset, GPU_D12_HandleFromPointer(GPU_ResourceHandle, ®ion->heap->resource), RNGU64(region->pos, region->pos + size)); } void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range) @@ -2180,12 +2509,12 @@ void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 grou //- Rasterize -void GPU_Rasterize(GPU_CommandListHandle cl_handle, - VertexShader vs, PixelShader ps, - u32 instances_count, GPU_IndexBufferDesc index_buffer, - u32 render_targets_count, GPU_ResourceHandle *render_targets, - Rng3 viewport, Rng2 scissor, - GPU_RasterMode mode) +void GPU_RasterizeEx(GPU_CommandListHandle cl_handle, + VertexShader vs, PixelShader ps, + u32 instances_count, GPU_IndexBufferDesc index_buffer, + u32 render_targets_count, GPU_ResourceHandle *render_targets, + Rng3 viewport, Rng2 scissor, + GPU_RasterMode mode) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); @@ -2221,6 +2550,19 @@ void GPU_ProfN(GPU_CommandListHandle cl, String name) /* TODO */ } +//////////////////////////////////////////////////////////// +//~ @hookimpl Synchronization + +void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue) +{ + /* TODO */ +} + +void GPU_SyncAllQueues(GPU_QueueKind completion_queue) +{ + /* TODO */ +} + //////////////////////////////////////////////////////////// //~ @hookimpl Map hooks @@ -2315,7 +2657,7 @@ GPU_SwapchainHandle GPU_AcquireSwapchain(WND_Handle window) swapchain = PushStruct(perm, GPU_D12_Swapchain); } swapchain->window_hwnd = (HWND)WND_OsHandleFromWindow(window); - return (GPU_SwapchainHandle) { .v = (u64)swapchain }; + return GPU_D12_HandleFromPointer(GPU_SwapchainHandle, swapchain); } void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain_handle) @@ -2483,7 +2825,7 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G cur_backbuffer = &swapchain->backbuffers[backbuffer_idx]; } - return (GPU_ResourceHandle) { .v = (u64)cur_backbuffer }; + return GPU_D12_HandleFromPointer(GPU_ResourceHandle, cur_backbuffer); } void GPU_CommitBackbuffer(GPU_ResourceHandle backbuffer_handle, i32 vsync) diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 8f1efa99..af6b92d9 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -104,7 +104,18 @@ Struct(GPU_D12_DescriptorList) Struct(GPU_D12_Arena) { + Arena *arena; GPU_D12_DescriptorList committed_descriptors_by_heap_and_queue[GPU_D12_DescriptorHeapKind_Count][GPU_NumQueues]; + + /* TODO: + * To support D3D12_RESOURCE_HEAP_TIER_1 devices, create separate heaps for: + * - Buffers + * - Non-render target & non-depth stencil textures + * - Render target or depth stencil textures + */ + ID3D12Heap *d3d_resource_heap; + u64 heap_pos; + u64 heap_size; }; //////////////////////////////////////////////////////////// @@ -132,6 +143,35 @@ Struct(GPU_D12_Resource) struct GPU_D12_Swapchain *swapchain; }; +//////////////////////////////////////////////////////////// +//~ Staging types + +Struct(GPU_D12_StagingHeap) +{ + Arena *arena; + + GPU_D12_Resource resource; + void *mapped; + u64 size; + + struct GPU_D12_StagingRegionNode *head_region_node; + struct GPU_D12_StagingRegionNode *first_free_region_node; + +}; + +Struct(GPU_D12_StagingRegionNode) +{ + GPU_D12_StagingHeap *heap; + + /* Heap links (requires heap lock to read) */ + GPU_D12_StagingRegionNode *prev; + GPU_D12_StagingRegionNode *next; + + /* Region info */ + Atomic64 completion_target; + u64 pos; +}; + //////////////////////////////////////////////////////////// //~ Command queue types @@ -149,9 +189,15 @@ Struct(GPU_D12_Queue) Mutex commit_mutex; ID3D12Fence *commit_fence; u64 commit_fence_target; + + /* Raw command lists */ struct GPU_D12_RawCommandList *first_committed_cl; struct GPU_D12_RawCommandList *last_committed_cl; + /* Staging heap */ + Mutex staging_mutex; + GPU_D12_StagingHeap *staging_heap; + Fence sync_fence; }; @@ -264,6 +310,7 @@ Struct(GPU_D12_CmdChunk) Struct(GPU_D12_CmdList) { + GPU_QueueKind queue_kind; GPU_D12_CmdList *next; GPU_D12_CmdChunk *first_cmd_chunk; @@ -332,12 +379,14 @@ Struct(GPU_D12_SharedState) /* Device */ IDXGIFactory6 *factory; IDXGIAdapter3 *adapter; - ID3D12Device *device; + ID3D12Device10 *device; } extern GPU_D12_shared_state; //////////////////////////////////////////////////////////// //~ Helpers +#define GPU_D12_HandleFromPointer(type, ptr) (type) { .v = (u64)(ptr) } + GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle); GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle); GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle); @@ -376,6 +425,7 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl); GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl); GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v); +GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size); //////////////////////////////////////////////////////////// //~ Sync job diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 8204eb20..aa7274d6 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -33,9 +33,9 @@ JobImpl(V_VisWorker, _, __) Arena *frame_arena = AcquireArena(Gibi(64)); Arena *perm = PermArena(); - GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mibi(8), GPU_CpuAccessFlag_Writable); - GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable); - GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable); + GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mebi(8), GPU_CpuAccessFlag_Writable); + GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mebi(32), GPU_CpuAccessFlag_Writable); + GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mebi(32), GPU_CpuAccessFlag_Writable); ////////////////////////////// //- State diff --git a/src/proto/proto.c b/src/proto/proto.c index f92b9a75..cb3fb713 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -1,5 +1,5 @@ -JobDecl(PR_RunForever, EmptySig); -JobImpl(PR_RunForever, _sig, _id) +JobDecl(PT_RunForever, EmptySig); +JobImpl(PT_RunForever, _sig, _id) { GPU_ArenaHandle gpu_frame_arena = GPU_AcquireArena(); @@ -28,7 +28,7 @@ JobImpl(PR_RunForever, _sig, _id) GPU_ResourceHandle backbuffer = GPU_PrepareBackbuffer(swapchain, GPU_Format_R16G16B16A16_Float, window_frame.draw_size); { - GPU_CommandListHandle cl = GPU_PrepareCommandList(); + GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct); { /* Push resources */ Vec2I32 final_target_size = window_frame.draw_size; @@ -36,43 +36,44 @@ JobImpl(PR_RunForever, _sig, _id) GPU_Format_R16G16B16A16_Float, final_target_size, GPU_Layout_DirectQueue_ShaderReadWrite, - .flags = GPU_ResourceFlag_AllowShaderReadWrite | GPU_ResourceFlag_AllowRenderTarget); + .flags = GPU_ResourceFlag_AllowShaderReadWrite); /* Push resource handles */ + Texture2DHandle final_target_rhandle = GPU_PushTexture2DHandle(gpu_frame_arena, final_target); RWTexture2DHandle final_target_rwhandle = GPU_PushRWTexture2DHandle(gpu_frame_arena, final_target); /* Prep test pass */ { - GPU_SetConstant(cl, PR_ShaderConst_TestTarget, final_target_rwhandle); - GPU_SetConstant(cl, PR_ShaderConst_TestConst, 3.123); + GPU_SetConstant(cl, PT_ShaderConst_TestTarget, final_target_rwhandle); + GPU_SetConstant(cl, PT_ShaderConst_TestConst, 3.123); + GPU_SetConstant(cl, PT_ShaderConst_BlitSampler, GPU_GetCommonPointSampler()); + GPU_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target_rhandle); } /* Test pass */ { - GPU_Compute(cl, PR_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1)); + GPU_Compute(cl, PT_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1)); } - GPU_DumbMemoryBarrier(cl, final_target); - /* Prep clear pass */ + /* Prep blit pass */ { - GPU_LayoutBarrier(cl, backbuffer, - GPU_Stage_None, GPU_Access_None, - GPU_Stage_RenderTarget, GPU_Access_RenderTargetWrite, - GPU_Layout_DirectQueue_RenderTargetWrite); + GPU_DumbMemoryLayoutBarrier(cl, final_target, GPU_Layout_DirectQueue_ShaderRead); + GPU_DumbMemoryLayoutBarrier(cl, backbuffer, GPU_Layout_DirectQueue_RenderTargetWrite); } - /* Clear pass */ + /* Blit pass */ { + GPU_Rasterize(cl, + PT_BlitVS, PT_BlitPS, + 1, GPU_GetCommonQuadIndices(), + backbuffer, GPU_RasterMode_TriangleList); GPU_ClearRenderTarget(cl, backbuffer, VEC4(1, 0, 0, 1)); } /* Finalize backbuffer layout */ { - GPU_LayoutBarrier(cl, backbuffer, - GPU_Stage_RenderTarget, GPU_Access_RenderTargetWrite, - GPU_Stage_None, GPU_Access_None, - GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present); + GPU_DumbMemoryLayoutBarrier(cl, backbuffer, GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present); } /* Reset */ @@ -80,7 +81,7 @@ JobImpl(PR_RunForever, _sig, _id) GPU_ResetArena(cl, gpu_frame_arena); } } - GPU_CommitCommandList(cl, GPU_QueueKind_Direct); + GPU_CommitCommandList(cl); } GPU_CommitBackbuffer(backbuffer, VSYNC); } @@ -88,8 +89,8 @@ JobImpl(PR_RunForever, _sig, _id) } } -void PR_Startup(void); -void PR_Startup(void) +void PT_Startup(void); +void PT_Startup(void) { - RunJob(PR_RunForever); + RunJob(PT_RunForever); } diff --git a/src/proto/proto.lay b/src/proto/proto.lay index 35e3a08d..097eb871 100644 --- a/src/proto/proto.lay +++ b/src/proto/proto.lay @@ -13,7 +13,9 @@ @IncludeGpu proto_shaders.gpu //- Shaders -@ComputeShader PR_TestCS +@ComputeShader PT_TestCS +@VertexShader PT_BlitVS +@PixelShader PT_BlitPS //- Startup -@Startup PR_Startup +@Startup PT_Startup diff --git a/src/proto/proto_shaders.gpu b/src/proto/proto_shaders.gpu index c28e9a8e..1765df08 100644 --- a/src/proto/proto_shaders.gpu +++ b/src/proto/proto_shaders.gpu @@ -6,11 +6,11 @@ Struct(TestStruct) i32 i; }; -ComputeShader2D(PR_TestCS, 8, 8) +ComputeShader2D(PT_TestCS, 8, 8) { - StructuredBuffer sb = StructuredBufferFromHandle(PR_ShaderConst_TestBuff); + StructuredBuffer sb = StructuredBufferFromHandle(PT_ShaderConst_TestBuff); - RWTexture2D target_tex = RWTexture2DFromHandle(PR_ShaderConst_TestTarget); + RWTexture2D target_tex = RWTexture2DFromHandle(PT_ShaderConst_TestTarget); Vec2U32 target_tex_size = Count2D(target_tex); Vec2I32 id = SV_DispatchThreadID; @@ -19,3 +19,46 @@ ComputeShader2D(PR_TestCS, 8, 8) target_tex[id] = Vec4(0, 1, 0, 1); } } + +//////////////////////////////////////////////////////////// +//~ Blit shader + +Struct(PT_BlitPSInput) +{ + Semantic(Vec4, SV_Position); + Semantic(Vec2, src_uv); +}; + +Struct(PT_BlitPSOutput) +{ + Semantic(Vec4, SV_Target0); +}; + +////////////////////////////// +//- Vertex shader + +VertexShader(PT_BlitVS, PT_BlitPSInput) +{ + Vec2 uv = RectUvFromVertexId(SV_VertexID); + + PT_BlitPSInput result; + result.SV_Position = Vec4(NdcFromUv(uv).xy, 0, 1); + result.src_uv = uv; + return result; +} + +////////////////////////////// +//- Pixel shader + +PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input) +{ + SamplerState sampler = SamplerStateFromHandle(PT_ShaderConst_BlitSampler); + Texture2D tex = Texture2DFromHandle(PT_ShaderConst_BlitSrc); + + Vec2 uv = input.src_uv; + Vec4 result = tex.Sample(sampler, uv); + + PT_BlitPSOutput output; + output.SV_Target0 = result; + return output; +} diff --git a/src/proto/proto_shaders.h b/src/proto/proto_shaders.h index 35c5f9fe..0e7e5abd 100644 --- a/src/proto/proto_shaders.h +++ b/src/proto/proto_shaders.h @@ -1,6 +1,11 @@ //////////////////////////////////////////////////////////// //~ Constants -ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0); -ShaderConstant(StructuredBufferHandle, PR_ShaderConst_TestBuff, 1); -ShaderConstant(f32, PR_ShaderConst_TestConst, 2); +/* Test shader */ +ShaderConstant(RWTexture2DHandle, PT_ShaderConst_TestTarget, 0); +ShaderConstant(StructuredBufferHandle, PT_ShaderConst_TestBuff, 1); +ShaderConstant(f32, PT_ShaderConst_TestConst, 2); + +/* Blit shader */ +ShaderConstant(SamplerStateHandle, PT_ShaderConst_BlitSampler, 3); +ShaderConstant(Texture2DHandle, PT_ShaderConst_BlitSrc, 4); diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index d94080ff..df3bcfe8 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -84,7 +84,7 @@ JobImpl(SPR_LoadSheet, sig, _) /* Init spans */ sheet->spans_count = decoded.num_spans; - sheet->span_bins_count = MaxU32(AlignU64Pow2(sheet->spans_count * 2), 1); + sheet->span_bins_count = MaxU32(AlignU64ToNextPow2(sheet->spans_count * 2), 1); sheet->spans = PushStructs(perm, SPR_Span, sheet->spans_count); sheet->span_bins = PushStructs(perm, SPR_SpanBin, sheet->span_bins_count); { @@ -107,7 +107,7 @@ JobImpl(SPR_LoadSheet, sig, _) /* Init slice groups */ sheet->slice_groups_count = decoded.num_slice_keys; - sheet->slice_group_bins_count = MaxU32(AlignU64Pow2(sheet->slice_groups_count * 2), 1); + sheet->slice_group_bins_count = MaxU32(AlignU64ToNextPow2(sheet->slice_groups_count * 2), 1); sheet->slice_groups = PushStructs(perm, SPR_SliceGroup, sheet->slice_groups_count); sheet->slice_group_bins = PushStructs(perm, SPR_SliceGroupBin, sheet->slice_group_bins_count); { diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index 0cde39e2..bbb34ff4 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -698,8 +698,8 @@ i64 UI_EndFrame(UI_Frame frame) { g->eframe.layout_arena = AcquireArena(Gibi(64)); g->eframe.tex_gpu_arena = GPU_AcquireTextureArena(); - g->eframe.frame_gpu_arena = GPU_AcquireArena(Mibi(16)); - g->eframe.drects_gpu_arena = GPU_AcquireArena(Mibi(16)); + g->eframe.frame_gpu_arena = GPU_AcquireArena(Mebi(16)); + g->eframe.drects_gpu_arena = GPU_AcquireArena(Mebi(16)); } ResetArena(g->eframe.layout_arena);