diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index ab32a0fb..3e86ed9d 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -619,13 +619,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc); ) \ } -#define G_PushRWStructuredBufferRef(arena, resource, type, ...) (G_RWStructuredBufferRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_RWStructuredBuffer, .element_size = sizeof(type), __VA_ARGS__ } \ - ) \ -} - #define G_PushByteAddressBufferRef(arena, resource, ...) (G_ByteAddressBufferRef) { \ .v = G_PushRef( \ (arena), (resource), \ @@ -633,13 +626,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc); ) \ } -#define G_PushRWByteAddressBufferRef(arena, resource, ...) (G_RWByteAddressBufferRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_RWByteAddressBuffer, __VA_ARGS__ } \ - ) \ -} - #define G_PushTexture1DRef(arena, resource, ...) (G_Texture1DRef) { \ .v = G_PushRef( \ (arena), (resource), \ @@ -647,13 +633,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc); ) \ } -#define G_PushRWTexture1DRef(arena, resource, ...) (G_RWTexture1DRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_RWTexture1D, .mips.max = 64, __VA_ARGS__ } \ - ) \ -} - #define G_PushTexture2DRef(arena, resource, ...) (G_Texture2DRef) { \ .v = G_PushRef( \ (arena), (resource), \ @@ -661,13 +640,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc); ) \ } -#define G_PushRWTexture2DRef(arena, resource, ...) (G_RWTexture2DRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_RWTexture2D, .mips.max = 64, __VA_ARGS__ } \ - ) \ -} - #define G_PushTexture3DRef(arena, resource, ...) (G_Texture3DRef) { \ .v = G_PushRef( \ (arena), (resource), \ @@ -675,13 +647,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc); ) \ } -#define G_PushRWTexture3DRef(arena, resource, ...) (G_RWTexture3DRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_RWTexture3D, .mips.max = 64, __VA_ARGS__ } \ - ) \ -} - #define G_PushSamplerStateRef(arena, resource, ...) (G_SamplerStateRef) { \ .v = G_PushRef( \ (arena), (resource), \ diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index f684a7db..78de8af0 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -333,24 +333,34 @@ void G_Bootstrap(void) //- Initialize descriptor heaps { - Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; String name; }; + Struct(Dx12HeapDesc) + { + D3D12_DESCRIPTOR_HEAP_TYPE type; + D3D12_DESCRIPTOR_HEAP_FLAGS flags; + u64 max; + u64 per_batch_count; + String name; + }; Dx12HeapDesc descs[G_D12_DescriptorHeapKind_COUNT] = { [G_D12_DescriptorHeapKind_CbvSrvUav] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = G_D12_MaxCbvSrvUavDescriptors, + .per_batch_count = 2, // 1 read, 1 write per ref .name = Lit("Primary Resource Descriptor Heap"), }, [G_D12_DescriptorHeapKind_Rtv] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, .max = G_D12_MaxRtvDescriptors, + .per_batch_count = 1, .name = Lit("Primary RTV Descriptor Heap"), }, [G_D12_DescriptorHeapKind_Sampler] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = G_D12_MaxSamplerDescriptors, + .per_batch_count = 1, .name = Lit("Primary Sampler Descriptor Heap"), }, }; @@ -362,6 +372,7 @@ void G_Bootstrap(void) heap->kind = kind; heap->type = desc.type; + heap->per_batch_count = desc.per_batch_count; heap->max_count = desc.max; heap->descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(G_D12.device, desc.type); @@ -483,7 +494,7 @@ void G_Bootstrap(void) .flags = G_ResourceFlag_HostMemory, .name = Lit("Debug print readback buffer") ); - queue->print_buffer_ref = G_PushRWByteAddressBufferRef(gpu_perm, queue->print_buffer); + queue->print_buffer_ref = G_PushByteAddressBufferRef(gpu_perm, queue->print_buffer); } G_CommitCommandList(cl); } @@ -1031,9 +1042,9 @@ G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind) if (SUCCEEDED(hr) && queue_kind == G_QueueKind_Direct) { G_D12_Arena *gpu_perm = G_D12_ArenaFromHandle(G_PermArena()); - for (u32 i = 0; i < countof(cl->rtv_descriptors); ++i) + for (u32 rtv_idx = 0; rtv_idx < countof(cl->rtv_descriptors); ++rtv_idx) { - cl->rtv_descriptors[i] = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv); + cl->rtv_descriptors[rtv_idx] = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv); } cl->rtv_clear_descriptor = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv); } @@ -1563,6 +1574,7 @@ G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind) { G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; + u64 per_batch_count = heap->per_batch_count; G_D12_Descriptor *descriptor = 0; u32 index = 0; @@ -1578,7 +1590,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH { // Descriptor no longer in use by gpu, reuse it DllQueueRemove(descriptors->first, descriptors->last, descriptor); - --descriptors->count; + descriptors->count -= 1; index = descriptor->index; } else @@ -1607,7 +1619,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH Panic(Lit("Max descriptors reached in heap")); } descriptor = PushStructNoZero(heap->descriptors_arena, G_D12_Descriptor); - index = descriptors_count; + index = descriptors_count * per_batch_count; } } Unlock(&lock); @@ -1617,11 +1629,11 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH ZeroStruct(descriptor); descriptor->gpu_arena = gpu_arena; descriptor->index = index; - descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); + descriptor->first_handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); descriptor->heap = heap; DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor); - ++gpu_arena->descriptors.count; + gpu_arena->descriptors.count += 1; return descriptor; } @@ -1638,144 +1650,167 @@ u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_Re G_RefKind kind = ref_desc.kind; b32 is_buffer = ( kind == G_RefKind_StructuredBuffer || - kind == G_RefKind_RWStructuredBuffer || - kind == G_RefKind_ByteAddressBuffer || - kind == G_RefKind_RWByteAddressBuffer + kind == G_RefKind_ByteAddressBuffer ); b32 is_sampler = kind == G_RefKind_SamplerState; b32 is_texture = !is_buffer && !is_sampler; - b32 is_raw = ( - kind == G_RefKind_ByteAddressBuffer || - kind == G_RefKind_RWByteAddressBuffer - ); - b32 is_uav = ( - kind == G_RefKind_RWStructuredBuffer || - kind == G_RefKind_RWByteAddressBuffer || - kind == G_RefKind_RWTexture1D || - kind == G_RefKind_RWTexture2D || - kind == G_RefKind_RWTexture3D - ); + b32 is_raw = kind == G_RefKind_ByteAddressBuffer; + b32 is_writable = resource->flags & G_ResourceFlag_AllowShaderReadWrite; - if (is_uav) - { - // RW refs must be allowed on this resource - Assert(resource->flags & G_ResourceFlag_AllowShaderReadWrite); - } + b32 ok = 1; G_D12_Descriptor *descriptor = 0; - if (is_buffer) + if (is_buffer || is_texture) { - if (is_raw) - { - ref_desc.element_size = 4; - ref_desc.element_offset /= 4; - } - descriptor = G_D12_PushDescriptor(gpu_arena, G_D12_DescriptorHeapKind_CbvSrvUav); - u64 buffer_size_actual = resource->buffer_size_actual; - u64 num_elements_in_buffer = buffer_size_actual / ref_desc.element_size; - u64 num_elements_after_offset = num_elements_in_buffer > ref_desc.element_offset ? num_elements_in_buffer - ref_desc.element_offset : 0; - if (num_elements_after_offset > 0) + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav]; + Assert(heap->per_batch_count >= 2); + D3D12_CPU_DESCRIPTOR_HANDLE readonly_handle = descriptor->first_handle; + D3D12_CPU_DESCRIPTOR_HANDLE readwrite_handle = descriptor->first_handle; + readwrite_handle.ptr += heap->descriptor_size; + + b32 srv_ok = 0; + b32 uav_ok = 0; + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = Zi; + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = Zi; + + if (is_buffer) { - if (is_uav) + if (is_raw) + { + ref_desc.element_size = 4; + ref_desc.element_offset /= 4; + } + + u64 buffer_size_actual = resource->buffer_size_actual; + u64 num_elements_in_buffer = buffer_size_actual / ref_desc.element_size; + u64 num_elements_after_offset = num_elements_in_buffer > ref_desc.element_offset ? num_elements_in_buffer - ref_desc.element_offset : 0; + + //- Create buffer SRV { - D3D12_UNORDERED_ACCESS_VIEW_DESC desc = Zi; { - desc.Format = DXGI_FORMAT_UNKNOWN; - desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - desc.Buffer.FirstElement = ref_desc.element_offset; - desc.Buffer.NumElements = num_elements_after_offset; - desc.Buffer.StructureByteStride = ref_desc.element_size; - desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + srv_desc.Format = DXGI_FORMAT_UNKNOWN; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Buffer.FirstElement = ref_desc.element_offset; + srv_desc.Buffer.NumElements = num_elements_after_offset; + srv_desc.Buffer.StructureByteStride = ref_desc.element_size; + srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; } if (is_raw) { - desc.Format = DXGI_FORMAT_R32_TYPELESS; - desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; - desc.Buffer.StructureByteStride = 0; + srv_desc.Format = DXGI_FORMAT_R32_TYPELESS; + srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + srv_desc.Buffer.StructureByteStride = 0; } - ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &desc, descriptor->handle); + srv_ok = 1; } - else + //- Create buffer UAV { - D3D12_SHADER_RESOURCE_VIEW_DESC desc = Zi; { - desc.Format = DXGI_FORMAT_UNKNOWN; - desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - desc.Buffer.FirstElement = ref_desc.element_offset; - desc.Buffer.NumElements = num_elements_after_offset; - desc.Buffer.StructureByteStride = ref_desc.element_size; - desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + uav_desc.Format = DXGI_FORMAT_UNKNOWN; + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uav_desc.Buffer.FirstElement = ref_desc.element_offset; + uav_desc.Buffer.NumElements = num_elements_after_offset; + uav_desc.Buffer.StructureByteStride = ref_desc.element_size; + uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; } if (is_raw) { - desc.Format = DXGI_FORMAT_R32_TYPELESS; - desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; - desc.Buffer.StructureByteStride = 0; - } - ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &desc, descriptor->handle); - } - } - } - else if (is_texture) - { - descriptor = G_D12_PushDescriptor(gpu_arena, G_D12_DescriptorHeapKind_CbvSrvUav); - DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(resource->texture_format); - RngI32 mips = ref_desc.mips; - mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); - mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); - if (is_uav) - { - D3D12_UNORDERED_ACCESS_VIEW_DESC desc = Zi; - { - desc.Format = DXGI_FORMAT_UNKNOWN; - if (ref_desc.kind == G_RefKind_RWTexture1D) - { - desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; - desc.Texture1D.MipSlice = mips.min; - } - else if (ref_desc.kind == G_RefKind_RWTexture2D) - { - desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - desc.Texture2D.MipSlice = mips.min; - } - else if (ref_desc.kind == G_RefKind_RWTexture3D) - { - desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; - desc.Texture3D.MipSlice = mips.min; - desc.Texture3D.WSize = U32Max; + uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; + uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + uav_desc.Buffer.StructureByteStride = 0; } } - ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &desc, descriptor->handle); - } - else - { - D3D12_SHADER_RESOURCE_VIEW_DESC desc = Zi; + if (num_elements_after_offset > 0) { - desc.Format = DXGI_FORMAT_UNKNOWN; - desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_ok = 1; + if (is_writable) + { + uav_ok = 1; + } + } + } + else if (is_texture) + { + // DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(resource->texture_format); + RngI32 mips = ref_desc.mips; + mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); + mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); + //- Create texture SRV + { + srv_desc.Format = DXGI_FORMAT_UNKNOWN; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; if (ref_desc.kind == G_RefKind_Texture1D) { - desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - desc.Texture1D.MostDetailedMip = mips.min; - desc.Texture1D.MipLevels = mips.max - mips.min + 1; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + srv_desc.Texture1D.MostDetailedMip = mips.min; + srv_desc.Texture1D.MipLevels = mips.max - mips.min + 1; } else if (ref_desc.kind == G_RefKind_Texture2D) { - desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - desc.Texture2D.MostDetailedMip = mips.min; - desc.Texture2D.MipLevels = mips.max - mips.min + 1; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv_desc.Texture2D.MostDetailedMip = mips.min; + srv_desc.Texture2D.MipLevels = mips.max - mips.min + 1; } else if (ref_desc.kind == G_RefKind_Texture3D) { - desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; - desc.Texture3D.MostDetailedMip = mips.min; - desc.Texture3D.MipLevels = mips.max - mips.min + 1; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + srv_desc.Texture3D.MostDetailedMip = mips.min; + srv_desc.Texture3D.MipLevels = mips.max - mips.min + 1; } } - ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &desc, descriptor->handle); + //- Create texture UAV + { + uav_desc.Format = DXGI_FORMAT_UNKNOWN; + if (ref_desc.kind == G_RefKind_Texture1D) + { + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; + uav_desc.Texture1D.MipSlice = mips.min; + } + else if (ref_desc.kind == G_RefKind_Texture2D) + { + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uav_desc.Texture2D.MipSlice = mips.min; + } + else if (ref_desc.kind == G_RefKind_Texture3D) + { + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + uav_desc.Texture3D.MipSlice = mips.min; + uav_desc.Texture3D.WSize = U32Max; + } + } + + srv_ok = 1; + if (is_writable) + { + uav_ok = 1; + } + + if (!uav_ok) + { + uav_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + } + } + + if (srv_ok) + { + ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &srv_desc, readonly_handle); + } + else + { + ID3D12Device_CreateShaderResourceView(G_D12.device, 0, &srv_desc, readonly_handle); + } + + if (uav_ok) + { + ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &uav_desc, readwrite_handle); + } + else + { + ID3D12Device_CreateUnorderedAccessView(G_D12.device, 0, 0, &uav_desc, readwrite_handle); } } else if (is_sampler) @@ -1805,7 +1840,7 @@ u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_Re { d3d_desc.MaxLOD = D3D12_FLOAT32_MAX; } - ID3D12Device_CreateSampler(G_D12.device, &d3d_desc, descriptor->handle); + ID3D12Device_CreateSampler(G_D12.device, &d3d_desc, descriptor->first_handle); } return descriptor->index; @@ -2757,7 +2792,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) if (bound_render_target_uids[i] != rt->uid + desc.mip) { G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; - G_D12_InitRtv(rt, rtv_descriptor->handle, desc.mip); + G_D12_InitRtv(rt, rtv_descriptor->first_handle, desc.mip); bound_render_target_uids[i] = rt->uid + desc.mip; om_dirty = 1; } @@ -2773,7 +2808,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[G_MaxRenderTargets] = Zi; for (u32 i = 0; i < rtvs_count; ++i) { - rtv_handles[i] = rcl->rtv_descriptors[i]->handle; + rtv_handles[i] = rcl->rtv_descriptors[i]->first_handle; } ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); } @@ -2798,7 +2833,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) clear_color[2] = cmd->clear_rtv.color.z; clear_color[3] = cmd->clear_rtv.color.w; } - D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->handle; + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->first_handle; if (bound_render_clear_target_uid != rt->uid + cmd->clear_rtv.mip) { G_D12_InitRtv(rt, rtv_handle, cmd->clear_rtv.mip); diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 381c1d44..2019ba58 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -123,6 +123,7 @@ Struct(G_D12_DescriptorHeap) G_D12_DescriptorHeapKind kind; D3D12_DESCRIPTOR_HEAP_TYPE type; + u32 per_batch_count; u32 descriptor_size; ID3D12DescriptorHeap *d3d_heap; D3D12_CPU_DESCRIPTOR_HANDLE start_handle; @@ -142,7 +143,7 @@ Struct(G_D12_Descriptor) i64 completion_queue_target; G_D12_DescriptorHeap *heap; - D3D12_CPU_DESCRIPTOR_HANDLE handle; + D3D12_CPU_DESCRIPTOR_HANDLE first_handle; u32 index; }; @@ -238,7 +239,7 @@ Struct(G_D12_Queue) u64 print_buffer_size; G_ResourceHandle print_buffer; G_ResourceHandle print_readback_buffer; - G_RWByteAddressBufferRef print_buffer_ref; + G_ByteAddressBufferRef print_buffer_ref; // Raw command lists struct G_D12_RawCommandList *first_committed_cl; diff --git a/src/gpu/gpu_shared.cgh b/src/gpu/gpu_shared.cgh index c1420f46..e07a6062 100644 --- a/src/gpu/gpu_shared.cgh +++ b/src/gpu/gpu_shared.cgh @@ -4,28 +4,18 @@ Enum(G_RefKind) { G_RefKind_StructuredBuffer, - G_RefKind_RWStructuredBuffer, G_RefKind_ByteAddressBuffer, - G_RefKind_RWByteAddressBuffer, G_RefKind_Texture1D, - G_RefKind_RWTexture1D, G_RefKind_Texture2D, - G_RefKind_RWTexture2D, G_RefKind_Texture3D, - G_RefKind_RWTexture3D, G_RefKind_SamplerState, }; Struct(G_StructuredBufferRef) { u32 v; }; -Struct(G_RWStructuredBufferRef) { u32 v; }; Struct(G_ByteAddressBufferRef) { u32 v; }; -Struct(G_RWByteAddressBufferRef) { u32 v; }; Struct(G_Texture1DRef) { u32 v; }; -Struct(G_RWTexture1DRef) { u32 v; }; Struct(G_Texture2DRef) { u32 v; }; -Struct(G_RWTexture2DRef) { u32 v; }; Struct(G_Texture3DRef) { u32 v; }; -Struct(G_RWTexture3DRef) { u32 v; }; Struct(G_SamplerStateRef) { u32 v; }; #define G_IsRefNil(r) ((r).v == 0) @@ -36,7 +26,7 @@ Struct(G_SamplerStateRef) { u32 v; }; // // D3D12 exposes 64 root constants and Vulkan exposes 32 push constants. // Supposedly AMD hardware will start spilling constants once more than -// 12 are used - https://gpuopen.com/learn/rdna-performance-guide/ +// 12 are in use - https://gpuopen.com/learn/rdna-performance-guide/ // #define G_NumGeneralPurposeConstants (24) // Constants available for any usage #define G_NumReservedConstants (4) // Constants reserved for internal usage by the GPU layer @@ -62,9 +52,9 @@ Struct(G_SamplerStateRef) { u32 v; }; StaticAssert(G_NumGeneralPurposeConstants == 24); StaticAssert(G_NumReservedConstants >= 3); -G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 24); -G_ForceDeclConstant(b32, G_ShaderConst_TweakB32, 25); -G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 26); +G_ForceDeclConstant(G_ByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 24); +G_ForceDeclConstant(b32, G_ShaderConst_TweakB32, 25); +G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 26); #if IsGpu #define G_TweakBool G_ShaderConst_TweakB32 @@ -98,17 +88,17 @@ Enum(G_BasicSamplerKind) // optimization on AMD hardware in the future. template StructuredBuffer G_Dereference(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template RWStructuredBuffer G_Dereference(G_RWStructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } + template RWStructuredBuffer G_DereferenceRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } ByteAddressBuffer G_Dereference(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - RWByteAddressBuffer G_Dereference(G_RWByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } + RWByteAddressBuffer G_DereferenceRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } template Texture1D G_Dereference(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } template Texture2D G_Dereference(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } template Texture3D G_Dereference(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template RWTexture1D G_Dereference(G_RWTexture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template RWTexture2D G_Dereference(G_RWTexture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template RWTexture3D G_Dereference(G_RWTexture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } + template RWTexture1D G_DereferenceRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } + template RWTexture2D G_DereferenceRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } + template RWTexture3D G_DereferenceRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } SamplerState G_Dereference(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; } #endif @@ -226,7 +216,7 @@ Struct(G_FmtArg) void G_CommitPrint(G_TempPrintBuffer buff) { - RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef); + RWByteAddressBuffer rw = G_DereferenceRW(G_ShaderConst_PrintBufferRef); if (buff.overflowed) { diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 86ae0e56..4db0719e 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -402,13 +402,13 @@ void V_TickForever(WaveLaneCtx *lane) G_ResourceHandle gpu_occluders_res = Zi; G_Texture2DRef gpu_tiles = Zi; - G_RWStructuredBufferRef gpu_particles = Zi; - G_RWTexture2DRef gpu_particle_cells[V_ParticleLayer_COUNT]; - G_RWTexture2DRef gpu_particle_densities[V_ParticleLayer_COUNT]; - G_RWTexture2DRef gpu_stains = Zi; - G_RWTexture2DRef gpu_dry_stains = Zi; - G_RWTexture2DRef gpu_drynesses = Zi; - G_RWTexture2DRef gpu_occluders = Zi; + G_StructuredBufferRef gpu_particles = Zi; + G_Texture2DRef gpu_particle_cells[V_ParticleLayer_COUNT]; + G_Texture2DRef gpu_particle_densities[V_ParticleLayer_COUNT]; + G_Texture2DRef gpu_stains = Zi; + G_Texture2DRef gpu_dry_stains = Zi; + G_Texture2DRef gpu_drynesses = Zi; + G_Texture2DRef gpu_occluders = Zi; { G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct); { @@ -433,7 +433,7 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Particles") ); - gpu_particles = G_PushRWStructuredBufferRef(gpu_perm, gpu_particles_res, V_Particle); + gpu_particles = G_PushStructuredBufferRef(gpu_perm, gpu_particles_res, V_Particle); } //- Init particle textures for (V_ParticleLayer layer = 0; layer < V_ParticleLayer_COUNT; ++layer) @@ -447,7 +447,7 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = StringF(perm, "Particle cells - layer %F", FmtSint(layer)) ); - G_RWTexture2DRef cells = G_PushRWTexture2DRef(gpu_perm, cells_res); + G_Texture2DRef cells = G_PushTexture2DRef(gpu_perm, cells_res); gpu_particle_cell_resources[layer] = cells_res; gpu_particle_cells[layer] = cells; } @@ -460,7 +460,7 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = StringF(perm, "Particle densities - layer %F", FmtSint(layer)) ); - G_RWTexture2DRef densities = G_PushRWTexture2DRef(gpu_perm, densities_res); + G_Texture2DRef densities = G_PushTexture2DRef(gpu_perm, densities_res); gpu_particle_density_resources[layer] = densities_res; gpu_particle_densities[layer] = densities; } @@ -475,7 +475,7 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Stains") ); - gpu_stains = G_PushRWTexture2DRef(gpu_perm, gpu_stains_res); + gpu_stains = G_PushTexture2DRef(gpu_perm, gpu_stains_res); } //- Init dry stains texture { @@ -487,7 +487,7 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Dry stains") ); - gpu_dry_stains = G_PushRWTexture2DRef(gpu_perm, gpu_dry_stains_res); + gpu_dry_stains = G_PushTexture2DRef(gpu_perm, gpu_dry_stains_res); } //- Init dryness texture { @@ -499,7 +499,7 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Drynesses") ); - gpu_drynesses = G_PushRWTexture2DRef(gpu_perm, gpu_drynesses_res); + gpu_drynesses = G_PushTexture2DRef(gpu_perm, gpu_drynesses_res); } //- Init occluders texture { @@ -511,7 +511,7 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Occluders cells") ); - gpu_occluders = G_PushRWTexture2DRef(gpu_perm, gpu_occluders_res); + gpu_occluders = G_PushTexture2DRef(gpu_perm, gpu_occluders_res); } } G_CommitCommandList(cl); @@ -4811,8 +4811,7 @@ void V_TickForever(WaveLaneCtx *lane) ); Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1)); Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y)); - frame->screen_ro = G_PushTexture2DRef(gpu_frame_arena, screen_target); - frame->screen_rw = G_PushRWTexture2DRef(gpu_frame_arena, screen_target); + frame->screen = G_PushTexture2DRef(gpu_frame_arena, screen_target); // Bloom texture G_ResourceHandle bloom_target = G_PushTexture2D( @@ -4826,8 +4825,7 @@ void V_TickForever(WaveLaneCtx *lane) ); for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx) { - frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx)); - frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->bloom_mips[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx)); } // Albedo texture @@ -4839,7 +4837,7 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_ResourceFlag_AllowRenderTarget, .name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick)) ); - frame->albedo_ro = G_PushTexture2DRef(gpu_frame_arena, albedo_target); + frame->albedo = G_PushTexture2DRef(gpu_frame_arena, albedo_target); // Backdrop texture G_ResourceHandle backdrop_target = G_PushTexture2D( @@ -4853,8 +4851,7 @@ void V_TickForever(WaveLaneCtx *lane) ); for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx) { - frame->backdrop_mips_ro[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx)); - frame->backdrop_mips_rw[mip_idx] = G_PushRWTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->backdrop_mips[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx)); } // Shade texture @@ -4868,8 +4865,7 @@ void V_TickForever(WaveLaneCtx *lane) ); Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1)); Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y)); - frame->shade_ro = G_PushTexture2DRef(gpu_frame_arena, shade_target); - frame->shade_rw = G_PushRWTexture2DRef(gpu_frame_arena, shade_target); + frame->shade = G_PushTexture2DRef(gpu_frame_arena, shade_target); // Quad buffers G_ResourceHandle quads_buff = G_PushBufferFromCpuCopy( @@ -4936,7 +4932,7 @@ void V_TickForever(WaveLaneCtx *lane) { // Prepare shade - G_Compute(frame->cl, V_PrepareShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims)); + // G_Compute(frame->cl, V_PrepareShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims)); // Prepare cells G_Compute(frame->cl, V_PrepareCellsCS, V_ThreadGroupSizeFromTexSize(cells_dims)); @@ -5104,7 +5100,7 @@ void V_TickForever(WaveLaneCtx *lane) uv.p0 = Vec2FromVec(screen_viewport.p0); uv.p1 = Vec2FromVec(screen_viewport.p1); uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims)); - UI_SetRawTexture(vis_box, frame->screen_ro, uv); + UI_SetRawTexture(vis_box, frame->screen, uv); } } diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index 9c394e94..af8e9712 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -59,7 +59,7 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density) ImplComputeShader2D(V_PrepareShadeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - RWTexture2D shade = G_Dereference(frame.shade_rw); + RWTexture2D shade = G_DereferenceRW(frame.shade); Vec2 shade_pos = SV_DispatchThreadID + 0.5; if (all(shade_pos < countof(shade))) { @@ -73,10 +73,10 @@ ImplComputeShader2D(V_PrepareCellsCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); - RWTexture2D stains = G_Dereference(frame.stains); - RWTexture2D dry_stains = G_Dereference(frame.dry_stains); - RWTexture2D drynesses = G_Dereference(frame.drynesses); - RWTexture2D occluders = G_Dereference(frame.occluders); + RWTexture2D stains = G_DereferenceRW(frame.stains); + RWTexture2D dry_stains = G_DereferenceRW(frame.dry_stains); + RWTexture2D drynesses = G_DereferenceRW(frame.drynesses); + RWTexture2D occluders = G_DereferenceRW(frame.occluders); Vec2 cell_pos = SV_DispatchThreadID + 0.5; if (all(cell_pos < P_WorldCellsDims)) @@ -102,8 +102,8 @@ ImplComputeShader2D(V_PrepareCellsCS) Vec4 over_dry_stain = 0; for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) { - RWTexture2D cells = G_Dereference(frame.particle_cells[layer]); - RWTexture2D densities = G_Dereference(frame.particle_densities[layer]); + RWTexture2D cells = G_DereferenceRW(frame.particle_cells[layer]); + RWTexture2D densities = G_DereferenceRW(frame.particle_densities[layer]); u32 packed = cells[cell_pos]; if (packed & (1 << 31)) { @@ -161,7 +161,7 @@ ImplComputeShader2D(V_PrepareCellsCS) ImplComputeShader(V_ClearParticlesCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - RWStructuredBuffer particles = G_Dereference(frame.particles); + RWStructuredBuffer particles = G_DereferenceRW(frame.particles); u32 particle_idx = SV_DispatchThreadID; if (particle_idx < V_ParticlesCap) { @@ -191,9 +191,9 @@ ImplComputeShader2D(V_BackdropDownCS) } else { - bd_up = G_Dereference(frame.backdrop_mips_ro[mip_idx - 1]); + bd_up = G_Dereference(frame.backdrop_mips[mip_idx - 1]); } - RWTexture2D bd_down = G_Dereference(frame.backdrop_mips_rw[mip_idx]); + RWTexture2D bd_down = G_DereferenceRW(frame.backdrop_mips[mip_idx]); Vec2 down_dims = countof(bd_down); @@ -248,8 +248,8 @@ ImplComputeShader2D(V_BackdropUpCS) i32 mip_idx = V_GpuConst_MipIdx; V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - Texture2D bd_down = G_Dereference(frame.backdrop_mips_ro[mip_idx + 1]); - RWTexture2D bd_up = G_Dereference(frame.backdrop_mips_rw[mip_idx]); + Texture2D bd_down = G_Dereference(frame.backdrop_mips[mip_idx + 1]); + RWTexture2D bd_up = G_DereferenceRW(frame.backdrop_mips[mip_idx]); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]); Vec2 down_dims = countof(bd_down); @@ -332,7 +332,7 @@ ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer quads = G_Dereference(frame.quads); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); - RWTexture2D occluders = G_Dereference(frame.occluders); + RWTexture2D occluders = G_DereferenceRW(frame.occluders); V_Quad quad = quads[input.quad_idx]; Texture2D tex = G_Dereference(quad.tex); @@ -368,7 +368,7 @@ ImplComputeShader(V_EmitParticlesCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer emitters = G_Dereference(frame.emitters); - RWStructuredBuffer particles = G_Dereference(frame.particles); + RWStructuredBuffer particles = G_DereferenceRW(frame.particles); u32 emitter_idx = SV_DispatchThreadID; if (emitter_idx < frame.emitters_count) @@ -399,8 +399,8 @@ ImplComputeShader(V_SimParticlesCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); - RWStructuredBuffer particles = G_Dereference(frame.particles); - RWTexture2D occluders = G_Dereference(frame.occluders); + RWStructuredBuffer particles = G_DereferenceRW(frame.particles); + RWTexture2D occluders = G_DereferenceRW(frame.occluders); u32 particle_idx = SV_DispatchThreadID; if (particle_idx < V_ParticlesCap) @@ -440,8 +440,8 @@ ImplComputeShader(V_SimParticlesCS) if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune) { V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); - RWTexture2D cells = G_Dereference(frame.particle_cells[desc.layer]); - RWTexture2D densities = G_Dereference(frame.particle_densities[desc.layer]); + RWTexture2D cells = G_DereferenceRW(frame.particle_cells[desc.layer]); + RWTexture2D densities = G_DereferenceRW(frame.particle_densities[desc.layer]); u32 packed = 0; packed |= (particle_idx & ((1 >> 24) - 1)) << 0; @@ -677,9 +677,9 @@ ImplComputeShader2D(V_ShadeCS) V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); Texture2D tiles = G_Dereference(frame.tiles); - Texture2D albedo_tex = G_Dereference(frame.albedo_ro); - RWTexture2D shade_tex = G_Dereference(frame.shade_rw); - RWTexture2D drynesses = G_Dereference(frame.drynesses); + Texture2D albedo_tex = G_Dereference(frame.albedo); + RWTexture2D shade_tex = G_DereferenceRW(frame.shade); + RWTexture2D drynesses = G_DereferenceRW(frame.drynesses); Vec2 shade_pos = SV_DispatchThreadID + 0.5; Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1)); @@ -711,17 +711,17 @@ ImplComputeShader2D(V_ShadeCS) ImplComputeShader2D(V_CompositeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - // Texture2D shade_tex = G_Dereference(frame.shade_ro); + // Texture2D shade_tex = G_Dereference(frame.shade); SamplerState point_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Texture2D albedo_tex = G_Dereference(frame.albedo_ro); - RWTexture2D screen_tex = G_Dereference(frame.screen_rw); - RWTexture2D stains = G_Dereference(frame.stains); - RWTexture2D dry_stains = G_Dereference(frame.dry_stains); - RWTexture2D drynesses = G_Dereference(frame.drynesses); + Texture2D albedo_tex = G_Dereference(frame.albedo); + RWTexture2D screen_tex = G_DereferenceRW(frame.screen); + RWTexture2D stains = G_DereferenceRW(frame.stains); + RWTexture2D dry_stains = G_DereferenceRW(frame.dry_stains); + RWTexture2D drynesses = G_DereferenceRW(frame.drynesses); Texture2D tiles = G_Dereference(frame.tiles); - Texture2D backdrop = G_Dereference(frame.backdrop_mips_ro[0]); - RWStructuredBuffer particles = G_Dereference(frame.particles); + Texture2D backdrop = G_Dereference(frame.backdrop_mips[0]); + RWStructuredBuffer particles = G_DereferenceRW(frame.particles); Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5; Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1)); @@ -858,8 +858,8 @@ ImplComputeShader2D(V_CompositeCS) for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) { - RWTexture2D cells = G_Dereference(frame.particle_cells[layer]); - RWTexture2D densities = G_Dereference(frame.particle_densities[layer]); + RWTexture2D cells = G_DereferenceRW(frame.particle_cells[layer]); + RWTexture2D densities = G_DereferenceRW(frame.particle_densities[layer]); u32 packed = cells[cell_pos]; V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F); if (particle_kind != V_ParticleKind_None) @@ -1115,17 +1115,17 @@ ImplComputeShader2D(V_BloomDownCS) V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - RWTexture2D bloom_down = G_Dereference(frame.bloom_mips_rw[mip_idx - 1]); + RWTexture2D bloom_down = G_DereferenceRW(frame.bloom_mips[mip_idx - 1]); Texture2D bloom_up; b32 is_first_pass = mip_idx == 1; if (is_first_pass) { - bloom_up = G_Dereference(frame.screen_ro); + bloom_up = G_Dereference(frame.screen); } else { - bloom_up = G_Dereference(frame.bloom_mips_ro[mip_idx - 2]); + bloom_up = G_Dereference(frame.bloom_mips[mip_idx - 2]); } Vec2 down_dims = countof(bloom_down); @@ -1190,17 +1190,17 @@ ImplComputeShader2D(V_BloomUpCS) V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Texture2D bloom_down = G_Dereference(frame.bloom_mips_ro[mip_idx]); + Texture2D bloom_down = G_Dereference(frame.bloom_mips[mip_idx]); b32 is_last_pass = mip_idx == 0; RWTexture2D bloom_up; if (is_last_pass) { - bloom_up = G_Dereference(frame.screen_rw); + bloom_up = G_DereferenceRW(frame.screen); } else { - bloom_up = G_Dereference(frame.bloom_mips_rw[mip_idx - 1]); + bloom_up = G_DereferenceRW(frame.bloom_mips[mip_idx - 1]); } Vec2 down_dims = countof(bloom_down); @@ -1255,8 +1255,8 @@ ImplComputeShader2D(V_FinalizeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Texture2D bloom_tex = G_Dereference(frame.bloom_mips_ro[0]); - RWTexture2D screen_tex = G_Dereference(frame.screen_rw); + Texture2D bloom_tex = G_Dereference(frame.bloom_mips[0]); + RWTexture2D screen_tex = G_DereferenceRW(frame.screen); Vec2 screen_pos = SV_DispatchThreadID + 0.5; b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index 11d5b1fa..092bbfcb 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -350,30 +350,25 @@ Struct(V_SharedFrame) f32 backdrop_parallax; G_Texture2DRef backdrop_src; - G_Texture2DRef backdrop_mips_ro[G_MaxMips]; - G_RWTexture2DRef backdrop_mips_rw[G_MaxMips]; + G_Texture2DRef backdrop_mips[G_MaxMips]; - G_Texture2DRef screen_ro; - G_RWTexture2DRef screen_rw; - G_Texture2DRef shade_ro; - G_RWTexture2DRef shade_rw; - G_Texture2DRef albedo_ro; - G_RWTexture2DRef albedo_rw; + G_Texture2DRef screen; + G_Texture2DRef shade; + G_Texture2DRef albedo; - G_Texture2DRef bloom_mips_ro[G_MaxMips]; - G_RWTexture2DRef bloom_mips_rw[G_MaxMips]; + G_Texture2DRef bloom_mips[G_MaxMips]; u32 emitters_count; G_StructuredBufferRef emitters; - G_RWStructuredBufferRef particles; + G_StructuredBufferRef particles; - G_RWTexture2DRef stains; - G_RWTexture2DRef dry_stains; - G_RWTexture2DRef drynesses; - G_RWTexture2DRef occluders; + G_Texture2DRef stains; + G_Texture2DRef dry_stains; + G_Texture2DRef drynesses; + G_Texture2DRef occluders; - G_RWTexture2DRef particle_cells[V_ParticleLayer_COUNT]; - G_RWTexture2DRef particle_densities[V_ParticleLayer_COUNT]; + G_Texture2DRef particle_cells[V_ParticleLayer_COUNT]; + G_Texture2DRef particle_densities[V_ParticleLayer_COUNT]; G_StructuredBufferRef dverts; G_StructuredBufferRef quads; diff --git a/src/proto/proto.c b/src/proto/proto.c index 324d5a73..703e8543 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -18,26 +18,22 @@ void PT_RunForever(WaveLaneCtx *lane) { G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct); { - // Push resources Vec2I32 final_target_size = window_frame.draw_size; - G_ResourceHandle final_target = G_PushTexture2D( + G_ResourceHandle final_target_res = G_PushTexture2D( gpu_frame_arena, cl, G_Format_R16G16B16A16_Float, final_target_size, - G_Layout_DirectQueue_ShaderReadWrite, + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_AllowShaderReadWrite ); - - // Push resource handles - G_Texture2DRef final_target_rhandle = G_PushTexture2DRef(gpu_frame_arena, final_target); - G_RWTexture2DRef final_target_rwhandle = G_PushRWTexture2DRef(gpu_frame_arena, final_target); + G_Texture2DRef final_target = G_PushTexture2DRef(gpu_frame_arena, final_target_res); // Prep test pass { - G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target_rwhandle); + G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target); G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123); G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp)); - G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target_rhandle); + G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target); G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture()); } @@ -45,12 +41,11 @@ void PT_RunForever(WaveLaneCtx *lane) { G_Compute(cl, PT_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1)); } - G_DumbMemorySync(cl, final_target); + G_DumbMemorySync(cl, final_target_res); // Prep blit pass { - G_DumbMemoryLayoutSync(cl, final_target, G_Layout_DirectQueue_ShaderRead); - G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_DirectQueue_RenderTargetWrite); + G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_DirectQueue_RenderTarget); } // Blit pass @@ -67,7 +62,7 @@ void PT_RunForever(WaveLaneCtx *lane) // Finalize backbuffer layout { - G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present); + G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_Common); } // Reset diff --git a/src/proto/proto_shaders.g b/src/proto/proto_shaders.g index 089365ab..c86d6aaa 100644 --- a/src/proto/proto_shaders.g +++ b/src/proto/proto_shaders.g @@ -5,7 +5,7 @@ ImplComputeShader2D(PT_TestCS) { StructuredBuffer sb = G_Dereference(PT_ShaderConst_TestBuff); - RWTexture2D target_tex = G_Dereference(PT_ShaderConst_TestTarget); + RWTexture2D target_tex = G_DereferenceRW(PT_ShaderConst_TestTarget); Vec2U32 target_tex_size = countof(target_tex); Vec2I32 id = SV_DispatchThreadID; diff --git a/src/proto/proto_shared.cgh b/src/proto/proto_shared.cgh index b6a98fd1..8b2f6085 100644 --- a/src/proto/proto_shared.cgh +++ b/src/proto/proto_shared.cgh @@ -4,7 +4,7 @@ G_DeclConstant(G_Texture3DRef, PT_ShaderConst_NoiseTex, 0); // Test shader -G_DeclConstant(G_RWTexture2DRef, PT_ShaderConst_TestTarget, 1); +G_DeclConstant(G_Texture2DRef, PT_ShaderConst_TestTarget, 1); G_DeclConstant(G_StructuredBufferRef, PT_ShaderConst_TestBuff, 2); G_DeclConstant(f32, PT_ShaderConst_TestConst, 3);