remove RW gpu reference types, instead storing at readonly-index + 1

This commit is contained in:
jacob 2026-02-24 10:25:02 -06:00
parent 9ad6b856c5
commit e9176147f3
10 changed files with 245 additions and 268 deletions

View File

@ -619,13 +619,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
) \
}
#define G_PushRWStructuredBufferRef(arena, resource, type, ...) (G_RWStructuredBufferRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_RWStructuredBuffer, .element_size = sizeof(type), __VA_ARGS__ } \
) \
}
#define G_PushByteAddressBufferRef(arena, resource, ...) (G_ByteAddressBufferRef) { \
.v = G_PushRef( \
(arena), (resource), \
@ -633,13 +626,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
) \
}
#define G_PushRWByteAddressBufferRef(arena, resource, ...) (G_RWByteAddressBufferRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_RWByteAddressBuffer, __VA_ARGS__ } \
) \
}
#define G_PushTexture1DRef(arena, resource, ...) (G_Texture1DRef) { \
.v = G_PushRef( \
(arena), (resource), \
@ -647,13 +633,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
) \
}
#define G_PushRWTexture1DRef(arena, resource, ...) (G_RWTexture1DRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_RWTexture1D, .mips.max = 64, __VA_ARGS__ } \
) \
}
#define G_PushTexture2DRef(arena, resource, ...) (G_Texture2DRef) { \
.v = G_PushRef( \
(arena), (resource), \
@ -661,13 +640,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
) \
}
#define G_PushRWTexture2DRef(arena, resource, ...) (G_RWTexture2DRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_RWTexture2D, .mips.max = 64, __VA_ARGS__ } \
) \
}
#define G_PushTexture3DRef(arena, resource, ...) (G_Texture3DRef) { \
.v = G_PushRef( \
(arena), (resource), \
@ -675,13 +647,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
) \
}
#define G_PushRWTexture3DRef(arena, resource, ...) (G_RWTexture3DRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_RWTexture3D, .mips.max = 64, __VA_ARGS__ } \
) \
}
#define G_PushSamplerStateRef(arena, resource, ...) (G_SamplerStateRef) { \
.v = G_PushRef( \
(arena), (resource), \

View File

@ -333,24 +333,34 @@ void G_Bootstrap(void)
//- Initialize descriptor heaps
{
Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; String name; };
Struct(Dx12HeapDesc)
{
D3D12_DESCRIPTOR_HEAP_TYPE type;
D3D12_DESCRIPTOR_HEAP_FLAGS flags;
u64 max;
u64 per_batch_count;
String name;
};
Dx12HeapDesc descs[G_D12_DescriptorHeapKind_COUNT] = {
[G_D12_DescriptorHeapKind_CbvSrvUav] = {
.type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
.flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
.max = G_D12_MaxCbvSrvUavDescriptors,
.per_batch_count = 2, // 1 read, 1 write per ref
.name = Lit("Primary Resource Descriptor Heap"),
},
[G_D12_DescriptorHeapKind_Rtv] = {
.type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
.flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
.max = G_D12_MaxRtvDescriptors,
.per_batch_count = 1,
.name = Lit("Primary RTV Descriptor Heap"),
},
[G_D12_DescriptorHeapKind_Sampler] = {
.type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
.flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
.max = G_D12_MaxSamplerDescriptors,
.per_batch_count = 1,
.name = Lit("Primary Sampler Descriptor Heap"),
},
};
@ -362,6 +372,7 @@ void G_Bootstrap(void)
heap->kind = kind;
heap->type = desc.type;
heap->per_batch_count = desc.per_batch_count;
heap->max_count = desc.max;
heap->descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(G_D12.device, desc.type);
@ -483,7 +494,7 @@ void G_Bootstrap(void)
.flags = G_ResourceFlag_HostMemory,
.name = Lit("Debug print readback buffer")
);
queue->print_buffer_ref = G_PushRWByteAddressBufferRef(gpu_perm, queue->print_buffer);
queue->print_buffer_ref = G_PushByteAddressBufferRef(gpu_perm, queue->print_buffer);
}
G_CommitCommandList(cl);
}
@ -1031,9 +1042,9 @@ G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind)
if (SUCCEEDED(hr) && queue_kind == G_QueueKind_Direct)
{
G_D12_Arena *gpu_perm = G_D12_ArenaFromHandle(G_PermArena());
for (u32 i = 0; i < countof(cl->rtv_descriptors); ++i)
for (u32 rtv_idx = 0; rtv_idx < countof(cl->rtv_descriptors); ++rtv_idx)
{
cl->rtv_descriptors[i] = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv);
cl->rtv_descriptors[rtv_idx] = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv);
}
cl->rtv_clear_descriptor = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv);
}
@ -1563,6 +1574,7 @@ G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind,
G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind)
{
G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind];
u64 per_batch_count = heap->per_batch_count;
G_D12_Descriptor *descriptor = 0;
u32 index = 0;
@ -1578,7 +1590,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
{
// Descriptor no longer in use by gpu, reuse it
DllQueueRemove(descriptors->first, descriptors->last, descriptor);
--descriptors->count;
descriptors->count -= 1;
index = descriptor->index;
}
else
@ -1607,7 +1619,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
Panic(Lit("Max descriptors reached in heap"));
}
descriptor = PushStructNoZero(heap->descriptors_arena, G_D12_Descriptor);
index = descriptors_count;
index = descriptors_count * per_batch_count;
}
}
Unlock(&lock);
@ -1617,11 +1629,11 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
ZeroStruct(descriptor);
descriptor->gpu_arena = gpu_arena;
descriptor->index = index;
descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size);
descriptor->first_handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size);
descriptor->heap = heap;
DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor);
++gpu_arena->descriptors.count;
gpu_arena->descriptors.count += 1;
return descriptor;
}
@ -1638,31 +1650,32 @@ u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_Re
G_RefKind kind = ref_desc.kind;
b32 is_buffer = (
kind == G_RefKind_StructuredBuffer ||
kind == G_RefKind_RWStructuredBuffer ||
kind == G_RefKind_ByteAddressBuffer ||
kind == G_RefKind_RWByteAddressBuffer
kind == G_RefKind_ByteAddressBuffer
);
b32 is_sampler = kind == G_RefKind_SamplerState;
b32 is_texture = !is_buffer && !is_sampler;
b32 is_raw = (
kind == G_RefKind_ByteAddressBuffer ||
kind == G_RefKind_RWByteAddressBuffer
);
b32 is_uav = (
kind == G_RefKind_RWStructuredBuffer ||
kind == G_RefKind_RWByteAddressBuffer ||
kind == G_RefKind_RWTexture1D ||
kind == G_RefKind_RWTexture2D ||
kind == G_RefKind_RWTexture3D
);
b32 is_raw = kind == G_RefKind_ByteAddressBuffer;
b32 is_writable = resource->flags & G_ResourceFlag_AllowShaderReadWrite;
if (is_uav)
{
// RW refs must be allowed on this resource
Assert(resource->flags & G_ResourceFlag_AllowShaderReadWrite);
}
b32 ok = 1;
G_D12_Descriptor *descriptor = 0;
if (is_buffer || is_texture)
{
descriptor = G_D12_PushDescriptor(gpu_arena, G_D12_DescriptorHeapKind_CbvSrvUav);
G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav];
Assert(heap->per_batch_count >= 2);
D3D12_CPU_DESCRIPTOR_HANDLE readonly_handle = descriptor->first_handle;
D3D12_CPU_DESCRIPTOR_HANDLE readwrite_handle = descriptor->first_handle;
readwrite_handle.ptr += heap->descriptor_size;
b32 srv_ok = 0;
b32 uav_ok = 0;
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = Zi;
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = Zi;
if (is_buffer)
{
if (is_raw)
@ -1671,111 +1684,133 @@ u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_Re
ref_desc.element_offset /= 4;
}
descriptor = G_D12_PushDescriptor(gpu_arena, G_D12_DescriptorHeapKind_CbvSrvUav);
u64 buffer_size_actual = resource->buffer_size_actual;
u64 num_elements_in_buffer = buffer_size_actual / ref_desc.element_size;
u64 num_elements_after_offset = num_elements_in_buffer > ref_desc.element_offset ? num_elements_in_buffer - ref_desc.element_offset : 0;
//- Create buffer SRV
{
{
srv_desc.Format = DXGI_FORMAT_UNKNOWN;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Buffer.FirstElement = ref_desc.element_offset;
srv_desc.Buffer.NumElements = num_elements_after_offset;
srv_desc.Buffer.StructureByteStride = ref_desc.element_size;
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
}
if (is_raw)
{
srv_desc.Format = DXGI_FORMAT_R32_TYPELESS;
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
srv_desc.Buffer.StructureByteStride = 0;
}
srv_ok = 1;
}
//- Create buffer UAV
{
{
uav_desc.Format = DXGI_FORMAT_UNKNOWN;
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav_desc.Buffer.FirstElement = ref_desc.element_offset;
uav_desc.Buffer.NumElements = num_elements_after_offset;
uav_desc.Buffer.StructureByteStride = ref_desc.element_size;
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
}
if (is_raw)
{
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
uav_desc.Buffer.StructureByteStride = 0;
}
}
if (num_elements_after_offset > 0)
{
if (is_uav)
srv_ok = 1;
if (is_writable)
{
D3D12_UNORDERED_ACCESS_VIEW_DESC desc = Zi;
{
desc.Format = DXGI_FORMAT_UNKNOWN;
desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
desc.Buffer.FirstElement = ref_desc.element_offset;
desc.Buffer.NumElements = num_elements_after_offset;
desc.Buffer.StructureByteStride = ref_desc.element_size;
desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
}
if (is_raw)
{
desc.Format = DXGI_FORMAT_R32_TYPELESS;
desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
desc.Buffer.StructureByteStride = 0;
}
ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &desc, descriptor->handle);
}
else
{
D3D12_SHADER_RESOURCE_VIEW_DESC desc = Zi;
{
desc.Format = DXGI_FORMAT_UNKNOWN;
desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
desc.Buffer.FirstElement = ref_desc.element_offset;
desc.Buffer.NumElements = num_elements_after_offset;
desc.Buffer.StructureByteStride = ref_desc.element_size;
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
}
if (is_raw)
{
desc.Format = DXGI_FORMAT_R32_TYPELESS;
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
desc.Buffer.StructureByteStride = 0;
}
ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &desc, descriptor->handle);
uav_ok = 1;
}
}
}
else if (is_texture)
{
descriptor = G_D12_PushDescriptor(gpu_arena, G_D12_DescriptorHeapKind_CbvSrvUav);
DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(resource->texture_format);
// DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(resource->texture_format);
RngI32 mips = ref_desc.mips;
mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1);
mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1);
if (is_uav)
//- Create texture SRV
{
D3D12_UNORDERED_ACCESS_VIEW_DESC desc = Zi;
{
desc.Format = DXGI_FORMAT_UNKNOWN;
if (ref_desc.kind == G_RefKind_RWTexture1D)
{
desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D;
desc.Texture1D.MipSlice = mips.min;
}
else if (ref_desc.kind == G_RefKind_RWTexture2D)
{
desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
desc.Texture2D.MipSlice = mips.min;
}
else if (ref_desc.kind == G_RefKind_RWTexture3D)
{
desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
desc.Texture3D.MipSlice = mips.min;
desc.Texture3D.WSize = U32Max;
}
}
ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &desc, descriptor->handle);
}
else
{
D3D12_SHADER_RESOURCE_VIEW_DESC desc = Zi;
{
desc.Format = DXGI_FORMAT_UNKNOWN;
desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Format = DXGI_FORMAT_UNKNOWN;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
if (ref_desc.kind == G_RefKind_Texture1D)
{
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D;
desc.Texture1D.MostDetailedMip = mips.min;
desc.Texture1D.MipLevels = mips.max - mips.min + 1;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D;
srv_desc.Texture1D.MostDetailedMip = mips.min;
srv_desc.Texture1D.MipLevels = mips.max - mips.min + 1;
}
else if (ref_desc.kind == G_RefKind_Texture2D)
{
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
desc.Texture2D.MostDetailedMip = mips.min;
desc.Texture2D.MipLevels = mips.max - mips.min + 1;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srv_desc.Texture2D.MostDetailedMip = mips.min;
srv_desc.Texture2D.MipLevels = mips.max - mips.min + 1;
}
else if (ref_desc.kind == G_RefKind_Texture3D)
{
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
desc.Texture3D.MostDetailedMip = mips.min;
desc.Texture3D.MipLevels = mips.max - mips.min + 1;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
srv_desc.Texture3D.MostDetailedMip = mips.min;
srv_desc.Texture3D.MipLevels = mips.max - mips.min + 1;
}
}
ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &desc, descriptor->handle);
//- Create texture UAV
{
uav_desc.Format = DXGI_FORMAT_UNKNOWN;
if (ref_desc.kind == G_RefKind_Texture1D)
{
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D;
uav_desc.Texture1D.MipSlice = mips.min;
}
else if (ref_desc.kind == G_RefKind_Texture2D)
{
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
uav_desc.Texture2D.MipSlice = mips.min;
}
else if (ref_desc.kind == G_RefKind_Texture3D)
{
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
uav_desc.Texture3D.MipSlice = mips.min;
uav_desc.Texture3D.WSize = U32Max;
}
}
srv_ok = 1;
if (is_writable)
{
uav_ok = 1;
}
if (!uav_ok)
{
uav_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
}
}
if (srv_ok)
{
ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &srv_desc, readonly_handle);
}
else
{
ID3D12Device_CreateShaderResourceView(G_D12.device, 0, &srv_desc, readonly_handle);
}
if (uav_ok)
{
ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &uav_desc, readwrite_handle);
}
else
{
ID3D12Device_CreateUnorderedAccessView(G_D12.device, 0, 0, &uav_desc, readwrite_handle);
}
}
else if (is_sampler)
@ -1805,7 +1840,7 @@ u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_Re
{
d3d_desc.MaxLOD = D3D12_FLOAT32_MAX;
}
ID3D12Device_CreateSampler(G_D12.device, &d3d_desc, descriptor->handle);
ID3D12Device_CreateSampler(G_D12.device, &d3d_desc, descriptor->first_handle);
}
return descriptor->index;
@ -2757,7 +2792,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
if (bound_render_target_uids[i] != rt->uid + desc.mip)
{
G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i];
G_D12_InitRtv(rt, rtv_descriptor->handle, desc.mip);
G_D12_InitRtv(rt, rtv_descriptor->first_handle, desc.mip);
bound_render_target_uids[i] = rt->uid + desc.mip;
om_dirty = 1;
}
@ -2773,7 +2808,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[G_MaxRenderTargets] = Zi;
for (u32 i = 0; i < rtvs_count; ++i)
{
rtv_handles[i] = rcl->rtv_descriptors[i]->handle;
rtv_handles[i] = rcl->rtv_descriptors[i]->first_handle;
}
ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0);
}
@ -2798,7 +2833,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
clear_color[2] = cmd->clear_rtv.color.z;
clear_color[3] = cmd->clear_rtv.color.w;
}
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->handle;
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->first_handle;
if (bound_render_clear_target_uid != rt->uid + cmd->clear_rtv.mip)
{
G_D12_InitRtv(rt, rtv_handle, cmd->clear_rtv.mip);

View File

@ -123,6 +123,7 @@ Struct(G_D12_DescriptorHeap)
G_D12_DescriptorHeapKind kind;
D3D12_DESCRIPTOR_HEAP_TYPE type;
u32 per_batch_count;
u32 descriptor_size;
ID3D12DescriptorHeap *d3d_heap;
D3D12_CPU_DESCRIPTOR_HANDLE start_handle;
@ -142,7 +143,7 @@ Struct(G_D12_Descriptor)
i64 completion_queue_target;
G_D12_DescriptorHeap *heap;
D3D12_CPU_DESCRIPTOR_HANDLE handle;
D3D12_CPU_DESCRIPTOR_HANDLE first_handle;
u32 index;
};
@ -238,7 +239,7 @@ Struct(G_D12_Queue)
u64 print_buffer_size;
G_ResourceHandle print_buffer;
G_ResourceHandle print_readback_buffer;
G_RWByteAddressBufferRef print_buffer_ref;
G_ByteAddressBufferRef print_buffer_ref;
// Raw command lists
struct G_D12_RawCommandList *first_committed_cl;

View File

@ -4,28 +4,18 @@
Enum(G_RefKind)
{
G_RefKind_StructuredBuffer,
G_RefKind_RWStructuredBuffer,
G_RefKind_ByteAddressBuffer,
G_RefKind_RWByteAddressBuffer,
G_RefKind_Texture1D,
G_RefKind_RWTexture1D,
G_RefKind_Texture2D,
G_RefKind_RWTexture2D,
G_RefKind_Texture3D,
G_RefKind_RWTexture3D,
G_RefKind_SamplerState,
};
Struct(G_StructuredBufferRef) { u32 v; };
Struct(G_RWStructuredBufferRef) { u32 v; };
Struct(G_ByteAddressBufferRef) { u32 v; };
Struct(G_RWByteAddressBufferRef) { u32 v; };
Struct(G_Texture1DRef) { u32 v; };
Struct(G_RWTexture1DRef) { u32 v; };
Struct(G_Texture2DRef) { u32 v; };
Struct(G_RWTexture2DRef) { u32 v; };
Struct(G_Texture3DRef) { u32 v; };
Struct(G_RWTexture3DRef) { u32 v; };
Struct(G_SamplerStateRef) { u32 v; };
#define G_IsRefNil(r) ((r).v == 0)
@ -36,7 +26,7 @@ Struct(G_SamplerStateRef) { u32 v; };
//
// D3D12 exposes 64 root constants and Vulkan exposes 32 push constants.
// Supposedly AMD hardware will start spilling constants once more than
// 12 are used - https://gpuopen.com/learn/rdna-performance-guide/
// 12 are in use - https://gpuopen.com/learn/rdna-performance-guide/
//
#define G_NumGeneralPurposeConstants (24) // Constants available for any usage
#define G_NumReservedConstants (4) // Constants reserved for internal usage by the GPU layer
@ -62,7 +52,7 @@ Struct(G_SamplerStateRef) { u32 v; };
StaticAssert(G_NumGeneralPurposeConstants == 24);
StaticAssert(G_NumReservedConstants >= 3);
G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 24);
G_ForceDeclConstant(G_ByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 24);
G_ForceDeclConstant(b32, G_ShaderConst_TweakB32, 25);
G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 26);
@ -98,17 +88,17 @@ Enum(G_BasicSamplerKind)
// optimization on AMD hardware in the future.
template<typename T> StructuredBuffer<T> G_Dereference(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> RWStructuredBuffer<T> G_Dereference(G_RWStructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> RWStructuredBuffer<T> G_DereferenceRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
ByteAddressBuffer G_Dereference(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
RWByteAddressBuffer G_Dereference(G_RWByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
RWByteAddressBuffer G_DereferenceRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> Texture1D<T> G_Dereference(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture2D<T> G_Dereference(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture3D<T> G_Dereference(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> RWTexture1D<T> G_Dereference(G_RWTexture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> RWTexture2D<T> G_Dereference(G_RWTexture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> RWTexture3D<T> G_Dereference(G_RWTexture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> RWTexture1D<T> G_DereferenceRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture2D<T> G_DereferenceRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture3D<T> G_DereferenceRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
SamplerState G_Dereference(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; }
#endif
@ -226,7 +216,7 @@ Struct(G_FmtArg)
void G_CommitPrint(G_TempPrintBuffer buff)
{
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef);
RWByteAddressBuffer rw = G_DereferenceRW(G_ShaderConst_PrintBufferRef);
if (buff.overflowed)
{

View File

@ -402,13 +402,13 @@ void V_TickForever(WaveLaneCtx *lane)
G_ResourceHandle gpu_occluders_res = Zi;
G_Texture2DRef gpu_tiles = Zi;
G_RWStructuredBufferRef gpu_particles = Zi;
G_RWTexture2DRef gpu_particle_cells[V_ParticleLayer_COUNT];
G_RWTexture2DRef gpu_particle_densities[V_ParticleLayer_COUNT];
G_RWTexture2DRef gpu_stains = Zi;
G_RWTexture2DRef gpu_dry_stains = Zi;
G_RWTexture2DRef gpu_drynesses = Zi;
G_RWTexture2DRef gpu_occluders = Zi;
G_StructuredBufferRef gpu_particles = Zi;
G_Texture2DRef gpu_particle_cells[V_ParticleLayer_COUNT];
G_Texture2DRef gpu_particle_densities[V_ParticleLayer_COUNT];
G_Texture2DRef gpu_stains = Zi;
G_Texture2DRef gpu_dry_stains = Zi;
G_Texture2DRef gpu_drynesses = Zi;
G_Texture2DRef gpu_occluders = Zi;
{
G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct);
{
@ -433,7 +433,7 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Particles")
);
gpu_particles = G_PushRWStructuredBufferRef(gpu_perm, gpu_particles_res, V_Particle);
gpu_particles = G_PushStructuredBufferRef(gpu_perm, gpu_particles_res, V_Particle);
}
//- Init particle textures
for (V_ParticleLayer layer = 0; layer < V_ParticleLayer_COUNT; ++layer)
@ -447,7 +447,7 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))
);
G_RWTexture2DRef cells = G_PushRWTexture2DRef(gpu_perm, cells_res);
G_Texture2DRef cells = G_PushTexture2DRef(gpu_perm, cells_res);
gpu_particle_cell_resources[layer] = cells_res;
gpu_particle_cells[layer] = cells;
}
@ -460,7 +460,7 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))
);
G_RWTexture2DRef densities = G_PushRWTexture2DRef(gpu_perm, densities_res);
G_Texture2DRef densities = G_PushTexture2DRef(gpu_perm, densities_res);
gpu_particle_density_resources[layer] = densities_res;
gpu_particle_densities[layer] = densities;
}
@ -475,7 +475,7 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Stains")
);
gpu_stains = G_PushRWTexture2DRef(gpu_perm, gpu_stains_res);
gpu_stains = G_PushTexture2DRef(gpu_perm, gpu_stains_res);
}
//- Init dry stains texture
{
@ -487,7 +487,7 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Dry stains")
);
gpu_dry_stains = G_PushRWTexture2DRef(gpu_perm, gpu_dry_stains_res);
gpu_dry_stains = G_PushTexture2DRef(gpu_perm, gpu_dry_stains_res);
}
//- Init dryness texture
{
@ -499,7 +499,7 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Drynesses")
);
gpu_drynesses = G_PushRWTexture2DRef(gpu_perm, gpu_drynesses_res);
gpu_drynesses = G_PushTexture2DRef(gpu_perm, gpu_drynesses_res);
}
//- Init occluders texture
{
@ -511,7 +511,7 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Occluders cells")
);
gpu_occluders = G_PushRWTexture2DRef(gpu_perm, gpu_occluders_res);
gpu_occluders = G_PushTexture2DRef(gpu_perm, gpu_occluders_res);
}
}
G_CommitCommandList(cl);
@ -4811,8 +4811,7 @@ void V_TickForever(WaveLaneCtx *lane)
);
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
frame->screen_ro = G_PushTexture2DRef(gpu_frame_arena, screen_target);
frame->screen_rw = G_PushRWTexture2DRef(gpu_frame_arena, screen_target);
frame->screen = G_PushTexture2DRef(gpu_frame_arena, screen_target);
// Bloom texture
G_ResourceHandle bloom_target = G_PushTexture2D(
@ -4826,8 +4825,7 @@ void V_TickForever(WaveLaneCtx *lane)
);
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
{
frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
frame->bloom_mips[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
}
// Albedo texture
@ -4839,7 +4837,7 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick))
);
frame->albedo_ro = G_PushTexture2DRef(gpu_frame_arena, albedo_target);
frame->albedo = G_PushTexture2DRef(gpu_frame_arena, albedo_target);
// Backdrop texture
G_ResourceHandle backdrop_target = G_PushTexture2D(
@ -4853,8 +4851,7 @@ void V_TickForever(WaveLaneCtx *lane)
);
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
{
frame->backdrop_mips_ro[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx));
frame->backdrop_mips_rw[mip_idx] = G_PushRWTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx));
frame->backdrop_mips[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx));
}
// Shade texture
@ -4868,8 +4865,7 @@ void V_TickForever(WaveLaneCtx *lane)
);
Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1));
Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y));
frame->shade_ro = G_PushTexture2DRef(gpu_frame_arena, shade_target);
frame->shade_rw = G_PushRWTexture2DRef(gpu_frame_arena, shade_target);
frame->shade = G_PushTexture2DRef(gpu_frame_arena, shade_target);
// Quad buffers
G_ResourceHandle quads_buff = G_PushBufferFromCpuCopy(
@ -4936,7 +4932,7 @@ void V_TickForever(WaveLaneCtx *lane)
{
// Prepare shade
G_Compute(frame->cl, V_PrepareShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims));
// G_Compute(frame->cl, V_PrepareShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims));
// Prepare cells
G_Compute(frame->cl, V_PrepareCellsCS, V_ThreadGroupSizeFromTexSize(cells_dims));
@ -5104,7 +5100,7 @@ void V_TickForever(WaveLaneCtx *lane)
uv.p0 = Vec2FromVec(screen_viewport.p0);
uv.p1 = Vec2FromVec(screen_viewport.p1);
uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims));
UI_SetRawTexture(vis_box, frame->screen_ro, uv);
UI_SetRawTexture(vis_box, frame->screen, uv);
}
}

View File

@ -59,7 +59,7 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
ImplComputeShader2D(V_PrepareShadeCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
RWTexture2D<Vec4> shade = G_Dereference<Vec4>(frame.shade_rw);
RWTexture2D<Vec4> shade = G_DereferenceRW<Vec4>(frame.shade);
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
if (all(shade_pos < countof(shade)))
{
@ -73,10 +73,10 @@ ImplComputeShader2D(V_PrepareCellsCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains);
RWTexture2D<Vec4> dry_stains = G_Dereference<Vec4>(frame.dry_stains);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
RWTexture2D<u32> occluders = G_Dereference<u32>(frame.occluders);
RWTexture2D<Vec4> stains = G_DereferenceRW<Vec4>(frame.stains);
RWTexture2D<Vec4> dry_stains = G_DereferenceRW<Vec4>(frame.dry_stains);
RWTexture2D<f32> drynesses = G_DereferenceRW<f32>(frame.drynesses);
RWTexture2D<u32> occluders = G_DereferenceRW<u32>(frame.occluders);
Vec2 cell_pos = SV_DispatchThreadID + 0.5;
if (all(cell_pos < P_WorldCellsDims))
@ -102,8 +102,8 @@ ImplComputeShader2D(V_PrepareCellsCS)
Vec4 over_dry_stain = 0;
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
{
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);
RWTexture2D<u32> densities = G_Dereference<u32>(frame.particle_densities[layer]);
RWTexture2D<u32> cells = G_DereferenceRW<u32>(frame.particle_cells[layer]);
RWTexture2D<u32> densities = G_DereferenceRW<u32>(frame.particle_densities[layer]);
u32 packed = cells[cell_pos];
if (packed & (1 << 31))
{
@ -161,7 +161,7 @@ ImplComputeShader2D(V_PrepareCellsCS)
ImplComputeShader(V_ClearParticlesCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
RWStructuredBuffer<V_Particle> particles = G_DereferenceRW<V_Particle>(frame.particles);
u32 particle_idx = SV_DispatchThreadID;
if (particle_idx < V_ParticlesCap)
{
@ -191,9 +191,9 @@ ImplComputeShader2D(V_BackdropDownCS)
}
else
{
bd_up = G_Dereference<Vec4>(frame.backdrop_mips_ro[mip_idx - 1]);
bd_up = G_Dereference<Vec4>(frame.backdrop_mips[mip_idx - 1]);
}
RWTexture2D<Vec4> bd_down = G_Dereference<Vec4>(frame.backdrop_mips_rw[mip_idx]);
RWTexture2D<Vec4> bd_down = G_DereferenceRW<Vec4>(frame.backdrop_mips[mip_idx]);
Vec2 down_dims = countof(bd_down);
@ -248,8 +248,8 @@ ImplComputeShader2D(V_BackdropUpCS)
i32 mip_idx = V_GpuConst_MipIdx;
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<Vec4> bd_down = G_Dereference<Vec4>(frame.backdrop_mips_ro[mip_idx + 1]);
RWTexture2D<Vec4> bd_up = G_Dereference<Vec4>(frame.backdrop_mips_rw[mip_idx]);
Texture2D<Vec4> bd_down = G_Dereference<Vec4>(frame.backdrop_mips[mip_idx + 1]);
RWTexture2D<Vec4> bd_up = G_DereferenceRW<Vec4>(frame.backdrop_mips[mip_idx]);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]);
Vec2 down_dims = countof(bd_down);
@ -332,7 +332,7 @@ ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
RWTexture2D<u32> occluders = G_Dereference<u32>(frame.occluders);
RWTexture2D<u32> occluders = G_DereferenceRW<u32>(frame.occluders);
V_Quad quad = quads[input.quad_idx];
Texture2D<Vec4> tex = G_Dereference<Vec4>(quad.tex);
@ -368,7 +368,7 @@ ImplComputeShader(V_EmitParticlesCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Emitter> emitters = G_Dereference<V_Emitter>(frame.emitters);
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
RWStructuredBuffer<V_Particle> particles = G_DereferenceRW<V_Particle>(frame.particles);
u32 emitter_idx = SV_DispatchThreadID;
if (emitter_idx < frame.emitters_count)
@ -399,8 +399,8 @@ ImplComputeShader(V_SimParticlesCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
RWTexture2D<u32> occluders = G_Dereference<u32>(frame.occluders);
RWStructuredBuffer<V_Particle> particles = G_DereferenceRW<V_Particle>(frame.particles);
RWTexture2D<u32> occluders = G_DereferenceRW<u32>(frame.occluders);
u32 particle_idx = SV_DispatchThreadID;
if (particle_idx < V_ParticlesCap)
@ -440,8 +440,8 @@ ImplComputeShader(V_SimParticlesCS)
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
{
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[desc.layer]);
RWTexture2D<u32> densities = G_Dereference<u32>(frame.particle_densities[desc.layer]);
RWTexture2D<u32> cells = G_DereferenceRW<u32>(frame.particle_cells[desc.layer]);
RWTexture2D<u32> densities = G_DereferenceRW<u32>(frame.particle_densities[desc.layer]);
u32 packed = 0;
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
@ -677,9 +677,9 @@ ImplComputeShader2D(V_ShadeCS)
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
RWTexture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_rw);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo);
RWTexture2D<Vec4> shade_tex = G_DereferenceRW<Vec4>(frame.shade);
RWTexture2D<f32> drynesses = G_DereferenceRW<f32>(frame.drynesses);
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1));
@ -711,17 +711,17 @@ ImplComputeShader2D(V_ShadeCS)
ImplComputeShader2D(V_CompositeCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade);
SamplerState point_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains);
RWTexture2D<Vec4> dry_stains = G_Dereference<Vec4>(frame.dry_stains);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo);
RWTexture2D<Vec4> screen_tex = G_DereferenceRW<Vec4>(frame.screen);
RWTexture2D<Vec4> stains = G_DereferenceRW<Vec4>(frame.stains);
RWTexture2D<Vec4> dry_stains = G_DereferenceRW<Vec4>(frame.dry_stains);
RWTexture2D<f32> drynesses = G_DereferenceRW<f32>(frame.drynesses);
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
Texture2D<Vec4> backdrop = G_Dereference<Vec4>(frame.backdrop_mips_ro[0]);
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
Texture2D<Vec4> backdrop = G_Dereference<Vec4>(frame.backdrop_mips[0]);
RWStructuredBuffer<V_Particle> particles = G_DereferenceRW<V_Particle>(frame.particles);
Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5;
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
@ -858,8 +858,8 @@ ImplComputeShader2D(V_CompositeCS)
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
{
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);
RWTexture2D<u32> densities = G_Dereference<u32>(frame.particle_densities[layer]);
RWTexture2D<u32> cells = G_DereferenceRW<u32>(frame.particle_cells[layer]);
RWTexture2D<u32> densities = G_DereferenceRW<u32>(frame.particle_densities[layer]);
u32 packed = cells[cell_pos];
V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F);
if (particle_kind != V_ParticleKind_None)
@ -1115,17 +1115,17 @@ ImplComputeShader2D(V_BloomDownCS)
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
RWTexture2D<Vec4> bloom_down = G_DereferenceRW<Vec4>(frame.bloom_mips[mip_idx - 1]);
Texture2D<Vec4> bloom_up;
b32 is_first_pass = mip_idx == 1;
if (is_first_pass)
{
bloom_up = G_Dereference<Vec4>(frame.screen_ro);
bloom_up = G_Dereference<Vec4>(frame.screen);
}
else
{
bloom_up = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx - 2]);
bloom_up = G_Dereference<Vec4>(frame.bloom_mips[mip_idx - 2]);
}
Vec2 down_dims = countof(bloom_down);
@ -1190,17 +1190,17 @@ ImplComputeShader2D(V_BloomUpCS)
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx]);
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips[mip_idx]);
b32 is_last_pass = mip_idx == 0;
RWTexture2D<Vec4> bloom_up;
if (is_last_pass)
{
bloom_up = G_Dereference<Vec4>(frame.screen_rw);
bloom_up = G_DereferenceRW<Vec4>(frame.screen);
}
else
{
bloom_up = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
bloom_up = G_DereferenceRW<Vec4>(frame.bloom_mips[mip_idx - 1]);
}
Vec2 down_dims = countof(bloom_down);
@ -1255,8 +1255,8 @@ ImplComputeShader2D(V_FinalizeCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
Texture2D<Vec4> bloom_tex = G_Dereference<Vec4>(frame.bloom_mips_ro[0]);
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
Texture2D<Vec4> bloom_tex = G_Dereference<Vec4>(frame.bloom_mips[0]);
RWTexture2D<Vec4> screen_tex = G_DereferenceRW<Vec4>(frame.screen);
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);

View File

@ -350,30 +350,25 @@ Struct(V_SharedFrame)
f32 backdrop_parallax;
G_Texture2DRef backdrop_src;
G_Texture2DRef backdrop_mips_ro[G_MaxMips];
G_RWTexture2DRef backdrop_mips_rw[G_MaxMips];
G_Texture2DRef backdrop_mips[G_MaxMips];
G_Texture2DRef screen_ro;
G_RWTexture2DRef screen_rw;
G_Texture2DRef shade_ro;
G_RWTexture2DRef shade_rw;
G_Texture2DRef albedo_ro;
G_RWTexture2DRef albedo_rw;
G_Texture2DRef screen;
G_Texture2DRef shade;
G_Texture2DRef albedo;
G_Texture2DRef bloom_mips_ro[G_MaxMips];
G_RWTexture2DRef bloom_mips_rw[G_MaxMips];
G_Texture2DRef bloom_mips[G_MaxMips];
u32 emitters_count;
G_StructuredBufferRef emitters;
G_RWStructuredBufferRef particles;
G_StructuredBufferRef particles;
G_RWTexture2DRef stains;
G_RWTexture2DRef dry_stains;
G_RWTexture2DRef drynesses;
G_RWTexture2DRef occluders;
G_Texture2DRef stains;
G_Texture2DRef dry_stains;
G_Texture2DRef drynesses;
G_Texture2DRef occluders;
G_RWTexture2DRef particle_cells[V_ParticleLayer_COUNT];
G_RWTexture2DRef particle_densities[V_ParticleLayer_COUNT];
G_Texture2DRef particle_cells[V_ParticleLayer_COUNT];
G_Texture2DRef particle_densities[V_ParticleLayer_COUNT];
G_StructuredBufferRef dverts;
G_StructuredBufferRef quads;

View File

@ -18,26 +18,22 @@ void PT_RunForever(WaveLaneCtx *lane)
{
G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct);
{
// Push resources
Vec2I32 final_target_size = window_frame.draw_size;
G_ResourceHandle final_target = G_PushTexture2D(
G_ResourceHandle final_target_res = G_PushTexture2D(
gpu_frame_arena, cl,
G_Format_R16G16B16A16_Float,
final_target_size,
G_Layout_DirectQueue_ShaderReadWrite,
G_Layout_DirectQueue_General,
.flags = G_ResourceFlag_AllowShaderReadWrite
);
// Push resource handles
G_Texture2DRef final_target_rhandle = G_PushTexture2DRef(gpu_frame_arena, final_target);
G_RWTexture2DRef final_target_rwhandle = G_PushRWTexture2DRef(gpu_frame_arena, final_target);
G_Texture2DRef final_target = G_PushTexture2DRef(gpu_frame_arena, final_target_res);
// Prep test pass
{
G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target_rwhandle);
G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target);
G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123);
G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp));
G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target_rhandle);
G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target);
G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture());
}
@ -45,12 +41,11 @@ void PT_RunForever(WaveLaneCtx *lane)
{
G_Compute(cl, PT_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1));
}
G_DumbMemorySync(cl, final_target);
G_DumbMemorySync(cl, final_target_res);
// Prep blit pass
{
G_DumbMemoryLayoutSync(cl, final_target, G_Layout_DirectQueue_ShaderRead);
G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_DirectQueue_RenderTargetWrite);
G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_DirectQueue_RenderTarget);
}
// Blit pass
@ -67,7 +62,7 @@ void PT_RunForever(WaveLaneCtx *lane)
// Finalize backbuffer layout
{
G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present);
G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_Common);
}
// Reset

View File

@ -5,7 +5,7 @@ ImplComputeShader2D(PT_TestCS)
{
StructuredBuffer<TestStruct> sb = G_Dereference<TestStruct>(PT_ShaderConst_TestBuff);
RWTexture2D<Vec4> target_tex = G_Dereference<Vec4>(PT_ShaderConst_TestTarget);
RWTexture2D<Vec4> target_tex = G_DereferenceRW<Vec4>(PT_ShaderConst_TestTarget);
Vec2U32 target_tex_size = countof(target_tex);
Vec2I32 id = SV_DispatchThreadID;

View File

@ -4,7 +4,7 @@
G_DeclConstant(G_Texture3DRef, PT_ShaderConst_NoiseTex, 0);
// Test shader
G_DeclConstant(G_RWTexture2DRef, PT_ShaderConst_TestTarget, 1);
G_DeclConstant(G_Texture2DRef, PT_ShaderConst_TestTarget, 1);
G_DeclConstant(G_StructuredBufferRef, PT_ShaderConst_TestBuff, 2);
G_DeclConstant(f32, PT_ShaderConst_TestConst, 3);