diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 21038680..b4ca57fa 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -24,45 +24,52 @@ Enum(G_Family) G_Family_Copy }; -Enum(G_QueueKind) -{ - G_QueueKind_Direct = 0, #if G_IsMultiQueueEnabled - G_QueueKind_AsyncCompute = 1, - G_QueueKind_AsyncCopy = 2, + Enum(G_QueueKind) + { + G_QueueKind_Direct = 0, + G_QueueKind_AsyncCompute = 1, + G_QueueKind_AsyncCopy = 2, + G_QueueKind_COUNT + }; + Enum(G_QueueMask) + { + G_QueueMask_None = 0, + G_QueueMask_Direct = (1 << 0), + G_QueueMask_AsyncCompute = (1 << 1), + G_QueueMask_AsyncCopy = (1 << 2), + G_QueueMask_All = (0xFFFFFFFF >> (32 - G_QueueKind_COUNT)) + }; #else - G_QueueKind_AsyncCompute = G_QueueKind_Direct, - G_QueueKind_AsyncCopy = G_QueueKind_Direct, + Enum(G_QueueKind) + { + G_QueueKind_Direct = 0, + G_QueueKind_AsyncCompute = G_QueueKind_Direct, + G_QueueKind_AsyncCopy = G_QueueKind_Direct, + G_QueueKind_COUNT + }; + Enum(G_QueueMask) + { + G_QueueMask_None = 0, + G_QueueMask_Direct = (1 << 0), + G_QueueMask_AsyncCompute = G_QueueMask_Direct, + G_QueueMask_AsyncCopy = G_QueueMask_Direct, + G_QueueMask_All = (0xFFFFFFFF >> (32 - G_QueueKind_COUNT)) + }; #endif - G_QueueKind_COUNT -}; -Enum(G_QueueMask) -{ - G_QueueMask_None = 0, - G_QueueMask_Direct = (1 << 0), -#if G_IsMultiQueueEnabled - G_QueueMask_AsyncCompute = (1 << 1), - G_QueueMask_AsyncCopy = (1 << 2), -#else - G_QueueMask_AsyncCompute = G_QueueMask_Direct, - G_QueueMask_AsyncCopy = G_QueueMask_Direct, -#endif - G_QueueMask_All = (0xFFFFFFFF >> (32 - G_QueueKind_COUNT)) -}; #define G_QueueMaskFromKind(queue_kind) (1 << queue_kind) Struct(G_QueueCompletions) { - i64 v[G_QueueKind_COUNT]; // Array of completions indexed by queue kind + i64 v[G_QueueKind_COUNT]; }; -// All waiters will wait until specified queues reach their value in the `completions` array Struct(G_QueueBarrierDesc) { - G_QueueCompletions completions; // Completions that waiters should wait for - G_QueueMask wait_queues; // Mask of queues that will wait for completions - b32 wait_cpu; // Will the cpu wait for completion + G_QueueCompletions completions; // Completions to be waited on + G_QueueMask wait_queues; // Mask of queues that should wait + b32 wait_cpu; // Should the cpu wait }; //////////////////////////////////////////////////////////// @@ -384,6 +391,7 @@ Enum(G_BlendMode) G_BlendMode_CompositePremultipliedAlpha, }; +#define G_Rt(_tex, _blend_mode, ...) ((G_RenderTargetDesc) { .texture = (_tex), .blend = (_blend_mode), __VA_ARGS__ }) Struct(G_RenderTargetDesc) { G_TextureRef texture; @@ -509,86 +517,11 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl, G_ArenaHandle gpu_are } \ )}) - - - - - - -//- Resource creation - -// G_ResourceHandle G_PushResource(G_ArenaHandle arena, G_CommandListHandle cl, G_ResourceDesc desc); - -// #define G_PushBuffer(arena, cl, _type, _count, ...) G_PushResource((arena), (cl), \ -// (G_ResourceDesc) { \ -// .kind = G_ResourceKind_Buffer, \ -// .buffer = { \ -// .size = sizeof(_type) * (_count), \ -// __VA_ARGS__ \ -// } \ -// } \ -// ) - -// #define G_PushTexture1D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ -// (G_ResourceDesc) { \ -// .kind = G_ResourceKind_Texture1D, \ -// .texture = { \ -// .format = (_format), \ -// .dims = VEC3I32((_size), 1, 1), \ -// .initial_layout = (_initial_layout), \ -// __VA_ARGS__ \ -// } \ -// } \ -// ) - -// #define G_PushTexture2D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ -// (G_ResourceDesc) { \ -// .kind = G_ResourceKind_Texture2D, \ -// .texture = { \ -// .format = (_format), \ -// .dims = VEC3I32((_size).x, (_size).y, 1), \ -// .initial_layout = (_initial_layout), \ -// __VA_ARGS__ \ -// } \ -// } \ -// ) - -// #define G_PushTexture3D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ -// (G_ResourceDesc) { \ -// .kind = G_ResourceKind_Texture3D, \ -// .texture = { \ -// .format = (_format), \ -// .dims = (_size), \ -// .initial_layout = (_initial_layout), \ -// __VA_ARGS__ \ -// } \ -// } \ -// ) - -// #define G_PushSampler(arena, cl, ...) G_PushResource((arena), (cl), \ -// (G_ResourceDesc) { \ -// .kind = G_ResourceKind_Sampler, \ -// .sampler = { \ -// .filter = G_Filter_MinMagMipPoint, \ -// __VA_ARGS__ \ -// } \ -// } \ -// ) - -//- Index buffer helpers - -// #define G_IdxBuff16(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 2, .count = (G_CountBuffer((_res), i16)) }) -// #define G_IdxBuff32(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 4, .count = (G_CountBuffer((_res), i32)) }) - -//- Render target helpers - -#define G_Rt(_tex, _blend_mode) ((G_RenderTargetDesc) { .texture = (_tex), .blend = (_blend_mode) }) - //- Count -u64 G_GetStride(G_BufferRef buffer); u64 G_CountBuffer(G_BufferRef buffer); u64 G_CountBufferBytes(G_BufferRef buffer); +u64 G_CountBufferStride(G_BufferRef buffer); i32 G_Count1D(G_TextureRef texture); Vec2I32 G_Count2D(G_TextureRef texture); Vec3I32 G_Count3D(G_TextureRef texture); @@ -602,53 +535,6 @@ i32 G_CountMips(G_TextureRef texture); void *G_CpuPointerFromBuffer(G_BufferRef buffer); #define G_Deref(buffer, type) ((type *)G_CpuPointerFromBuffer(buffer)) -//////////////////////////////////////////////////////////// -//~ @hookdecl Shader resource reference - -// u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc); - -// #define G_PushStructuredBufferRef(arena, resource, type, ...) (G_StructuredBufferRef) { \ -// .v = G_PushRef( \ -// (arena), (resource), \ -// (G_RefDesc) { .kind = G_RefKind_StructuredBuffer, .element_size = sizeof(type), __VA_ARGS__ } \ -// ) \ -// } - -// #define G_PushByteAddressBufferRef(arena, resource, ...) (G_ByteAddressBufferRef) { \ -// .v = G_PushRef( \ -// (arena), (resource), \ -// (G_RefDesc) { .kind = G_RefKind_ByteAddressBuffer, __VA_ARGS__ } \ -// ) \ -// } - -// #define G_PushTexture1DRef(arena, resource, ...) (G_Texture1DRef) { \ -// .v = G_PushRef( \ -// (arena), (resource), \ -// (G_RefDesc) { .kind = G_RefKind_Texture1D, .mips.max = G_MaxMips, __VA_ARGS__ } \ -// ) \ -// } - -// #define G_PushTexture2DRef(arena, resource, ...) (G_Texture2DRef) { \ -// .v = G_PushRef( \ -// (arena), (resource), \ -// (G_RefDesc) { .kind = G_RefKind_Texture2D, .mips.max = G_MaxMips, __VA_ARGS__ } \ -// ) \ -// } - -// #define G_PushTexture3DRef(arena, resource, ...) (G_Texture3DRef) { \ -// .v = G_PushRef( \ -// (arena), (resource), \ -// (G_RefDesc) { .kind = G_RefKind_Texture3D, .mips.max = G_MaxMips, __VA_ARGS__ } \ -// ) \ -// } - -// #define G_PushSamplerStateRef(arena, resource, ...) (G_SamplerStateRef) { \ -// .v = G_PushRef( \ -// (arena), (resource), \ -// (G_RefDesc) { .kind = G_RefKind_SamplerState, __VA_ARGS__ } \ -// ) \ -// } - //////////////////////////////////////////////////////////// //~ @hookdecl Command diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index eb735188..18aab93f 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -442,25 +442,25 @@ void G_Bootstrap(void) .capacity = G_D12_MaxCbvSrvUavDescriptors, .name = Lit("Primary Resource Descriptor Heap"), }, - [G_D12_DescriptorHeapKind_Rtv] = { - .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, - .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, - .capacity = G_D12_MaxRtvDescriptors, - .name = Lit("Primary RTV Descriptor Heap"), - }, [G_D12_DescriptorHeapKind_Sampler] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .capacity = G_D12_MaxSamplerDescriptors, .name = Lit("Primary Sampler Descriptor Heap"), }, + [G_D12_DescriptorHeapKind_Rtv] = { + .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, + .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + .capacity = G_D12_MaxRtvDescriptors, + .name = Lit("Primary RTV Descriptor Heap"), + }, }; for (G_D12_DescriptorHeapKind heap_kind = 0; heap_kind < countof(descs); ++heap_kind) { Dx12HeapDesc desc = descs[heap_kind]; G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; heap->arena = AcquireArena(Gibi(64)); - heap->descriptor_indices_arena = AcquireArena(Gibi(64)); + heap->indices_to_descriptors_arena = AcquireArena(Gibi(64)); heap->kind = heap_kind; heap->type = desc.type; @@ -489,7 +489,7 @@ void G_Bootstrap(void) // Push nil descriptor at index 0 { G_D12_Arena *gpu_perm = G_D12_ArenaFromHandle(G_PermArena()); - G_D12_Descriptor *nil_descriptor = G_D12_PushDescriptor(gpu_perm, 1, heap_kind); + G_D12_Descriptor *nil_descriptor = G_D12_AcquireDescriptor(heap_kind, G_D12_MaxDescriptorBundleCount); Assert(nil_descriptor->base_index == 0); } G_D12_SetObjectName((ID3D12Object *)heap->d3d_heap, desc.name); @@ -1018,9 +1018,9 @@ G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind) G_D12_Arena *gpu_perm = G_D12_ArenaFromHandle(G_PermArena()); for (u32 rtv_idx = 0; rtv_idx < countof(cl->rtv_descriptors); ++rtv_idx) { - cl->rtv_descriptors[rtv_idx] = G_D12_PushDescriptor(gpu_perm, 1, G_D12_DescriptorHeapKind_Rtv); + cl->rtv_descriptors[rtv_idx] = G_D12_AcquireDescriptor(G_D12_DescriptorHeapKind_Rtv, 1); } - cl->rtv_clear_descriptor = G_D12_PushDescriptor(gpu_perm, 1, G_D12_DescriptorHeapKind_Rtv); + cl->rtv_clear_descriptor = G_D12_AcquireDescriptor(G_D12_DescriptorHeapKind_Rtv, 1); } } @@ -1307,52 +1307,108 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) -G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, u64 bundle_count, G_D12_DescriptorHeapKind heap_kind) +G_D12_Descriptor *G_D12_AcquireDescriptor(G_D12_DescriptorHeapKind heap_kind, u64 bundle_count) { - // FIXME: Impl - G_D12_Descriptor *result = 0; - return result; + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; + G_D12_DescriptorList *free_descriptors = &heap->free_descriptors_table.descriptors_by_bundle_count[bundle_count]; + + b32 is_new = 0; + G_D12_Descriptor *descriptor = 0; + { + Lock lock = LockE(&heap->mutex); + descriptor = free_descriptors->first; + if (descriptor) + { + DllQueueRemove(free_descriptors->first, free_descriptors->last, descriptor); + } + else + { + Arena *perm = PermArena(); + if (heap->count + bundle_count > heap->capacity) + { + Panic(StringF(perm, "Maximum D3D12 descriptors reached in heap (%F)", FmtSint(heap->capacity))); + } + descriptor = PushStruct(perm, G_D12_Descriptor); + descriptor->heap = heap; + descriptor->base_index = heap->count; + descriptor->d3d_handle.ptr = heap->start_handle.ptr + (descriptor->base_index * heap->stride); + descriptor->bundle_count = bundle_count; + heap->count += bundle_count; + is_new = 1; + } + Unlock(&lock); + } + ZeroStruct(&descriptor->info); + + // Fill indices + if (is_new) + { + G_D12_Descriptor **indices_map_base = PushStructsNoZero(heap->indices_to_descriptors_arena, G_D12_Descriptor *, bundle_count); + for (u64 base_offset_idx = 0; base_offset_idx < bundle_count; ++base_offset_idx) + { + indices_map_base[base_offset_idx] = descriptor; + } + } + + return descriptor; } -void G_D12_InitRtvDescriptorFromResource(G_D12_Descriptor *descriptor, G_D12_Resource *resource, i32 mip) +void G_D12_ReleaseDescriptor(G_D12_Descriptor *descriptor) { - // FIXME: Impl + // TODO } + + + + + + + + + +void G_D12_InitResourceDescriptor(G_D12_Descriptor *descriptor, G_D12_Resource *resource, RngI32 mips) +{ +} + +void G_D12_InitRtvDescriptor(G_D12_Descriptor *descriptor, G_D12_Resource *resource, i32 mip) +{ + // FIXME: Impl +} + G_D12_Descriptor *G_D12_DescriptorFromBufferRef(G_BufferRef ref) { - // FIXME: Impl - G_D12_Descriptor *result = 0; - return result; + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav]; + return ArenaFirst(heap->indices_to_descriptors_arena, G_D12_Descriptor *)[ref.v]; } G_D12_Descriptor *G_D12_DescriptorFromTextureRef(G_TextureRef ref) { - G_D12_Descriptor *result = 0; - return result; + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav]; + return ArenaFirst(heap->indices_to_descriptors_arena, G_D12_Descriptor *)[ref.v]; } G_D12_Descriptor *G_D12_DescriptorFromSamplerRef(G_SamplerRef ref) { - G_D12_Descriptor *result = 0; - return result; + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler]; + return ArenaFirst(heap->indices_to_descriptors_arena, G_D12_Descriptor *)[ref.v]; } G_D12_Resource *G_D12_ResourceFromBufferRef(G_BufferRef ref) { - return G_D12_DescriptorFromBufferRef(ref)->resource; + return G_D12_DescriptorFromBufferRef(ref)->info.resource; } G_D12_Resource *G_D12_ResourceFromTextureRef(G_TextureRef ref) { - return G_D12_DescriptorFromTextureRef(ref)->resource; + return G_D12_DescriptorFromTextureRef(ref)->info.resource; } G_D12_Resource *G_D12_ResourceFromSamplerRef(G_SamplerRef ref) { - return G_D12_DescriptorFromSamplerRef(ref)->resource; + return G_D12_DescriptorFromSamplerRef(ref)->info.resource; } @@ -1376,6 +1432,9 @@ G_D12_Resource *G_D12_ResourceFromSamplerRef(G_SamplerRef ref) +// TODO: Return nil descriptor when size is 0 + + @@ -1394,6 +1453,11 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle memory_desc.kind == G_MemoryKind_Texture3D ); b32 is_sampler = memory_desc.kind == G_MemoryKind_Sampler; + + b32 is_1d = is_texture && memory_desc.kind == G_MemoryKind_Texture1D; + b32 is_2d = is_texture && memory_desc.kind == G_MemoryKind_Texture2D; + b32 is_3d = is_texture && memory_desc.kind == G_MemoryKind_Texture3D; + G_MemoryFlag flags = ( is_buffer ? memory_desc.buffer.flags : is_texture ? memory_desc.texture.flags : @@ -1681,11 +1745,10 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle } ////////////////////////////// - //- Push ref + //- Push descriptor - G_BaseDescriptorIndex descriptor_idx = 0; // G_D12_RefBundle *bundle = 0; @@ -1707,11 +1770,252 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle // } + G_D12_Descriptor *descriptor = 0; + { + G_QueueCompletions completions = G_CompletionValuesFromQueues(G_QueueMask_All); + G_D12_DescriptorHeapKind descriptor_heap_kind = 0; + u64 bundle_count = 0; + if (is_buffer) + { + bundle_count = 4; + descriptor_heap_kind = G_D12_DescriptorHeapKind_CbvSrvUav; + } + else if (is_texture) + { + // 0: SRV, 1: UAV + bundle_count = resource->texture_mips * 2; + descriptor_heap_kind = G_D12_DescriptorHeapKind_CbvSrvUav; + } + else + { + bundle_count = 1; + descriptor_heap_kind = G_D12_DescriptorHeapKind_Sampler; + } + G_D12_DescriptorTable *table = &gpu_arena->reset_descriptor_tables_by_heap[descriptor_heap_kind]; + G_D12_DescriptorList *reset_descriptors = &table->descriptors_by_bundle_count[bundle_count]; + descriptor = reset_descriptors->first; + if (descriptor && completions.v[descriptor->info.completion_queue_kind] >= descriptor->info.completion_queue_target) + { + DllQueueRemove(reset_descriptors->first, reset_descriptors->last, descriptor); + --reset_descriptors->count; + } + else + { + descriptor = G_D12_AcquireDescriptor(descriptor_heap_kind, bundle_count); + DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor); + ++gpu_arena->descriptors.count; + descriptor->info.gpu_arena = gpu_arena; + } + } + + ////////////////////////////// + //- Create views + + // TODO: Descriptor reuse + b32 can_reuse_descriptor = 0; + + if (!can_reuse_descriptor) + { + descriptor->info.resource = resource; + if (is_texture) + { + descriptor->info.mips = RNGI32(0, resource->texture_mips - 1); + } + else if (is_buffer) + { + descriptor->info.buffer_element_offset = 0; + descriptor->info.buffer_element_count = memory_desc.buffer.count; + descriptor->info.buffer_element_stride = memory_desc.buffer.stride; + } + + { + //- Buffer views + if (is_buffer) + { + // base index + 0: Structured SRV + // base index + 1: Structured UAV + // base index + 2: Raw SRV + // base index + 3: Raw UAV + for (u32 descriptor_idx_offset = 0; descriptor_idx_offset < 4; ++descriptor_idx_offset) + { + D3D12_CPU_DESCRIPTOR_HANDLE d3d_handle = { .ptr = descriptor->d3d_handle.ptr + descriptor_idx_offset * descriptor->heap->stride }; + if (descriptor_idx_offset % 2 == 0) + { + //- Buffer SRV + b32 ok = 1; + D3D12_SHADER_RESOURCE_VIEW_DESC srv = Zi; + srv.Format = DXGI_FORMAT_UNKNOWN; + srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv.Buffer.FirstElement = descriptor->info.buffer_element_offset; + srv.Buffer.NumElements = descriptor->info.buffer_element_count; + srv.Buffer.StructureByteStride = descriptor->info.buffer_element_stride; + if (descriptor_idx_offset >= 2) + { + // Raw + srv.Format = DXGI_FORMAT_R32_TYPELESS; + srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + srv.Buffer.StructureByteStride = 0; + } + else if (srv.Buffer.StructureByteStride < 4) + { + ok = 0; + } + if (ok) + { + ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &srv, d3d_handle); + } + else + { + ID3D12Device_CreateShaderResourceView(G_D12.device, 0, 0, d3d_handle); + } + } + else + { + //- Buffer UAV + b32 ok = 1; + D3D12_UNORDERED_ACCESS_VIEW_DESC uav = Zi; + uav.Format = DXGI_FORMAT_UNKNOWN; + uav.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uav.Buffer.FirstElement = descriptor->info.buffer_element_offset; + uav.Buffer.NumElements = descriptor->info.buffer_element_count; + uav.Buffer.StructureByteStride = descriptor->info.buffer_element_stride; + if (descriptor_idx_offset >= 2) + { + // Raw + uav.Format = DXGI_FORMAT_R32_TYPELESS; + uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + uav.Buffer.StructureByteStride = 0; + } + else if (uav.Buffer.StructureByteStride < 4) + { + ok = 0; + } + if (ok) + { + ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &uav, d3d_handle); + } + else + { + ID3D12Device_CreateUnorderedAccessView(G_D12.device, 0, 0, 0, d3d_handle); + } + } + } + } + + //- Texture views + if (is_texture) + { + // base index + mip + 0: mip SRV + // base index + mip + 1: mip UAV + // etc... + for (u32 descriptor_idx_offset = 0; descriptor_idx_offset < (u32)resource->texture_mips * 2; ++descriptor_idx_offset) + { + D3D12_CPU_DESCRIPTOR_HANDLE d3d_handle = { .ptr = descriptor->d3d_handle.ptr + descriptor_idx_offset * descriptor->heap->stride }; + + RngI32 mips = RNGI32(descriptor_idx_offset / 2, resource->texture_mips - 1); + + if (descriptor_idx_offset % 2 == 0) + { + //- Texture SRV + D3D12_SHADER_RESOURCE_VIEW_DESC srv = Zi; + srv.Format = DXGI_FORMAT_UNKNOWN; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + if (is_1d) + { + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + srv.Texture1D.MostDetailedMip = mips.min; + srv.Texture1D.MipLevels = mips.max - mips.min + 1; + } + else if (is_2d) + { + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv.Texture2D.MostDetailedMip = mips.min; + srv.Texture2D.MipLevels = mips.max - mips.min + 1; + } + else if (is_3d) + { + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + srv.Texture3D.MostDetailedMip = mips.min; + srv.Texture3D.MipLevels = mips.max - mips.min + 1; + } + ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &srv, d3d_handle); + } + else + { + //- Texture UAV + D3D12_UNORDERED_ACCESS_VIEW_DESC uav = Zi; + uav.Format = DXGI_FORMAT_UNKNOWN; + if (is_1d) + { + uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; + uav.Texture1D.MipSlice = mips.min; + } + else if (is_2d) + { + uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uav.Texture2D.MipSlice = mips.min; + } + else if (is_3d) + { + uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + uav.Texture3D.MipSlice = mips.min; + uav.Texture3D.WSize = U32Max; + } + if (resource->d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) + { + ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &uav, d3d_handle); + } + else + { + uav.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + ID3D12Device_CreateUnorderedAccessView(G_D12.device, 0, 0, &uav, d3d_handle); + } + } + } + } + + //- Sampler view + if (is_sampler) + { + G_SamplerDesc sampler_desc = resource->sampler_desc; + D3D12_SAMPLER_DESC sampler = Zi; + { + sampler.Filter = (D3D12_FILTER)sampler_desc.filter; + sampler.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.x; + sampler.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.y; + sampler.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.z; + sampler.MipLODBias = sampler_desc.mip_lod_bias; + sampler.MaxAnisotropy = MaxU32(sampler_desc.max_anisotropy, 1); + sampler.ComparisonFunc = (D3D12_COMPARISON_FUNC)sampler_desc.comparison; + sampler.BorderColor[0] = sampler_desc.border_color.x; + sampler.BorderColor[1] = sampler_desc.border_color.y; + sampler.BorderColor[2] = sampler_desc.border_color.z; + sampler.BorderColor[3] = sampler_desc.border_color.w; + sampler.MinLOD = sampler_desc.min_lod; + sampler.MaxLOD = sampler_desc.max_lod; + } + if (sampler.AddressU == 0) sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + if (sampler.AddressV == 0) sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + if (sampler.AddressW == 0) sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + if (sampler.MaxLOD >= Inf) + { + sampler.MaxLOD = D3D12_FLOAT32_MAX; + } + ID3D12Device_CreateSampler(G_D12.device, &sampler, descriptor->d3d_handle); + } + } + } + + + + + ////////////////////////////// - //- Transition reused texture to common if necessary + //- Transition intiial layout @@ -1727,24 +2031,23 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle queue_kind != G_QueueKind_AsyncCopy ) { - - G_SyncLayout(cl_handle, G_MakeTextureRef(descriptor_idx), G_TextureLayout_Common); + G_SyncLayout(cl_handle, G_MakeTextureRef(descriptor->base_index), G_TextureLayout_Common); } ////////////////////////////// - //- Upload initial data if present + //- Upload initial data if (is_buffer && memory_desc.buffer.cpu_src) { G_CopyCpuToBuffer( cl_handle, - G_MakeBufferRef(descriptor_idx), 0, + G_MakeBufferRef(descriptor->base_index), 0, memory_desc.buffer.cpu_src, RNGU64(0, memory_desc.buffer.stride * memory_desc.buffer.count) ); } - return descriptor_idx; + return descriptor->base_index; } @@ -2293,73 +2596,67 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle //- Count -u64 G_GetBufferStride(G_BufferRef buffer) +u64 G_CountBufferStride(G_BufferRef buffer) { - // FIXME: Impl - u64 result = Zi; - return result; + return G_D12_DescriptorFromBufferRef(buffer)->info.buffer_element_stride; } u64 G_CountBuffer(G_BufferRef buffer) { - // FIXME: Impl - u64 result = Zi; - return result; + return G_D12_DescriptorFromBufferRef(buffer)->info.buffer_element_count; } u64 G_CountBufferBytes(G_BufferRef buffer) { - // FIXME: Impl - u64 result = Zi; - return result; + return G_D12_DescriptorFromBufferRef(buffer)->info.buffer_element_count * G_D12_DescriptorFromBufferRef(buffer)->info.buffer_element_stride; } i32 G_Count1D(G_TextureRef texture) { - // FIXME: Impl - i32 result = Zi; + G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture); + i32 result = resource->texture_dims.x; return result; } Vec2I32 G_Count2D(G_TextureRef texture) { - // FIXME: Impl - Vec2I32 result = Zi; + G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture); + Vec2I32 result = VEC2I32(result.x = resource->texture_dims.x, result.y = resource->texture_dims.y); return result; } Vec3I32 G_Count3D(G_TextureRef texture) { - // FIXME: Impl - Vec3I32 result = Zi; + G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture); + Vec3I32 result = resource->texture_dims; return result; } i32 G_CountWidth(G_TextureRef texture) { - // FIXME: Impl - i32 result = Zi; + G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture); + i32 result = resource->texture_dims.x; return result; } i32 G_CountHeight(G_TextureRef texture) { - // FIXME: Impl - i32 result = Zi; + G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture); + i32 result = resource->texture_dims.y; return result; } i32 G_CountDepth(G_TextureRef texture) { - // FIXME: Impl - i32 result = Zi; + G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture); + i32 result = resource->texture_dims.z; return result; } i32 G_CountMips(G_TextureRef texture) { - // FIXME: Impl - i32 result = Zi; + G_D12_Descriptor *descriptor = G_D12_DescriptorFromTextureRef(texture); + i32 result = descriptor->info.mips.max - descriptor->info.mips.min + 1; return result; } @@ -2367,8 +2664,8 @@ i32 G_CountMips(G_TextureRef texture) void *G_CpuPointerFromBuffer(G_BufferRef buffer) { - // FIXME: Impl - void *result = 0; + G_D12_Resource *resource = G_D12_ResourceFromBufferRef(buffer); + void *result = resource->mapped; return result; } @@ -3377,7 +3674,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) if (indices_count > 0) { G_D12_Resource *indices_resource = G_D12_ResourceFromBufferRef(cmd->draw.indices); - u32 stride = G_GetStride(cmd->draw.indices); + u32 stride = G_CountBufferStride(cmd->draw.indices); ibv.BufferLocation = indices_resource->buffer_gpu_address; ibv.SizeInBytes = indices_count * stride; if (stride == 2) @@ -3508,7 +3805,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) if (bound_render_target_uids[i] != rt->uid + desc.mip) { G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; - G_D12_InitRtvDescriptorFromResource(rtv_descriptor, rt, desc.mip); + G_D12_InitRtvDescriptor(rtv_descriptor, rt, desc.mip); bound_render_target_uids[i] = rt->uid + desc.mip; om_dirty = 1; } @@ -3595,9 +3892,9 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) { G_D12_Descriptor *next = descriptor->next; { - G_D12_Arena *gpu_arena = descriptor->gpu_arena; - descriptor->completion_queue_kind = queue_kind; - descriptor->completion_queue_target = completion_target; + G_D12_Arena *gpu_arena = descriptor->info.gpu_arena; + descriptor->info.completion_queue_kind = queue_kind; + descriptor->info.completion_queue_target = completion_target; G_D12_DescriptorTable *dst_table = &gpu_arena->reset_descriptor_tables_by_heap[descriptor->heap->kind]; G_D12_DescriptorList *dst_list = &dst_table->descriptors_by_bundle_count[descriptor->bundle_count]; DllQueuePush(dst_list->first, dst_list->last, descriptor); diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 17ff8b7a..d3bf0ed7 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -118,25 +118,6 @@ Enum(G_D12_DescriptorHeapKind) G_D12_DescriptorHeapKind_COUNT }; -Struct(G_D12_DescriptorHeap) -{ - G_D12_DescriptorHeapKind kind; - - Mutex mutex; - Arena *arena; - Arena *descriptor_indices_arena; - - D3D12_DESCRIPTOR_HEAP_TYPE type; - ID3D12DescriptorHeap *d3d_heap; - D3D12_CPU_DESCRIPTOR_HANDLE start_handle; - - u32 stride; - u32 count; - u32 capacity; - - struct G_D12_Descriptor *first_free_descriptor_by_count[G_D12_MaxDescriptorBundleCount]; -}; - Struct(G_D12_Descriptor) { G_D12_Descriptor *next; @@ -144,25 +125,27 @@ Struct(G_D12_Descriptor) // Persistent data - G_D12_DescriptorHeap *heap; + struct G_D12_DescriptorHeap *heap; D3D12_CPU_DESCRIPTOR_HANDLE d3d_handle; u32 base_index; - - // Per-lifetime data - - struct G_D12_Arena *gpu_arena; - G_D12_Resource *resource; - u64 bundle_count; - u64 buffer_element_offset; - u64 buffer_element_count; - u64 buffer_element_stride; + // Per-resource data - RngI32 texture_mips; + struct + { + struct G_D12_Arena *gpu_arena; + struct G_D12_Resource *resource; - G_QueueKind completion_queue_kind; - i64 completion_queue_target; + u64 buffer_element_offset; + u64 buffer_element_count; + u64 buffer_element_stride; + + RngI32 mips; + + G_QueueKind completion_queue_kind; + i64 completion_queue_target; + } info; }; @@ -178,6 +161,25 @@ Struct(G_D12_DescriptorTable) G_D12_DescriptorList descriptors_by_bundle_count[G_D12_MaxDescriptorBundleCount]; }; +Struct(G_D12_DescriptorHeap) +{ + G_D12_DescriptorHeapKind kind; + + Mutex mutex; + Arena *arena; + Arena *indices_to_descriptors_arena; + + D3D12_DESCRIPTOR_HEAP_TYPE type; + ID3D12DescriptorHeap *d3d_heap; + D3D12_CPU_DESCRIPTOR_HANDLE start_handle; + + u32 stride; + u32 count; + u32 capacity; + + G_D12_DescriptorTable free_descriptors_table; +}; + //////////////////////////////////////////////////////////// //~ Arena types @@ -754,7 +756,7 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena); // G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index); // G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind); -G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, u64 bundle_count, G_D12_DescriptorHeapKind heap_kind); +G_D12_Descriptor *G_D12_AcquireDescriptor(G_D12_DescriptorHeapKind heap_kind, u64 bundle_count); void G_D12_InitRtvDescriptor(G_D12_Descriptor *descriptor, G_D12_Resource *resource, i32 mip); diff --git a/src/gpu/gpu_shared.cgh b/src/gpu/gpu_shared.cgh index d04cb36d..d05128b7 100644 --- a/src/gpu/gpu_shared.cgh +++ b/src/gpu/gpu_shared.cgh @@ -91,31 +91,31 @@ Enum(G_BasicSamplerKind) - //- Scalar/Uniform dereference - template StructuredBuffer G_SDeref(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 0]; } - template RWStructuredBuffer G_SDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 1]; } - ByteAddressBuffer G_SDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 2]; } - RWByteAddressBuffer G_SDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 3]; } - template Texture1D G_SDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; } - template Texture2D G_SDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; } - template Texture3D G_SDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; } - template RWTexture1D G_SDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; } - template RWTexture2D G_SDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; } - template RWTexture3D G_SDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; } - SamplerState G_SDeref(G_SamplerRef r) { return SamplerDescriptorHeap[r.v]; } + //- Scalar/Uniform dereference (faster on AMD hardware) + template StructuredBuffer G_UniformDeref(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 0]; } + template RWStructuredBuffer G_UniformDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 1]; } + ByteAddressBuffer G_UniformDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 2]; } + RWByteAddressBuffer G_UniformDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 3]; } + template Texture1D G_UniformDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; } + template Texture2D G_UniformDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; } + template Texture3D G_UniformDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; } + template RWTexture1D G_UniformDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; } + template RWTexture2D G_UniformDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; } + template RWTexture3D G_UniformDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; } + SamplerState G_UniformDeref(G_SamplerRef r) { return SamplerDescriptorHeap[r.v]; } //- Vector/Non-Uniform dereference - template StructuredBuffer G_VDeref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 0)]; } - template RWStructuredBuffer G_VDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - ByteAddressBuffer G_VDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 2)]; } - RWByteAddressBuffer G_VDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } - template Texture1D G_VDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } - template Texture2D G_VDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } - template Texture3D G_VDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } - template RWTexture1D G_VDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } - template RWTexture2D G_VDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } - template RWTexture3D G_VDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } - SamplerState G_VDeref(G_SamplerRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; } + template StructuredBuffer G_DynamicDeref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 0)]; } + template RWStructuredBuffer G_DynamicDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } + ByteAddressBuffer G_DynamicDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 2)]; } + RWByteAddressBuffer G_DynamicDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } + template Texture1D G_DynamicDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } + template Texture2D G_DynamicDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } + template Texture3D G_DynamicDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } + template RWTexture1D G_DynamicDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } + template RWTexture2D G_DynamicDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } + template RWTexture3D G_DynamicDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } + SamplerState G_DynamicDeref(G_SamplerRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; } @@ -170,8 +170,6 @@ Enum(G_BasicSamplerKind) //////////////////////////////////////////////////////////// //~ Debug printf -// This technique is based on MJP's article - https://therealmjp.github.io/posts/hlsl-printf/ - Enum(G_FmtArgKind) { G_FmtArgKind_None, @@ -261,7 +259,7 @@ Struct(G_FmtArg) void G_CommitPrint(G_TempPrintBuffer buff) { - RWByteAddressBuffer rw = G_SDerefRawRW(G_ShaderConst_PrintBuffer); + RWByteAddressBuffer rw = G_UniformDerefRawRW(G_ShaderConst_PrintBuffer); if (buff.overflowed) { diff --git a/src/proto/proto_gpu.g b/src/proto/proto_gpu.g index 10411b51..0b60494f 100644 --- a/src/proto/proto_gpu.g +++ b/src/proto/proto_gpu.g @@ -3,8 +3,8 @@ ComputeShader(PT_TestCS) { - PT_SharedFrame frame = G_SDeref(PT_ShaderConst_Frame)[0]; - RWTexture2D target_tex = G_SDerefRW2D(frame.compute_target); + PT_SharedFrame frame = G_UniformDeref(PT_ShaderConst_Frame)[0]; + RWTexture2D target_tex = G_UniformDerefRW2D(frame.compute_target); Vec2U32 target_tex_size = countof(target_tex); @@ -35,10 +35,10 @@ VertexShader(PT_BlitVS, PT_BlitPSInput) PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input) { - PT_SharedFrame frame = G_SDeref(PT_ShaderConst_Frame)[0]; - SamplerState sampler = G_SDeref(frame.sampler); - Texture2D src = G_SDeref2D(frame.compute_target); - Texture3D noise = G_SDeref3D(frame.noise_tex); + PT_SharedFrame frame = G_UniformDeref(PT_ShaderConst_Frame)[0]; + SamplerState sampler = G_UniformDeref(frame.sampler); + Texture2D src = G_UniformDeref2D(frame.compute_target); + Texture3D noise = G_UniformDeref3D(frame.noise_tex); Vec2 uv = input.src_uv; Vec4 tex_col = src.Sample(sampler, uv);