diff --git a/src/base/base.h b/src/base/base.h index 0650680d..ae9c7544 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -607,6 +607,7 @@ ForceInline void LockTicketMutex(TicketMutex *tm) ForceInline void UnlockTicketMutex(TicketMutex *tm) { + /* TODO: Atomic set w/ known ticket + 1 */ Atomic64FetchAdd(&tm->serving.v, 1); } #endif diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index d7b662e7..9dfba581 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -39,10 +39,32 @@ GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl) return cmd; } -u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc) +u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc, u64 buffer_size) { - /* TODO */ - u64 result = 1; + u64 result = RandU64FromSeeds(desc.kind, desc.flags); + switch(desc.kind) + { + default: break; + case GPU_ResourceKind_Texture1D: + case GPU_ResourceKind_Texture2D: + case GPU_ResourceKind_Texture3D: + { + result = RandU64FromSeeds(result, desc.texture.format); + result = RandU64FromSeeds(result, desc.texture.mip_levels); + result = RandU64FromSeeds(result, desc.clear_color.x); + result = RandU64FromSeeds(result, desc.clear_color.y); + result = RandU64FromSeeds(result, desc.clear_color.z); + result = RandU64FromSeeds(result, desc.clear_color.w); + result = RandU64FromSeeds(result, desc.texture.size.x); + result = RandU64FromSeeds(result, desc.texture.size.y); + result = RandU64FromSeeds(result, desc.texture.size.z); + } break; + case GPU_ResourceKind_Buffer: + { + result = RandU64FromSeeds(result, desc.buffer.heap_kind); + result = RandU64FromSeeds(result, buffer_size); + } break; + } return result; } @@ -919,26 +941,35 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64)); } - u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc); -#if 0 + u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size); /* Grab reusable */ { - u64 bin_index = hash % countof(g->reuse_bins); - GPU_D12_ReuseBin *bin = &g->reuse_bins[bin_index]; + u64 bin_index = reuse_hash % countof(g->resource_reuse_bins); + GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index]; { Lock lock = LockE(&bin->mutex); - for (r = bin->first; r; r = r->next_reuse) { - if (r->reuse_hash == hash) + GPU_D12_ResourceReuseList *list = bin->first; + for (; list; list = list->next) { - DllRemoveNP(bin->first, bin->last, r, next_reuse, prev_reuse); - break; + if (list->hash == reuse_hash) break; + } + if (list) + { + r = list->first; + list->first = r->next_free; + if (!list->first) + { + DllRemove(bin->first, bin->last, list); + StackPush(bin->first_free, list); + list->prev = 0; + } + r->next_free = 0; } } Unlock(&lock); } } -#endif /* Grab from free list */ if (!r) @@ -967,10 +998,9 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) PushAlign(perm, CachelineSize); } - if (r->reuse_hash == 0) + /* Create d3d resource */ + if (!r->d3d_resource) { - r->reuse_hash = reuse_hash; - switch (desc.kind) { case GPU_ResourceKind_Sampler: break; @@ -1044,79 +1074,95 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) } } break; } + } - /* Create texture srv descriptor */ - if (desc.kind == GPU_ResourceKind_Texture1D - || desc.kind == GPU_ResourceKind_Texture2D - || desc.kind == GPU_ResourceKind_Texture3D) + /* Create texture srv descriptor */ + if (desc.kind == GPU_ResourceKind_Texture1D + || desc.kind == GPU_ResourceKind_Texture2D + || desc.kind == GPU_ResourceKind_Texture3D) + { + if (!r->srv_descriptor) { r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap); - ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, 0, r->srv_descriptor->handle); } + ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, 0, r->srv_descriptor->handle); + } - /* Create buffer srv descriptor */ - if (desc.kind == GPU_ResourceKind_Buffer - && desc.buffer.heap_kind != GPU_HeapKind_Download - && desc.buffer.count > 0) + /* Create buffer srv descriptor */ + if (desc.kind == GPU_ResourceKind_Buffer + && desc.buffer.heap_kind != GPU_HeapKind_Download + && desc.buffer.count > 0) + { + if (!r->srv_descriptor) { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = ZI; - srv_desc.Format = DXGI_FORMAT_UNKNOWN; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Buffer.FirstElement = 0; - srv_desc.Buffer.NumElements = desc.buffer.count; - srv_desc.Buffer.StructureByteStride = desc.buffer.stride; - srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap); - ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, &srv_desc, r->srv_descriptor->handle); } + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = ZI; + srv_desc.Format = DXGI_FORMAT_UNKNOWN; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Buffer.FirstElement = 0; + srv_desc.Buffer.NumElements = desc.buffer.count; + srv_desc.Buffer.StructureByteStride = desc.buffer.stride; + srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, &srv_desc, r->srv_descriptor->handle); + } - /* Create uav descriptor */ - if (desc.flags & GPU_ResourceFlag_Writable) + /* Create uav descriptor */ + if (desc.flags & GPU_ResourceFlag_Writable) + { + if (!r->uav_descriptor) { r->uav_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap); - ID3D12Device_CreateUnorderedAccessView(g->device, r->d3d_resource, 0, 0, r->uav_descriptor->handle); } + ID3D12Device_CreateUnorderedAccessView(g->device, r->d3d_resource, 0, 0, r->uav_descriptor->handle); + } - /* Create rtv descriptor */ - if (desc.flags & GPU_ResourceFlag_Renderable) + /* Create rtv descriptor */ + if (desc.flags & GPU_ResourceFlag_Renderable) + { + if (!r->rtv_descriptor) { r->rtv_descriptor = GPU_D12_AcquireDescriptor(g->rtv_heap); - ID3D12Device_CreateRenderTargetView(g->device, r->d3d_resource, 0, r->rtv_descriptor->handle); } + ID3D12Device_CreateRenderTargetView(g->device, r->d3d_resource, 0, r->rtv_descriptor->handle); + } - /* Create sampler descriptor */ - if (desc.kind == GPU_ResourceKind_Sampler) + /* Create sampler descriptor */ + if (desc.kind == GPU_ResourceKind_Sampler) + { + if (!r->sampler_descriptor) { - D3D12_SAMPLER_DESC d3d_desc = ZI; - d3d_desc.Filter = (D3D12_FILTER)desc.sampler.filter; - d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.x; - d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.y; - d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.z; - d3d_desc.MipLODBias = desc.sampler.mip_lod_bias; - d3d_desc.MaxAnisotropy = MaxU32(desc.sampler.max_anisotropy, 1); - d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)desc.sampler.comparison; - d3d_desc.BorderColor[0] = desc.sampler.border_color.x; - d3d_desc.BorderColor[1] = desc.sampler.border_color.y; - d3d_desc.BorderColor[2] = desc.sampler.border_color.z; - d3d_desc.BorderColor[3] = desc.sampler.border_color.w; - d3d_desc.MinLOD = desc.sampler.min_lod; - d3d_desc.MaxLOD = desc.sampler.max_lod; - - /* Defaults */ - if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;; - if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;; - if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;; - if (d3d_desc.MaxLOD >= F32Infinity) - { - d3d_desc.MaxLOD = D3D12_FLOAT32_MAX; - } r->sampler_descriptor = GPU_D12_AcquireDescriptor(g->sampler_heap); - ID3D12Device_CreateSampler(g->device, &d3d_desc, r->sampler_descriptor->handle); } + D3D12_SAMPLER_DESC d3d_desc = ZI; + d3d_desc.Filter = (D3D12_FILTER)desc.sampler.filter; + d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.x; + d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.y; + d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.z; + d3d_desc.MipLODBias = desc.sampler.mip_lod_bias; + d3d_desc.MaxAnisotropy = MaxU32(desc.sampler.max_anisotropy, 1); + d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)desc.sampler.comparison; + d3d_desc.BorderColor[0] = desc.sampler.border_color.x; + d3d_desc.BorderColor[1] = desc.sampler.border_color.y; + d3d_desc.BorderColor[2] = desc.sampler.border_color.z; + d3d_desc.BorderColor[3] = desc.sampler.border_color.w; + d3d_desc.MinLOD = desc.sampler.min_lod; + d3d_desc.MaxLOD = desc.sampler.max_lod; + + /* Defaults */ + if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;; + if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;; + if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;; + if (d3d_desc.MaxLOD >= F32Infinity) + { + d3d_desc.MaxLOD = D3D12_FLOAT32_MAX; + } + ID3D12Device_CreateSampler(g->device, &d3d_desc, r->sampler_descriptor->handle); } r->desc = desc; + r->buffer_size = buffer_size; return (GPU_Resource *)r; } @@ -1126,39 +1172,82 @@ void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags) GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource; - /* TODO: Reuse */ - switch (r->desc.kind) - { - case GPU_ResourceKind_Buffer: - case GPU_ResourceKind_Texture1D: - case GPU_ResourceKind_Texture2D: - case GPU_ResourceKind_Texture3D: - { - ID3D12Resource_Release(r->d3d_resource); - } - } - if (r->srv_descriptor) { GPU_D12_ReleaseDescriptor(r->srv_descriptor); + r->srv_descriptor = 0; } if (r->uav_descriptor) { GPU_D12_ReleaseDescriptor(r->uav_descriptor); + r->uav_descriptor = 0; } if (r->rtv_descriptor) { GPU_D12_ReleaseDescriptor(r->rtv_descriptor); + r->rtv_descriptor = 0; } if (r->sampler_descriptor) { GPU_D12_ReleaseDescriptor(r->sampler_descriptor); + r->sampler_descriptor = 0; + } + + if (flags & GPU_ReleaseFlag_Reuse) + { + GPU_ResourceDesc desc = r->desc; + u64 buffer_size = r->buffer_size; + u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size); + u64 bin_index = reuse_hash % countof(g->resource_reuse_bins); + GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index]; + { + Lock lock = LockE(&bin->mutex); + { + GPU_D12_ResourceReuseList *list = bin->first; + for (; list; list = list->next) + { + if (list->hash == reuse_hash) break; + } + if (!list) + { + list = bin->first_free; + if (list) + { + bin->first_free = list->next; + } + else + { + Arena *perm = PermArena(); + PushAlign(perm, CachelineSize); + list = PushStruct(perm, GPU_D12_ResourceReuseList); + PushAlign(perm, CachelineSize); + } + list->hash = reuse_hash; + DllPushBack(bin->first, bin->last, list); + } + StackPushN(list->first, r, next_free); + } + Unlock(&lock); + } + } + else + { + switch (r->desc.kind) + { + case GPU_ResourceKind_Buffer: + case GPU_ResourceKind_Texture1D: + case GPU_ResourceKind_Texture2D: + case GPU_ResourceKind_Texture3D: + { + ID3D12Resource_Release(r->d3d_resource); + } + } + Lock lock = LockE(&g->free_resources_mutex); + r->next_free = g->first_free_resource; + g->first_free_resource = r; + Unlock(&lock); } - Lock lock = LockE(&g->free_resources_mutex); - r->next_free = g->first_free_resource; - g->first_free_resource = r; - Unlock(&lock); } u32 GPU_GetReadableId(GPU_Resource *resource) diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 42e570a7..1a8fe58f 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -90,7 +90,8 @@ Struct(GPU_D12_Resource) ID3D12Resource *d3d_resource; D3D12_RESOURCE_STATES state; - u64 reuse_hash; + + u64 buffer_size; /* Actual size of buffer in GPU memory */ GPU_D12_Descriptor *srv_descriptor; GPU_D12_Descriptor *uav_descriptor; @@ -104,6 +105,22 @@ Struct(GPU_D12_Resource) D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; }; +Struct(GPU_D12_ResourceReuseList) +{ + u64 hash; + GPU_D12_ResourceReuseList *next; + GPU_D12_ResourceReuseList *prev; + GPU_D12_Resource *first; +}; + +Struct(GPU_D12_ResourceReuseListBin) +{ + Mutex mutex; + GPU_D12_ResourceReuseList *first; + GPU_D12_ResourceReuseList *last; + GPU_D12_ResourceReuseList *first_free; +}; + //////////////////////////////// //~ Queue types @@ -253,6 +270,8 @@ Struct(GPU_D12_Swapchain) //////////////////////////////// //~ State types +#define GPU_D12_NumResourceReuseBins 1024 + Struct(GPU_D12_FiberState) { GPU_D12_CommandList *first_free_command_list; @@ -282,6 +301,7 @@ Struct(GPU_D12_SharedState) /* Resources */ Mutex free_resources_mutex; GPU_D12_Resource *first_free_resource; + GPU_D12_ResourceReuseListBin resource_reuse_bins[GPU_D12_NumResourceReuseBins]; /* Swapchains */ Mutex free_swapchains_mutex; @@ -299,7 +319,7 @@ Struct(GPU_D12_SharedState) GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id); DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format); GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl); -u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc); +u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc, u64 buffer_size); //////////////////////////////// //~ Startup