From cbf0961f4cd6963968ddb7609df682d2d2e62331 Mon Sep 17 00:00:00 2001 From: jacob Date: Tue, 24 Feb 2026 03:05:19 -0600 Subject: [PATCH] add mirror-mode basic samplers --- src/gpu/gpu_common.c | 33 ++++++++++++++++++++++++++------ src/gpu/gpu_core.h | 16 ++++++++-------- src/gpu/gpu_dx12/gpu_dx12_core.c | 22 ++++++++++++++------- src/gpu/gpu_dx12/gpu_dx12_core.h | 1 + src/gpu/gpu_shared.cgh | 3 +++ 5 files changed, 54 insertions(+), 21 deletions(-) diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index 3fef7737..f2779408 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -23,10 +23,11 @@ void G_BootstrapCommon(void) { G_ResourceHandle blank_tex = G_PushTexture2D( gpu_perm, cl, - G_Format_R8G8B8A8_Uint, + G_Format_R8G8B8A8_Unorm, VEC2I32(8, 8), G_Layout_Common, - .flags = G_ResourceFlag_ZeroMemory + .flags = G_ResourceFlag_ZeroMemory, + .name = Lit("Blank texture") ); G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex); } @@ -44,7 +45,8 @@ void G_BootstrapCommon(void) gpu_perm, cl, G_Format_R16_Uint, noise_dims, - G_Layout_Common + G_Layout_Common, + .name = Lit("Noise texture") ); G_CopyCpuToTexture( cl, @@ -80,6 +82,13 @@ void G_BootstrapCommon(void) G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); } break; + case G_BasicSamplerKind_PointMirror: + { + G_Filter filter = G_Filter_MinMagMipPoint; + G_AddressMode address_mode = G_AddressMode_Mirror; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; case G_BasicSamplerKind_BilinearClamp: { G_Filter filter = G_Filter_MinMagLinearMipPoint; @@ -94,6 +103,13 @@ void G_BootstrapCommon(void) G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); } break; + case G_BasicSamplerKind_BilinearMirror: + { + G_Filter filter = G_Filter_MinMagLinearMipPoint; + G_AddressMode address_mode = G_AddressMode_Mirror; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; case G_BasicSamplerKind_TrilinearClamp: { G_Filter filter = G_Filter_MinMagMipLinear; @@ -108,14 +124,19 @@ void G_BootstrapCommon(void) G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); } break; + case G_BasicSamplerKind_TrilinearMirror: + { + G_Filter filter = G_Filter_MinMagMipLinear; + G_AddressMode address_mode = G_AddressMode_Mirror; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; } G.basic_samplers[sampler_kind] = sampler; } } G_CommitCommandList(cl); - - // Barrier all queues until direct queue finishes initializing resources - G_Sync(G_QueueMask_Direct, G_QueueMask_All); + G_QueueSync(G_QueueMask_Direct, G_QueueMask_All); } //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 8186f548..635ad786 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -284,7 +284,7 @@ Enum(G_Layout) // Barrier will execute after stages specified by `stage_prev`, and before stages specified by `stage_next`. // When barrier executes: // - Necessary resource flushes will occur based on `access_prev` & `access_next` -// - Resource layout will transition based on `layout` (if specified) +// - Texture layout will transition based on `layout` (if specified) Struct(G_MemoryBarrierDesc) { G_ResourceHandle resource; @@ -294,7 +294,7 @@ Struct(G_MemoryBarrierDesc) G_Access access_prev; G_Access access_next; G_Layout layout; - RngI32 mips; // Inclusive range of texture mip levels to sync + RngI32 mips; // Inclusive range of texture mip levels to sync }; //////////////////////////////////////////////////////////// @@ -794,23 +794,23 @@ void G_DiscardRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_targe void G_LogResource(G_CommandListHandle cl, G_ResourceHandle resource); //////////////////////////////////////////////////////////// -//~ @hookdecl Synchronization +//~ @hookdecl Queue synchronization i64 G_CompletionValueFromQueue(G_QueueKind queue_kind); i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind); G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask); G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask); -void G_SyncEx(G_QueueBarrierDesc desc); +void G_QueueSyncEx(G_QueueBarrierDesc desc); -#define G_Sync(completion_mask, ...) \ - G_SyncEx((G_QueueBarrierDesc) { \ +#define G_QueueSync(completion_mask, ...) \ + G_QueueSyncEx((G_QueueBarrierDesc) { \ .completions = G_CompletionTargetsFromQueues(completion_mask), \ __VA_ARGS__ \ }) -#define G_SyncGpu(completion_mask, wait_mask) G_Sync((completion_mask), .wait_queues = (wait_mask)) -#define G_SyncCpu(completion_mask) G_Sync((completion_mask), .wait_cpu = 1); +#define G_QueueSyncGpu(completion_mask, wait_mask) G_QueueSync((completion_mask), .wait_queues = (wait_mask)) +#define G_QueueSyncCpu(completion_mask) G_QueueSync((completion_mask), .wait_cpu = 1); //////////////////////////////////////////////////////////// //~ @hookdecl Statistics diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index 73c7af4f..05edb15c 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -10,8 +10,8 @@ void G_Bootstrap(void) Arena *perm = PermArena(); // NOTE: Nsight seems to have trouble attaching when independent devices are enabled - b32 independent_devices_enabled = !CommandlineArgFromName(Lit("no-independent-d3d12-device")).exists; - LogInfoF("D3D12 independent devices enabled: %F", FmtSint(independent_devices_enabled)); + G_D12.independent_devices_enabled = !CommandlineArgFromName(Lit("no-independent-d3d12-device")).exists; + LogInfoF("D3D12 independent devices enabled: %F", FmtSint(G_D12.independent_devices_enabled)); ////////////////////////////// //- Extract agility SDK @@ -131,7 +131,7 @@ void G_Bootstrap(void) } if (skips <= 0) { - if (independent_devices_enabled) + if (G_D12.independent_devices_enabled) { hr = ID3D12DeviceFactory_CreateDevice(G_D12.device_factory, (IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); } @@ -276,6 +276,7 @@ void G_Bootstrap(void) } // Log device configuration + if (G_D12.independent_devices_enabled) { D3D12_DEVICE_CONFIGURATION_DESC desc = Zi; ID3D12DeviceConfiguration_GetDesc(G_D12.device_config, &desc); @@ -424,7 +425,14 @@ void G_Bootstrap(void) desc.Desc_1_1.NumStaticSamplers = 0; desc.Desc_1_1.pStaticSamplers = 0; desc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; - hr = ID3D12DeviceConfiguration_SerializeVersionedRootSignature(G_D12.device_config, &desc, &blob, 0); + if (G_D12.independent_devices_enabled) + { + hr = ID3D12DeviceConfiguration_SerializeVersionedRootSignature(G_D12.device_config, &desc, &blob, 0); + } + else + { + hr = D3D12SerializeVersionedRootSignature(&desc, &blob, 0); + } } // Create root signature @@ -3187,7 +3195,7 @@ void G_LogResource(G_CommandListHandle cl_handle, G_ResourceHandle resource_hand } //////////////////////////////////////////////////////////// -//~ @hookimpl Synchronization +//~ @hookimpl Queue synchronization i64 G_CompletionValueFromQueue(G_QueueKind queue_kind) { @@ -3233,7 +3241,7 @@ G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask) return completions; } -void G_SyncEx(G_QueueBarrierDesc desc) +void G_QueueSyncEx(G_QueueBarrierDesc desc) { u64 fences_count = 0; ID3D12Fence *fences[G_QueueKind_COUNT] = Zi; @@ -3569,7 +3577,7 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane) } // TODO: Collect asynchronously - G_SyncCpu(G_QueueMask_Direct | G_QueueMask_AsyncCompute); + G_QueueSyncCpu(G_QueueMask_Direct | G_QueueMask_AsyncCompute); for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) { diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 20670a47..51938e27 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -442,6 +442,7 @@ Struct(G_D12_AsyncCtx) Struct(G_D12_Ctx) { + b32 independent_devices_enabled; IsolatedAtomic64 resource_creation_gen; // Stats diff --git a/src/gpu/gpu_shared.cgh b/src/gpu/gpu_shared.cgh index 9936832e..c1420f46 100644 --- a/src/gpu/gpu_shared.cgh +++ b/src/gpu/gpu_shared.cgh @@ -78,10 +78,13 @@ Enum(G_BasicSamplerKind) { G_BasicSamplerKind_PointClamp, G_BasicSamplerKind_PointWrap, + G_BasicSamplerKind_PointMirror, G_BasicSamplerKind_BilinearClamp, G_BasicSamplerKind_BilinearWrap, + G_BasicSamplerKind_BilinearMirror, G_BasicSamplerKind_TrilinearClamp, G_BasicSamplerKind_TrilinearWrap, + G_BasicSamplerKind_TrilinearMirror, G_BasicSamplerKind_COUNT };