From 8e9f7b4945cdf3e1d32fe7d98250de2e2efbfdde Mon Sep 17 00:00:00 2001 From: jacob Date: Mon, 2 Mar 2026 17:03:47 -0800 Subject: [PATCH] explicit uniform/non-uniform resource dereference --- src/base/base_shader.gh | 2 +- src/gpu/gpu_shared.cgh | 43 +++++----- src/pp/pp_vis/pp_vis_gpu.g | 157 ++++++++++++++++++------------------- src/ui/ui_gpu.g | 18 ++--- 4 files changed, 113 insertions(+), 107 deletions(-) diff --git a/src/base/base_shader.gh b/src/base/base_shader.gh index bd7ce248..4dbde723 100644 --- a/src/base/base_shader.gh +++ b/src/base/base_shader.gh @@ -149,7 +149,7 @@ Inline f64 Norm53(u64 v) //~ Comparison #define MatchFloor(a, b) all(floor(a) == floor(b)) -#define IsInside(pos, dims) (all(pos >= 0) && all(pos <= (dims))) +#define IsInside(pos, dims) all(and((pos) >= 0, (pos) < (dims))) //////////////////////////////////////////////////////////// //~ Rotation diff --git a/src/gpu/gpu_shared.cgh b/src/gpu/gpu_shared.cgh index e07a6062..374212ff 100644 --- a/src/gpu/gpu_shared.cgh +++ b/src/gpu/gpu_shared.cgh @@ -83,24 +83,31 @@ Enum(G_BasicSamplerKind) //~ Resource dereference #if IsGpu - // TODO: Non-uniform resource access currently is assumed as the default - // behavior. We may want to add explicit "uniform" variants for - // optimization on AMD hardware in the future. + //- Scalar/Uniform dereference + SamplerState G_SDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[idx]; } + template StructuredBuffer G_SDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; } + ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; } + template Texture1D G_SDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; } + template Texture2D G_SDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; } + template Texture3D G_SDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; } + template RWStructuredBuffer G_SDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; } + RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; } + template RWTexture1D G_SDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; } + template RWTexture2D G_SDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; } + template RWTexture3D G_SDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; } - template StructuredBuffer G_Dereference(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template RWStructuredBuffer G_DereferenceRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - - ByteAddressBuffer G_Dereference(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - RWByteAddressBuffer G_DereferenceRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - - template Texture1D G_Dereference(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template Texture2D G_Dereference(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template Texture3D G_Dereference(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template RWTexture1D G_DereferenceRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - template RWTexture2D G_DereferenceRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - template RWTexture3D G_DereferenceRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - - SamplerState G_Dereference(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; } + //- Vector/Non-Uniform dereference (slower on AMD) + SamplerState G_VDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[NonUniformResourceIndex(idx)]; } + template StructuredBuffer G_VDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } + ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } + template Texture1D G_VDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } + template Texture2D G_VDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } + template Texture3D G_VDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } + template RWStructuredBuffer G_VDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } + RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } + template RWTexture1D G_VDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } + template RWTexture2D G_VDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } + template RWTexture3D G_VDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } #endif //////////////////////////////////////////////////////////// @@ -216,7 +223,7 @@ Struct(G_FmtArg) void G_CommitPrint(G_TempPrintBuffer buff) { - RWByteAddressBuffer rw = G_DereferenceRW(G_ShaderConst_PrintBufferRef); + RWByteAddressBuffer rw = G_SDerefRW(G_ShaderConst_PrintBufferRef); if (buff.overflowed) { diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index 8e540f88..16c81621 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -3,7 +3,7 @@ f32 V_RandFromPos(Vec3 pos) { - Texture3D noise3d = G_Dereference(V_GpuConst_NoiseTex); + Texture3D noise3d = G_SDeref(V_GpuConst_NoiseTex); // TODO: Compile-time noise dims u32 noise = noise3d[(Vec3U32)pos % countof(noise3d)]; f32 rand = Norm16(noise); @@ -58,8 +58,8 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density) //- Prepare shade ImplComputeShader2D(V_PrepareShadeCS) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - RWTexture2D shade = G_DereferenceRW(frame.shade); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + RWTexture2D shade = G_SDerefRW(frame.shade); Vec2 shade_pos = SV_DispatchThreadID + 0.5; if (all(shade_pos < countof(shade))) { @@ -71,12 +71,12 @@ ImplComputeShader2D(V_PrepareShadeCS) //- Prepare cells ImplComputeShader2D(V_PrepareCellsCS) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - Texture2D tiles = G_Dereference(frame.tiles); - RWTexture2D stains = G_DereferenceRW(frame.stains); - RWTexture2D dry_stains = G_DereferenceRW(frame.dry_stains); - RWTexture2D drynesses = G_DereferenceRW(frame.drynesses); - RWTexture2D occluders = G_DereferenceRW(frame.occluders); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + Texture2D tiles = G_SDeref(frame.tiles); + RWTexture2D stains = G_SDerefRW(frame.stains); + RWTexture2D dry_stains = G_SDerefRW(frame.dry_stains); + RWTexture2D drynesses = G_SDerefRW(frame.drynesses); + RWTexture2D occluders = G_SDerefRW(frame.occluders); Vec2 cell_pos = SV_DispatchThreadID + 0.5; if (all(cell_pos < P_WorldCellsDims)) @@ -102,8 +102,8 @@ ImplComputeShader2D(V_PrepareCellsCS) Vec4 over_dry_stain = 0; for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) { - RWTexture2D cells = G_DereferenceRW(frame.particle_cells[layer]); - RWTexture2D densities = G_DereferenceRW(frame.particle_densities[layer]); + RWTexture2D cells = G_VDerefRW(frame.particle_cells[layer]); + RWTexture2D densities = G_VDerefRW(frame.particle_densities[layer]); u32 packed = cells[cell_pos]; if (packed & (1 << 31)) { @@ -160,8 +160,8 @@ ImplComputeShader2D(V_PrepareCellsCS) //- Clear particles ImplComputeShader(V_ClearParticlesCS) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - RWStructuredBuffer particles = G_DereferenceRW(frame.particles); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + RWStructuredBuffer particles = G_SDerefRW(frame.particles); u32 particle_idx = SV_DispatchThreadID; if (particle_idx < V_ParticlesCap) { @@ -180,20 +180,19 @@ ImplComputeShader2D(V_BackdropDownCS) i32 mip_idx = V_GpuConst_MipIdx; b32 is_first_pass = mip_idx == 0; - - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]); Texture2D bd_up; if (is_first_pass) { - bd_up = G_Dereference(frame.backdrop_src); + bd_up = G_SDeref(frame.backdrop_src); } else { - bd_up = G_Dereference(frame.backdrop_mips[mip_idx - 1]); + bd_up = G_SDeref(frame.backdrop_mips[mip_idx - 1]); } - RWTexture2D bd_down = G_DereferenceRW(frame.backdrop_mips[mip_idx]); + RWTexture2D bd_down = G_VDerefRW(frame.backdrop_mips[mip_idx]); Vec2 down_dims = countof(bd_down); @@ -233,7 +232,7 @@ ImplComputeShader2D(V_BackdropDownCS) } } - if (IsInside(bd_pos, down_dims)) + if (all(bd_pos < down_dims)) { bd_down[bd_pos] = result; } @@ -246,10 +245,10 @@ ImplComputeShader2D(V_BackdropUpCS) { i32 mip_idx = V_GpuConst_MipIdx; - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - Texture2D bd_down = G_Dereference(frame.backdrop_mips[mip_idx + 1]); - RWTexture2D bd_up = G_DereferenceRW(frame.backdrop_mips[mip_idx]); - SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + Texture2D bd_down = G_SDeref(frame.backdrop_mips[mip_idx + 1]); + RWTexture2D bd_up = G_SDerefRW(frame.backdrop_mips[mip_idx]); + SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]); Vec2 down_dims = countof(bd_down); Vec2 up_dims = countof(bd_up); @@ -290,7 +289,7 @@ ImplComputeShader2D(V_BackdropUpCS) ) * 1.0f / 41.0f; } - if (IsInside(bd_pos, up_dims)) + if (all(bd_pos < up_dims)) { bd_up[bd_pos] = result; } @@ -304,8 +303,8 @@ ImplComputeShader2D(V_BackdropUpCS) ImplVertexShader(V_QuadVS, V_QuadPSInput) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - StructuredBuffer quads = G_Dereference(frame.quads); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + StructuredBuffer quads = G_SDeref(frame.quads); V_Quad quad = quads[SV_InstanceID]; @@ -328,13 +327,13 @@ ImplVertexShader(V_QuadVS, V_QuadPSInput) ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - StructuredBuffer quads = G_Dereference(frame.quads); - SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); - RWTexture2D occluders = G_DereferenceRW(frame.occluders); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + StructuredBuffer quads = G_SDeref(frame.quads); + SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); + RWTexture2D occluders = G_SDerefRW(frame.occluders); V_Quad quad = quads[input.quad_idx]; - Texture2D tex = G_Dereference(quad.tex); + Texture2D tex = G_VDeref(quad.tex); Vec2 world_pos = input.world_pos; Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1)); @@ -365,9 +364,9 @@ ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) ImplComputeShader(V_EmitParticlesCS) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - StructuredBuffer emitters = G_Dereference(frame.emitters); - RWStructuredBuffer particles = G_DereferenceRW(frame.particles); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + StructuredBuffer emitters = G_SDeref(frame.emitters); + RWStructuredBuffer particles = G_SDerefRW(frame.particles); u32 emitter_idx = SV_DispatchThreadID; if (emitter_idx < frame.emitters_count) @@ -396,10 +395,10 @@ ImplComputeShader(V_EmitParticlesCS) ImplComputeShader(V_SimParticlesCS) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - Texture2D tiles = G_Dereference(frame.tiles); - RWStructuredBuffer particles = G_DereferenceRW(frame.particles); - Texture2D occluders = G_Dereference(frame.occluders); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + Texture2D tiles = G_SDeref(frame.tiles); + RWStructuredBuffer particles = G_SDerefRW(frame.particles); + Texture2D occluders = G_SDeref(frame.occluders); u32 particle_idx = SV_DispatchThreadID; if (particle_idx < V_ParticlesCap) @@ -424,7 +423,7 @@ ImplComputeShader(V_SimParticlesCS) if (particle.kind < 0) { u32 emitter_idx = -particle.kind - 1; - V_Emitter emitter = G_Dereference(frame.emitters)[emitter_idx]; + V_Emitter emitter = G_SDeref(frame.emitters)[emitter_idx]; f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle); f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed); @@ -439,8 +438,8 @@ ImplComputeShader(V_SimParticlesCS) if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune) { V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); - RWTexture2D cells = G_DereferenceRW(frame.particle_cells[desc.layer]); - RWTexture2D densities = G_DereferenceRW(frame.particle_densities[desc.layer]); + RWTexture2D cells = G_VDerefRW(frame.particle_cells[desc.layer]); + RWTexture2D densities = G_VDerefRW(frame.particle_densities[desc.layer]); u32 packed = 0; packed |= (particle_idx & ((1 >> 24) - 1)) << 0; @@ -673,12 +672,12 @@ ImplComputeShader(V_SimParticlesCS) ImplComputeShader2D(V_ShadeCS) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); - Texture2D tiles = G_Dereference(frame.tiles); - Texture2D albedo_tex = G_Dereference(frame.albedo); - RWTexture2D shade_tex = G_DereferenceRW(frame.shade); - Texture2D drynesses = G_Dereference(frame.drynesses); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); + Texture2D tiles = G_SDeref(frame.tiles); + Texture2D albedo_tex = G_SDeref(frame.albedo); + RWTexture2D shade_tex = G_SDerefRW(frame.shade); + Texture2D drynesses = G_SDeref(frame.drynesses); Vec2 shade_pos = SV_DispatchThreadID + 0.5; Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1)); @@ -709,18 +708,18 @@ ImplComputeShader2D(V_ShadeCS) ImplComputeShader2D(V_CompositeCS) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - // Texture2D shade_tex = G_Dereference(frame.shade); - SamplerState point_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); - SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Texture2D albedo_tex = G_Dereference(frame.albedo); - RWTexture2D screen_tex = G_DereferenceRW(frame.screen); - Texture2D stains = G_Dereference(frame.stains); - Texture2D dry_stains = G_Dereference(frame.dry_stains); - Texture2D drynesses = G_Dereference(frame.drynesses); - Texture2D tiles = G_Dereference(frame.tiles); - Texture2D backdrop = G_Dereference(frame.backdrop_mips[0]); - StructuredBuffer particles = G_Dereference(frame.particles); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + // Texture2D shade_tex = G_SDeref(frame.shade); + SamplerState point_sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); + SamplerState bilinear_sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); + Texture2D albedo_tex = G_SDeref(frame.albedo); + RWTexture2D screen_tex = G_SDerefRW(frame.screen); + Texture2D stains = G_SDeref(frame.stains); + Texture2D dry_stains = G_SDeref(frame.dry_stains); + Texture2D drynesses = G_SDeref(frame.drynesses); + Texture2D tiles = G_SDeref(frame.tiles); + Texture2D backdrop = G_SDeref(frame.backdrop_mips[0]); + StructuredBuffer particles = G_SDeref(frame.particles); Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5; Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1)); @@ -828,7 +827,7 @@ ImplComputeShader2D(V_CompositeCS) else if (tile != P_TileKind_Empty) { V_TileDesc tile_desc = frame.tile_descs[tile]; - Texture2D tile_tex = G_Dereference(tile_desc.tex); + Texture2D tile_tex = G_VDeref(tile_desc.tex); Vec2 samp_t = clamp(frac(world_pos), 0.00001, 1.0 - 0.00001); Vec2 samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, samp_t); tile_color = tile_tex.SampleLevel(point_sampler, samp_uv, 0); @@ -857,8 +856,8 @@ ImplComputeShader2D(V_CompositeCS) for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) { - Texture2D cells = G_Dereference(frame.particle_cells[layer]); - Texture2D densities = G_Dereference(frame.particle_densities[layer]); + Texture2D cells = G_SDeref(frame.particle_cells[layer]); + Texture2D densities = G_SDeref(frame.particle_densities[layer]); u32 packed = cells[cell_pos]; V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F); if (particle_kind != V_ParticleKind_None) @@ -1112,19 +1111,19 @@ ImplComputeShader2D(V_BloomDownCS) { i32 mip_idx = V_GpuConst_MipIdx; - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - RWTexture2D bloom_down = G_DereferenceRW(frame.bloom_mips[mip_idx - 1]); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); + RWTexture2D bloom_down = G_SDerefRW(frame.bloom_mips[mip_idx - 1]); Texture2D bloom_up; b32 is_first_pass = mip_idx == 1; if (is_first_pass) { - bloom_up = G_Dereference(frame.screen); + bloom_up = G_SDeref(frame.screen); } else { - bloom_up = G_Dereference(frame.bloom_mips[mip_idx - 2]); + bloom_up = G_SDeref(frame.bloom_mips[mip_idx - 2]); } Vec2 down_dims = countof(bloom_down); @@ -1187,19 +1186,19 @@ ImplComputeShader2D(V_BloomUpCS) { i32 mip_idx = V_GpuConst_MipIdx; - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Texture2D bloom_down = G_Dereference(frame.bloom_mips[mip_idx]); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); + Texture2D bloom_down = G_SDeref(frame.bloom_mips[mip_idx]); b32 is_last_pass = mip_idx == 0; RWTexture2D bloom_up; if (is_last_pass) { - bloom_up = G_DereferenceRW(frame.screen); + bloom_up = G_SDerefRW(frame.screen); } else { - bloom_up = G_DereferenceRW(frame.bloom_mips[mip_idx - 1]); + bloom_up = G_SDerefRW(frame.bloom_mips[mip_idx - 1]); } Vec2 down_dims = countof(bloom_down); @@ -1252,10 +1251,10 @@ ImplComputeShader2D(V_BloomUpCS) ImplComputeShader2D(V_FinalizeCS) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Texture2D bloom_tex = G_Dereference(frame.bloom_mips[0]); - RWTexture2D screen_tex = G_DereferenceRW(frame.screen); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + SamplerState bilinear_sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); + Texture2D bloom_tex = G_SDeref(frame.bloom_mips[0]); + RWTexture2D screen_tex = G_SDerefRW(frame.screen); Vec2 screen_pos = SV_DispatchThreadID + 0.5; b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); @@ -1285,8 +1284,8 @@ ImplComputeShader2D(V_FinalizeCS) ImplVertexShader(V_DVertVS, V_DVertPSInput) { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - StructuredBuffer verts = G_Dereference(frame.dverts); + V_SharedFrame frame = G_SDeref(V_GpuConst_Frame)[0]; + StructuredBuffer verts = G_SDeref(frame.dverts); V_DVert vert = verts[SV_VertexID]; diff --git a/src/ui/ui_gpu.g b/src/ui/ui_gpu.g index f7811ccf..2ad2bfa7 100644 --- a/src/ui/ui_gpu.g +++ b/src/ui/ui_gpu.g @@ -6,8 +6,8 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput) { - UI_GpuParams params = G_Dereference(UI_GpuConst_Params)[0]; - StructuredBuffer rects = G_Dereference(params.rects); + UI_GpuParams params = G_SDeref(UI_GpuConst_Params)[0]; + StructuredBuffer rects = G_SDeref(params.rects); UI_GpuRect rect = rects[SV_InstanceID]; Vec2 rect_uv = RectUvFromIdx(SV_VertexID); @@ -35,9 +35,9 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput) ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) { - UI_GpuParams params = G_Dereference(UI_GpuConst_Params)[0]; - StructuredBuffer rects = G_Dereference(params.rects); - SamplerState sampler = G_Dereference(params.sampler); + UI_GpuParams params = G_SDeref(UI_GpuConst_Params)[0]; + StructuredBuffer rects = G_SDeref(params.rects); + SamplerState sampler = G_SDeref(params.sampler); UI_GpuRect rect = rects[input.rect_idx]; @@ -77,7 +77,7 @@ ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) } else { - Texture2D tex = G_Dereference(rect.tex); + Texture2D tex = G_VDeref(rect.tex); background_premul = tex.SampleLevel(sampler, input.tex_uv, 0); background_premul.rgb *= background_premul.a; } @@ -135,9 +135,9 @@ ImplVertexShader(UI_BlitVS, UI_BlitPSInput) ImplPixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input) { - UI_GpuParams params = G_Dereference(UI_GpuConst_Params)[0]; - Texture2D tex = G_Dereference(params.target_ro); - SamplerState sampler = G_Dereference(params.sampler); + UI_GpuParams params = G_SDeref(UI_GpuConst_Params)[0]; + Texture2D tex = G_SDeref(params.target_ro); + SamplerState sampler = G_SDeref(params.sampler); Vec2 uv = input.src_uv; Vec4 result = tex.Sample(sampler, uv);