diff --git a/src/gpu/gpu_shared.cgh b/src/gpu/gpu_shared.cgh index 9401224b..facbd52e 100644 --- a/src/gpu/gpu_shared.cgh +++ b/src/gpu/gpu_shared.cgh @@ -95,31 +95,25 @@ Struct(G_IndexBufferDesc) //~ Resource dereference #if IsGpu - //- Scalar/Uniform dereference (faster on AMD hardware) - template StructuredBuffer G_UniformDeref(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 0]; } - template RWStructuredBuffer G_UniformDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 1]; } - ByteAddressBuffer G_UniformDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 2]; } - RWByteAddressBuffer G_UniformDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 3]; } - template Texture1D G_UniformDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; } - template Texture2D G_UniformDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; } - template Texture3D G_UniformDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; } - template RWTexture1D G_UniformDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; } - template RWTexture2D G_UniformDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; } - template RWTexture3D G_UniformDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; } - SamplerState G_UniformDeref(G_SamplerRef r) { return SamplerDescriptorHeap[r.v]; } + // TODO: Add explicit uniform dereference functions, since on AMD hardware + // non-uniform is slower and there are some shader-compilation issues in older + // driver versions - //- Vector/Non-Uniform dereference - template StructuredBuffer G_DynamicDeref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 0)]; } - template RWStructuredBuffer G_DynamicDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - ByteAddressBuffer G_DynamicDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 2)]; } - RWByteAddressBuffer G_DynamicDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } - template Texture1D G_DynamicDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } - template Texture2D G_DynamicDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } - template Texture3D G_DynamicDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } - template RWTexture1D G_DynamicDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } - template RWTexture2D G_DynamicDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } - template RWTexture3D G_DynamicDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } - SamplerState G_DynamicDeref(G_SamplerRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; } + template struct G_DerefImpl; + template<> struct G_DerefImpl< SamplerState > { static SamplerState Deref(G_SamplerRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } }; + template struct G_DerefImpl< StructuredBuffer > { static StructuredBuffer Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 0)]; } }; + template struct G_DerefImpl< RWStructuredBuffer > { static RWStructuredBuffer Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } }; + template<> struct G_DerefImpl< ByteAddressBuffer > { static ByteAddressBuffer Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } }; + template<> struct G_DerefImpl< RWByteAddressBuffer > { static RWByteAddressBuffer Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } }; + template struct G_DerefImpl< Texture1D > { static Texture1D Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } }; + template struct G_DerefImpl< Texture2D > { static Texture2D Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } }; + template struct G_DerefImpl< Texture3D > { static Texture3D Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } }; + template struct G_DerefImpl< RWTexture1D > { static RWTexture1D Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } }; + template struct G_DerefImpl< RWTexture2D > { static RWTexture2D Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } }; + template struct G_DerefImpl< RWTexture3D > { static RWTexture3D Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } }; + + // Wrap since HLSL can't handle template double angle bracket '>>' + #define G_Deref(ref, type, ...) (G_DerefImpl< type >::Deref((ref), ##__VA_ARGS__)) #endif //////////////////////////////////////////////////////////// @@ -229,7 +223,7 @@ Struct(G_FmtArg) void G_CommitPrint(G_TempPrintBuffer buff) { - RWByteAddressBuffer rw = G_UniformDerefRawRW(G_ShaderConst_PrintBuffer); + RWByteAddressBuffer rw = G_Deref(G_ShaderConst_PrintBuffer, RWByteAddressBuffer); if (buff.overflowed) { diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 08b27a99..5fe0d69d 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -534,7 +534,7 @@ void V_TickForever(WaveLaneCtx *lane) while (!shutdown) { shutdown = Atomic32Fetch(&V.shutdown); - P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 1); + P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 0); ////////////////////////////// //- Begin frame @@ -5369,8 +5369,6 @@ void V_TickForever(WaveLaneCtx *lane) } } - G_SyncReleaseFamilyLayout(cl, frame->bloom_chain); - ////////////////////////////// //- Finalization pass diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index a22dd613..3eb38d7d 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -3,7 +3,7 @@ f32 V_RandFromPos(Vec3 pos) { - Texture3D noise3d = G_UniformDeref3D(V_GpuConst_NoiseTex); + Texture3D noise3d = G_Deref(V_GpuConst_NoiseTex, Texture3D); u32 noise = noise3d[(Vec3U32)pos % G_BasicNoiseDims]; f32 rand = Norm16(noise); return rand; @@ -57,8 +57,8 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density) //- Prepare shade ComputeShader(V_PrepareShadeCS) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - RWTexture2D shade = G_UniformDerefRW2D(frame.shade); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + RWTexture2D shade = G_Deref(frame.shade, RWTexture2D); Vec2 shade_pos = SV_DispatchThreadID + 0.5; if (all(shade_pos < G_Count2D(shade))) { @@ -70,12 +70,12 @@ ComputeShader(V_PrepareShadeCS) //- Prepare cells ComputeShader(V_PrepareCellsCS) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - Texture2D tiles = G_UniformDeref2D(frame.tiles); - RWTexture2D stains = G_UniformDerefRW2D(frame.stains); - RWTexture2D dry_stains = G_UniformDerefRW2D(frame.dry_stains); - RWTexture2D drynesses = G_UniformDerefRW2D(frame.drynesses); - RWTexture2D occluders = G_UniformDerefRW2D(frame.occluders); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + Texture2D tiles = G_Deref(frame.tiles, Texture2D); + RWTexture2D stains = G_Deref(frame.stains, RWTexture2D); + RWTexture2D dry_stains = G_Deref(frame.dry_stains, RWTexture2D); + RWTexture2D drynesses = G_Deref(frame.drynesses, RWTexture2D); + RWTexture2D occluders = G_Deref(frame.occluders, RWTexture2D); Vec2 cell_pos = SV_DispatchThreadID + 0.5; if (all(cell_pos < P_WorldCellsDims)) @@ -101,8 +101,8 @@ ComputeShader(V_PrepareCellsCS) Vec4 over_dry_stain = 0; for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) { - RWTexture2D cells = G_DynamicDerefRW2D(frame.particle_cells[layer]); - RWTexture2D densities = G_DynamicDerefRW2D(frame.particle_densities[layer]); + RWTexture2D cells = G_Deref(frame.particle_cells[layer], RWTexture2D); + RWTexture2D densities = G_Deref(frame.particle_densities[layer], RWTexture2D); u32 packed = cells[cell_pos]; if (packed & (1 << 31)) { @@ -159,8 +159,8 @@ ComputeShader(V_PrepareCellsCS) //- Clear particles ComputeShader(V_ClearParticlesCS) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - RWStructuredBuffer particles = G_UniformDerefRW(frame.particles); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + RWStructuredBuffer particles = G_Deref(frame.particles, RWStructuredBuffer); u32 particle_idx = SV_DispatchThreadID; if (particle_idx < V_ParticlesCap) { @@ -179,14 +179,14 @@ ComputeShader(V_BackdropDownCS) i32 mip_idx = V_GpuConst_MipIdx; b32 is_first_pass = mip_idx == 0; - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror], SamplerState); Texture2D bd_up = ( is_first_pass ? - G_UniformDeref2D(frame.backdrop_src) : - G_UniformDeref2D(frame.backdrop_chain, mip_idx - 1) + G_Deref(frame.backdrop_src, Texture2D) : + G_Deref(frame.backdrop_chain, Texture2D, mip_idx - 1) ); - RWTexture2D bd_down = G_UniformDerefRW2D(frame.backdrop_chain, mip_idx); + RWTexture2D bd_down = G_Deref(frame.backdrop_chain, RWTexture2D, mip_idx); Vec2 down_dims = G_Count2D(bd_down); Vec2 bd_pos = SV_DispatchThreadID + 0.5; @@ -238,10 +238,10 @@ ComputeShader(V_BackdropUpCS) { i32 mip_idx = V_GpuConst_MipIdx; - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - Texture2D bd_down = G_UniformDeref2D(frame.backdrop_chain, mip_idx + 1); - RWTexture2D bd_up = G_UniformDerefRW2D(frame.backdrop_chain, mip_idx); - SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + Texture2D bd_down = G_Deref(frame.backdrop_chain, Texture2D, mip_idx + 1); + RWTexture2D bd_up = G_Deref(frame.backdrop_chain, RWTexture2D, mip_idx); + SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror], SamplerState); Vec2 down_dims = G_Count2D(bd_down); Vec2 up_dims = G_Count2D(bd_up); @@ -296,8 +296,8 @@ ComputeShader(V_BackdropUpCS) VertexShader(V_QuadVS, V_QuadPSInput) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - StructuredBuffer quads = G_UniformDeref(frame.quads); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + StructuredBuffer quads = G_Deref(frame.quads, StructuredBuffer); V_Quad quad = quads[SV_InstanceID]; @@ -320,12 +320,12 @@ VertexShader(V_QuadVS, V_QuadPSInput) PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); - RWTexture2D occluders = G_UniformDerefRW2D(frame.occluders); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_PointClamp], SamplerState); + RWTexture2D occluders = G_Deref(frame.occluders, RWTexture2D); V_Quad quad = input.quad; - Texture2D tex = G_DynamicDeref2D(quad.tex); + Texture2D tex = G_Deref(quad.tex, Texture2D); Vec2 world_pos = input.world_pos; Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1)); @@ -356,9 +356,9 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) ComputeShader(V_EmitParticlesCS) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - StructuredBuffer emitters = G_UniformDeref(frame.emitters); - RWStructuredBuffer particles = G_UniformDerefRW(frame.particles); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + StructuredBuffer emitters = G_Deref(frame.emitters, StructuredBuffer); + RWStructuredBuffer particles = G_Deref(frame.particles, RWStructuredBuffer); u32 emitter_idx = SV_DispatchThreadID; if (emitter_idx < frame.emitters_count) @@ -387,10 +387,10 @@ ComputeShader(V_EmitParticlesCS) ComputeShader(V_SimParticlesCS) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - Texture2D tiles = G_UniformDeref2D(frame.tiles); - RWStructuredBuffer particles = G_UniformDerefRW(frame.particles); - Texture2D occluders = G_UniformDeref2D(frame.occluders); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + Texture2D tiles = G_Deref(frame.tiles, Texture2D); + RWStructuredBuffer particles = G_Deref(frame.particles, RWStructuredBuffer); + Texture2D occluders = G_Deref(frame.occluders, Texture2D); u32 particle_idx = SV_DispatchThreadID; if (particle_idx < V_ParticlesCap) @@ -415,7 +415,7 @@ ComputeShader(V_SimParticlesCS) if (particle.kind < 0) { u32 emitter_idx = -particle.kind - 1; - V_Emitter emitter = G_UniformDeref(frame.emitters)[emitter_idx]; + V_Emitter emitter = G_Deref(frame.emitters, StructuredBuffer)[emitter_idx]; f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle); f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed); @@ -430,8 +430,8 @@ ComputeShader(V_SimParticlesCS) if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune) { V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); - RWTexture2D cells = G_DynamicDerefRW2D(frame.particle_cells[desc.layer]); - RWTexture2D densities = G_DynamicDerefRW2D(frame.particle_densities[desc.layer]); + RWTexture2D cells = G_Deref(frame.particle_cells[desc.layer], RWTexture2D); + RWTexture2D densities = G_Deref(frame.particle_densities[desc.layer], RWTexture2D); u32 packed = 0; packed |= (particle_idx & ((1 >> 24) - 1)) << 0; @@ -664,12 +664,12 @@ ComputeShader(V_SimParticlesCS) ComputeShader(V_ShadeCS) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); - Texture2D tiles = G_UniformDeref2D(frame.tiles); - Texture2D albedo_tex = G_UniformDeref2D(frame.albedo); - RWTexture2D shade_tex = G_UniformDerefRW2D(frame.shade); - Texture2D drynesses = G_UniformDeref2D(frame.drynesses); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_PointClamp], SamplerState); + Texture2D tiles = G_Deref(frame.tiles, Texture2D); + Texture2D albedo_tex = G_Deref(frame.albedo, Texture2D); + RWTexture2D shade_tex = G_Deref(frame.shade, RWTexture2D); + Texture2D drynesses = G_Deref(frame.drynesses, Texture2D); Vec2 shade_pos = SV_DispatchThreadID + 0.5; Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1)); @@ -700,18 +700,18 @@ ComputeShader(V_ShadeCS) ComputeShader(V_CompositeCS) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - // Texture2D shade_tex = G_UniformDeref2D(frame.shade); - SamplerState point_sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); - SamplerState bilinear_sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Texture2D albedo_tex = G_UniformDeref2D(frame.albedo); - RWTexture2D screen_tex = G_UniformDerefRW2D(frame.screen); - Texture2D stains = G_UniformDeref2D(frame.stains); - Texture2D dry_stains = G_UniformDeref2D(frame.dry_stains); - Texture2D drynesses = G_UniformDeref2D(frame.drynesses); - Texture2D tiles = G_UniformDeref2D(frame.tiles); - Texture2D backdrop = G_UniformDeref2D(frame.backdrop_chain); - StructuredBuffer particles = G_UniformDeref(frame.particles); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + // Texture2D shade_tex = G_Deref(frame.shade, Texture2D); + SamplerState point_sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_PointClamp], SamplerState); + SamplerState bilinear_sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState); + Texture2D albedo_tex = G_Deref(frame.albedo, Texture2D); + RWTexture2D screen_tex = G_Deref(frame.screen, RWTexture2D); + Texture2D stains = G_Deref(frame.stains, Texture2D); + Texture2D dry_stains = G_Deref(frame.dry_stains, Texture2D); + Texture2D drynesses = G_Deref(frame.drynesses, Texture2D); + Texture2D tiles = G_Deref(frame.tiles, Texture2D); + Texture2D backdrop = G_Deref(frame.backdrop_chain, Texture2D); + StructuredBuffer particles = G_Deref(frame.particles, StructuredBuffer); Vec2 screen_pos = SV_DispatchThreadID + 0.5; Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1)); @@ -819,7 +819,7 @@ ComputeShader(V_CompositeCS) else if (tile != P_TileKind_Empty) { V_TileDesc tile_desc = frame.tile_descs[tile]; - Texture2D tile_tex = G_DynamicDeref2D(tile_desc.tex); + Texture2D tile_tex = G_Deref(tile_desc.tex, Texture2D); Vec2 samp_t = clamp(frac(world_pos), 0.00001, 1.0 - 0.00001); Vec2 samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, samp_t); tile_color = tile_tex.SampleLevel(point_sampler, samp_uv, 0); @@ -848,8 +848,8 @@ ComputeShader(V_CompositeCS) for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) { - Texture2D cells = G_UniformDeref2D(frame.particle_cells[layer]); - Texture2D densities = G_UniformDeref2D(frame.particle_densities[layer]); + Texture2D cells = G_Deref(frame.particle_cells[layer], Texture2D); + Texture2D densities = G_Deref(frame.particle_densities[layer], Texture2D); u32 packed = cells[cell_pos]; V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F); if (particle_kind != V_ParticleKind_None) @@ -1104,14 +1104,14 @@ ComputeShader(V_BloomDownCS) i32 mip_idx = V_GpuConst_MipIdx; b32 is_first_pass = mip_idx == 1; - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - RWTexture2D bloom_down = G_UniformDerefRW2D(frame.bloom_chain, mip_idx - 1); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState); + RWTexture2D bloom_down = G_Deref(frame.bloom_chain, RWTexture2D, mip_idx - 1); Texture2D bloom_up = ( is_first_pass ? - G_UniformDeref2D(frame.screen) : - G_UniformDeref2D(frame.bloom_chain, mip_idx - 2) + G_Deref(frame.screen, Texture2D) : + G_Deref(frame.bloom_chain, Texture2D, mip_idx - 1) ); Vec2 down_dims = G_Count2D(bloom_down); @@ -1173,20 +1173,16 @@ ComputeShader(V_BloomUpCS) { i32 mip_idx = V_GpuConst_MipIdx; - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Texture2D bloom_down = G_UniformDeref2D(frame.bloom_chain, mip_idx); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState); + Texture2D bloom_down = G_Deref(frame.bloom_chain, Texture2D, mip_idx); b32 is_last_pass = mip_idx == 0; - RWTexture2D bloom_up; - if (is_last_pass) - { - bloom_up = G_UniformDerefRW2D(frame.screen); - } - else - { - bloom_up = G_UniformDerefRW2D(frame.bloom_chain, mip_idx - 1); - } + RWTexture2D bloom_up = ( + is_last_pass ? + G_Deref(frame.screen, RWTexture2D) : + G_Deref(frame.bloom_chain, RWTexture2D, mip_idx - 1) + ); Vec2 down_dims = G_Count2D(bloom_down); Vec2 up_dims = G_Count2D(bloom_up); @@ -1238,10 +1234,10 @@ ComputeShader(V_BloomUpCS) ComputeShader(V_FinalizeCS) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - SamplerState bilinear_sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Texture2D bloom_tex = G_UniformDeref2D(frame.bloom_chain, 0); - RWTexture2D screen_tex = G_UniformDerefRW2D(frame.screen); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + SamplerState bilinear_sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState); + Texture2D bloom_tex = G_Deref(frame.bloom_chain, Texture2D); + RWTexture2D screen_tex = G_Deref(frame.screen, RWTexture2D); Vec2 screen_pos = SV_DispatchThreadID + 0.5; b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); @@ -1271,8 +1267,8 @@ ComputeShader(V_FinalizeCS) VertexShader(V_DVertVS, V_DVertPSInput) { - V_SharedFrame frame = G_UniformDeref(V_GpuConst_Frame)[0]; - StructuredBuffer verts = G_UniformDeref(frame.dverts); + V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer)[0]; + StructuredBuffer verts = G_Deref(frame.dverts, StructuredBuffer); V_DVert vert = verts[SV_VertexID]; diff --git a/src/proto/proto_gpu.g b/src/proto/proto_gpu.g index 8deaa97f..6b8fd989 100644 --- a/src/proto/proto_gpu.g +++ b/src/proto/proto_gpu.g @@ -3,8 +3,8 @@ ComputeShader(PT_TestCS) { - PT_SharedFrame frame = G_UniformDeref(PT_ShaderConst_Frame)[0]; - RWTexture2D target_tex = G_UniformDerefRW2D(frame.compute_target); + PT_SharedFrame frame = G_Deref(PT_ShaderConst_Frame, StructuredBuffer)[0]; + RWTexture2D target_tex = G_Deref(frame.compute_target, RWTexture2D); Vec2U32 target_tex_size = G_Count2D(target_tex); @@ -35,10 +35,10 @@ VertexShader(PT_BlitVS, PT_BlitPSInput) PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input) { - PT_SharedFrame frame = G_UniformDeref(PT_ShaderConst_Frame)[0]; - SamplerState sampler = G_UniformDeref(frame.sampler); - Texture2D src = G_UniformDeref2D(frame.compute_target); - Texture3D noise = G_UniformDeref3D(frame.noise_tex); + PT_SharedFrame frame = G_Deref(PT_ShaderConst_Frame, StructuredBuffer)[0]; + SamplerState sampler = G_Deref(frame.sampler, SamplerState); + Texture2D src = G_Deref(frame.compute_target, Texture2D); + Texture3D noise = G_Deref(frame.noise_tex, Texture3D); Vec2 uv = input.src_uv; Vec4 tex_col = src.Sample(sampler, uv); diff --git a/src/ui/ui_gpu.g b/src/ui/ui_gpu.g index 4c730574..b1118e7c 100644 --- a/src/ui/ui_gpu.g +++ b/src/ui/ui_gpu.g @@ -6,8 +6,8 @@ VertexShader(UI_DRectVS, UI_DRectPSInput) { - UI_GpuParams params = G_UniformDeref(UI_GpuConst_Params)[0]; - StructuredBuffer rects = G_UniformDeref(params.rects); + UI_GpuParams params = G_Deref(UI_GpuConst_Params, StructuredBuffer)[0]; + StructuredBuffer rects = G_Deref(params.rects, StructuredBuffer); UI_GpuRect rect = rects[SV_InstanceID]; Vec2 rect_uv = RectUvFromIdx(SV_VertexID); @@ -33,8 +33,8 @@ VertexShader(UI_DRectVS, UI_DRectPSInput) PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) { - UI_GpuParams params = G_UniformDeref(UI_GpuConst_Params)[0]; - SamplerState sampler = G_UniformDeref(params.sampler); + UI_GpuParams params = G_Deref(UI_GpuConst_Params, StructuredBuffer)[0]; + SamplerState sampler = G_Deref(params.sampler, SamplerState); UI_GpuRect rect = input.rect; Vec2 rect_uv = input.rect_uv; @@ -73,7 +73,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) } else { - Texture2D tex = G_DynamicDeref2D(rect.tex); + Texture2D tex = G_Deref(rect.tex, Texture2D); background_premul = tex.SampleLevel(sampler, input.tex_uv, 0); background_premul.rgb *= background_premul.a; }