From 65ae383d75c443633407765448831fedf548934b Mon Sep 17 00:00:00 2001 From: jacob Date: Thu, 19 Feb 2026 17:07:27 -0600 Subject: [PATCH] specify compute group size only in declaration --- src/base/base.cgh | 15 ++++++--- src/gpu/gpu_shared.cgh | 2 +- src/pp/pp_vis/pp_vis_gpu.g | 29 ++++++++--------- src/pp/pp_vis/pp_vis_gpu.gh | 29 ++++++++--------- src/proto/proto.c | 2 +- src/proto/proto.lay | 5 +-- src/proto/proto_shaders.g | 22 ++----------- src/proto/proto_shaders.gh | 31 +++++++++++++++++++ .../{proto_shaders.cgh => proto_shared.cgh} | 0 src/ui/ui_gpu.g | 8 ++--- src/ui/ui_gpu.gh | 8 ++--- 11 files changed, 87 insertions(+), 64 deletions(-) create mode 100644 src/proto/proto_shaders.gh rename src/proto/{proto_shaders.cgh => proto_shared.cgh} (100%) diff --git a/src/base/base.cgh b/src/base/base.cgh index 33bd2c0c..1b0e4065 100644 --- a/src/base/base.cgh +++ b/src/base/base.cgh @@ -727,11 +727,16 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; } Struct(ComputeShader) { ResourceKey resource; }; #elif IsGpu #define Semantic(t, n) t n : n - #define ComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID)) - #define ComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID)) - #define ComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID)) - #define VertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID)) - #define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) + #define DeclComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID)) + #define DeclComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID)) + #define DeclComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID)) + #define DeclVertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID)) + #define DeclPixelShader(name, return_type, ...) return_type name(__VA_ARGS__) + #define ImplComputeShader(name) void name(Semantic(u32, SV_DispatchThreadID)) + #define ImplComputeShader2D(name) void name(Semantic(Vec2U32, SV_DispatchThreadID)) + #define ImplComputeShader3D(name) void name(Semantic(Vec3U32, SV_DispatchThreadID)) + #define ImplVertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID)) + #define ImplPixelShader(name, return_type, ...) return_type name(__VA_ARGS__) #endif //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_shared.cgh b/src/gpu/gpu_shared.cgh index ce528aaa..2dff069e 100644 --- a/src/gpu/gpu_shared.cgh +++ b/src/gpu/gpu_shared.cgh @@ -44,7 +44,7 @@ Struct(G_SamplerStateRef) { u32 v; }; #if IsCpu #define G_ForceDeclConstant(type, name, slot) \ - Enum(name##__shaderconstantenum) { name = slot }; \ + enum { name = slot }; \ Struct(name##__shaderconstanttype) { type v; } #define G_DeclConstant(type, name, slot) \ StaticAssert(sizeof(type) <= 4); \ diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index 7b300512..53db5044 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -56,7 +56,8 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density) //////////////////////////////////////////////////////////// //~ Prepare frame -ComputeShader2D(V_PrepareShadeCS, 16, 16) +//- Prepare shade +ImplComputeShader2D(V_PrepareShadeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; RWTexture2D shade = G_Dereference(frame.shade_rw); @@ -69,7 +70,7 @@ ComputeShader2D(V_PrepareShadeCS, 16, 16) } //- Prepare cells -ComputeShader2D(V_PrepareCellsCS, 16, 16) +ImplComputeShader2D(V_PrepareCellsCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); @@ -158,7 +159,7 @@ ComputeShader2D(V_PrepareCellsCS, 16, 16) } //- Clear particles -ComputeShader(V_ClearParticlesCS, 256) +ImplComputeShader(V_ClearParticlesCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; RWStructuredBuffer particles = G_Dereference(frame.particles); @@ -175,7 +176,7 @@ ComputeShader(V_ClearParticlesCS, 256) ////////////////////////////// //- Vertex shader -VertexShader(V_QuadVS, V_QuadPSInput) +ImplVertexShader(V_QuadVS, V_QuadPSInput) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer quads = G_Dereference(frame.quads); @@ -199,7 +200,7 @@ VertexShader(V_QuadVS, V_QuadPSInput) ////////////////////////////// //- Pixel shader -PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) +ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer quads = G_Dereference(frame.quads); @@ -236,7 +237,7 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) ////////////////////////////// //- Particle emitter shader -ComputeShader(V_EmitParticlesCS, 256) +ImplComputeShader(V_EmitParticlesCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer emitters = G_Dereference(frame.emitters); @@ -267,7 +268,7 @@ ComputeShader(V_EmitParticlesCS, 256) ////////////////////////////// //- Particle sim shader -ComputeShader(V_SimParticlesCS, 256) +ImplComputeShader(V_SimParticlesCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); @@ -544,7 +545,7 @@ ComputeShader(V_SimParticlesCS, 256) // TODO: Remove this -ComputeShader2D(V_ShadeCS, 16, 16) +ImplComputeShader2D(V_ShadeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); @@ -580,7 +581,7 @@ ComputeShader2D(V_ShadeCS, 16, 16) //////////////////////////////////////////////////////////// //~ Composite -ComputeShader2D(V_CompositeCS, 16, 16) +ImplComputeShader2D(V_CompositeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; // Texture2D shade_tex = G_Dereference(frame.shade_ro); @@ -959,7 +960,7 @@ ComputeShader2D(V_CompositeCS, 16, 16) ////////////////////////////// //- Downsample -ComputeShader2D(V_BloomDownCS, 16, 16) +ImplComputeShader2D(V_BloomDownCS) { i32 mips_count = V_GpuConst_MipsCount; i32 mip_idx = V_GpuConst_MipIdx; @@ -1035,7 +1036,7 @@ ComputeShader2D(V_BloomDownCS, 16, 16) ////////////////////////////// //- Upsample -ComputeShader2D(V_BloomUpCS, 16, 16) +ImplComputeShader2D(V_BloomUpCS) { i32 mips_count = V_GpuConst_MipsCount; i32 mip_idx = V_GpuConst_MipIdx; @@ -1103,7 +1104,7 @@ ComputeShader2D(V_BloomUpCS, 16, 16) //////////////////////////////////////////////////////////// //~ Finalize -ComputeShader2D(V_FinalizeCS, 16, 16) +ImplComputeShader2D(V_FinalizeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); @@ -1136,7 +1137,7 @@ ComputeShader2D(V_FinalizeCS, 16, 16) ////////////////////////////// //- Vertex shader -VertexShader(V_DVertVS, V_DVertPSInput) +ImplVertexShader(V_DVertVS, V_DVertPSInput) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer verts = G_Dereference(frame.dverts); @@ -1154,7 +1155,7 @@ VertexShader(V_DVertVS, V_DVertPSInput) ////////////////////////////// //- Pixel shader -PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input) +ImplPixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input) { V_DVertPSOutput output; output.sv_target0 = input.color_lin; diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh index 971053b9..c0afd030 100644 --- a/src/pp/pp_vis/pp_vis_gpu.gh +++ b/src/pp/pp_vis/pp_vis_gpu.gh @@ -50,31 +50,32 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density); //////////////////////////////////////////////////////////// //~ Shaders -//- Utility shaders -ComputeShader2D(V_PrepareCellsCS, 16, 16); -ComputeShader(V_ClearParticlesCS, 256); +//- Prepare frame +DeclComputeShader2D(V_PrepareShadeCS, 16, 16); +DeclComputeShader2D(V_PrepareCellsCS, 16, 16); +DeclComputeShader(V_ClearParticlesCS, 256); //- Quads -VertexShader(V_QuadVS, V_QuadPSInput); -PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input); +DeclVertexShader(V_QuadVS, V_QuadPSInput); +DeclPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input); //- Particle simulation -ComputeShader(V_EmitParticlesCS, 256); -ComputeShader(V_SimParticlesCS, 256); +DeclComputeShader(V_EmitParticlesCS, 256); +DeclComputeShader(V_SimParticlesCS, 256); //- Shade -ComputeShader2D(V_ShadeCS, 16, 16); +DeclComputeShader2D(V_ShadeCS, 16, 16); //- Composite -ComputeShader2D(V_CompositeCS, 16, 16); +DeclComputeShader2D(V_CompositeCS, 16, 16); //- Bloom -ComputeShader2D(V_BloomDownCS, 16, 16); -ComputeShader2D(V_BloomUpCS, 16, 16); +DeclComputeShader2D(V_BloomDownCS, 16, 16); +DeclComputeShader2D(V_BloomUpCS, 16, 16); //- Finalize -ComputeShader2D(V_FinalizeCS, 16, 16); +DeclComputeShader2D(V_FinalizeCS, 16, 16); //- Debug shapes -VertexShader(V_DVertVS, V_DVertPSInput); -PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input); +DeclVertexShader(V_DVertVS, V_DVertPSInput); +DeclPixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input); diff --git a/src/proto/proto.c b/src/proto/proto.c index 51d55fd5..324d5a73 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -36,7 +36,7 @@ void PT_RunForever(WaveLaneCtx *lane) { G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target_rwhandle); G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123); - G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicPointClampSampler()); + G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp)); G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target_rhandle); G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture()); } diff --git a/src/proto/proto.lay b/src/proto/proto.lay index c9c49d6e..474b5569 100644 --- a/src/proto/proto.lay +++ b/src/proto/proto.lay @@ -16,8 +16,8 @@ ////////////////////////////// //- Api -@IncludeC proto_shaders.cgh -@IncludeG proto_shaders.cgh +@IncludeC proto_shared.cgh +@IncludeG proto_shared.cgh @Bootstrap PT_Bootstrap @@ -26,4 +26,5 @@ @IncludeC proto.c +@IncludeG proto_shaders.gh @IncludeG proto_shaders.g diff --git a/src/proto/proto_shaders.g b/src/proto/proto_shaders.g index 93714896..089365ab 100644 --- a/src/proto/proto_shaders.g +++ b/src/proto/proto_shaders.g @@ -1,12 +1,7 @@ //////////////////////////////////////////////////////////// //~ Test shader -Struct(TestStruct) -{ - i32 i; -}; - -ComputeShader2D(PT_TestCS, 8, 8) +ImplComputeShader2D(PT_TestCS) { StructuredBuffer sb = G_Dereference(PT_ShaderConst_TestBuff); @@ -23,21 +18,10 @@ ComputeShader2D(PT_TestCS, 8, 8) //////////////////////////////////////////////////////////// //~ Blit shader -Struct(PT_BlitPSInput) -{ - Semantic(Vec4, sv_position); - Semantic(Vec2, src_uv); -}; - -Struct(PT_BlitPSOutput) -{ - Semantic(Vec4, sv_target0); -}; - ////////////////////////////// //- Vertex shader -VertexShader(PT_BlitVS, PT_BlitPSInput) +ImplVertexShader(PT_BlitVS, PT_BlitPSInput) { Vec2 uv = RectUvFromIdx(SV_VertexID); PT_BlitPSInput result; @@ -49,7 +33,7 @@ VertexShader(PT_BlitVS, PT_BlitPSInput) ////////////////////////////// //- Pixel shader -PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input) +ImplPixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input) { SamplerState sampler = G_Dereference(PT_ShaderConst_BlitSampler); Texture2D tex = G_Dereference(PT_ShaderConst_BlitSrc); diff --git a/src/proto/proto_shaders.gh b/src/proto/proto_shaders.gh new file mode 100644 index 00000000..805636f6 --- /dev/null +++ b/src/proto/proto_shaders.gh @@ -0,0 +1,31 @@ +//////////////////////////////////////////////////////////// +//~ Test shader types + +Struct(TestStruct) +{ + i32 i; +}; + +//////////////////////////////////////////////////////////// +//~ Blit shader types + +Struct(PT_BlitPSInput) +{ + Semantic(Vec4, sv_position); + Semantic(Vec2, src_uv); +}; + +Struct(PT_BlitPSOutput) +{ + Semantic(Vec4, sv_target0); +}; + +//////////////////////////////////////////////////////////// +//~ Shaders + +//- Test +DeclComputeShader2D(PT_TestCS, 8, 8); + +//- Blit +DeclVertexShader(PT_BlitVS, PT_BlitPSInput); +DeclPixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input); diff --git a/src/proto/proto_shaders.cgh b/src/proto/proto_shared.cgh similarity index 100% rename from src/proto/proto_shaders.cgh rename to src/proto/proto_shared.cgh diff --git a/src/ui/ui_gpu.g b/src/ui/ui_gpu.g index 7609ac52..f7811ccf 100644 --- a/src/ui/ui_gpu.g +++ b/src/ui/ui_gpu.g @@ -4,7 +4,7 @@ ////////////////////////////// //- Vertex shader -VertexShader(UI_DRectVS, UI_DRectPSInput) +ImplVertexShader(UI_DRectVS, UI_DRectPSInput) { UI_GpuParams params = G_Dereference(UI_GpuConst_Params)[0]; StructuredBuffer rects = G_Dereference(params.rects); @@ -33,7 +33,7 @@ VertexShader(UI_DRectVS, UI_DRectPSInput) ////////////////////////////// //- Pixel shader -PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) +ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) { UI_GpuParams params = G_Dereference(UI_GpuConst_Params)[0]; StructuredBuffer rects = G_Dereference(params.rects); @@ -120,7 +120,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) ////////////////////////////// //- Vertex shader -VertexShader(UI_BlitVS, UI_BlitPSInput) +ImplVertexShader(UI_BlitVS, UI_BlitPSInput) { Vec2 uv = RectUvFromIdx(SV_VertexID); UI_BlitPSInput result; @@ -133,7 +133,7 @@ VertexShader(UI_BlitVS, UI_BlitPSInput) ////////////////////////////// //- Pixel shader -PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input) +ImplPixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input) { UI_GpuParams params = G_Dereference(UI_GpuConst_Params)[0]; Texture2D tex = G_Dereference(params.target_ro); diff --git a/src/ui/ui_gpu.gh b/src/ui/ui_gpu.gh index 2295f36c..6c3a201d 100644 --- a/src/ui/ui_gpu.gh +++ b/src/ui/ui_gpu.gh @@ -36,9 +36,9 @@ Struct(UI_BlitPSOutput) //~ Shaders //- Rects -VertexShader(UI_DRectVS, UI_DRectPSInput); -PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input); +DeclVertexShader(UI_DRectVS, UI_DRectPSInput); +DeclPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input); //- Blit -VertexShader(UI_BlitVS, UI_BlitPSInput); -PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input); +DeclVertexShader(UI_BlitVS, UI_BlitPSInput); +DeclPixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input);