diff --git a/src/base/base.cgh b/src/base/base.cgh index 5c2eb705..5e0c3e37 100644 --- a/src/base/base.cgh +++ b/src/base/base.cgh @@ -252,8 +252,8 @@ #endif //- Preprocessor concatenation -#define Cat1(a, b) a ## b -#define Cat(a, b) Cat1(a, b) +#define CAT1(a, b) a ## b +#define CAT(a, b) CAT1(a, b) //- Preprocessor stringization #define Stringize1(x) #x @@ -461,10 +461,10 @@ #define IsFixedArray(a) (IsIndexable(a) && (((void *)&a) == ((void *)a))) //- struct region -#define BeginFieldRegion(name) i8 __begfieldreg__##name -#define EndFieldRegion(name) i8 __endfieldreg__##name -#define CopyFieldRegion(dst, src, r) CopyBytes(&dst->__begfieldreg__##r, &src->__begfieldreg__##r, (u8 *)&dst->__endfieldreg__##r - (u8 *)&dst->__begfieldreg__##r) -#define ZeroFieldRegion(dst, src, r) ZeroBytes(&dst->__begfieldreg__##r, &src->__begfieldreg__##r, (u8 *)&dst->__endfieldreg__##r - (u8 *)&dst->__begfieldreg__##r) +#define BeginFieldRegion(name) i8 CAT(__begfieldreg__, name) +#define EndFieldRegion(name) i8 CAT(__endfieldreg__, name) +#define CopyFieldRegion(dst, src, r) CopyBytes(&dst->CAT(__begfieldreg__, r), &src->CAT(__begfieldreg__, r), (u8 *)&dst->CAT(__endfieldreg__, r) - (u8 *)&dst->CAT(__begfieldreg__, r)) +#define ZeroFieldRegion(dst, src, r) ZeroBytes(&dst->CAT(__begfieldreg__, r), &src->CAT(__begfieldreg__, r), (u8 *)&dst->CAT(__endfieldreg__, r) - (u8 *)&dst->CAT(__begfieldreg__, r)) //- Packed #if IsCompilerMsvc @@ -736,28 +736,28 @@ Struct(VertexShaderDesc) { ResourceKey resource; u32 x, y, z; }; Struct(PixelShaderDesc) { ResourceKey resource; u32 x, y, z; }; Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; }; -#define GetGroupSize(name) VEC3U32(name##__GroupSize_X, name##__GroupSize_Y, name##__GroupSize_Z) +#define GroupSize(name) VEC3U32(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z)) #if IsGpu #define Semantic(name) name : name #define VertexShader(name, return_type) return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID)) #define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) - #define ComputeShader(name) \ - [numthreads(name##__GroupSize_X, name##__GroupSize_Y, name##__GroupSize_Z)] \ - void name( \ - u32 Semantic(SV_GroupIndex), \ - Vec3U32 Semantic(SV_GroupID), \ - Vec3U32 Semantic(SV_GroupThreadID), \ - Vec3U32 Semantic(SV_DispatchThreadID) \ + #define ComputeShader(name) \ + [numthreads(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z))] \ + void name( \ + u32 Semantic(SV_GroupIndex), \ + Vec3U32 Semantic(SV_GroupID), \ + Vec3U32 Semantic(SV_GroupThreadID), \ + Vec3U32 Semantic(SV_DispatchThreadID) \ ) #endif #if IsCpu - #define DeclComputeShader(name, resource_hash, x, y, z) enum { name##__GroupSize_X = x, name##__GroupSize_Y = y, name##__GroupSize_Z = z }; static ComputeShaderDesc name = { resource_hash, x, y, z } + #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z } #define DeclVertexShader(name, resource_hash) static VertexShaderDesc name = { resource_hash, 1, 1, 1 } #define DeclPixelShader(name, resource_hash) static PixelShaderDesc name = { resource_hash, 1, 1, 1 } -#else - #define DeclComputeShader(name, resource_hash, x, y, z) enum { name##__GroupSize_X = x, name##__GroupSize_Y = y, name##__GroupSize_Z = z }; +#elif IsGpu + #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; #define DeclVertexShader(name, resource_hash) #define DeclPixelShader(name, resource_hash) #endif diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index f2779408..9c732b11 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -215,6 +215,17 @@ Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip) return result; } +//- Thread count + +Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads) +{ + return VEC3I32( + (threads.x + cs.x - 1) / cs.x, + (threads.y + cs.y - 1) / cs.y, + (threads.z + cs.z - 1) / cs.z + ); +} + //- Viewport / scissor Rng3 G_ViewportFromTexture(G_ResourceHandle texture) diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h index 03927040..7af854b0 100644 --- a/src/gpu/gpu_common.h +++ b/src/gpu/gpu_common.h @@ -39,6 +39,9 @@ i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip); Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip); Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip); +//- Thread count +Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads); + //- Viewport / scissor Rng3 G_ViewportFromTexture(G_ResourceHandle texture); Rng2 G_ScissorFromTexture(G_ResourceHandle texture); diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 00a1d519..ccee6c07 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -679,7 +679,7 @@ void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size); #define G_SetConstant(cl, name, value) do { \ - name##__shaderconstanttype __src; \ + CAT(name, __shaderconstanttype) __src; \ __src.v = value; \ G_SetConstantEx((cl), (name), &__src, sizeof(__src)); \ } while (0) @@ -733,7 +733,11 @@ void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc); //- Compute -void G_Compute(G_CommandListHandle cl, ComputeShaderDesc cs, Vec3I32 groups); +void G_ComputeEx(G_CommandListHandle cl, ComputeShaderDesc cs, Vec3I32 threads); + +#define G_Compute(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads), 1, 1)) +#define G_Compute2D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, 1)) +#define G_Compute3D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, (threads).z)) //- Rasterize diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index ad1e0776..385a47a6 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -884,7 +884,7 @@ G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc) raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; - raster_desc.DepthClipEnable = 1; + raster_desc.DepthClipEnable = 0; raster_desc.MultisampleEnable = 0; raster_desc.AntialiasedLineEnable = 0; raster_desc.ForcedSampleCount = 0; @@ -3240,15 +3240,15 @@ void G_MemorySyncEx(G_CommandListHandle cl_handle, G_MemoryBarrierDesc desc) //- Compute -void G_Compute(G_CommandListHandle cl_handle, ComputeShaderDesc cs, Vec3I32 groups) +void G_ComputeEx(G_CommandListHandle cl_handle, ComputeShaderDesc cs, Vec3I32 threads) { - if (groups.x > 0 && groups.y > 0 && groups.z > 0) + if (threads.x > 0 && threads.y > 0 && threads.z > 0) { G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Cmd *cmd = G_D12_PushCmd(cl); cmd->kind = G_D12_CmdKind_Compute; cmd->compute.cs = cs; - cmd->compute.groups = groups; + cmd->compute.groups = G_GroupCountFromThreadCount(cs, threads); } } diff --git a/src/meta/meta.c b/src/meta/meta.c index fe952be8..87e23221 100644 --- a/src/meta/meta.c +++ b/src/meta/meta.c @@ -1087,13 +1087,22 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) e->kind == ShaderEntryKind_PS ? Lit("ps_6_6") : Lit("cs_6_6") ); + + StringList local_defs = Zi; + { + PushStringToList(perm, &local_defs, StringF(perm, "-DShaderName=%F", FmtString(shader_name))); + PushStringToList(perm, &local_defs, StringF(perm, "-DShaderDef_%F=1", FmtString(shader_name))); + PushStringToList(perm, &local_defs, StringF(perm, "-DShaderTarget=%F", FmtString(target))); + } + String cmd = StringF( perm, - "dxc.exe -T %F -E %F -Fo %F %F %F %F %F", + "dxc.exe -T %F -E %F -Fo %F %F %F %F %F %F", FmtString(target), FmtString(e->name), FmtString(out_file), FmtString(gpu_out_file), + FmtString(StringFromList(perm, local_defs, Lit(" "))), FmtString(StringFromList(perm, cp.defs, Lit(" "))), FmtString(StringFromList(perm, cp.flags_dxc, Lit(" "))), FmtString(StringFromList(perm, cp.warnings_dxc, Lit(" "))) @@ -1229,19 +1238,12 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) if (output.len > 0) { String msg = output; - if (!StringContains(msg, Lit("In file"))) - { - // If error message is missing "In file" then it may have - // failed to even find the entry point, meaning we should - // include the name of the shader in the error message for - // clarification. - msg = StringF( - perm, - "Error compiling shader \"%F\"\n%F", - FmtString(gpu_obj->name), - FmtString(output) - ); - } + msg = StringF( + perm, + "%F\n%F", + FmtString(gpu_obj->name), + FmtString(output) + ); if (obj_errored) { if (error_gpu_obj_outputs.count == 0) diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index b33175f4..a2a924f4 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -5281,15 +5281,15 @@ void V_TickForever(WaveLaneCtx *lane) { // Prepare shade - G_Compute(cl, V_PrepareShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims)); + G_Compute2D(cl, V_PrepareShadeCS, frame->shade_dims); // Prepare cells - G_Compute(cl, V_PrepareCellsCS, V_ThreadGroupSizeFromTexSize(cells_dims)); + G_Compute2D(cl, V_PrepareCellsCS, cells_dims); // Clear particles if (frame->should_clear_particles) { - G_Compute(cl, V_ClearParticlesCS, V_ThreadGroupSizeFromBufferSize(V_ParticlesCap)); + G_Compute(cl, V_ClearParticlesCS, V_ParticlesCap); V.particle_seq = 0; } @@ -5304,7 +5304,7 @@ void V_TickForever(WaveLaneCtx *lane) Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(backdrop_target), mip_idx); G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); - G_Compute(cl, V_BackdropDownCS, V_ThreadGroupSizeFromTexSize(down_dims)); + G_Compute2D(cl, V_BackdropDownCS, down_dims); G_DumbGlobalMemorySync(cl); } @@ -5315,7 +5315,7 @@ void V_TickForever(WaveLaneCtx *lane) Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(backdrop_target), mip_idx); G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); - G_Compute(cl, V_BackdropUpCS, V_ThreadGroupSizeFromTexSize(up_dims)); + G_Compute2D(cl, V_BackdropUpCS, up_dims); G_DumbGlobalMemorySync(cl); } @@ -5341,7 +5341,7 @@ void V_TickForever(WaveLaneCtx *lane) ); // Emit particles - G_Compute(cl, V_EmitParticlesCS, V_ThreadGroupSizeFromBufferSize(frame->emitters_count)); + G_Compute(cl, V_EmitParticlesCS, frame->emitters_count); // Sync particles, occluders, & albedo G_DumbGlobalMemorySync(cl); @@ -5353,7 +5353,7 @@ void V_TickForever(WaveLaneCtx *lane) { // Simulate particles - G_Compute(cl, V_SimParticlesCS, V_ThreadGroupSizeFromBufferSize(V_ParticlesCap)); + G_Compute(cl, V_SimParticlesCS, V_ParticlesCap); // Sync cells G_DumbGlobalMemorySync(cl); @@ -5366,7 +5366,7 @@ void V_TickForever(WaveLaneCtx *lane) if (0) { - G_Compute(cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims)); + G_Compute2D(cl, V_ShadeCS, frame->shade_dims); G_DumbGlobalMemorySync(cl); } @@ -5375,7 +5375,7 @@ void V_TickForever(WaveLaneCtx *lane) //- Composite pass { - G_Compute(cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); + G_Compute2D(cl, V_CompositeCS, frame->screen_dims); // Sync screen tex G_DumbGlobalMemorySync(cl); @@ -5398,7 +5398,7 @@ void V_TickForever(WaveLaneCtx *lane) Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); - G_Compute(cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims)); + G_Compute2D(cl, V_BloomDownCS, down_dims); G_DumbGlobalMemorySync(cl); } @@ -5409,7 +5409,7 @@ void V_TickForever(WaveLaneCtx *lane) Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); - G_Compute(cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims)); + G_Compute2D(cl, V_BloomUpCS, up_dims); G_DumbGlobalMemorySync(cl); } @@ -5419,7 +5419,7 @@ void V_TickForever(WaveLaneCtx *lane) //- Finalization pass { - G_Compute(cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); + G_Compute2D(cl, V_FinalizeCS, frame->screen_dims); G_DumbGlobalMemorySync(cl); } diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index 9e1b4587..a77260a5 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -377,7 +377,4 @@ Struct(V_SharedFrame) //////////////////////////////////////////////////////////// //~ Helpers -#define V_ThreadGroupSizeFromBufferSize(buffer_size) VEC3I32((((buffer_size) + 255) / 256), 1, 1) -#define V_ThreadGroupSizeFromTexSize(tex_size) VEC3I32(((tex_size).x + 15) / 16, ((tex_size).y + 15) / 16, 1) - V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind); diff --git a/src/proto/proto.c b/src/proto/proto.c index 703e8543..a2145ba9 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -39,7 +39,7 @@ void PT_RunForever(WaveLaneCtx *lane) // Test pass { - G_Compute(cl, PT_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1)); + G_Compute2D(cl, PT_TestCS, final_target_size); } G_DumbMemorySync(cl, final_target_res); diff --git a/src/proto/proto_gpu.gh b/src/proto/proto_gpu.gh index 7ec277b4..8330c376 100644 --- a/src/proto/proto_gpu.gh +++ b/src/proto/proto_gpu.gh @@ -27,5 +27,5 @@ Struct(PT_BlitPSOutput) ComputeShader(PT_TestCS); //- Blit -DeclVertexShader(PT_BlitVS, PT_BlitPSInput); -DeclPixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input); +VertexShader(PT_BlitVS, PT_BlitPSInput); +PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input);