IsAsyncCompute constant

This commit is contained in:
jacob 2025-12-08 20:06:50 -06:00
parent 9772e4ce4c
commit d22f97debf
11 changed files with 69 additions and 52 deletions

View File

@ -759,19 +759,23 @@ Struct(SamplerStateHandle) { u32 v; };
//- Shader constants
/* D3D12 - 64 maximum root constants
* Vulkan - 32 maximum push constants
/* D3D12: 64 maximum root constants
* Vulkan: 32 maximum push constants
*
*/
#define MaxShaderConstants (32)
#define MaxDeclarableShaderConstants (MaxShaderConstants - 1) /* 1 constant reserved for generic async compute queue check */
#if IsLanguageC
#define ShaderConstant(type, name, slot) \
StaticAssert(sizeof(type) <= 4); \
StaticAssert(slot < MaxShaderConstants); \
StaticAssert(slot < MaxDeclarableShaderConstants); \
Enum(name##__shaderconstantenum) { name = slot }; \
Struct(name##__shaderconstanttype) { type v; }
#elif IsLanguageHlsl
#define ShaderConstant(type, name, slot) cbuffer name : register(b##slot) { type name; }
cbuffer IsAsyncCompute : register(b31) { b32 IsAsyncCompute; }
#endif
////////////////////////////////////////////////////////////

View File

@ -13,20 +13,27 @@ void GPU_StartupExtra(void)
GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct);
{
/* Init quad index buffer */
GPU_ResourceHandle quad_indices = ZI;
{
GPU_ResourceHandle quad_indices = ZI;
u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 };
quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data));
GPU_CopyCpuToBuffer(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data)));
}
g->quad_indices = GPU_IdxBuff16(quad_indices);
}
/* TODO: Init debug print queues */
{
}
/* Init point sampler */
{
GPU_ResourceHandle pt_sampler = GPU_PushSamplerResource(gpu_perm, (GPU_SamplerResourceDesc) { .filter = GPU_Filter_MinMagMipPoint });
GPU_PushSamplerStateHandle(gpu_perm, pt_sampler, .forced = GPU_BasicPointSampler.v);
}
/* Init noise texture */
GPU_ResourceHandle noise_tex = ZI;
{
GPU_ResourceHandle noise_tex = ZI;
String noise_data = DataFromResource(ResourceKeyFromStore(&GPU_Resources, Lit("noise_128x128x64_16.dat")));
Vec3I32 noise_dims = VEC3I32(128, 128, 64);
if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2)
@ -41,15 +48,14 @@ void GPU_StartupExtra(void)
noise_tex, VEC3I32(0, 0, 0),
noise_data.text, noise_dims,
RNG3I32(VEC3I32(0, 0, 0), noise_dims));
GPU_PushTexture3DHandle(gpu_perm, noise_tex, .forced = GPU_BasicNoiseTexture.v);
}
/* Initialize static shader handles */
GPU_PushSamplerStateHandle(gpu_perm, pt_sampler, .forced = GPU_BasicPointSampler.v);
GPU_PushTexture3DHandle(gpu_perm, noise_tex, .forced = GPU_BasicNoiseTexture.v);
}
GPU_CommitCommandList(cl);
GPU_SyncAllQueues(GPU_QueueKind_Direct);
GPU_SyncOtherQueues(GPU_QueueKind_Direct);
}
////////////////////////////////////////////////////////////

View File

@ -730,13 +730,13 @@ void GPU_Rasterize(GPU_CommandListHandle cl,
void GPU_ClearRenderTarget(GPU_CommandListHandle cl, GPU_ResourceHandle render_target, Vec4 color);
////////////////////////////////////////////////////////////
//~ @hookdecl Synchronization
//~ @hookdecl Queue synchronization
/* `waiter_queue` will block until `completion_queue` completes all submitted commands */
void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue);
/* All queues will block until `completion_queue` completes all submitted commands */
void GPU_SyncAllQueues(GPU_QueueKind completion_queue);
void GPU_SyncOtherQueues(GPU_QueueKind completion_queue);
////////////////////////////////////////////////////////////
//~ @hookdecl Statistics

View File

@ -286,12 +286,6 @@ void GPU_Startup(void)
}
}
//////////////////////////////
//- Initialize queue sync worker
// JobPoolId sync_pool = InitJobPool(1, Lit("Dx12 queue sync"), JobPoolPriority_Critical);
// RunJob(GPU_D12_StartQueueSync, .pool = sync_pool);
EndScratch(scratch);
}
@ -1527,6 +1521,8 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, u64 fence_ops_coun
for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; }
for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; }
slotted_constants[MaxShaderConstants - 1] = queue_kind == queue_kind == GPU_QueueKind_AsyncCompute; /* IsAsyncCompute constant */
/* Rasterizer state */
D3D12_VIEWPORT bound_viewport = ZI;
D3D12_RECT bound_scissor = ZI;
@ -2360,7 +2356,7 @@ void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, GPU_ResourceHandle r
}
////////////////////////////////////////////////////////////
//~ @hookimpl Synchronization
//~ @hookimpl Queue synchronization
void GPU_SyncQueue(GPU_QueueKind completion_queue_kind, GPU_QueueKind waiter_queue_kind)
{
@ -2382,7 +2378,7 @@ void GPU_SyncQueue(GPU_QueueKind completion_queue_kind, GPU_QueueKind waiter_que
}
}
void GPU_SyncAllQueues(GPU_QueueKind completion_queue_kind)
void GPU_SyncOtherQueues(GPU_QueueKind completion_queue_kind)
{
if (GPU_IsMultiQueueEnabled)
{

View File

@ -7,6 +7,9 @@
#define GPU_SharedHandle(type, v) (type(v))
#endif
#define GPU_ShaderPrintBuffer GPU_SharedHandle(RWByteAddressBufferHandle, 1)
#define GPU_BasicPointSampler GPU_SharedHandle(SamplerStateHandle, 2)
#define GPU_BasicNoiseTexture GPU_SharedHandle(Texture3DHandle, 3)
#define GPU_DebugPrintBufferSize Mebi(128)
#define GPU_DirectQueueDebugPrintBuffer GPU_SharedHandle(RWByteAddressBufferHandle, 1)
#define GPU_AsyncComputeQueueDebugPrintBuffer GPU_SharedHandle(RWByteAddressBufferHandle, 2)
#define GPU_BasicPointSampler GPU_SharedHandle(SamplerStateHandle, 3)
#define GPU_BasicNoiseTexture GPU_SharedHandle(Texture3DHandle, 4)

View File

@ -204,7 +204,15 @@ template<typename T> u32 U32FromChar(in T c)
#define DebugPrint(fmt_cstr) do { \
u32 __strlen = 0; \
for (;;) { if (U32FromChar(fmt_cstr[__strlen]) == 0) { break; } ++__strlen; } \
RWByteAddressBuffer __print_buff = RWByteAddressBufferFromHandle(GPU_ShaderPrintBuffer); \
RWByteAddressBuffer __print_buff; \
if (IsAsyncCompute) \
{ \
__print_buff = RWByteAddressBufferFromHandle(GPU_AsyncComputeQueueDebugPrintBuffer); \
} \
else \
{ \
__print_buff = RWByteAddressBufferFromHandle(GPU_DirectQueueDebugPrintBuffer); \
} \
u32 __pos; \
__print_buff.InterlockedAdd(0, __strlen, __pos); \
if (__pos < countof(__print_buff)) \

View File

@ -260,7 +260,7 @@ void BuildEntryPoint(WaveLaneCtx *lane)
OS_Rm(Lit("metahash.dat"));
}
/* Calculate new metahash */
/* Compute new metahash */
u64 new_metahash = 0;
{
StringList check_files = ZI;

View File

@ -405,7 +405,7 @@ MIX_PcmF32 MIX_MixAllTracks(Arena *arena, u64 frame_count)
Vec2 sound_rel = SubVec2(pos, listener_pos);
Vec2 sound_rel_dir = NormVec2(sound_rel);
/* Calculate volume */
/* Compute volume */
f32 volume_start = effect_data->spatial_volume;
f32 volume_end;
{
@ -419,7 +419,7 @@ MIX_PcmF32 MIX_MixAllTracks(Arena *arena, u64 frame_count)
}
effect_data->spatial_volume = volume_end;
/* Calculate pan */
/* Compute pan */
f32 pan_start = effect_data->spatial_pan;
f32 pan_end = WedgeVec2(listener_dir, sound_rel_dir) * pan_scale;
effect_data->spatial_pan = pan_end;

View File

@ -195,7 +195,7 @@ void P_W32_SyncTimerForever(WaveLaneCtx *lane)
i64 period_ns = last_cycle_ns == 0 ? P_W32_DefaultTimerPeriodNs : now_ns - last_cycle_ns;
last_cycle_ns = now_ns;
/* Calculate mean period */
/* Compute mean period */
{
periods[periods_index++] = period_ns;
if (periods_index == countof(periods))

View File

@ -884,7 +884,7 @@ void UI_EndFrame(UI_Frame *frame)
Assert(post_index == boxes_count);
}
/* Calculate independent sizes */
/* Compute independent sizes */
for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index)
{
UI_Box *box = boxes_pre[pre_index];
@ -914,7 +914,7 @@ void UI_EndFrame(UI_Frame *frame)
}
}
/* Calculate upwards-dependent sizes along layout axis */
/* Compute upwards-dependent sizes along layout axis */
for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index)
{
UI_Box *box = boxes_pre[pre_index];
@ -941,7 +941,7 @@ void UI_EndFrame(UI_Frame *frame)
}
}
/* Calculate downwards-dependent sizes */
/* Compute downwards-dependent sizes */
for (u64 post_index = 0; post_index < boxes_count; ++post_index)
{
UI_Box *box = boxes_post[post_index];
@ -970,7 +970,7 @@ void UI_EndFrame(UI_Frame *frame)
}
}
/* Calculate upwards-dependent sizes along non-layout axis */
/* Compute upwards-dependent sizes along non-layout axis */
for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index)
{
UI_Box *box = boxes_pre[pre_index];
@ -1064,7 +1064,7 @@ void UI_EndFrame(UI_Frame *frame)
}
}
/* Calculate final positions */
/* Compute final positions */
for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index)
{
UI_Box *box = boxes_pre[pre_index];
@ -1118,12 +1118,12 @@ void UI_EndFrame(UI_Frame *frame)
{
f32 layout_cursor = parent->layout_cursor;
f32 offset[2] = ZI;
/* Calculate offset in layout direction */
/* Compute offset in layout direction */
{
Axis axis = parent->desc.child_layout_axis;
offset[axis] = layout_cursor;
}
/* Calculate offset in non-layout direction (based on alignment) */
/* Compute offset in non-layout direction (based on alignment) */
{
Axis axis = !parent->desc.child_layout_axis;
UI_AxisAlignment alignment = parent->desc.child_alignment[axis];
@ -1298,7 +1298,7 @@ void UI_EndFrame(UI_Frame *frame)
x_alignment = UI_AxisAlignment_Start;
}
/* Calculate baseline */
/* Compute baseline */
f32 ascent = raw_run->ascent;
f32 descent = raw_run->descent;
f32 cap = raw_run->cap;

View File

@ -58,7 +58,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
Vec2 p0 = rect.p0;
Vec2 p1 = rect.p1;
/* Calculate rect sdf (negative means pixel is inside of rect) */
/* Compute rect sdf (negative means pixel is inside of rect) */
f32 rect_dist = min(min(p.x - p0.x, p1.x - p.x), min(p.y - p0.y, p1.y - p.y));
{
f32 tl_radius = rect.tl_rounding;
@ -76,7 +76,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
}
rect_dist = -rect_dist;
/* Calculate border sdf (negative means pixel is inside of border) */
/* Compute border sdf (negative means pixel is inside of border) */
f32 border_width = 0;
f32 border_dist = 0;
Vec4 border_color = 0;