diff --git a/src/base/base.h b/src/base/base.h index 68e0d0cb..c9a2bb6a 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -755,23 +755,27 @@ Struct(Texture3DHandle) { u32 v; }; Struct(RWTexture3DHandle) { u32 v; }; Struct(SamplerStateHandle) { u32 v; }; -#define IsShaderHandleNil(h) ((h).v == 0) +#define IsShaderHandleNil(h) ((h).v == 0) //- Shader constants -/* D3D12 - 64 maximum root constants - * Vulkan - 32 maximum push constants +/* D3D12: 64 maximum root constants + * Vulkan: 32 maximum push constants + * */ -#define MaxShaderConstants (32) +#define MaxShaderConstants (32) +#define MaxDeclarableShaderConstants (MaxShaderConstants - 1) /* 1 constant reserved for generic async compute queue check */ #if IsLanguageC #define ShaderConstant(type, name, slot) \ StaticAssert(sizeof(type) <= 4); \ - StaticAssert(slot < MaxShaderConstants); \ + StaticAssert(slot < MaxDeclarableShaderConstants); \ Enum(name##__shaderconstantenum) { name = slot }; \ Struct(name##__shaderconstanttype) { type v; } #elif IsLanguageHlsl #define ShaderConstant(type, name, slot) cbuffer name : register(b##slot) { type name; } + + cbuffer IsAsyncCompute : register(b31) { b32 IsAsyncCompute; } #endif //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index 370a9504..c9cede75 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -13,20 +13,27 @@ void GPU_StartupExtra(void) GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct); { /* Init quad index buffer */ - GPU_ResourceHandle quad_indices = ZI; { + GPU_ResourceHandle quad_indices = ZI; u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 }; quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data)); GPU_CopyCpuToBuffer(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data))); + g->quad_indices = GPU_IdxBuff16(quad_indices); + } + + /* TODO: Init debug print queues */ + { } - g->quad_indices = GPU_IdxBuff16(quad_indices); /* Init point sampler */ - GPU_ResourceHandle pt_sampler = GPU_PushSamplerResource(gpu_perm, (GPU_SamplerResourceDesc) { .filter = GPU_Filter_MinMagMipPoint }); + { + GPU_ResourceHandle pt_sampler = GPU_PushSamplerResource(gpu_perm, (GPU_SamplerResourceDesc) { .filter = GPU_Filter_MinMagMipPoint }); + GPU_PushSamplerStateHandle(gpu_perm, pt_sampler, .forced = GPU_BasicPointSampler.v); + } /* Init noise texture */ - GPU_ResourceHandle noise_tex = ZI; { + GPU_ResourceHandle noise_tex = ZI; String noise_data = DataFromResource(ResourceKeyFromStore(&GPU_Resources, Lit("noise_128x128x64_16.dat"))); Vec3I32 noise_dims = VEC3I32(128, 128, 64); if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2) @@ -41,15 +48,14 @@ void GPU_StartupExtra(void) noise_tex, VEC3I32(0, 0, 0), noise_data.text, noise_dims, RNG3I32(VEC3I32(0, 0, 0), noise_dims)); + + GPU_PushTexture3DHandle(gpu_perm, noise_tex, .forced = GPU_BasicNoiseTexture.v); } - /* Initialize static shader handles */ - GPU_PushSamplerStateHandle(gpu_perm, pt_sampler, .forced = GPU_BasicPointSampler.v); - GPU_PushTexture3DHandle(gpu_perm, noise_tex, .forced = GPU_BasicNoiseTexture.v); } GPU_CommitCommandList(cl); - GPU_SyncAllQueues(GPU_QueueKind_Direct); + GPU_SyncOtherQueues(GPU_QueueKind_Direct); } //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 432c53c4..178a8389 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -730,13 +730,13 @@ void GPU_Rasterize(GPU_CommandListHandle cl, void GPU_ClearRenderTarget(GPU_CommandListHandle cl, GPU_ResourceHandle render_target, Vec4 color); //////////////////////////////////////////////////////////// -//~ @hookdecl Synchronization +//~ @hookdecl Queue synchronization /* `waiter_queue` will block until `completion_queue` completes all submitted commands */ void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue); /* All queues will block until `completion_queue` completes all submitted commands */ -void GPU_SyncAllQueues(GPU_QueueKind completion_queue); +void GPU_SyncOtherQueues(GPU_QueueKind completion_queue); //////////////////////////////////////////////////////////// //~ @hookdecl Statistics diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 4f43239e..65a96a6e 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -286,12 +286,6 @@ void GPU_Startup(void) } } - ////////////////////////////// - //- Initialize queue sync worker - - // JobPoolId sync_pool = InitJobPool(1, Lit("Dx12 queue sync"), JobPoolPriority_Critical); - // RunJob(GPU_D12_StartQueueSync, .pool = sync_pool); - EndScratch(scratch); } @@ -1527,6 +1521,8 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, u64 fence_ops_coun for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } + slotted_constants[MaxShaderConstants - 1] = queue_kind == queue_kind == GPU_QueueKind_AsyncCompute; /* IsAsyncCompute constant */ + /* Rasterizer state */ D3D12_VIEWPORT bound_viewport = ZI; D3D12_RECT bound_scissor = ZI; @@ -2360,7 +2356,7 @@ void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, GPU_ResourceHandle r } //////////////////////////////////////////////////////////// -//~ @hookimpl Synchronization +//~ @hookimpl Queue synchronization void GPU_SyncQueue(GPU_QueueKind completion_queue_kind, GPU_QueueKind waiter_queue_kind) { @@ -2382,7 +2378,7 @@ void GPU_SyncQueue(GPU_QueueKind completion_queue_kind, GPU_QueueKind waiter_que } } -void GPU_SyncAllQueues(GPU_QueueKind completion_queue_kind) +void GPU_SyncOtherQueues(GPU_QueueKind completion_queue_kind) { if (GPU_IsMultiQueueEnabled) { diff --git a/src/gpu/gpu_shader_extras.h b/src/gpu/gpu_shader_extras.h index 243cf727..225c9e1b 100644 --- a/src/gpu/gpu_shader_extras.h +++ b/src/gpu/gpu_shader_extras.h @@ -7,6 +7,9 @@ #define GPU_SharedHandle(type, v) (type(v)) #endif -#define GPU_ShaderPrintBuffer GPU_SharedHandle(RWByteAddressBufferHandle, 1) -#define GPU_BasicPointSampler GPU_SharedHandle(SamplerStateHandle, 2) -#define GPU_BasicNoiseTexture GPU_SharedHandle(Texture3DHandle, 3) +#define GPU_DebugPrintBufferSize Mebi(128) + +#define GPU_DirectQueueDebugPrintBuffer GPU_SharedHandle(RWByteAddressBufferHandle, 1) +#define GPU_AsyncComputeQueueDebugPrintBuffer GPU_SharedHandle(RWByteAddressBufferHandle, 2) +#define GPU_BasicPointSampler GPU_SharedHandle(SamplerStateHandle, 3) +#define GPU_BasicNoiseTexture GPU_SharedHandle(Texture3DHandle, 4) diff --git a/src/gpu/gpu_shader_extras.hlsl b/src/gpu/gpu_shader_extras.hlsl index 5ed6e3d1..b1a2c7b8 100644 --- a/src/gpu/gpu_shader_extras.hlsl +++ b/src/gpu/gpu_shader_extras.hlsl @@ -201,18 +201,26 @@ template u32 U32FromChar(in T c) } #if GPU_DEBUG - #define DebugPrint(fmt_cstr) do { \ - u32 __strlen = 0; \ - for (;;) { if (U32FromChar(fmt_cstr[__strlen]) == 0) { break; } ++__strlen; } \ - RWByteAddressBuffer __print_buff = RWByteAddressBufferFromHandle(GPU_ShaderPrintBuffer); \ - u32 __pos; \ - __print_buff.InterlockedAdd(0, __strlen, __pos); \ - if (__pos < countof(__print_buff)) \ - { \ - for (u32 char_idx = 0; char_idx < __strlen; ++char_idx) \ - { \ - __print_buff.Store(__pos + char_idx, U32FromChar(fmt_cstr[char_idx])); \ - } \ + #define DebugPrint(fmt_cstr) do { \ + u32 __strlen = 0; \ + for (;;) { if (U32FromChar(fmt_cstr[__strlen]) == 0) { break; } ++__strlen; } \ + RWByteAddressBuffer __print_buff; \ + if (IsAsyncCompute) \ + { \ + __print_buff = RWByteAddressBufferFromHandle(GPU_AsyncComputeQueueDebugPrintBuffer); \ + } \ + else \ + { \ + __print_buff = RWByteAddressBufferFromHandle(GPU_DirectQueueDebugPrintBuffer); \ + } \ + u32 __pos; \ + __print_buff.InterlockedAdd(0, __strlen, __pos); \ + if (__pos < countof(__print_buff)) \ + { \ + for (u32 char_idx = 0; char_idx < __strlen; ++char_idx) \ + { \ + __print_buff.Store(__pos + char_idx, U32FromChar(fmt_cstr[char_idx])); \ + } \ } \ } while (0) #else diff --git a/src/meta/meta.c b/src/meta/meta.c index 58b3d60c..55b0d054 100644 --- a/src/meta/meta.c +++ b/src/meta/meta.c @@ -260,7 +260,7 @@ void BuildEntryPoint(WaveLaneCtx *lane) OS_Rm(Lit("metahash.dat")); } - /* Calculate new metahash */ + /* Compute new metahash */ u64 new_metahash = 0; { StringList check_files = ZI; diff --git a/src/mixer/mixer.c b/src/mixer/mixer.c index 9066437e..08e8e248 100644 --- a/src/mixer/mixer.c +++ b/src/mixer/mixer.c @@ -405,7 +405,7 @@ MIX_PcmF32 MIX_MixAllTracks(Arena *arena, u64 frame_count) Vec2 sound_rel = SubVec2(pos, listener_pos); Vec2 sound_rel_dir = NormVec2(sound_rel); - /* Calculate volume */ + /* Compute volume */ f32 volume_start = effect_data->spatial_volume; f32 volume_end; { @@ -419,7 +419,7 @@ MIX_PcmF32 MIX_MixAllTracks(Arena *arena, u64 frame_count) } effect_data->spatial_volume = volume_end; - /* Calculate pan */ + /* Compute pan */ f32 pan_start = effect_data->spatial_pan; f32 pan_end = WedgeVec2(listener_dir, sound_rel_dir) * pan_scale; effect_data->spatial_pan = pan_end; diff --git a/src/platform/platform_win32/platform_win32.c b/src/platform/platform_win32/platform_win32.c index 06e117a1..c1a664cb 100644 --- a/src/platform/platform_win32/platform_win32.c +++ b/src/platform/platform_win32/platform_win32.c @@ -195,7 +195,7 @@ void P_W32_SyncTimerForever(WaveLaneCtx *lane) i64 period_ns = last_cycle_ns == 0 ? P_W32_DefaultTimerPeriodNs : now_ns - last_cycle_ns; last_cycle_ns = now_ns; - /* Calculate mean period */ + /* Compute mean period */ { periods[periods_index++] = period_ns; if (periods_index == countof(periods)) diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index a0c98cab..4d3365ba 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -884,7 +884,7 @@ void UI_EndFrame(UI_Frame *frame) Assert(post_index == boxes_count); } - /* Calculate independent sizes */ + /* Compute independent sizes */ for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index) { UI_Box *box = boxes_pre[pre_index]; @@ -914,7 +914,7 @@ void UI_EndFrame(UI_Frame *frame) } } - /* Calculate upwards-dependent sizes along layout axis */ + /* Compute upwards-dependent sizes along layout axis */ for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index) { UI_Box *box = boxes_pre[pre_index]; @@ -941,7 +941,7 @@ void UI_EndFrame(UI_Frame *frame) } } - /* Calculate downwards-dependent sizes */ + /* Compute downwards-dependent sizes */ for (u64 post_index = 0; post_index < boxes_count; ++post_index) { UI_Box *box = boxes_post[post_index]; @@ -970,7 +970,7 @@ void UI_EndFrame(UI_Frame *frame) } } - /* Calculate upwards-dependent sizes along non-layout axis */ + /* Compute upwards-dependent sizes along non-layout axis */ for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index) { UI_Box *box = boxes_pre[pre_index]; @@ -1064,7 +1064,7 @@ void UI_EndFrame(UI_Frame *frame) } } - /* Calculate final positions */ + /* Compute final positions */ for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index) { UI_Box *box = boxes_pre[pre_index]; @@ -1118,12 +1118,12 @@ void UI_EndFrame(UI_Frame *frame) { f32 layout_cursor = parent->layout_cursor; f32 offset[2] = ZI; - /* Calculate offset in layout direction */ + /* Compute offset in layout direction */ { Axis axis = parent->desc.child_layout_axis; offset[axis] = layout_cursor; } - /* Calculate offset in non-layout direction (based on alignment) */ + /* Compute offset in non-layout direction (based on alignment) */ { Axis axis = !parent->desc.child_layout_axis; UI_AxisAlignment alignment = parent->desc.child_alignment[axis]; @@ -1298,7 +1298,7 @@ void UI_EndFrame(UI_Frame *frame) x_alignment = UI_AxisAlignment_Start; } - /* Calculate baseline */ + /* Compute baseline */ f32 ascent = raw_run->ascent; f32 descent = raw_run->descent; f32 cap = raw_run->cap; diff --git a/src/ui/ui_shaders.hlsl b/src/ui/ui_shaders.hlsl index f2762f76..4eeb5512 100644 --- a/src/ui/ui_shaders.hlsl +++ b/src/ui/ui_shaders.hlsl @@ -58,7 +58,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) Vec2 p0 = rect.p0; Vec2 p1 = rect.p1; - /* Calculate rect sdf (negative means pixel is inside of rect) */ + /* Compute rect sdf (negative means pixel is inside of rect) */ f32 rect_dist = min(min(p.x - p0.x, p1.x - p.x), min(p.y - p0.y, p1.y - p.y)); { f32 tl_radius = rect.tl_rounding; @@ -76,7 +76,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) } rect_dist = -rect_dist; - /* Calculate border sdf (negative means pixel is inside of border) */ + /* Compute border sdf (negative means pixel is inside of border) */ f32 border_width = 0; f32 border_dist = 0; Vec4 border_color = 0;