diff --git a/res/sh/flood.hlsl b/res/sh/flood.hlsl index bee3e6ce..d2cd039e 100644 --- a/res/sh/flood.hlsl +++ b/res/sh/flood.hlsl @@ -19,7 +19,7 @@ ConstantBuffer g_constants : register(b0); Texture2D g_emittance_textures[] : register(t0, space0); -RWTexture2D g_flood_textures[]: register(u0, space1); +RWTexture2D g_flood_textures[]: register(u0, space1); SamplerState g_sampler : register(s0); @@ -44,51 +44,39 @@ SH_ENTRY(ROOTSIG) void cs(struct cs_input input) if (step_len == -1) { /* Seed */ float4 emittance = g_emittance_textures[g_constants.emittance_tex_urid][id]; - int2 seed = int2(-1, -1); + uint seed = 0xFFFFFFFF; if (emittance.a > 0) { - seed = id.xy; + seed = (id.x << 16) | id.y; } - g_flood_textures[g_constants.flood_read_tex_urid][id] = seed; - g_flood_textures[g_constants.flood_write_tex_urid][id] = seed; + g_flood_textures[g_constants.write_flood_tex_urid][id] = seed; } else { /* Flood */ - int2 seed = g_flood_textures[g_constants.flood_read_tex_urid][id]; - if (seed.x >= 0 && seed.y >= 0) { - int2 flood_coords[2] = { - int2((int)id.x - step_len, (int)id.y ), /* cl */ - int2((int)id.x + step_len, (int)id.y ), /* cr */ - }; - for (int i = 0; i < 2; ++i) { - int2 coord = flood_coords[i]; - if (coord.x >= 0 && coord.x < (int)tex_size.x && coord.y >= 0 && coord.y < (int)tex_size.y) { - int2 old_flood = g_flood_textures[g_constants.flood_read_tex_urid][coord]; - // if (old_flood.x < 0 || old_flood.y < 0) { - g_flood_textures[g_constants.flood_write_tex_urid][coord] = seed; - // } + int2 read_coords[9] = { + (int2)id + int2(-step_len, -step_len), /* top left */ + (int2)id + int2(0 , -step_len), /* top center */ + (int2)id + int2(+step_len, -step_len), /* top right */ + (int2)id + int2(-step_len, 0 ), /* center left */ + (int2)id + int2(0 , 0 ), /* center center */ + (int2)id + int2(+step_len, 0 ), /* center right */ + (int2)id + int2(-step_len, +step_len), /* bottom left */ + (int2)id + int2(0 , +step_len), /* bottom center */ + (int2)id + int2(+step_len, +step_len) /* bottom right */ + }; + uint closest_seed = 0xFFFFFFFF; + uint closest_seed_len_sq = 0xFFFFFFFF; + for (int i = 0; i < 9; ++i) { + int2 coord = read_coords[i]; + if (coord.x >= 0 && coord.x < (int)tex_size.x && coord.y >= 0 && coord.y < (int)tex_size.y) { + uint seed = g_flood_textures[g_constants.read_flood_tex_urid][coord]; + int2 seed_coord = int2((seed >> 16) & 0xFFFF, seed & 0xFFFF); + int2 dist_vec = (int2)id - seed_coord; + uint dist_len_sq = dot(dist_vec, dist_vec); + if (dist_len_sq < closest_seed_len_sq) { + closest_seed = seed; + closest_seed_len_sq = dist_len_sq; } } } - // if (seed.x >= 0 && seed.y >= 0) { - // int2 flood_coords[8] = { - // int2((int)id.x - step_len, (int)id.y - step_len), /* tl */ - // int2((int)id.x , (int)id.y - step_len), /* tc */ - // int2((int)id.x + step_len, (int)id.y - step_len), /* tr */ - // int2((int)id.x - step_len, (int)id.y ), /* cl */ - // int2((int)id.x + step_len, (int)id.y ), /* cr */ - // int2((int)id.x - step_len, (int)id.y + step_len), /* bl */ - // int2((int)id.x , (int)id.y + step_len), /* bc */ - // int2((int)id.x + step_len, (int)id.y + step_len) /* br */ - // }; - // for (int i = 0; i < 8; ++i) { - // int2 coord = flood_coords[i]; - // if (coord.x >= 0 && coord.x < (int)tex_size.x && coord.y >= 0 && coord.y < (int)tex_size.y) { - // int2 old_flood = g_flood_textures[g_constants.flood_read_tex_urid][coord]; - // if (old_flood.x < 0 || old_flood.y < 0) { - // g_flood_textures[g_constants.flood_write_tex_urid][coord] = seed; - // } - // } - // } - // } + g_flood_textures[g_constants.write_flood_tex_urid][id] = closest_seed; } - } diff --git a/res/sh/sh_common.h b/res/sh/sh_common.h index e5ccf37e..879e9f90 100644 --- a/res/sh/sh_common.h +++ b/res/sh/sh_common.h @@ -106,8 +106,8 @@ SH_STRUCT(sh_material_grid { SH_STRUCT(sh_flood_constants { SH_DECL(int, step_len); SH_DECL(uint, emittance_tex_urid); - SH_DECL(uint, flood_read_tex_urid); - SH_DECL(uint, flood_write_tex_urid); + SH_DECL(uint, read_flood_tex_urid); + SH_DECL(uint, write_flood_tex_urid); SH_DECL(uint, tex_width); SH_DECL(uint, tex_height); }); diff --git a/res/sh/shade.hlsl b/res/sh/shade.hlsl index 7052d458..91879e24 100644 --- a/res/sh/shade.hlsl +++ b/res/sh/shade.hlsl @@ -20,7 +20,7 @@ ConstantBuffer g_constants : register(b0); Texture2D g_gbuff_textures[] : register(t0, space0); -Texture2D g_emittance_flood_textures[] : register(t0, space1); +Texture2D g_emittance_flood_textures[] : register(t0, space1); RWTexture2D g_write_textures[]: register(u0, space2); SamplerState g_sampler : register(s0); @@ -44,8 +44,9 @@ SH_ENTRY(ROOTSIG) void cs(struct cs_input input) float4 albedo = g_gbuff_textures[g_constants.albedo_tex_urid][id]; float4 emittance = g_gbuff_textures[g_constants.emittance_tex_urid][id]; - int2 emittance_flood = g_emittance_flood_textures[g_constants.emittance_flood_tex_urid][id]; - emittance_flood *= (emittance_flood.x >= 0 && emittance_flood.y >= 0); + uint emittance_flood_packed = g_emittance_flood_textures[g_constants.emittance_flood_tex_urid][id]; + uint2 emittance_flood = uint2(emittance_flood_packed >> 16, emittance_flood_packed & 0xFFFF); + emittance_flood *= emittance_flood_packed < 0xFFFFFFFF; float4 final_color = old_color + albedo + float4(float(emittance_flood.x) / float(g_constants.tex_width), float(emittance_flood.y) / float(g_constants.tex_height), 0, 1); diff --git a/src/collider.c b/src/collider.c index 5cb0a2f4..74edd732 100644 --- a/src/collider.c +++ b/src/collider.c @@ -1,6 +1,7 @@ #include "collider.h" #include "math.h" #include "arena.h" +#include "gstat.h" /* How close can non-overlapping shapes be before collision is considered */ #define COLLISION_TOLERANCE 0.005f @@ -12,10 +13,6 @@ #define MAX_EPA_ITERATIONS 64 #if COLLIDER_DEBUG -u32 collider_debug_steps = U32_MAX; -//u32 collider_debug_steps = 1000000; -//u32 collider_debug_steps = 50; - INTERNAL void _dbgbreakable(void) { #if RTC @@ -25,9 +22,9 @@ INTERNAL void _dbgbreakable(void) #define DBGSTEP \ dbg_step++; \ - if (dbg_step >= collider_debug_steps) { \ + if (dbg_step >= gstat_get(GSTAT_DEBUG_STEPS)) { \ goto abort; \ - } else if (dbg_step >= collider_debug_steps - 1) { \ + } else if (dbg_step >= gstat_get(GSTAT_DEBUG_STEPS) - 1) { \ _dbgbreakable(); \ } (void)0 #else diff --git a/src/collider.h b/src/collider.h index b7c7acdf..95d0f058 100644 --- a/src/collider.h +++ b/src/collider.h @@ -1,10 +1,6 @@ #ifndef COLLIDER_H #define COLLIDER_H -#if COLLIDER_DEBUG -extern u32 collider_debug_steps; -#endif - struct collider_support_point { struct v2 p; u32 i; /* Index of original point in shape */ diff --git a/src/config.h b/src/config.h index 9787e1ec..cc18976c 100644 --- a/src/config.h +++ b/src/config.h @@ -69,6 +69,8 @@ #define COLLIDER_DEBUG_DETAILED 1 #define COLLIDER_DEBUG_DETAILED_DRAW_MENKOWSKI 1 +#define FLOOD_DEBUG 1 + /* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */ #define BITBUFF_DEBUG 0 #define BITBUFF_TEST RTC diff --git a/src/gp_dx12.c b/src/gp_dx12.c index 691c0a37..fe494ead 100644 --- a/src/gp_dx12.c +++ b/src/gp_dx12.c @@ -2661,28 +2661,19 @@ void gp_run(struct gp_run_params params) if (!v2i32_eq(sig->old_size, final_target_size)) { __profn("Allocate buffers"); - /* Allocate albedo buffer */ + /* Release buffers */ + /* TODO: Batch release */ if (sig->albedo) { fenced_release(sig->albedo, FENCED_RELEASE_KIND_RESOURCE); - } - sig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); - - - /* Allocate emittance buffer */ - if (sig->emittance) { fenced_release(sig->emittance, FENCED_RELEASE_KIND_RESOURCE); - } - sig->emittance = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); - - /* Allocate emittance field buffers */ - if (sig->emittance_flood_a) { fenced_release(sig->emittance_flood_a, FENCED_RELEASE_KIND_RESOURCE); - } - if (sig->emittance_flood_b) { fenced_release(sig->emittance_flood_b, FENCED_RELEASE_KIND_RESOURCE); } - sig->emittance_flood_a = gbuff_alloc(DXGI_FORMAT_R16G16_SINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - sig->emittance_flood_b = gbuff_alloc(DXGI_FORMAT_R16G16_SINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + /* Alloc buffers */ + sig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); + sig->emittance = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); + sig->emittance_flood_a = gbuff_alloc(DXGI_FORMAT_R32_UINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + sig->emittance_flood_b = gbuff_alloc(DXGI_FORMAT_R32_UINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } struct sprite_scope *sprite_scope = sprite_scope_begin(); @@ -2829,9 +2820,10 @@ void gp_run(struct gp_run_params params) } } + /* Flood pass */ struct dx12_resource *emittance_flood_read = sig->emittance_flood_a; - struct dx12_resource *emittance_flood_write = sig->emittance_flood_b; + struct dx12_resource *emittance_flood_write = sig->emittance_flood_a; { __profn("Flood pass"); __profnc_dx12(cl->cq->prof, cl->cl, "Flood pass", RGB32_F(0.5, 0.2, 0.2)); @@ -2840,21 +2832,12 @@ void gp_run(struct gp_run_params params) { struct dx12_resource_barrier_desc barriers[] = { { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_write, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_write, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } }; dx12_resource_barriers(cl->cl, countof(barriers), barriers); } - /* Clear emittance floods */ - if (params.clear_target) { - __profn("Clear emittance floods"); - __profnc_dx12(cl->cq->prof, cl->cl, "Clear emittance floods", RGB32_F(0.5, 0.2, 0.2)); - f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(cl->cl, gpu_handle_from_descriptor(emittance_flood_read->uav_descriptor, descriptor_heap), emittance_flood_read->uav_descriptor->handle, emittance_flood_read->resource, clear_color, 0, 0); - ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(cl->cl, gpu_handle_from_descriptor(emittance_flood_write->uav_descriptor, descriptor_heap), emittance_flood_write->uav_descriptor->handle, emittance_flood_write->resource, clear_color, 0, 0); - } - /* Dispatch */ if (flood_pipeline->success) { /* Bind pipeline */ @@ -2862,16 +2845,18 @@ void gp_run(struct gp_run_params params) ID3D12GraphicsCommandList_SetComputeRootSignature(cl->cl, flood_pipeline->rootsig); i32 step_length = -1; - while (step_length != 0) { + + /* TODO: Remove this */ + u64 max_steps = gstat_get(GSTAT_DEBUG_STEPS); + u64 step = 0; + while (step_length != 0 && step < max_steps) { __profn("Flood step"); __profnc_dx12(cl->cq->prof, cl->cl, "Flood step", RGB32_F(0.5, 0.2, 0.2)); /* UAV barrier */ { struct dx12_resource_barrier_desc barriers[] = { - { D3D12_RESOURCE_BARRIER_TYPE_UAV, emittance_flood_read, 0 }, - /* TODO: Remove this barrier */ - { D3D12_RESOURCE_BARRIER_TYPE_UAV, emittance_flood_write, 0 } + { D3D12_RESOURCE_BARRIER_TYPE_UAV, emittance_flood_read, 0 } }; dx12_resource_barriers(cl->cl, countof(barriers), barriers); } @@ -2880,8 +2865,8 @@ void gp_run(struct gp_run_params params) struct sh_flood_constants constants = ZI; constants.step_len = sh_int_from_i32(step_length); constants.emittance_tex_urid = sh_uint_from_u32(sig->emittance->srv_descriptor->index); - constants.flood_read_tex_urid = sh_uint_from_u32(emittance_flood_read->uav_descriptor->index); - constants.flood_write_tex_urid = sh_uint_from_u32(emittance_flood_write->uav_descriptor->index); + constants.read_flood_tex_urid = sh_uint_from_u32(emittance_flood_read->uav_descriptor->index); + constants.write_flood_tex_urid = sh_uint_from_u32(emittance_flood_write->uav_descriptor->index); constants.tex_width = sh_uint_from_u32(final_target_size.x); constants.tex_height = sh_uint_from_u32(final_target_size.y); @@ -2893,15 +2878,19 @@ void gp_run(struct gp_run_params params) /* Dispatch */ ID3D12GraphicsCommandList_Dispatch(cl->cl, (final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1); - /* Swap & increment */ + /* Swap buffers */ struct dx12_resource *swp = emittance_flood_read; emittance_flood_read = emittance_flood_write; emittance_flood_write = swp; + + /* Update step */ if (step_length == -1) { step_length = max_i32(final_target_size.x, final_target_size.y) / 2; + //step_length = 16; } else { step_length /= 2; } + ++step; } } } diff --git a/src/gstat.h b/src/gstat.h index c55499d0..be62c443 100644 --- a/src/gstat.h +++ b/src/gstat.h @@ -13,6 +13,7 @@ struct _gstats { struct atomic64_padded GSTAT_MEMORY_COMMITTED; struct atomic64_padded GSTAT_MEMORY_RESERVED; struct atomic64_padded GSTAT_NUM_ARENAS; + struct atomic64_padded GSTAT_DEBUG_STEPS; }; extern struct _gstats _g_gstats; diff --git a/src/prof_tracy.h b/src/prof_tracy.h index 69430c21..d68f31d9 100644 --- a/src/prof_tracy.h +++ b/src/prof_tracy.h @@ -11,7 +11,8 @@ #define PROFILING_CAPTURE_FRAME_IMAGE 0 #define PROFILING_LOCKS 0 #define PROFILING_D3D 1 -#define PROFILER_THREAD_AFFINITY_MASK 0x000000000000F000ull +//#define PROFILER_THREAD_AFFINITY_MASK 0x000000000000F000ull +#define PROFILER_THREAD_AFFINITY_MASK 0 #define PROFILER_THREAD_PREFIX_WSTR L"Tracy" #define PROFILING_FILE_WSTR L".tracy" #define PROFILING_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy" diff --git a/src/sys_win32.c b/src/sys_win32.c index c7954830..dd837e62 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -1066,6 +1066,7 @@ INTERNAL THREAD_DEF(job_worker_entry, worker_ctx_arg) (UNUSED)success; } +#if 0 if (pool->thread_affinity_mask) { __profn("Set affinity"); b32 success = SetThreadAffinityMask(thread_handle, pool->thread_affinity_mask) != 0; @@ -1084,6 +1085,7 @@ INTERNAL THREAD_DEF(job_worker_entry, worker_ctx_arg) ASSERT(success); (UNUSED)success; } +#endif if (pool->thread_is_audio) { /* https://learn.microsoft.com/en-us/windows/win32/procthread/multimedia-class-scheduler-service#registry-settings */ diff --git a/src/user.c b/src/user.c index 83403360..06e1b494 100644 --- a/src/user.c +++ b/src/user.c @@ -179,11 +179,11 @@ GLOBAL READONLY enum user_bind_kind g_binds[SYS_BTN_COUNT] = { [SYS_BTN_MWHEELDOWN] = USER_BIND_KIND_ZOOM_OUT, [SYS_BTN_M3] = USER_BIND_KIND_PAN, -#if COLLIDER_DEBUG +#if RTC /* Debug */ - [SYS_BTN_FORWARD_SLASH] = USER_BIND_KIND_RESET_COLLIDER_GJK_STEPS, - [SYS_BTN_COMMA] = USER_BIND_KIND_DECR_COLLIDER_GJK_STEPS, - [SYS_BTN_PERIOD] = USER_BIND_KIND_INCR_COLLIDER_GJK_STEPS + [SYS_BTN_FORWARD_SLASH] = USER_BIND_KIND_RESET_DEBUG_STEPS, + [SYS_BTN_COMMA] = USER_BIND_KIND_DECR_DEBUG_STEPS, + [SYS_BTN_PERIOD] = USER_BIND_KIND_INCR_DEBUG_STEPS #endif }; @@ -216,6 +216,8 @@ struct user_startup_receipt user_startup(struct font_startup_receipt *font_sr, (UNUSED)host_sr; (UNUSED)sim_sr; + gstat_set(GSTAT_DEBUG_STEPS, U64_MAX); + G.arena = arena_alloc(GIBI(64)); G.real_time_ns = sys_time_ns(); @@ -1828,14 +1830,18 @@ INTERNAL void user_update(struct sys_window *window) } } -#if COLLIDER_DEBUG +#if RTC /* Gjk steps */ { - i64 new_steps = collider_debug_steps; - new_steps += G.bind_states[USER_BIND_KIND_INCR_COLLIDER_GJK_STEPS].num_presses_and_repeats; - new_steps -= G.bind_states[USER_BIND_KIND_DECR_COLLIDER_GJK_STEPS].num_presses_and_repeats; - if (G.bind_states[USER_BIND_KIND_RESET_COLLIDER_GJK_STEPS].num_presses_and_repeats > 0) new_steps = 0; - collider_debug_steps = (u32)clamp_i64(new_steps, 0, U32_MAX); + if (G.bind_states[USER_BIND_KIND_RESET_DEBUG_STEPS].num_presses_and_repeats > 0) { + gstat_set(GSTAT_DEBUG_STEPS, 0); + } + i32 add_steps = 0; + add_steps += G.bind_states[USER_BIND_KIND_INCR_DEBUG_STEPS].num_presses_and_repeats; + add_steps -= G.bind_states[USER_BIND_KIND_DECR_DEBUG_STEPS].num_presses_and_repeats; + if (add_steps != 0) { + gstat_add(GSTAT_DEBUG_STEPS, add_steps); + } } #endif } @@ -1977,9 +1983,11 @@ INTERNAL void user_update(struct sys_window *window) //text.len += string_copy(temp.arena, LIT("\n")).len; //text.len += string_copy(temp.arena, LIT("\n")).len; -#if COLLIDER_DEBUG - draw_text(G.ui_gp_sig, font, pos, string_format(temp.arena, LIT("collider gjk steps: %F"), FMT_UINT(collider_debug_steps))); - pos.y += spacing; +#if RTC + text.len += string_copy(temp.arena, LIT("\n")).len; + text.len += string_copy(temp.arena, LIT("\n")).len; + text.len += string_format(temp.arena, LIT("Debug steps: %F"), FMT_UINT(gstat_get(GSTAT_DEBUG_STEPS))).len; + //text.len += string_copy(temp.arena, LIT("\n")).len; #endif //draw_text(G.ui_gp_sig, font, pos, string_format(temp.arena, LIT("blended world entities: %F/%F"), FMT_UINT(G.ss_blended->num_ents_allocated), FMT_UINT(G.ss_blended->num_ents_reserved))); diff --git a/src/user.h b/src/user.h index 39a59930..2d650ba0 100644 --- a/src/user.h +++ b/src/user.h @@ -52,9 +52,9 @@ enum user_bind_kind { #if RTC /* Debug */ - USER_BIND_KIND_RESET_COLLIDER_GJK_STEPS, - USER_BIND_KIND_INCR_COLLIDER_GJK_STEPS, - USER_BIND_KIND_DECR_COLLIDER_GJK_STEPS, + USER_BIND_KIND_RESET_DEBUG_STEPS, + USER_BIND_KIND_INCR_DEBUG_STEPS, + USER_BIND_KIND_DECR_DEBUG_STEPS, #endif USER_BIND_KIND_COUNT