From c6f500ebf42fed042737ea6585eca20feb2357aa Mon Sep 17 00:00:00 2001 From: jacob Date: Sun, 20 Jul 2025 10:40:53 -0500 Subject: [PATCH] shade & blit to final target separately --- res/sh/blit.hlsl | 23 +++++ res/sh/flood.hlsl | 6 +- res/sh/sh_common.h | 100 ++++++++++-------- res/sh/shade.hlsl | 24 +---- src/gp.h | 1 - src/gp_dx12.c | 253 +++++++++++++++++++++++++++++---------------- src/sys_win32.c | 34 +++--- src/user.c | 1 - 8 files changed, 266 insertions(+), 176 deletions(-) diff --git a/res/sh/blit.hlsl b/res/sh/blit.hlsl index b51d78e6..be20b1a2 100644 --- a/res/sh/blit.hlsl +++ b/res/sh/blit.hlsl @@ -50,6 +50,17 @@ SH_ENTRY(ROOTSIG) struct vs_output vs(struct vs_input input) return output; } +/* ========================== * + * Tone map + * ========================== */ + +/* ACES approximation by Krzysztof Narkowicz + * https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/ */ +INLINE float3 tone_map(float3 v) +{ + return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f)); +} + /* ========================== * * Pixel shader * ========================== */ @@ -66,6 +77,18 @@ SH_ENTRY(ROOTSIG) struct ps_output ps(struct ps_input input) { struct ps_output output; float4 color = g_textures[g_constants.tex_urid].Sample(g_sampler, input.vs.uv); + + /* Apply tone map */ + if (g_constants.flags & SH_BLIT_FLAG_TONE_MAP) { + /* TODO: Dynamic exposure based on average scene luminance */ + color.rgb = tone_map(color.rgb) * g_constants.exposure; + } + + /* Apply gamma correction */ + if (g_constants.flags & SH_BLIT_FLAG_GAMMA_CORRECT) { + color = pow(abs(color), 1/g_constants.gamma); + } + output.SV_Target = color; return output; } diff --git a/res/sh/flood.hlsl b/res/sh/flood.hlsl index 14dd35e6..80b3a578 100644 --- a/res/sh/flood.hlsl +++ b/res/sh/flood.hlsl @@ -39,7 +39,7 @@ SH_ENTRY(ROOTSIG) void cs(struct cs_input input) if (id.x < tex_size.x && id.y < tex_size.y) { Texture2D emittance_tex = g_emittance_textures[g_constants.emittance_tex_urid]; RWTexture2D read_flood_tex = g_flood_textures[g_constants.read_flood_tex_urid]; - RWTexture2D write_flood_tex = g_flood_textures[g_constants.write_flood_tex_urid]; + RWTexture2D target_flood_tex = g_flood_textures[g_constants.target_flood_tex_urid]; int step_len = g_constants.step_len; if (step_len == -1) { /* Seed */ @@ -48,7 +48,7 @@ SH_ENTRY(ROOTSIG) void cs(struct cs_input input) if (emittance.a > 0) { seed = id; } - write_flood_tex[id] = seed; + target_flood_tex[id] = seed; } else { /* Flood */ int2 read_coords[9] = { @@ -76,7 +76,7 @@ SH_ENTRY(ROOTSIG) void cs(struct cs_input input) } } } - write_flood_tex[id] = closest_seed; + target_flood_tex[id] = closest_seed; } } } diff --git a/res/sh/sh_common.h b/res/sh/sh_common.h index b6440701..8ba06f2c 100644 --- a/res/sh/sh_common.h +++ b/res/sh/sh_common.h @@ -4,8 +4,9 @@ #define SH_DECL(t, n) struct CAT(sh_, t) n #define SH_DECLS(t, n) SH_DECL(t, n) #define SH_ENTRY(rootsig) static -#define SH_ASSERT_32BIT(s, n) STATIC_ASSERT(sizeof(s) % 16 == 0); /* Root constant structs should pad to 16 byte alignment */ \ - STATIC_ASSERT((sizeof(s) / 4) == n) /* Verify that struct size matches supplied 32 bit count */ +#define SH_ASSERT_ROOT_CONST(s, n) STATIC_ASSERT(sizeof(s) % 16 == 0); /* Root constant struct should pad to 16 byte alignment */ \ + STATIC_ASSERT((sizeof(s) / 4) == n); /* Root constant struct size should match the specified 32-bit-constant count */ \ + STATIC_ASSERT((sizeof(s) <= 256)) /* Root constant struct can only fit 64 DWORDS */ struct sh_uint { u32 v; }; INLINE struct sh_uint sh_uint_from_u32(u32 v) @@ -20,15 +21,21 @@ INLINE struct sh_int sh_int_from_i32(i32 v) } struct sh_uint2 { u32 v[2]; }; -INLINE struct sh_uint2 sh_uint2_from_u32(u32 a, u32 b) +INLINE struct sh_uint2 sh_uint2_from_u32(u32 x, u32 y) { - return (struct sh_uint2) { .v[0] = a, .v[1] = b }; + return (struct sh_uint2) { .v[0] = x, .v[1] = y }; } struct sh_uint3 { u32 v[3]; }; -INLINE struct sh_uint3 sh_uint3_from_u32(u32 a, u32 b, u32 c) +INLINE struct sh_uint3 sh_uint3_from_u32(u32 x, u32 y, u32 z) { - return (struct sh_uint3) { .v[0] = a, .v[1] = b, .v[2] = c }; + return (struct sh_uint3) { .v[0] = x, .v[1] = y, .v[2] = z }; +} + +struct sh_uint4 { u32 v[4]; }; +INLINE struct sh_uint4 sh_uint4_from_u32(u32 x, u32 y, u32 z, u32 w) +{ + return (struct sh_uint4) { .v[0] = x, .v[1] = y, .v[2] = z, .v[3] = w }; } struct sh_float { f32 v; }; @@ -73,7 +80,7 @@ INLINE struct sh_float2x3 sh_float2x3_from_xform(struct xform v) #define SH_DECL(t, n) t n #define SH_DECLS(t, n) t n : n #define SH_ENTRY(rootsig) [RootSignature(rootsig)] -#define SH_ASSERT_32BIT(s, n) +#define SH_ASSERT_ROOT_CONST(s, n) #endif @@ -92,11 +99,11 @@ INLINE struct sh_float2x3 sh_float2x3_from_xform(struct xform v) * ========================== */ SH_STRUCT(sh_material_constants { - /* ==================================================== */ + /* ---------------------------------------------------- */ SH_DECL(float4x4, projection); /* 16 consts */ - /* ==================================================== */ + /* ---------------------------------------------------- */ }); -SH_ASSERT_32BIT(struct sh_material_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ +SH_ASSERT_ROOT_CONST(struct sh_material_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ SH_STRUCT(sh_material_instance { SH_DECL(int, tex_nurid); @@ -125,55 +132,52 @@ SH_STRUCT(sh_material_grid { * ========================== */ SH_STRUCT(sh_flood_constants { - /* ==================================================== */ - SH_DECL(int, step_len); /* 04 consts */ - SH_DECL(uint, emittance_tex_urid); /* 04 consts */ - SH_DECL(uint, read_flood_tex_urid); /* 04 consts */ - SH_DECL(uint, write_flood_tex_urid); /* 04 consts */ - /* ==================================================== */ - SH_DECL(uint, tex_width); /* 04 consts */ - SH_DECL(uint, tex_height); /* 04 consts */ - SH_DECL(uint2, _pad0); /* 08 consts (padding) */ - /* ==================================================== */ + /* ---------------------------------------------------- */ + SH_DECL(int, step_len); /* 01 consts */ + SH_DECL(uint, emittance_tex_urid); /* 01 consts */ + SH_DECL(uint, read_flood_tex_urid); /* 01 consts */ + SH_DECL(uint, target_flood_tex_urid); /* 01 consts */ + /* ---------------------------------------------------- */ + SH_DECL(uint, tex_width); /* 01 consts */ + SH_DECL(uint, tex_height); /* 01 consts */ + SH_DECL(uint2, _pad0); /* 02 consts (padding) */ + /* ---------------------------------------------------- */ }); -SH_ASSERT_32BIT(struct sh_flood_constants, 8); /* Expected to match num32BitConstants in shader's root signature */ +SH_ASSERT_ROOT_CONST(struct sh_flood_constants, 8); /* Expected to match num32BitConstants in shader's root signature */ /* ========================== * * Shade shader structures * ========================== */ SH_STRUCT(sh_shade_constants { - /* ==================================================== */ - SH_DECL(uint, frame_index); /* 01 consts */ - SH_DECL(uint3, frame_seed); /* 03 consts */ - /* ==================================================== */ + /* ---------------------------------------------------- */ + SH_DECL(uint4, frame_seed); /* 04 consts */ + /* ---------------------------------------------------- */ SH_DECL(float2, camera_offset); /* 02 consts */ + SH_DECL(uint, frame_index); /* 01 consts */ SH_DECL(uint, albedo_tex_urid); /* 01 consts */ - SH_DECL(uint, _pad0); /* 01 consts (padding) */ - /* ==================================================== */ + /* ---------------------------------------------------- */ SH_DECL(uint, emittance_tex_urid); /* 01 consts */ SH_DECL(uint, emittance_flood_tex_urid); /* 01 consts */ - SH_DECL(uint, write_tex_urid); /* 01 consts */ + SH_DECL(uint, target_tex_urid); /* 01 consts */ SH_DECL(uint, tex_width); /* 01 consts */ - /* ==================================================== */ + /* ---------------------------------------------------- */ SH_DECL(uint, tex_height); /* 01 consts */ - SH_DECL(float, exposure); /* 01 consts */ - SH_DECL(float, gamma); /* 01 consts */ - SH_DECL(uint, _pad1); /* 01 consts (padding) */ - /* ==================================================== */ + SH_DECL(uint3, _pad0); /* 03 consts (padding) */ + /* ---------------------------------------------------- */ }); -SH_ASSERT_32BIT(struct sh_shade_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ +SH_ASSERT_ROOT_CONST(struct sh_shade_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ /* ========================== * * Shape shader structures * ========================== */ SH_STRUCT(sh_shape_constants { - /* ==================================================== */ + /* ---------------------------------------------------- */ SH_DECL(float4x4, projection); /* 16 consts */ - /* ==================================================== */ + /* ---------------------------------------------------- */ }); -SH_ASSERT_32BIT(struct sh_shape_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ +SH_ASSERT_ROOT_CONST(struct sh_shape_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ SH_STRUCT(sh_shape_vert { SH_DECLS(float2, pos); @@ -185,11 +189,11 @@ SH_STRUCT(sh_shape_vert { * ========================== */ SH_STRUCT(sh_ui_constants { - /* ==================================================== */ + /* ---------------------------------------------------- */ SH_DECL(float4x4, projection); /* 16 consts */ - /* ==================================================== */ + /* ---------------------------------------------------- */ }); -SH_ASSERT_32BIT(struct sh_ui_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ +SH_ASSERT_ROOT_CONST(struct sh_ui_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ SH_STRUCT(sh_ui_instance { SH_DECL(int, tex_nurid); @@ -204,12 +208,18 @@ SH_STRUCT(sh_ui_instance { * Blit shader structures * ========================== */ + #define SH_BLIT_FLAG_NONE (0 << 0) + #define SH_BLIT_FLAG_TONE_MAP (1 << 0) + #define SH_BLIT_FLAG_GAMMA_CORRECT (1 << 1) + SH_STRUCT(sh_blit_constants { - /* ==================================================== */ + /* ---------------------------------------------------- */ SH_DECL(float4x4, projection); /* 16 consts */ - /* ==================================================== */ + /* ---------------------------------------------------- */ + SH_DECL(uint, flags); /* 01 consts */ SH_DECL(uint, tex_urid); /* 01 consts */ - SH_DECL(uint3, _pad0); /* 03 consts (padding) */ - /* ==================================================== */ + SH_DECL(float, exposure); /* 01 consts */ + SH_DECL(float, gamma); /* 01 consts */ + /* ---------------------------------------------------- */ }); -SH_ASSERT_32BIT(struct sh_blit_constants, 20); /* Expected to match num32BitConstants in shader's root signature */ +SH_ASSERT_ROOT_CONST(struct sh_blit_constants, 20); /* Expected to match num32BitConstants in shader's root signature */ diff --git a/res/sh/shade.hlsl b/res/sh/shade.hlsl index 53aeef2a..fedebe17 100644 --- a/res/sh/shade.hlsl +++ b/res/sh/shade.hlsl @@ -15,7 +15,7 @@ ConstantBuffer g_constants : register(b0); Texture2D g_textures_float4[] : register(t0, space0); Texture2D g_textures_uint2[] : register(t0, space1); Texture3D g_noise_textures[] : register(t0, space2); -RWTexture2D g_write_textures[]: register(u0, space3); +RWTexture2D g_target_textures[]: register(u0, space3); struct cs_input { DECLS(uint3, SV_DispatchThreadID); @@ -82,17 +82,6 @@ INLINE float4 get_light_at_pos(int2 pos) return result; } -/* ========================== * - * Tone map - * ========================== */ - -/* ACES approximation by Krzysztof Narkowicz - * https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/ */ -INLINE float3 tone_map(float3 v) -{ - return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f)); -} - /* ========================== * * Entry point * ========================== */ @@ -103,7 +92,7 @@ SH_ENTRY(ROOTSIG) void cs(struct cs_input input) uint2 id = input.SV_DispatchThreadID.xy; if (id.x < g_constants.tex_width && id.y < g_constants.tex_height) { Texture2D albedo_tex = g_textures_float4[g_constants.albedo_tex_urid]; - RWTexture2D write_tex = g_write_textures[g_constants.write_tex_urid]; + RWTexture2D target_tex = g_target_textures[g_constants.target_tex_urid]; float4 color = float4(1, 1, 1, 1); /* Apply albedo */ @@ -112,13 +101,6 @@ SH_ENTRY(ROOTSIG) void cs(struct cs_input input) /* Apply lighting */ color *= get_light_at_pos(id); - /* Apply tone map */ - /* TODO: Dynamic exposure based on average scene luminance */ - color.rgb = tone_map(color.rgb) * g_constants.exposure; - - /* Apply gamma correction */ - color = pow(abs(color), 1/g_constants.gamma); - - write_tex[id] = color; + target_tex[id] = color; } } diff --git a/src/gp.h b/src/gp.h index 3eb8546f..047b2c30 100644 --- a/src/gp.h +++ b/src/gp.h @@ -107,7 +107,6 @@ struct gp_render_params { struct v2i32 draw_target_size; struct rect draw_target_viewport; struct xform draw_target_view; - b32 clear_target; }; struct gp_render_sig *gp_render_sig_alloc(void); diff --git a/src/gp_dx12.c b/src/gp_dx12.c index 45d7c2c8..06f07471 100644 --- a/src/gp_dx12.c +++ b/src/gp_dx12.c @@ -1869,14 +1869,14 @@ struct dx12_resource_barrier_desc { enum D3D12_RESOURCE_STATES new_state; /* 0 if type != D3D12_RESOURCE_BARRIER_TYPE_TRANSITION */ }; -INTERNAL void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_barriers, struct dx12_resource_barrier_desc *descs) +INTERNAL void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); i32 num_rbs = 0; - struct D3D12_RESOURCE_BARRIER *rbs = arena_push_array_no_zero(scratch.arena, struct D3D12_RESOURCE_BARRIER, num_barriers); - for (i32 i = 0; i < num_barriers; ++i) { + struct D3D12_RESOURCE_BARRIER *rbs = arena_push_array_no_zero(scratch.arena, struct D3D12_RESOURCE_BARRIER, num_descs); + for (i32 i = 0; i < num_descs; ++i) { struct dx12_resource_barrier_desc *desc = &descs[i]; struct dx12_resource *resource = desc->resource; enum D3D12_RESOURCE_BARRIER_TYPE type = desc->type; @@ -1901,7 +1901,7 @@ INTERNAL void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_barr rb->Flags = 0; rb->UAV.pResource = resource->resource; } else { - /* Unknown barrier */ + /* Unknown barrier type */ ASSERT(0); } } @@ -2787,11 +2787,13 @@ struct render_sig { struct arena *material_grid_descs_arena; /* Resources */ - struct dx12_resource *final_target; struct dx12_resource *albedo; struct dx12_resource *emittance; - struct dx12_resource *emittance_flood_a; - struct dx12_resource *emittance_flood_b; + struct dx12_resource *emittance_flood_read; + struct dx12_resource *emittance_flood_target; + struct dx12_resource *shade_read; + struct dx12_resource *shade_target; + struct dx12_resource *final_target; }; struct material_instance_desc { @@ -2947,32 +2949,35 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re __prof; struct arena_temp scratch = scratch_begin_no_conflict(); struct render_sig *sig = (struct render_sig *)render_sig; + ++sig->frame_index; struct v2i32 final_target_size = params.draw_target_size; final_target_size.x = max_i32(final_target_size.x, 1); final_target_size.y = max_i32(final_target_size.y, 1); - ++sig->frame_index; - - /* Release sig resources if size changed */ + /* Release buffers if size changed */ if (sig->final_target && !v2i32_eq(final_target_size, sig->final_target->texture_size)) { __profn("Release sig resources"); - fenced_release(sig->final_target, FENCED_RELEASE_KIND_RESOURCE); - fenced_release(sig->albedo, FENCED_RELEASE_KIND_RESOURCE); - fenced_release(sig->emittance, FENCED_RELEASE_KIND_RESOURCE); - fenced_release(sig->emittance_flood_a, FENCED_RELEASE_KIND_RESOURCE); - fenced_release(sig->emittance_flood_b, FENCED_RELEASE_KIND_RESOURCE); + fenced_release(sig->albedo, FENCED_RELEASE_KIND_RESOURCE); + fenced_release(sig->emittance, FENCED_RELEASE_KIND_RESOURCE); + fenced_release(sig->emittance_flood_read, FENCED_RELEASE_KIND_RESOURCE); + fenced_release(sig->emittance_flood_target, FENCED_RELEASE_KIND_RESOURCE); + fenced_release(sig->shade_read, FENCED_RELEASE_KIND_RESOURCE); + fenced_release(sig->shade_target, FENCED_RELEASE_KIND_RESOURCE); + fenced_release(sig->final_target, FENCED_RELEASE_KIND_RESOURCE); sig->final_target = 0; } - /* Allocate sig resources */ + /* Allocate buffers */ if (!sig->final_target) { __profn("Allocate sig resources"); - sig->final_target = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); - sig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); - sig->emittance = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); - sig->emittance_flood_a = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - sig->emittance_flood_b = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + sig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); + sig->emittance = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); + sig->emittance_flood_read = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + sig->emittance_flood_target = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + sig->shade_read = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + sig->shade_target = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + sig->final_target = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET); } struct sprite_scope *sprite_scope = sprite_scope_begin(); @@ -2980,6 +2985,7 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re struct pipeline *material_pipeline = pipeline_from_name(pipeline_scope, LIT("material")); struct pipeline *flood_pipeline = pipeline_from_name(pipeline_scope, LIT("flood")); struct pipeline *shade_pipeline = pipeline_from_name(pipeline_scope, LIT("shade")); + struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, LIT("blit")); struct pipeline *ui_pipeline = pipeline_from_name(pipeline_scope, LIT("ui")); struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, LIT("shape")); struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; @@ -2988,6 +2994,7 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re __profn("Run render"); __profnc_dx12(cl->cq->prof, cl->cl, "Run render", RGB32_F(0.5, 0.2, 0.2)); struct mat4x4 world_vp_matrix = calculate_vp(params.draw_target_view, params.draw_target_viewport.width, params.draw_target_viewport.height); + struct mat4x4 blit_vp_matrix = calculate_vp(XFORM_TRS(.t = v2_mul(V2(final_target_size.x, final_target_size.y), 0.5), .s = V2(final_target_size.x, final_target_size.y)), params.draw_target_viewport.width, params.draw_target_viewport.height); struct mat4x4 ui_vp_matrix = calculate_vp(XFORM_IDENT, params.draw_target_viewport.width, params.draw_target_viewport.height); /* Upload dummmy vert & index buffer */ @@ -3085,27 +3092,29 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps); - /* Bind gbuffers */ + /* Prep for material pass */ { - struct dx12_resource_barrier_desc barriers[] = { - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->albedo, D3D12_RESOURCE_STATE_RENDER_TARGET }, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_RENDER_TARGET } - }; - D3D12_CPU_DESCRIPTOR_HANDLE rtvs[] = { - sig->albedo->rtv_descriptor->handle, - sig->emittance->rtv_descriptor->handle, - }; - dx12_resource_barriers(cl->cl, countof(barriers), barriers); - ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, countof(rtvs), rtvs, 0, 0); - } - - /* Clear gbuffers */ - { - __profn("Clear gbuffers"); - __profnc_dx12(cl->cq->prof, cl->cl, "Clear gbuffers", RGB32_F(0.5, 0.2, 0.2)); - f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, sig->albedo->rtv_descriptor->handle, clear_color, 0, 0); - ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, sig->emittance->rtv_descriptor->handle, clear_color, 0, 0); + /* Barrier */ + { + struct dx12_resource_barrier_desc barriers[] = { + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->albedo, D3D12_RESOURCE_STATE_RENDER_TARGET }, + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_RENDER_TARGET } + }; + D3D12_CPU_DESCRIPTOR_HANDLE rtvs[] = { + sig->albedo->rtv_descriptor->handle, + sig->emittance->rtv_descriptor->handle, + }; + dx12_resource_barriers(cl->cl, countof(barriers), barriers); + ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, countof(rtvs), rtvs, 0, 0); + } + /* Clear */ + { + __profn("Clear gbuffers"); + __profnc_dx12(cl->cq->prof, cl->cl, "Clear gbuffers", RGB32_F(0.5, 0.2, 0.2)); + f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, sig->albedo->rtv_descriptor->handle, clear_color, 0, 0); + ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, sig->emittance->rtv_descriptor->handle, clear_color, 0, 0); + } } /* Material pass */ @@ -3143,16 +3152,20 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, instance_count, 0, 0, 0); } - /* Transition emittance & emittance flood */ - struct dx12_resource *emittance_flood_read = sig->emittance_flood_a; - struct dx12_resource *emittance_flood_write = sig->emittance_flood_a; + /* Prep for flood pass */ { - struct dx12_resource_barrier_desc barriers[] = { - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_write, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } - }; - dx12_resource_barriers(cl->cl, countof(barriers), barriers); + /* Barrier */ + { + struct dx12_resource_barrier_desc barriers[] = { + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, + + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, + { D3D12_RESOURCE_BARRIER_TYPE_UAV, sig->emittance_flood_read, 0 }, + + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance_flood_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } + }; + dx12_resource_barriers(cl->cl, countof(barriers), barriers); + } } /* Flood pass */ @@ -3176,7 +3189,7 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re /* UAV barrier */ { struct dx12_resource_barrier_desc barriers[] = { - { D3D12_RESOURCE_BARRIER_TYPE_UAV, emittance_flood_read, 0 } + { D3D12_RESOURCE_BARRIER_TYPE_UAV, sig->emittance_flood_read, 0 } }; dx12_resource_barriers(cl->cl, countof(barriers), barriers); } @@ -3185,8 +3198,8 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re struct sh_flood_constants constants = ZI; constants.step_len = sh_int_from_i32(step_length); constants.emittance_tex_urid = sh_uint_from_u32(sig->emittance->srv_descriptor->index); - constants.read_flood_tex_urid = sh_uint_from_u32(emittance_flood_read->uav_descriptor->index); - constants.write_flood_tex_urid = sh_uint_from_u32(emittance_flood_write->uav_descriptor->index); + constants.read_flood_tex_urid = sh_uint_from_u32(sig->emittance_flood_read->uav_descriptor->index); + constants.target_flood_tex_urid = sh_uint_from_u32(sig->emittance_flood_target->uav_descriptor->index); constants.tex_width = sh_uint_from_u32(final_target_size.x); constants.tex_height = sh_uint_from_u32(final_target_size.y); @@ -3199,14 +3212,13 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re ID3D12GraphicsCommandList_Dispatch(cl->cl, (final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1); /* Swap buffers */ - struct dx12_resource *swp = emittance_flood_read; - emittance_flood_read = emittance_flood_write; - emittance_flood_write = swp; + struct dx12_resource *swp = sig->emittance_flood_read; + sig->emittance_flood_read = sig->emittance_flood_target; + sig->emittance_flood_target = swp; /* Update step */ if (step_length == -1) { step_length = max_i32(final_target_size.x, final_target_size.y) / 2; - //step_length = 16; } else { step_length /= 2; } @@ -3214,24 +3226,31 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re } } - /* Transition gbuffers & final target */ + /* Prep for shade pass */ { - struct dx12_resource_barrier_desc barriers[] = { - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->albedo, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, - { D3D12_RESOURCE_BARRIER_TYPE_UAV, emittance_flood_read, 0 }, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_read, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->final_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } - }; - dx12_resource_barriers(cl->cl, countof(barriers), barriers); - } + /* Barrier */ + { + struct dx12_resource_barrier_desc barriers[] = { + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->albedo, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, - /* Clear final target */ - if (params.clear_target) { - __profn("Clear target"); - __profnc_dx12(cl->cq->prof, cl->cl, "Clear target", RGB32_F(0.5, 0.2, 0.2)); - f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(cl->cl, gpu_handle_from_descriptor(sig->final_target->uav_descriptor, descriptor_heap), sig->final_target->uav_descriptor->handle, sig->final_target->resource, clear_color, 0, 0); + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, + { D3D12_RESOURCE_BARRIER_TYPE_UAV, sig->emittance_flood_read, 0 }, + + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->shade_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, + { D3D12_RESOURCE_BARRIER_TYPE_UAV, sig->shade_read, 0 }, + + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->shade_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } + }; + dx12_resource_barriers(cl->cl, countof(barriers), barriers); + } + /* Clear */ + { + __profn("Clear shade target"); + __profnc_dx12(cl->cq->prof, cl->cl, "Clear shade target", RGB32_F(0.5, 0.2, 0.2)); + f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(cl->cl, gpu_handle_from_descriptor(sig->shade_target->uav_descriptor, descriptor_heap), sig->shade_target->uav_descriptor->handle, sig->shade_target->resource, clear_color, 0, 0); + } } /* Shade pass */ @@ -3245,18 +3264,18 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re /* Set constants */ struct sh_shade_constants constants = ZI; - /* TODO: Remove this */ + constants.frame_seed = sh_uint4_from_u32((u32)rand_u64_from_state(&sig->rand), + (u32)rand_u64_from_state(&sig->rand), + (u32)rand_u64_from_state(&sig->rand), + (u32)rand_u64_from_state(&sig->rand)); constants.frame_index = sh_uint_from_u32(sig->frame_index); - constants.frame_seed = sh_uint3_from_u32((u32)rand_u64_from_state(&sig->rand), (u32)rand_u64_from_state(&sig->rand), (u32)rand_u64_from_state(&sig->rand)); constants.camera_offset = sh_float2_from_v2(params.draw_target_view.og); constants.albedo_tex_urid = sh_uint_from_u32(sig->albedo->srv_descriptor->index); constants.emittance_tex_urid = sh_uint_from_u32(sig->emittance->srv_descriptor->index); - constants.emittance_flood_tex_urid = sh_uint_from_u32(emittance_flood_read->srv_descriptor->index); - constants.write_tex_urid = sh_uint_from_u32(sig->final_target->uav_descriptor->index); + constants.emittance_flood_tex_urid = sh_uint_from_u32(sig->emittance_flood_read->srv_descriptor->index); + constants.target_tex_urid = sh_uint_from_u32(sig->shade_target->uav_descriptor->index); constants.tex_width = sh_uint_from_u32(final_target_size.x); constants.tex_height = sh_uint_from_u32(final_target_size.y); - constants.exposure = sh_float_from_f32(1.0); - constants.gamma = sh_float_from_f32(2.2); /* Set parameters */ command_list_set_compute_root_constant(cl, &constants, sizeof(constants)); @@ -3267,16 +3286,73 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re /* Dispatch */ ID3D12GraphicsCommandList_Dispatch(cl->cl, (final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1); + + /* Swap */ + struct dx12_resource *swp = sig->shade_read; + sig->shade_read = sig->shade_target; + sig->shade_target = swp; } - /* Bind final target as RTV */ + /* Prep for UI pass */ { - struct dx12_resource_barrier_desc barriers[] = { - { D3D12_RESOURCE_BARRIER_TYPE_UAV, sig->final_target, 0}, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->final_target, D3D12_RESOURCE_STATE_RENDER_TARGET }, - }; - dx12_resource_barriers(cl->cl, countof(barriers), barriers); - ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &sig->final_target->rtv_descriptor->handle, 0, 0); + /* Barrier */ + { + struct dx12_resource_barrier_desc barriers[] = { + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->shade_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, + { D3D12_RESOURCE_BARRIER_TYPE_UAV, sig->shade_read, 0 }, + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->final_target, D3D12_RESOURCE_STATE_RENDER_TARGET } + }; + dx12_resource_barriers(cl->cl, countof(barriers), barriers); + ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &sig->final_target->rtv_descriptor->handle, 0, 0); + } + /* Clear */ + { + __profn("Clear final target"); + __profnc_dx12(cl->cq->prof, cl->cl, "Clear final target", RGB32_F(0.5, 0.2, 0.2)); + f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, sig->final_target->rtv_descriptor->handle, clear_color, 0, 0); + } + } + + /* UI blit pass */ + if (blit_pipeline->success) { + __profn("UI blit pass"); + __profnc_dx12(cl->cq->prof, cl->cl, "UI blit pass", RGB32_F(0.5, 0.2, 0.2)); + + /* Bind pipeline */ + ID3D12GraphicsCommandList_SetPipelineState(cl->cl, blit_pipeline->pso); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, blit_pipeline->rootsig); + + /* Set Rasterizer State */ +#if 1 + D3D12_VIEWPORT viewport = viewport_from_rect(params.draw_target_viewport); + D3D12_RECT scissor = scissor_from_rect(params.draw_target_viewport); +#else + D3D12_VIEWPORT viewport = viewport_from_rect(RECT_FROM_V2(V2(0, 0), V2(final_target_size.x, final_target_size.y))); + D3D12_RECT scissor = scissor_from_rect(params.draw_target_viewport); +#endif + ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); + ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); + + /* Set constants */ + struct sh_blit_constants constants = ZI; + constants.projection = sh_float4x4_from_mat4x4(blit_vp_matrix); + constants.flags = sh_uint_from_u32(SH_BLIT_FLAG_TONE_MAP | SH_BLIT_FLAG_GAMMA_CORRECT); + constants.exposure = sh_float_from_f32(2.0); + constants.gamma = sh_float_from_f32(2.2); + constants.tex_urid = sh_uint_from_u32(sig->shade_read->uav_descriptor->index); + + /* Set parameters */ + command_list_set_graphics_root_constant(cl, &constants, sizeof(constants)); + ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(cl->cl, 1, descriptor_heap->start_gpu_handle); + + /* Draw */ + D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0); + D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); + ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); + ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0); } /* UI rect pass */ @@ -3552,8 +3628,8 @@ INTERNAL void present_blit(struct swapchain_buffer *dst, struct dx12_resource *s struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; struct command_list *cl = command_list_open(cq->cl_pool); { - __profn("Blit"); - __profnc_dx12(cl->cq->prof, cl->cl, "Blit", RGB32_F(0.5, 0.2, 0.2)); + __profn("Present blit"); + __profnc_dx12(cl->cq->prof, cl->cl, "Present blit", RGB32_F(0.5, 0.2, 0.2)); struct swapchain *swapchain = dst->swapchain; /* Upload dummmy vert & index buffer */ @@ -3604,6 +3680,7 @@ INTERNAL void present_blit(struct swapchain_buffer *dst, struct dx12_resource *s /* Set constants */ struct sh_blit_constants constants = ZI; constants.projection = sh_float4x4_from_mat4x4(vp_matrix); + constants.flags = sh_uint_from_u32(SH_BLIT_FLAG_NONE); constants.tex_urid = sh_uint_from_u32(src->srv_descriptor->index); /* Set parameters */ diff --git a/src/sys_win32.c b/src/sys_win32.c index 6cd6e296..8b85c137 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -178,44 +178,44 @@ struct yield_param { }; struct alignas(64) fiber { - /* ==================================================== */ + /* ---------------------------------------------------- */ void *addr; /* 08 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ char *name_cstr; /* 08 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ struct atomic32 wake_lock; /* 04 bytes (4 byte alignment) */ i16 id; /* 02 bytes */ i16 parent_id; /* 02 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ u64 wait_addr; /* 08 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ u64 wait_time; /* 08 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ i16 next_addr_waiter; /* 02 bytes */ i16 prev_addr_waiter; /* 02 bytes */ i16 next_time_waiter; /* 02 bytes */ i16 prev_time_waiter; /* 02 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ u8 _pad1[8]; /* 08 bytes (padding) */ - /* ==================================================== */ + /* ---------------------------------------------------- */ u8 _pad2[8]; /* 08 bytes (padding) */ - /* ==================================================== */ - /* ==================== Cache line ==================== */ - /* ==================================================== */ + /* ---------------------------------------------------- */ + /* -------------------- Cache line -------------------- */ + /* ---------------------------------------------------- */ struct sys_scratch_ctx scratch_ctx; /* 16 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ sys_job_func *job_func; /* 08 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ void *job_sig; /* 08 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ i32 job_id; /* 04 bytes */ i16 job_pool; /* 02 bytes */ i16 job_priority; /* 02 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ struct snc_counter *job_counter; /* 08 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ struct yield_param *yield_param; /* 08 bytes */ - /* ==================================================== */ + /* ---------------------------------------------------- */ u8 _pad3[8]; /* 08 bytes (padding) */ }; diff --git a/src/user.c b/src/user.c index 0f5ae100..e990e25e 100644 --- a/src/user.c +++ b/src/user.c @@ -2027,7 +2027,6 @@ INTERNAL void user_update(struct sys_window *window) params.draw_target_size = user_resolution; params.draw_target_viewport = user_viewport; params.draw_target_view = G.world_to_user_xf; - params.clear_target = 1; render_texture = gp_run_render(G.render_sig, params); }