diff --git a/res/noise_128x128x64_16.dat b/res/noise_128x128x64_16.dat new file mode 100644 index 00000000..bfbdffc9 --- /dev/null +++ b/res/noise_128x128x64_16.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35c141664e6879a3a336816112a8fbabe72067d5dcdd57c130d836de6dda5b2e +size 2097152 diff --git a/res/sh/blit.hlsl b/res/sh/blit.hlsl index 5750c535..b51d78e6 100644 --- a/res/sh/blit.hlsl +++ b/res/sh/blit.hlsl @@ -5,7 +5,7 @@ * ========================== */ #define ROOTSIG \ - "RootConstants(num32BitConstants = 17, b0), " \ + "RootConstants(num32BitConstants = 20, b0), " \ "DescriptorTable(SRV(t0, space = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ \ "StaticSampler(s0, " \ diff --git a/res/sh/flood.hlsl b/res/sh/flood.hlsl index cc4f71df..14dd35e6 100644 --- a/res/sh/flood.hlsl +++ b/res/sh/flood.hlsl @@ -5,7 +5,7 @@ * ========================== */ #define ROOTSIG \ - "RootConstants(num32BitConstants = 6, b0), " \ + "RootConstants(num32BitConstants = 8, b0), " \ "DescriptorTable(SRV(t0, space = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ "DescriptorTable(UAV(u0, space = 1, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ \ @@ -36,50 +36,47 @@ SH_ENTRY(ROOTSIG) void cs(struct cs_input input) { uint2 id = input.SV_DispatchThreadID.xy; uint2 tex_size = uint2(g_constants.tex_width, g_constants.tex_height); - if (id.x >= tex_size.x || id.y >= tex_size.y) { - return; /* Overflow */ - } - - Texture2D emittance_tex = g_emittance_textures[g_constants.emittance_tex_urid]; - RWTexture2D read_flood_tex = g_flood_textures[g_constants.read_flood_tex_urid]; - RWTexture2D write_flood_tex = g_flood_textures[g_constants.write_flood_tex_urid]; - - int step_len = g_constants.step_len; - if (step_len == -1) { - /* Seed */ - float4 emittance = emittance_tex[id]; - uint2 seed = uint2(0xFFFF, 0xFFFF); - if (emittance.a > 0) { - seed = id; - } - write_flood_tex[id] = seed; - } else { - /* Flood */ - int2 read_coords[9] = { - (int2)id + int2(-step_len, -step_len), /* top left */ - (int2)id + int2(0 , -step_len), /* top center */ - (int2)id + int2(+step_len, -step_len), /* top right */ - (int2)id + int2(-step_len, 0 ), /* center left */ - (int2)id + int2(0 , 0 ), /* center center */ - (int2)id + int2(+step_len, 0 ), /* center right */ - (int2)id + int2(-step_len, +step_len), /* bottom left */ - (int2)id + int2(0 , +step_len), /* bottom center */ - (int2)id + int2(+step_len, +step_len) /* bottom right */ - }; - uint2 closest_seed = uint2(0xFFFF, 0xFFFF); - uint closest_seed_len_sq = 0xFFFFFFFF; - for (int i = 0; i < 9; ++i) { - int2 coord = read_coords[i]; - if (coord.x >= 0 && coord.x < (int)tex_size.x && coord.y >= 0 && coord.y < (int)tex_size.y) { - uint2 seed = read_flood_tex[coord]; - int2 dist_vec = (int2)id - (int2)seed; - uint dist_len_sq = dot(dist_vec, dist_vec); - if (dist_len_sq < closest_seed_len_sq) { - closest_seed = seed; - closest_seed_len_sq = dist_len_sq; + if (id.x < tex_size.x && id.y < tex_size.y) { + Texture2D emittance_tex = g_emittance_textures[g_constants.emittance_tex_urid]; + RWTexture2D read_flood_tex = g_flood_textures[g_constants.read_flood_tex_urid]; + RWTexture2D write_flood_tex = g_flood_textures[g_constants.write_flood_tex_urid]; + int step_len = g_constants.step_len; + if (step_len == -1) { + /* Seed */ + float4 emittance = emittance_tex[id]; + uint2 seed = uint2(0xFFFF, 0xFFFF); + if (emittance.a > 0) { + seed = id; + } + write_flood_tex[id] = seed; + } else { + /* Flood */ + int2 read_coords[9] = { + (int2)id + int2(-step_len, -step_len), /* top left */ + (int2)id + int2(0 , -step_len), /* top center */ + (int2)id + int2(+step_len, -step_len), /* top right */ + (int2)id + int2(-step_len, 0 ), /* center left */ + (int2)id + int2(0 , 0 ), /* center center */ + (int2)id + int2(+step_len, 0 ), /* center right */ + (int2)id + int2(-step_len, +step_len), /* bottom left */ + (int2)id + int2(0 , +step_len), /* bottom center */ + (int2)id + int2(+step_len, +step_len) /* bottom right */ + }; + uint2 closest_seed = uint2(0xFFFF, 0xFFFF); + uint closest_seed_len_sq = 0xFFFFFFFF; + for (int i = 0; i < 9; ++i) { + int2 coord = read_coords[i]; + if (coord.x >= 0 && coord.x < (int)tex_size.x && coord.y >= 0 && coord.y < (int)tex_size.y) { + uint2 seed = read_flood_tex[coord]; + int2 dist_vec = (int2)id - (int2)seed; + uint dist_len_sq = dot(dist_vec, dist_vec); + if (dist_len_sq < closest_seed_len_sq) { + closest_seed = seed; + closest_seed_len_sq = dist_len_sq; + } } } + write_flood_tex[id] = closest_seed; } - write_flood_tex[id] = closest_seed; } } diff --git a/res/sh/sh_common.h b/res/sh/sh_common.h index 1863ccde..b6440701 100644 --- a/res/sh/sh_common.h +++ b/res/sh/sh_common.h @@ -4,7 +4,8 @@ #define SH_DECL(t, n) struct CAT(sh_, t) n #define SH_DECLS(t, n) SH_DECL(t, n) #define SH_ENTRY(rootsig) static -#define SH_ASSERT_32BIT(s, n) STATIC_ASSERT((sizeof(s) / 4) == n) +#define SH_ASSERT_32BIT(s, n) STATIC_ASSERT(sizeof(s) % 16 == 0); /* Root constant structs should pad to 16 byte alignment */ \ + STATIC_ASSERT((sizeof(s) / 4) == n) /* Verify that struct size matches supplied 32 bit count */ struct sh_uint { u32 v; }; INLINE struct sh_uint sh_uint_from_u32(u32 v) @@ -24,6 +25,12 @@ INLINE struct sh_uint2 sh_uint2_from_u32(u32 a, u32 b) return (struct sh_uint2) { .v[0] = a, .v[1] = b }; } +struct sh_uint3 { u32 v[3]; }; +INLINE struct sh_uint3 sh_uint3_from_u32(u32 a, u32 b, u32 c) +{ + return (struct sh_uint3) { .v[0] = a, .v[1] = b, .v[2] = c }; +} + struct sh_float { f32 v; }; INLINE struct sh_float sh_float_from_f32(f32 v) { @@ -76,17 +83,20 @@ INLINE struct sh_float2x3 sh_float2x3_from_xform(struct xform v) /* Blue noise */ #define SH_BLUE_NOISE_TEX_ID 0 -#define SH_BLUE_NOISE_TEX_WIDTH 1024 -#define SH_BLUE_NOISE_TEX_HEIGHT 1024 +#define SH_BLUE_NOISE_TEX_WIDTH 128 +#define SH_BLUE_NOISE_TEX_HEIGHT 128 +#define SH_BLUE_NOISE_TEX_DEPTH 64 /* ========================== * * Material shader structures * ========================== */ SH_STRUCT(sh_material_constants { - SH_DECL(float4x4, projection); + /* ==================================================== */ + SH_DECL(float4x4, projection); /* 16 consts */ + /* ==================================================== */ }); -SH_ASSERT_32BIT(struct sh_material_constants, 16); /* Expected 32bit root constant size in shader */ +SH_ASSERT_32BIT(struct sh_material_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ SH_STRUCT(sh_material_instance { SH_DECL(int, tex_nurid); @@ -115,41 +125,55 @@ SH_STRUCT(sh_material_grid { * ========================== */ SH_STRUCT(sh_flood_constants { - SH_DECL(int, step_len); - SH_DECL(uint, emittance_tex_urid); - SH_DECL(uint, read_flood_tex_urid); - SH_DECL(uint, write_flood_tex_urid); - SH_DECL(uint, tex_width); - SH_DECL(uint, tex_height); + /* ==================================================== */ + SH_DECL(int, step_len); /* 04 consts */ + SH_DECL(uint, emittance_tex_urid); /* 04 consts */ + SH_DECL(uint, read_flood_tex_urid); /* 04 consts */ + SH_DECL(uint, write_flood_tex_urid); /* 04 consts */ + /* ==================================================== */ + SH_DECL(uint, tex_width); /* 04 consts */ + SH_DECL(uint, tex_height); /* 04 consts */ + SH_DECL(uint2, _pad0); /* 08 consts (padding) */ + /* ==================================================== */ }); -SH_ASSERT_32BIT(struct sh_flood_constants, 6); /* Expected 32bit root constant size in shader */ +SH_ASSERT_32BIT(struct sh_flood_constants, 8); /* Expected to match num32BitConstants in shader's root signature */ /* ========================== * * Shade shader structures * ========================== */ SH_STRUCT(sh_shade_constants { - SH_DECL(float2, camera_offset); - SH_DECL(uint2, seed); - SH_DECL(uint, albedo_tex_urid); - SH_DECL(uint, emittance_tex_urid); - SH_DECL(uint, emittance_flood_tex_urid); - SH_DECL(uint, write_tex_urid); - SH_DECL(uint, tex_width); - SH_DECL(uint, tex_height); - SH_DECL(float, exposure); - SH_DECL(float, gamma); + /* ==================================================== */ + SH_DECL(uint, frame_index); /* 01 consts */ + SH_DECL(uint3, frame_seed); /* 03 consts */ + /* ==================================================== */ + SH_DECL(float2, camera_offset); /* 02 consts */ + SH_DECL(uint, albedo_tex_urid); /* 01 consts */ + SH_DECL(uint, _pad0); /* 01 consts (padding) */ + /* ==================================================== */ + SH_DECL(uint, emittance_tex_urid); /* 01 consts */ + SH_DECL(uint, emittance_flood_tex_urid); /* 01 consts */ + SH_DECL(uint, write_tex_urid); /* 01 consts */ + SH_DECL(uint, tex_width); /* 01 consts */ + /* ==================================================== */ + SH_DECL(uint, tex_height); /* 01 consts */ + SH_DECL(float, exposure); /* 01 consts */ + SH_DECL(float, gamma); /* 01 consts */ + SH_DECL(uint, _pad1); /* 01 consts (padding) */ + /* ==================================================== */ }); -SH_ASSERT_32BIT(struct sh_shade_constants, 12); /* Expected 32bit root constant size in shader */ +SH_ASSERT_32BIT(struct sh_shade_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ /* ========================== * * Shape shader structures * ========================== */ SH_STRUCT(sh_shape_constants { - SH_DECL(float4x4, projection); + /* ==================================================== */ + SH_DECL(float4x4, projection); /* 16 consts */ + /* ==================================================== */ }); -SH_ASSERT_32BIT(struct sh_shape_constants, 16); /* Expected 32bit root constant size in shader */ +SH_ASSERT_32BIT(struct sh_shape_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ SH_STRUCT(sh_shape_vert { SH_DECLS(float2, pos); @@ -161,9 +185,11 @@ SH_STRUCT(sh_shape_vert { * ========================== */ SH_STRUCT(sh_ui_constants { - SH_DECL(float4x4, projection); + /* ==================================================== */ + SH_DECL(float4x4, projection); /* 16 consts */ + /* ==================================================== */ }); -SH_ASSERT_32BIT(struct sh_ui_constants, 16); /* Expected 32bit root constant size in shader */ +SH_ASSERT_32BIT(struct sh_ui_constants, 16); /* Expected to match num32BitConstants in shader's root signature */ SH_STRUCT(sh_ui_instance { SH_DECL(int, tex_nurid); @@ -179,7 +205,11 @@ SH_STRUCT(sh_ui_instance { * ========================== */ SH_STRUCT(sh_blit_constants { - SH_DECL(float4x4, projection); - SH_DECL(uint, tex_urid); + /* ==================================================== */ + SH_DECL(float4x4, projection); /* 16 consts */ + /* ==================================================== */ + SH_DECL(uint, tex_urid); /* 01 consts */ + SH_DECL(uint3, _pad0); /* 03 consts (padding) */ + /* ==================================================== */ }); -SH_ASSERT_32BIT(struct sh_blit_constants, 17); /* Expected 32bit root constant size in shader */ +SH_ASSERT_32BIT(struct sh_blit_constants, 20); /* Expected to match num32BitConstants in shader's root signature */ diff --git a/res/sh/shade.hlsl b/res/sh/shade.hlsl index fd0fd497..b25a0edc 100644 --- a/res/sh/shade.hlsl +++ b/res/sh/shade.hlsl @@ -5,24 +5,17 @@ * ========================== */ #define ROOTSIG \ - "RootConstants(num32BitConstants = 12, b0), " \ + "RootConstants(num32BitConstants = 16, b0), " \ "DescriptorTable(SRV(t0, space = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ "DescriptorTable(SRV(t0, space = 1, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ - "DescriptorTable(UAV(u0, space = 2, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ - \ - "StaticSampler(s0, " \ - "filter = FILTER_MIN_MAG_MIP_POINT, " \ - "addressU = TEXTURE_ADDRESS_WRAP, " \ - "addressV = TEXTURE_ADDRESS_WRAP, " \ - "addressW = TEXTURE_ADDRESS_WRAP, " \ - "maxAnisotropy = 1)" + "DescriptorTable(SRV(t0, space = 2, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ + "DescriptorTable(UAV(u0, space = 3, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " ConstantBuffer g_constants : register(b0); Texture2D g_textures_float4[] : register(t0, space0); Texture2D g_textures_uint2[] : register(t0, space1); -RWTexture2D g_write_textures[]: register(u0, space2); - -SamplerState g_noise_sampler : register(s0); +Texture3D g_noise_textures[] : register(t0, space2); +RWTexture2D g_write_textures[]: register(u0, space3); struct cs_input { DECLS(uint3, SV_DispatchThreadID); @@ -32,16 +25,27 @@ struct cs_input { * Lighting * ========================== */ -#define SAMPLES 8 +#define SAMPLES 4 #define MARCHES 16 #define AMBIENT float4(0, 0, 0, 0) float rand_float_from_float2(float2 pos) { - // pos += uint2(g_constants.seed.x % g_constants.tex_width, g_constants.seed.x % g_constants.tex_height); - Texture2D noise_tex = g_textures_float4[SH_BLUE_NOISE_TEX_ID]; - float2 uv = pos / float2(SH_BLUE_NOISE_TEX_WIDTH, SH_BLUE_NOISE_TEX_HEIGHT); - float4 v = noise_tex.SampleLevel(g_noise_sampler, uv, 0); - return v.r; + Texture3D noise_tex = g_noise_textures[SH_BLUE_NOISE_TEX_ID]; + + // pos += uint2(g_constants.frame_seed.x % g_constants.tex_width, g_constants.frame_seed.x % g_constants.tex_height); + + // pos -= g_constants.camera_offset; + + uint3 noise_coord = uint3(pos.xy, 0); + // noise_coord.xy += g_constants.frame_seed.xy; + // noise_coord.z += g_constants.frame_index; + // noise_coord.z += g_constants.frame_index; + + + + uint3 noise_size = uint3(SH_BLUE_NOISE_TEX_WIDTH, SH_BLUE_NOISE_TEX_HEIGHT, SH_BLUE_NOISE_TEX_DEPTH); + uint noise = noise_tex[noise_coord % noise_size]; + return (float)noise / (float)0xFFFF; } INLINE float4 get_light_in_dir(uint2 ray_start, float2 ray_dir) @@ -76,6 +80,7 @@ INLINE float4 get_light_at_pos(uint2 pos) float4 result = 0; for (uint i = 0; i < SAMPLES; ++i) { float angle = ((((float)i + rand_float_from_float2((float2)pos + (float)i)) / SAMPLES)) * TAU; + // float angle = (rand_float_from_float2(pos)) * TAU; // float angle = (rand_float_from_float2(pos)) * TAU; // float angle = (((float)i / SAMPLES)) * TAU; @@ -108,27 +113,24 @@ INLINE float3 tone_map(float3 v) SH_ENTRY(ROOTSIG) void cs(struct cs_input input) { uint2 id = input.SV_DispatchThreadID.xy; - if (id.x >= g_constants.tex_width || id.y >= g_constants.tex_height) { - return; /* Overflow */ + if (id.x < g_constants.tex_width && id.y < g_constants.tex_height) { + Texture2D albedo_tex = g_textures_float4[g_constants.albedo_tex_urid]; + RWTexture2D write_tex = g_write_textures[g_constants.write_tex_urid]; + float4 color = float4(1, 1, 1, 1); + + /* Apply albedo */ + color *= albedo_tex[id]; + + /* Apply lighting */ + color *= get_light_at_pos(id); + + /* Apply tone map */ + /* TODO: Dynamic exposure based on average scene luminance */ + color.rgb = tone_map(color.rgb) * g_constants.exposure; + + /* Apply gamma correction */ + color = pow(abs(color), 1/g_constants.gamma); + + write_tex[id] = color; } - - Texture2D albedo_tex = g_textures_float4[g_constants.albedo_tex_urid]; - RWTexture2D write_tex = g_write_textures[g_constants.write_tex_urid]; - - float4 color = float4(1, 1, 1, 1); - - /* Apply albedo */ - color *= albedo_tex[id]; - - /* Apply lighting */ - color *= get_light_at_pos(id); - - /* Apply tone map */ - /* TODO: Dynamic exposure based on average scene luminance */ - color.rgb = tone_map(color.rgb) * g_constants.exposure; - - /* Apply gamma correction */ - color = pow(abs(color), 1/g_constants.gamma); - - write_tex[id] = color; } diff --git a/res/sprite/noise.ase b/res/sprite/noise.ase deleted file mode 100644 index 63c5db51..00000000 --- a/res/sprite/noise.ase +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aa3d0deedb329f2e1f1fca1d3cba1e76bc2a61913d3d572e036c55ea2c6fec4d -size 4197289 diff --git a/src/config.h b/src/config.h index cc18976c..bc21094c 100644 --- a/src/config.h +++ b/src/config.h @@ -84,6 +84,6 @@ /* TODO: Move these to user-configurable settings */ +#define VSYNC 0 #define AUDIO_ENABLED 0 -#define VSYNC 1 -#define USER_FPS_LIMIT 300 +#define FPS_LIMIT 300 diff --git a/src/gp_dx12.c b/src/gp_dx12.c index 53574634..865a8570 100644 --- a/src/gp_dx12.c +++ b/src/gp_dx12.c @@ -42,7 +42,7 @@ #define DX12_ALLOW_TEARING 1 #define DX12_WAIT_FRAME_LATENCY 1 -#define DX12_SWAPCHAIN_FLAGS ((DX12_ALLOW_TEARING * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | (DX12_WAIT_FRAME_LATENCY * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)) +#define DX12_SWAPCHAIN_FLAGS (((DX12_ALLOW_TEARING != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | ((DX12_WAIT_FRAME_LATENCY != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)) #define DX12_SWAPCHAIN_BUFFER_COUNT (4) /* Arbitrary limits */ @@ -224,6 +224,14 @@ struct descriptor { struct descriptor *next_free; }; +enum dx12_resource_view_flags { + DX12_RESOURCE_VIEW_FLAG_NONE = 0, + DX12_RESOURCE_VIEW_FLAG_CBV = (1 << 1), + DX12_RESOURCE_VIEW_FLAG_SRV = (1 << 2), + DX12_RESOURCE_VIEW_FLAG_UAV = (1 << 3), + DX12_RESOURCE_VIEW_FLAG_RTV = (1 << 4) +}; + struct dx12_resource { enum D3D12_RESOURCE_STATES state; ID3D12Resource *resource; @@ -303,6 +311,8 @@ INTERNAL SYS_JOB_DEF(dx12_evictor_job, _); INTERNAL void fenced_release(void *data, enum fenced_release_kind kind); +INTERNAL struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state, i32 view_flags); + struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; }; INTERNAL SYS_JOB_DEF(command_queue_alloc_job, job); @@ -803,35 +813,60 @@ INTERNAL void dx12_init_noise(void) { struct arena_temp scratch = scratch_begin_no_conflict(); - /* Decode */ - struct ase_decode_image_result decoded = ZI; { - struct resource texture_rs = resource_open(LIT("sprite/noise.ase")); - if (resource_exists(&texture_rs)) { - decoded = ase_decode_image(scratch.arena, resource_get_data(&texture_rs)); - } else { - sys_panic(LIT("Noise texture not found")); - } - resource_close(&texture_rs); - } - if (decoded.success) { - /* Initialize */ - if (decoded.image.width != SH_BLUE_NOISE_TEX_WIDTH || decoded.image.height != SH_BLUE_NOISE_TEX_HEIGHT) { - sys_panic(string_format(scratch.arena, - LIT("Noise texture has unexpected dimensions (expected %Fx%F, got %Fx%F)"), - FMT_UINT(SH_BLUE_NOISE_TEX_WIDTH), FMT_UINT(SH_BLUE_NOISE_TEX_HEIGHT), - FMT_UINT(decoded.image.width), FMT_UINT(decoded.image.height))); + struct string noise_res_name = LIT("noise_128x128x64_16.dat"); + struct resource noise_res = resource_open(noise_res_name); + DXGI_FORMAT format = DXGI_FORMAT_R16_UINT; + //u32 expected_size = SH_BLUE_NOISE_TEX_WIDTH * SH_BLUE_NOISE_TEX_HEIGHT * SH_BLUE_NOISE_TEX_DEPTH * 2; + u32 expected_size = SH_BLUE_NOISE_TEX_WIDTH * SH_BLUE_NOISE_TEX_HEIGHT * SH_BLUE_NOISE_TEX_DEPTH * 2; + if (resource_exists(&noise_res)) { + struct string data = resource_get_data(&noise_res); + if (data.len != expected_size) { + sys_panic(string_format(scratch.arena, + LIT("Noise texture has unexpected size for a %Fx%Fx%F texture (expected %F, got %F)"), + FMT_UINT(SH_BLUE_NOISE_TEX_WIDTH), FMT_UINT(SH_BLUE_NOISE_TEX_HEIGHT), FMT_UINT(SH_BLUE_NOISE_TEX_DEPTH), + FMT_UINT(expected_size), FMT_UINT(data.len))); + } + { + enum dx12_resource_view_flags view_flags = DX12_RESOURCE_VIEW_FLAG_SRV; + D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; + heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + + D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + + D3D12_RESOURCE_DESC desc = ZI; + desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; + + desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + desc.Format = format; + desc.Alignment = 0; + desc.Width = SH_BLUE_NOISE_TEX_WIDTH; + desc.Height = SH_BLUE_NOISE_TEX_HEIGHT; + desc.DepthOrArraySize = SH_BLUE_NOISE_TEX_DEPTH; + desc.MipLevels = 1; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + + struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, D3D12_RESOURCE_STATE_COPY_DEST, view_flags); + + /* Upload texture */ + { + struct snc_counter counter = ZI; + struct dx12_upload_job_sig sig = ZI; + sig.resource = r; + sig.data = data.text; + sys_run(1, dx12_upload_job, &sig, SYS_POOL_INHERIT, SYS_PRIORITY_INHERIT, &counter); + snc_counter_wait(&counter); + } + } + } else { + sys_panic(string_format(scratch.arena, LIT("Noise resource \"%F\" not found"), FMT_STR(noise_res_name))); } - struct dx12_resource *r = (struct dx12_resource *)gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, V2I32(decoded.image.width, decoded.image.height), decoded.image.pixels); - if (r->srv_descriptor->index != SH_BLUE_NOISE_TEX_ID) { - sys_panic(string_format(scratch.arena, - LIT("Noise texture has unexpected descriptor index (expected %F, got %F)"), - FMT_UINT(SH_BLUE_NOISE_TEX_ID), FMT_UINT(r->srv_descriptor->index))); - } - } else { - sys_panic(LIT("Failed to decode noise texture")); + resource_close(&noise_res); } + scratch_end(scratch); } @@ -1687,14 +1722,6 @@ INTERNAL void fenced_release(void *data, enum fenced_release_kind kind) * Resource * ========================== */ -enum dx12_resource_view_flags { - DX12_RESOURCE_VIEW_FLAG_NONE = 0, - DX12_RESOURCE_VIEW_FLAG_CBV = (1 << 1), - DX12_RESOURCE_VIEW_FLAG_SRV = (1 << 2), - DX12_RESOURCE_VIEW_FLAG_UAV = (1 << 3), - DX12_RESOURCE_VIEW_FLAG_RTV = (1 << 4) -}; - INTERNAL struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state, i32 view_flags) { __prof; @@ -2311,21 +2338,16 @@ struct gp_resource *gp_texture_alloc(enum gp_texture_format format, u32 flags, s if (size.x <= 0 || size.y <= 0) { sys_panic(LIT("Tried to create texture with dimension <= 0")); } - struct dxgi_format_info { DXGI_FORMAT format; u32 size; }; - LOCAL_PERSIST const struct dxgi_format_info formats[] = { - [GP_TEXTURE_FORMAT_R8_UNORM] = { DXGI_FORMAT_R8_UNORM, 1 }, - [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM] = { DXGI_FORMAT_R8G8B8A8_UNORM, 4 }, - [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = { DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 4 }, - [GP_TEXTURE_FORMAT_R16G16B16A16_FLOAT] = { DXGI_FORMAT_R16G16B16A16_FLOAT, 8 } + LOCAL_PERSIST const DXGI_FORMAT formats[] = { + [GP_TEXTURE_FORMAT_R8_UNORM] = DXGI_FORMAT_R8_UNORM, + [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM] = DXGI_FORMAT_R8G8B8A8_UNORM, + [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + [GP_TEXTURE_FORMAT_R16G16B16A16_FLOAT] = DXGI_FORMAT_R16G16B16A16_FLOAT }; DXGI_FORMAT dxgi_format = ZI; - u32 pixel_size = 0; - if (format < (i32)countof(formats)) { - dxgi_format = formats[format].format; - pixel_size = formats[format].size; - ASSERT(dxgi_format != 0); - ASSERT(pixel_size != 0); + if (format >= 0 && format < (i32)countof(formats)) { + dxgi_format = formats[format]; } if (format == 0) { sys_panic(LIT("Tried to create texture with unknown format")); @@ -2395,12 +2417,104 @@ INTERNAL SYS_JOB_DEF(dx12_upload_job, job) D3D12_RESOURCE_DESC desc = ZI; ID3D12Resource_GetDesc(r->resource, &desc); - u64 upload_size = 0; - u64 upload_row_size = 0; - u32 upload_num_rows = 0; - D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI; - ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &footprint, &upload_num_rows, &upload_row_size, &upload_size); +#if 1 + { + u64 upload_size = 0; + u64 upload_row_size = 0; + u32 upload_num_rows = 0; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; + ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); + D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; + + /* Create upload heap */ + struct dx12_resource *upload = 0; + { + enum dx12_resource_view_flags upload_view_flags = DX12_RESOURCE_VIEW_FLAG_NONE; + + D3D12_HEAP_PROPERTIES upload_heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; + upload_heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + upload_heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + + D3D12_HEAP_FLAGS upload_heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + + D3D12_RESOURCE_DESC upload_desc = ZI; + upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + upload_desc.Format = DXGI_FORMAT_UNKNOWN; + upload_desc.Alignment = 0; + upload_desc.Width = upload_size; + upload_desc.Height = 1; + upload_desc.DepthOrArraySize = 1; + upload_desc.MipLevels = 1; + upload_desc.SampleDesc.Count = 1; + upload_desc.SampleDesc.Quality = 0; + D3D12_RESOURCE_STATES upload_initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; + + upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state, upload_view_flags); + } + + struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND]; + struct command_list *cl = command_list_open(cq->cl_pool); + { + /* Copy to upload heap */ + { + D3D12_RANGE read_range = ZI; + void *mapped = 0; + HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped); + if (FAILED(hr) || !mapped) { + /* TODO: Don't panic */ + sys_panic(LIT("Failed to map texture upload resource")); + } + u8 *dst = (u8 *)mapped + placed_footprint.Offset; + u8 *src = data; + + u32 z_size = upload_row_size * upload_num_rows; + + for (u32 z = 0; z < desc.DepthOrArraySize; ++z) { + u32 z_offset = z * z_size; + for (u32 y = 0; y < upload_num_rows; ++y) { + MEMCPY(dst + y * footprint.RowPitch + z_offset, src + y * upload_row_size + z_offset, upload_row_size); + } + } + ID3D12Resource_Unmap(upload->resource, 0, 0); + } + + /* Copy from upload heap to texture */ + { + __profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", RGB32_F(0.2, 0.5, 0.2)); + D3D12_TEXTURE_COPY_LOCATION dst_loc = { + .pResource = r->resource, + .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + .SubresourceIndex = 0, + }; + + D3D12_TEXTURE_COPY_LOCATION src_loc = { + .pResource = upload->resource, + .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + .PlacedFootprint = placed_footprint, + }; + + ID3D12GraphicsCommandList_CopyTextureRegion(cl->cl, &dst_loc, 0, 0, 0, &src_loc, 0); + } + } u64 fence_target = command_list_close(cl); + + /* Wait on fence so we know it's safe to release upload heap */ + if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target) { + struct dx12_wait_fence_job_sig wait_sig = ZI; + wait_sig.fence = cq->submit_fence; + wait_sig.target = fence_target; + struct snc_counter counter = ZI; + sys_run(1, dx12_wait_fence_job, &wait_sig, SYS_POOL_FLOATING, SYS_PRIORITY_LOW, &counter); + snc_counter_wait(&counter); + } + + /* Release upload heap now */ + dx12_resource_release_now(upload); + + } + +#else /* Create temp upload heap */ struct dx12_resource *upload = 0; { @@ -2478,6 +2592,8 @@ INTERNAL SYS_JOB_DEF(dx12_upload_job, job) /* Release upload heap now */ dx12_resource_release_now(upload); +#endif + } /* ========================== * @@ -2605,6 +2721,7 @@ INTERNAL D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descripto struct render_sig { struct arena *arena; struct rand_state rand; + u32 frame_index; /* Material instances */ u32 num_material_instance_descs; @@ -2783,10 +2900,13 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re __prof; struct arena_temp scratch = scratch_begin_no_conflict(); struct render_sig *sig = (struct render_sig *)render_sig; + struct v2i32 final_target_size = params.draw_target_size; final_target_size.x = max_i32(final_target_size.x, 1); final_target_size.y = max_i32(final_target_size.y, 1); + ++sig->frame_index; + /* Release sig resources if size changed */ if (sig->final_target && !v2i32_eq(final_target_size, sig->final_target->texture_size)) { __profn("Release sig resources"); @@ -3079,8 +3199,9 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re /* Set constants */ struct sh_shade_constants constants = ZI; /* TODO: Remove this */ + constants.frame_index = sh_uint_from_u32(sig->frame_index); + constants.frame_seed = sh_uint3_from_u32((u32)rand_u64_from_state(&sig->rand), (u32)rand_u64_from_state(&sig->rand), (u32)rand_u64_from_state(&sig->rand)); constants.camera_offset = sh_float2_from_v2(params.draw_target_view.og); - constants.seed = sh_uint2_from_u32((u32)rand_u64_from_state(&sig->rand), (u32)rand_u64_from_state(&sig->rand)); constants.albedo_tex_urid = sh_uint_from_u32(sig->albedo->srv_descriptor->index); constants.emittance_tex_urid = sh_uint_from_u32(sig->emittance->srv_descriptor->index); constants.emittance_flood_tex_urid = sh_uint_from_u32(emittance_flood_read->srv_descriptor->index); @@ -3095,6 +3216,7 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(cl->cl, 1, descriptor_heap->start_gpu_handle); ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(cl->cl, 2, descriptor_heap->start_gpu_handle); ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(cl->cl, 3, descriptor_heap->start_gpu_handle); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(cl->cl, 4, descriptor_heap->start_gpu_handle); /* Dispatch */ ID3D12GraphicsCommandList_Dispatch(cl->cl, (final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1); @@ -3286,8 +3408,8 @@ struct gp_swapchain *gp_swapchain_alloc(struct sys_window *window, struct v2i32 } /* Create waitable object */ -#if DX12_WAIT_FRAME_LATENCY - IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, 1); +#if DX12_WAIT_FRAME_LATENCY > 0 + IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, DX12_WAIT_FRAME_LATENCY); swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain); ASSERT(swapchain->waitable); #endif @@ -3311,10 +3433,14 @@ void gp_swapchain_release(struct gp_swapchain *gp_swapchain) void gp_swapchain_wait(struct gp_swapchain *gp_swapchain) { +#if DX12_WAIT_FRAME_LATENCY > 0 struct swapchain *swapchain = (struct swapchain *)gp_swapchain; if (swapchain->waitable) { WaitForSingleObjectEx(swapchain->waitable, 1000, 1); } +#else + (UNUSED)gp_swapchain; +#endif } INTERNAL struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, struct v2i32 resolution) diff --git a/src/user.c b/src/user.c index 45327bd1..0f5ae100 100644 --- a/src/user.c +++ b/src/user.c @@ -258,11 +258,9 @@ struct user_startup_receipt user_startup(struct font_startup_receipt *font_sr, INTERNAL SYS_EXIT_FUNC(user_shutdown) { __prof; - sys_window_release(G.window); - /* Signal shutdown */ atomic32_fetch_set(&G.shutdown, 1); - /* Wait for jobs shutdown */ snc_counter_wait(&G.shutdown_job_counters); + sys_window_release(G.window); } /* ========================== * @@ -2049,11 +2047,20 @@ INTERNAL void user_update(struct sys_window *window) INTERNAL SYS_JOB_DEF(user_update_job, _) { (UNUSED)_; + i64 time_ns = sys_time_ns(); while (!atomic32_fetch(&G.shutdown)) { struct sys_window *window = G.window; { - __profn("Swapchain wait"); - gp_swapchain_wait(G.swapchain); + __profn("User sleep"); + { + __profn("Swapchain wait"); + gp_swapchain_wait(G.swapchain); + } + { + __profn("Frame limiter wait"); + sleep_frame(time_ns, 1000000000 / FPS_LIMIT); + time_ns = sys_time_ns(); + } } user_update(window); }