diff --git a/res/sh/material.hlsl b/res/sh/material.hlsl index cf14d2a5..0067b8ec 100644 --- a/res/sh/material.hlsl +++ b/res/sh/material.hlsl @@ -86,7 +86,8 @@ SH_ENTRY(ROOTSIG) struct ps_output ps(struct ps_input input) /* Texture */ if (input.vs.tex_nurid >= 0) { - albedo *= g_textures[NURID(input.vs.tex_nurid)].Sample(g_sampler, input.vs.uv); + Texture2D tex = g_textures[NURID(input.vs.tex_nurid)]; + albedo *= tex.Sample(g_sampler, input.vs.uv); } /* Grid */ diff --git a/res/sh/sh_common.h b/res/sh/sh_common.h index 1ce40d48..1863ccde 100644 --- a/res/sh/sh_common.h +++ b/res/sh/sh_common.h @@ -18,6 +18,12 @@ INLINE struct sh_int sh_int_from_i32(i32 v) return (struct sh_int) { .v = v }; } +struct sh_uint2 { u32 v[2]; }; +INLINE struct sh_uint2 sh_uint2_from_u32(u32 a, u32 b) +{ + return (struct sh_uint2) { .v[0] = a, .v[1] = b }; +} + struct sh_float { f32 v; }; INLINE struct sh_float sh_float_from_f32(f32 v) { @@ -64,6 +70,15 @@ INLINE struct sh_float2x3 sh_float2x3_from_xform(struct xform v) #endif +/* ========================== * + * Global textures + * ========================== */ + +/* Blue noise */ +#define SH_BLUE_NOISE_TEX_ID 0 +#define SH_BLUE_NOISE_TEX_WIDTH 1024 +#define SH_BLUE_NOISE_TEX_HEIGHT 1024 + /* ========================== * * Material shader structures * ========================== */ @@ -114,7 +129,8 @@ SH_ASSERT_32BIT(struct sh_flood_constants, 6); /* Expected 32bit root constant * ========================== */ SH_STRUCT(sh_shade_constants { - SH_DECL(uint, tick); + SH_DECL(float2, camera_offset); + SH_DECL(uint2, seed); SH_DECL(uint, albedo_tex_urid); SH_DECL(uint, emittance_tex_urid); SH_DECL(uint, emittance_flood_tex_urid); @@ -124,7 +140,7 @@ SH_STRUCT(sh_shade_constants { SH_DECL(float, exposure); SH_DECL(float, gamma); }); -SH_ASSERT_32BIT(struct sh_shade_constants, 9); /* Expected 32bit root constant size in shader */ +SH_ASSERT_32BIT(struct sh_shade_constants, 12); /* Expected 32bit root constant size in shader */ /* ========================== * * Shape shader structures diff --git a/res/sh/shade.hlsl b/res/sh/shade.hlsl index 8a272c33..fd0fd497 100644 --- a/res/sh/shade.hlsl +++ b/res/sh/shade.hlsl @@ -5,25 +5,24 @@ * ========================== */ #define ROOTSIG \ - "RootConstants(num32BitConstants = 9, b0), " \ + "RootConstants(num32BitConstants = 12, b0), " \ "DescriptorTable(SRV(t0, space = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ "DescriptorTable(SRV(t0, space = 1, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ "DescriptorTable(UAV(u0, space = 2, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE)), " \ \ - \ "StaticSampler(s0, " \ "filter = FILTER_MIN_MAG_MIP_POINT, " \ - "addressU = TEXTURE_ADDRESS_CLAMP, " \ - "addressV = TEXTURE_ADDRESS_CLAMP, " \ - "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "addressU = TEXTURE_ADDRESS_WRAP, " \ + "addressV = TEXTURE_ADDRESS_WRAP, " \ + "addressW = TEXTURE_ADDRESS_WRAP, " \ "maxAnisotropy = 1)" ConstantBuffer g_constants : register(b0); -Texture2D g_gbuff_textures[] : register(t0, space0); -Texture2D g_emittance_flood_textures[] : register(t0, space1); +Texture2D g_textures_float4[] : register(t0, space0); +Texture2D g_textures_uint2[] : register(t0, space1); RWTexture2D g_write_textures[]: register(u0, space2); -SamplerState g_sampler : register(s0); +SamplerState g_noise_sampler : register(s0); struct cs_input { DECLS(uint3, SV_DispatchThreadID); @@ -33,14 +32,22 @@ struct cs_input { * Lighting * ========================== */ -#define SAMPLES 4 +#define SAMPLES 8 #define MARCHES 16 #define AMBIENT float4(0, 0, 0, 0) +float rand_float_from_float2(float2 pos) { + // pos += uint2(g_constants.seed.x % g_constants.tex_width, g_constants.seed.x % g_constants.tex_height); + Texture2D noise_tex = g_textures_float4[SH_BLUE_NOISE_TEX_ID]; + float2 uv = pos / float2(SH_BLUE_NOISE_TEX_WIDTH, SH_BLUE_NOISE_TEX_HEIGHT); + float4 v = noise_tex.SampleLevel(g_noise_sampler, uv, 0); + return v.r; +} + INLINE float4 get_light_in_dir(uint2 ray_start, float2 ray_dir) { - Texture2D flood_tex = g_emittance_flood_textures[g_constants.emittance_flood_tex_urid]; - Texture2D emittance_tex = g_gbuff_textures[g_constants.emittance_tex_urid]; + Texture2D flood_tex = g_textures_uint2[g_constants.emittance_flood_tex_urid]; + Texture2D emittance_tex = g_textures_float4[g_constants.emittance_tex_urid]; float4 result = AMBIENT; float2 at_float = ray_start; @@ -49,7 +56,7 @@ INLINE float4 get_light_in_dir(uint2 ray_start, float2 ray_dir) uint2 flood = flood_tex[at_uint]; float2 dist_vec = at_float - (float2)flood; float dist = length(dist_vec); - if (dist <= 1) { + if (dist < 1) { result = emittance_tex[flood]; break; } else { @@ -64,15 +71,15 @@ INLINE float4 get_light_in_dir(uint2 ray_start, float2 ray_dir) return result; } -float rand_float_from_float2(float2 pos) { - return frac(sin(dot(pos.xy, float2(12.9898,78.233))) * 43758.5453123); -} - INLINE float4 get_light_at_pos(uint2 pos) { float4 result = 0; - for (int i = 0; i < SAMPLES; ++i) { - float angle = TAU * (((float)i + rand_float_from_float2(pos + (float)i)) / ((float)SAMPLES - 1)); + for (uint i = 0; i < SAMPLES; ++i) { + float angle = ((((float)i + rand_float_from_float2((float2)pos + (float)i)) / SAMPLES)) * TAU; + + // float angle = (rand_float_from_float2(pos)) * TAU; + // float angle = (((float)i / SAMPLES)) * TAU; + // float angle = (rand_float_from_float2(pos)) * TAU; float2 dir = float2(cos(angle), sin(angle)); float4 light_in_dir = get_light_in_dir(pos, dir); result += light_in_dir; @@ -105,19 +112,22 @@ SH_ENTRY(ROOTSIG) void cs(struct cs_input input) return; /* Overflow */ } - Texture2D albedo_tex = g_gbuff_textures[g_constants.albedo_tex_urid]; + Texture2D albedo_tex = g_textures_float4[g_constants.albedo_tex_urid]; RWTexture2D write_tex = g_write_textures[g_constants.write_tex_urid]; - float4 albedo = albedo_tex[id]; - float4 lighting = get_light_at_pos(id); - float4 color = albedo * lighting; + float4 color = float4(1, 1, 1, 1); - /* Tone map */ + /* Apply albedo */ + color *= albedo_tex[id]; + + /* Apply lighting */ + color *= get_light_at_pos(id); + + /* Apply tone map */ /* TODO: Dynamic exposure based on average scene luminance */ - color *= g_constants.exposure; - color.rgb = tone_map(color.rgb); + color.rgb = tone_map(color.rgb) * g_constants.exposure; - /* Gamma correct */ + /* Apply gamma correction */ color = pow(abs(color), 1/g_constants.gamma); write_tex[id] = color; diff --git a/res/sprite/noise.ase b/res/sprite/noise.ase new file mode 100644 index 00000000..63c5db51 --- /dev/null +++ b/res/sprite/noise.ase @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa3d0deedb329f2e1f1fca1d3cba1e76bc2a61913d3d572e036c55ea2c6fec4d +size 4197289 diff --git a/res/sprite/tile.ase b/res/sprite/tile.ase index 41422f8b..3fd5ed3d 100644 --- a/res/sprite/tile.ase +++ b/res/sprite/tile.ase @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e21a8acdf81686d7beadc8bec5544e8fdaa55830789713a69c049750b2ceff01 -size 672 +oid sha256:278c0ed087590d89f0b50c51373861c45d1ab87f9241f8c3ea2942333baf294a +size 673 diff --git a/src/ase.c b/src/ase.c index 1c5d85af..844660db 100644 --- a/src/ase.c +++ b/src/ase.c @@ -269,7 +269,14 @@ INTERNAL void inflate(u8 *dst, u8 *encoded) u8 btype = consume_bits(&bb, 2); switch (btype) { case BLOCK_TYPE_UNCOMPRESSED: { - sys_panic(LIT("Unsupported block type while inflating ase: BLOCK_TYPE_UNCOMPRESSED")); + skip_bits(&bb, (8 - (bb.cur_bit % 8)) % 8); + i16 len = consume_bits(&bb, 16); + i16 nlen = consume_bits(&bb, 16); + ASSERT(len == ~nlen); /* Validation */ + (UNUSED)nlen; + while (len-- > 0) { + *dst++ = consume_bits(&bb, 8); + } } break; case BLOCK_TYPE_COMPRESSED_FIXED: @@ -361,9 +368,9 @@ INTERNAL void inflate(u8 *dst, u8 *encoded) u32 extra_bits = consume_bits(&bb, dist_entry.bits_used); distance += extra_bits; } - u8 *source = dst - distance; + u8 *src = dst - distance; while (length--) { - *dst++ = *source++; + *dst++ = *src++; } } else { break; diff --git a/src/common.h b/src/common.h index d44ca8d0..cad50ec5 100644 --- a/src/common.h +++ b/src/common.h @@ -189,15 +189,15 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); #define INLINE static inline #if COMPILER_MSVC -# define FORCE_INLINE static inline __forceinline +# define FORCE_INLINE inline __forceinline #else -# define FORCE_INLINE static inline __attribute((always_inline)) +# define FORCE_INLINE inline __attribute((always_inline)) #endif #if COMPILER_MSVC -# define NO_INLINE __declspec(noinline) +# define FORCE_NO_INLINE __declspec(noinline) #else -# define NO_INLINE __attribute__((noinline)) +# define FORCE_NO_INLINE __attribute__((noinline)) #endif /* Separate `static` usage into different keywords for easier grepping */ @@ -578,6 +578,11 @@ struct v2i32 { i32 x, y; }; +#define V3I32(x, y, z) CPPCOMPAT_INITLIST_TYPE(struct v3i32) { (x), (y), (z) } +struct v3i32 { + i32 x, y, z; +}; + struct xform { struct v2 bx; /* X basis vector (x axis) */ struct v2 by; /* Y basis vector (y axis)*/ diff --git a/src/gp.h b/src/gp.h index 40870054..3eb8546f 100644 --- a/src/gp.h +++ b/src/gp.h @@ -34,6 +34,7 @@ void gp_resource_release(struct gp_resource *resource); enum gp_texture_format { GP_TEXTURE_FORMAT_NONE, + GP_TEXTURE_FORMAT_R8_UNORM, GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB, GP_TEXTURE_FORMAT_R16G16B16A16_FLOAT, diff --git a/src/gp_dx12.c b/src/gp_dx12.c index cbd65980..53574634 100644 --- a/src/gp_dx12.c +++ b/src/gp_dx12.c @@ -12,6 +12,7 @@ #include "sprite.h" #include "gstat.h" #include "snc.h" +#include "ase.h" /* Include common shader types */ #define SH_CPU 1 @@ -72,6 +73,10 @@ # define DX12_SHADER_DEBUG 0 #endif +/* ========================== * + * Internal structs + * ========================== */ + struct shader_desc { struct string file; struct string func; @@ -276,6 +281,41 @@ struct fenced_release_data { void *ptr; }; +/* ========================== * + * Internal procs + * ========================== */ + +INTERNAL SYS_EXIT_FUNC(gp_shutdown); + +INTERNAL void dx12_init_device(void); + +INTERNAL void dx12_init_objects(void); + +INTERNAL void dx12_init_pipelines(void); + +INTERNAL void dx12_init_noise(void); + +INTERNAL struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type); + +INTERNAL void command_queue_release(struct command_queue *cq); + +INTERNAL SYS_JOB_DEF(dx12_evictor_job, _); + +INTERNAL void fenced_release(void *data, enum fenced_release_kind kind); + +struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; }; +INTERNAL SYS_JOB_DEF(command_queue_alloc_job, job); + +struct pipeline_alloc_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; }; +INTERNAL SYS_JOB_DEF(pipeline_alloc_job, job); + +struct dx12_upload_job_sig { struct dx12_resource *resource; void *data; }; +INTERNAL SYS_JOB_DEF(dx12_upload_job, job); + +#if RESOURCE_RELOADING +INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(pipeline_resource_watch_callback, name); +#endif + /* ========================== * * Global state * ========================== */ @@ -351,25 +391,6 @@ GLOBAL struct { * Startup * ========================== */ -INTERNAL SYS_EXIT_FUNC(gp_shutdown); -INTERNAL void dx12_init_device(void); -INTERNAL void dx12_init_objects(void); -INTERNAL void dx12_init_pipelines(void); -INTERNAL struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type); -INTERNAL void command_queue_release(struct command_queue *cq); -INTERNAL SYS_JOB_DEF(dx12_evictor_job, _); -INTERNAL void fenced_release(void *data, enum fenced_release_kind kind); - -struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; }; -INTERNAL SYS_JOB_DEF(command_queue_alloc_job, job); - -struct pipeline_alloc_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; }; -INTERNAL SYS_JOB_DEF(pipeline_alloc_job, job); - -#if RESOURCE_RELOADING -INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(pipeline_resource_watch_callback, name); -#endif - void gp_startup(void) { __prof; @@ -400,9 +421,11 @@ void gp_startup(void) G.fenced_releases_arena = arena_alloc(GIBI(64)); /* Initialize dx12 */ + /* TODO: Parallelize phases */ dx12_init_device(); dx12_init_objects(); dx12_init_pipelines(); + dx12_init_noise(); /* Register callbacks */ #if RESOURCE_RELOADING @@ -772,6 +795,46 @@ INTERNAL void dx12_init_pipelines(void) scratch_end(scratch); } +/* ========================== * + * Noise texture initialization + * ========================== */ + +INTERNAL void dx12_init_noise(void) +{ + struct arena_temp scratch = scratch_begin_no_conflict(); + + /* Decode */ + struct ase_decode_image_result decoded = ZI; + { + struct resource texture_rs = resource_open(LIT("sprite/noise.ase")); + if (resource_exists(&texture_rs)) { + decoded = ase_decode_image(scratch.arena, resource_get_data(&texture_rs)); + } else { + sys_panic(LIT("Noise texture not found")); + } + resource_close(&texture_rs); + } + if (decoded.success) { + /* Initialize */ + if (decoded.image.width != SH_BLUE_NOISE_TEX_WIDTH || decoded.image.height != SH_BLUE_NOISE_TEX_HEIGHT) { + sys_panic(string_format(scratch.arena, + LIT("Noise texture has unexpected dimensions (expected %Fx%F, got %Fx%F)"), + FMT_UINT(SH_BLUE_NOISE_TEX_WIDTH), FMT_UINT(SH_BLUE_NOISE_TEX_HEIGHT), + FMT_UINT(decoded.image.width), FMT_UINT(decoded.image.height))); + + } + struct dx12_resource *r = (struct dx12_resource *)gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, V2I32(decoded.image.width, decoded.image.height), decoded.image.pixels); + if (r->srv_descriptor->index != SH_BLUE_NOISE_TEX_ID) { + sys_panic(string_format(scratch.arena, + LIT("Noise texture has unexpected descriptor index (expected %F, got %F)"), + FMT_UINT(SH_BLUE_NOISE_TEX_ID), FMT_UINT(r->srv_descriptor->index))); + } + } else { + sys_panic(LIT("Failed to decode noise texture")); + } + scratch_end(scratch); +} + /* ========================== * * Shader compilation * ========================== */ @@ -2250,9 +2313,10 @@ struct gp_resource *gp_texture_alloc(enum gp_texture_format format, u32 flags, s } struct dxgi_format_info { DXGI_FORMAT format; u32 size; }; LOCAL_PERSIST const struct dxgi_format_info formats[] = { - [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM] = { DXGI_FORMAT_R8G8B8A8_UNORM, 4 }, - [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = { DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 4 }, - [GP_TEXTURE_FORMAT_R16G16B16A16_FLOAT] = { DXGI_FORMAT_R16G16B16A16_FLOAT, 8 } + [GP_TEXTURE_FORMAT_R8_UNORM] = { DXGI_FORMAT_R8_UNORM, 1 }, + [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM] = { DXGI_FORMAT_R8G8B8A8_UNORM, 4 }, + [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = { DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 4 }, + [GP_TEXTURE_FORMAT_R16G16B16A16_FLOAT] = { DXGI_FORMAT_R16G16B16A16_FLOAT, 8 } }; DXGI_FORMAT dxgi_format = ZI; @@ -2298,89 +2362,13 @@ struct gp_resource *gp_texture_alloc(enum gp_texture_format format, u32 flags, s /* Upload texture */ if (initial_data) { - u64 upload_size = 0; - u64 upload_row_size = 0; - u32 upload_num_rows = 0; - D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI; - ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &footprint, &upload_num_rows, &upload_row_size, &upload_size); - - /* Create temp upload heap */ - struct dx12_resource *upload = 0; - { - enum dx12_resource_view_flags upload_view_flags = DX12_RESOURCE_VIEW_FLAG_NONE; - - D3D12_HEAP_PROPERTIES upload_heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; - upload_heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - upload_heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - - D3D12_HEAP_FLAGS upload_heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; - - D3D12_RESOURCE_DESC upload_desc = ZI; - upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - upload_desc.Format = DXGI_FORMAT_UNKNOWN; - upload_desc.Alignment = 0; - upload_desc.Width = upload_size; - upload_desc.Height = 1; - upload_desc.DepthOrArraySize = 1; - upload_desc.MipLevels = 1; - upload_desc.SampleDesc.Count = 1; - upload_desc.SampleDesc.Quality = 0; - D3D12_RESOURCE_STATES upload_initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; - - /* FIXME: Release */ - upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state, upload_view_flags); - - /* Copy to upload heap */ - /* FIXME: Copy based on footprint */ - { - D3D12_RANGE read_range = ZI; - void *mapped = 0; - HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped); - if (FAILED(hr) || !mapped) { - /* TODO: Don't panic */ - sys_panic(LIT("Failed to map texture upload resource")); - } - u8 *dst = (u8 *)mapped + footprint.Offset; - u8 *src = initial_data; - for (u32 y = 0; y < upload_num_rows; ++y) { - memcpy(dst + y * footprint.Footprint.RowPitch, src + y * size.x * pixel_size, size.x * pixel_size); - } - ID3D12Resource_Unmap(upload->resource, 0, 0); - } - } - - /* Copy from upload heap to texture */ - struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND]; - struct command_list *cl = command_list_open(cq->cl_pool); - { - __profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", RGB32_F(0.2, 0.5, 0.2)); - D3D12_TEXTURE_COPY_LOCATION dst_loc = { - .pResource = r->resource, - .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, - .SubresourceIndex = 0, - }; - - D3D12_TEXTURE_COPY_LOCATION src_loc = { - .pResource = upload->resource, - .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, - .PlacedFootprint = footprint, - }; - - ID3D12GraphicsCommandList_CopyTextureRegion(cl->cl, &dst_loc, 0, 0, 0, &src_loc, 0); - } - u64 fence_target = command_list_close(cl); - - /* Submit wait job */ /* TODO: Make wait optional */ - if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target) { - struct dx12_wait_fence_job_sig sig = ZI; - sig.fence = cq->submit_fence; - sig.target = fence_target; - struct snc_counter counter = ZI; - sys_run(1, dx12_wait_fence_job, &sig, SYS_POOL_FLOATING, SYS_PRIORITY_LOW, &counter); - snc_counter_wait(&counter); - } + struct snc_counter counter = ZI; + struct dx12_upload_job_sig sig = ZI; + sig.resource = r; + sig.data = initial_data; + sys_run(1, dx12_upload_job, &sig, SYS_POOL_INHERIT, SYS_PRIORITY_INHERIT, &counter); + snc_counter_wait(&counter); } return (struct gp_resource *)r; @@ -2392,6 +2380,106 @@ struct v2i32 gp_texture_get_size(struct gp_resource *resource) return r->texture_size; } +/* ========================== * + * Upload + * ========================== */ + +INTERNAL SYS_JOB_DEF(dx12_upload_job, job) +{ + struct dx12_upload_job_sig *sig = job.sig; + struct dx12_resource *r = sig->resource; + void *data = sig->data; + + ASSERT(r->state == D3D12_RESOURCE_STATE_COPY_DEST); + + D3D12_RESOURCE_DESC desc = ZI; + ID3D12Resource_GetDesc(r->resource, &desc); + + u64 upload_size = 0; + u64 upload_row_size = 0; + u32 upload_num_rows = 0; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI; + ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &footprint, &upload_num_rows, &upload_row_size, &upload_size); + + /* Create temp upload heap */ + struct dx12_resource *upload = 0; + { + enum dx12_resource_view_flags upload_view_flags = DX12_RESOURCE_VIEW_FLAG_NONE; + + D3D12_HEAP_PROPERTIES upload_heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; + upload_heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + upload_heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + + D3D12_HEAP_FLAGS upload_heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + + D3D12_RESOURCE_DESC upload_desc = ZI; + upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + upload_desc.Format = DXGI_FORMAT_UNKNOWN; + upload_desc.Alignment = 0; + upload_desc.Width = upload_size; + upload_desc.Height = 1; + upload_desc.DepthOrArraySize = 1; + upload_desc.MipLevels = 1; + upload_desc.SampleDesc.Count = 1; + upload_desc.SampleDesc.Quality = 0; + D3D12_RESOURCE_STATES upload_initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; + + upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state, upload_view_flags); + + /* Copy to upload heap */ + { + D3D12_RANGE read_range = ZI; + void *mapped = 0; + HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped); + if (FAILED(hr) || !mapped) { + /* TODO: Don't panic */ + sys_panic(LIT("Failed to map texture upload resource")); + } + u8 *dst = (u8 *)mapped + footprint.Offset; + u8 *src = data; + for (u32 y = 0; y < upload_num_rows; ++y) { + MEMCPY(dst + y * footprint.Footprint.RowPitch, src + y * upload_row_size, upload_row_size); + } + ID3D12Resource_Unmap(upload->resource, 0, 0); + } + } + + /* Copy from upload heap to texture */ + struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND]; + struct command_list *cl = command_list_open(cq->cl_pool); + { + __profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", RGB32_F(0.2, 0.5, 0.2)); + D3D12_TEXTURE_COPY_LOCATION dst_loc = { + .pResource = r->resource, + .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + .SubresourceIndex = 0, + }; + + D3D12_TEXTURE_COPY_LOCATION src_loc = { + .pResource = upload->resource, + .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + .PlacedFootprint = footprint, + }; + + ID3D12GraphicsCommandList_CopyTextureRegion(cl->cl, &dst_loc, 0, 0, 0, &src_loc, 0); + } + u64 fence_target = command_list_close(cl); + + /* Wait on fence so we know it's safe to release upload heap */ + if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target) { + struct dx12_wait_fence_job_sig wait_sig = ZI; + wait_sig.fence = cq->submit_fence; + wait_sig.target = fence_target; + struct snc_counter counter = ZI; + sys_run(1, dx12_wait_fence_job, &wait_sig, SYS_POOL_FLOATING, SYS_PRIORITY_LOW, &counter); + snc_counter_wait(&counter); + } + + /* Release upload heap now */ + dx12_resource_release_now(upload); +} + /* ========================== * * Run utils * ========================== */ @@ -2516,6 +2604,7 @@ INTERNAL D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descripto struct render_sig { struct arena *arena; + struct rand_state rand; /* Material instances */ u32 num_material_instance_descs; @@ -2539,8 +2628,6 @@ struct render_sig { struct dx12_resource *emittance; struct dx12_resource *emittance_flood_a; struct dx12_resource *emittance_flood_b; - - u32 tick; }; struct material_instance_desc { @@ -2588,7 +2675,6 @@ INTERNAL struct render_sig *render_sig_alloc(void) sig->ui_rect_instance_descs_arena = arena_alloc(GIBI(1)); sig->ui_shape_verts_arena = arena_alloc(GIBI(1)); sig->ui_shape_indices_arena = arena_alloc(GIBI(1)); - sig->tick = 1; return sig; } @@ -2612,8 +2698,6 @@ INTERNAL void render_sig_reset(struct render_sig *sig) /* Reset grids */ sig->num_material_grid_descs = 0; arena_reset(sig->material_grid_descs_arena); - - ++sig->tick; } struct gp_render_sig *gp_render_sig_alloc(void) @@ -2970,7 +3054,7 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, { D3D12_RESOURCE_BARRIER_TYPE_UAV, emittance_flood_read, 0 }, { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_read, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, - { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->final_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } + { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->final_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } }; dx12_resource_barriers(cl->cl, countof(barriers), barriers); } @@ -2995,7 +3079,8 @@ struct gp_resource *gp_run_render(struct gp_render_sig *render_sig, struct gp_re /* Set constants */ struct sh_shade_constants constants = ZI; /* TODO: Remove this */ - constants.tick = sh_uint_from_u32(sig->tick); + constants.camera_offset = sh_float2_from_v2(params.draw_target_view.og); + constants.seed = sh_uint2_from_u32((u32)rand_u64_from_state(&sig->rand), (u32)rand_u64_from_state(&sig->rand)); constants.albedo_tex_urid = sh_uint_from_u32(sig->albedo->srv_descriptor->index); constants.emittance_tex_urid = sh_uint_from_u32(sig->emittance->srv_descriptor->index); constants.emittance_flood_tex_urid = sh_uint_from_u32(emittance_flood_read->srv_descriptor->index); diff --git a/src/prof_tracy.h b/src/prof_tracy.h index 2ce88309..febf2c1c 100644 --- a/src/prof_tracy.h +++ b/src/prof_tracy.h @@ -145,10 +145,10 @@ INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3 #endif /* PROFILING_CAPTURE_FRAME_IMAGE */ #ifdef TRACY_FIBERS -/* Tracy fiber methods are wrapped in NO_INLINE because otherwise issues can arise +/* Tracy fiber methods are wrapped in FORCE_NO_INLINE because otherwise issues can arise * accross fiber context boundaries during optimization */ -NO_INLINE INLINE void __prof_fiber_enter(char *fiber_name, i32 profiler_group) { TracyCFiberEnterWithHint(fiber_name, profiler_group); } -NO_INLINE INLINE void __prof_fiber_leave(void) { TracyCFiberLeave; } +FORCE_NO_INLINE INLINE void __prof_fiber_enter(char *fiber_name, i32 profiler_group) { TracyCFiberEnterWithHint(fiber_name, profiler_group); } +FORCE_NO_INLINE INLINE void __prof_fiber_leave(void) { TracyCFiberLeave; } #else # define __prof_fiber_enter(fiber_name, profiler_group) # define __prof_fiber_leave() diff --git a/src/sim_step.c b/src/sim_step.c index f7b09b97..1ffd303f 100644 --- a/src/sim_step.c +++ b/src/sim_step.c @@ -255,14 +255,15 @@ INTERNAL void test_spawn_entities2(struct sim_ent *parent, struct v2 pos) struct sim_ent *e = sim_ent_alloc_sync_src(parent); f32 rot = 0; - struct v2 size = V2(1, 0.5); + struct v2 size = V2(1, 1); struct xform xf = XFORM_TRS(.t = pos, .r = rot, .s = size); sim_ent_set_xform(e, xf); - e->sprite = sprite_tag_from_path(LIT("sprite/box.ase")); + e->sprite = sprite_tag_from_path(LIT("sprite/tile.ase")); e->layer = SIM_LAYER_SHOULDERS; - e->sprite_tint = ALPHA32_F(COLOR_BLUE, 0.75); + //e->sprite_tint = ALPHA32_F(COLOR_BLUE, 0.75); + e->sprite_tint = ALPHA32_F(COLOR_WHITE, 1); sim_ent_enable_prop(e, SEPROP_SOLID); struct quad collider_quad = quad_from_rect(RECT(-0.5, -0.5, 1, 1)); diff --git a/src/sys_win32.c b/src/sys_win32.c index dd837e62..6cd6e296 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -927,7 +927,7 @@ INTERNAL void fiber_release(struct job_pool *pool, struct fiber *fiber) tm_unlock(&pool->free_fibers_lock); } -FORCE_INLINE struct fiber *fiber_from_id(i16 id) +INTERNAL FORCE_INLINE struct fiber *fiber_from_id(i16 id) { if (id <= 0) { return 0; @@ -936,6 +936,13 @@ FORCE_INLINE struct fiber *fiber_from_id(i16 id) } } +INTERNAL FORCE_NO_INLINE void fiber_resume(struct fiber *fiber) +{ + MemoryBarrier(); + SwitchToFiber(fiber->addr); + MemoryBarrier(); +} + i16 sys_current_fiber_id(void) { return (i16)(i64)GetFiberData(); @@ -953,29 +960,20 @@ INTERNAL void job_fiber_yield(struct fiber *fiber, struct fiber *parent_fiber) ASSERT(parent_fiber->id > 0); { __prof_fiber_leave(); - MemoryBarrier(); - SwitchToFiber(parent_fiber->addr); - MemoryBarrier(); + fiber_resume(parent_fiber); __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS - MEBI(fiber->job_pool) + KIBI(1) + fiber->id); } } -INTERNAL void job_fiber_resume(struct fiber *fiber) -{ - MemoryBarrier(); - SwitchToFiber(fiber->addr); - MemoryBarrier(); -} - INTERNAL void job_fiber_entry(void *id_ptr) { i16 id = (i32)(i64)id_ptr; - struct fiber *fiber = fiber_from_id(id); + volatile struct fiber *fiber = fiber_from_id(id); __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS - MEBI(fiber->job_pool) + KIBI(1) + fiber->id); for (;;) { /* Run job */ { - volatile struct yield_param *yield_param = fiber->yield_param; + struct yield_param *yield_param = fiber->yield_param; yield_param->kind = YIELD_KIND_NONE; struct sys_job_data data = ZI; data.id = fiber->job_id; @@ -986,17 +984,21 @@ INTERNAL void job_fiber_entry(void *id_ptr) MemoryBarrier(); } } - /* Yield */ + /* Job completed, yield */ { - volatile struct yield_param *yield_param = fiber->yield_param; - yield_param->kind = YIELD_KIND_DONE; + /* Decrement job counter */ + struct snc_counter *job_counter = fiber->job_counter; + if (job_counter) { + snc_counter_add(job_counter, -1); + } + /* Yield to worker */ + fiber->yield_param->kind = YIELD_KIND_DONE; struct fiber *parent_fiber = fiber_from_id(fiber->parent_id); - job_fiber_yield(fiber, parent_fiber); + job_fiber_yield((struct fiber *)fiber, parent_fiber); } } } - void sys_run(i32 count, sys_job_func *func, void *sig, enum sys_pool pool_kind, enum sys_priority priority, struct snc_counter *counter) { if (count > 0) { @@ -1189,7 +1191,7 @@ INTERNAL THREAD_DEF(job_worker_entry, worker_ctx_arg) job_fiber->yield_param = &yield; b32 done = 0; while (!done) { - job_fiber_resume(job_fiber); + fiber_resume(job_fiber); switch (yield.kind) { default: { @@ -1330,9 +1332,6 @@ INTERNAL THREAD_DEF(job_worker_entry, worker_ctx_arg) case YIELD_KIND_DONE: { - if (job_counter) { - snc_counter_add(job_counter, -1); - } done = 1; } break; }