diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index acf947f1..a9686d87 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -19,30 +19,6 @@ void G_BootstrapCommon(void) G.quad_indices = G_IdxBuff16(quad_indices); } - // Init point clamp sampler - { - G_ResourceHandle pt_sampler = G_PushSampler( - gpu_perm, cl, - .filter = G_Filter_MinMagMipPoint, - .x = G_AddressMode_Clamp, - .y = G_AddressMode_Clamp, - .z = G_AddressMode_Clamp, - ); - G.basic_point_clamp_sampler = G_PushSamplerStateRef(gpu_perm, pt_sampler); - } - - // Init point wrap sampler - { - G_ResourceHandle pt_sampler = G_PushSampler( - gpu_perm, cl, - .filter = G_Filter_MinMagMipPoint, - .x = G_AddressMode_Wrap, - .y = G_AddressMode_Wrap, - .z = G_AddressMode_Wrap, - ); - G.basic_point_wrap_sampler = G_PushSamplerStateRef(gpu_perm, pt_sampler); - } - // Init blank texture { G_ResourceHandle blank_tex = G_PushTexture2D( @@ -79,6 +55,62 @@ void G_BootstrapCommon(void) G.basic_noise = G_PushTexture3DRef(gpu_perm, noise_tex); } + // Init basic samplers + for (G_BasicSamplerKind sampler_kind = 0; sampler_kind < countof(G.basic_samplers); ++sampler_kind) + { + G_SamplerStateRef sampler = Zi; + switch (sampler_kind) + { + default: + { + // Sampler unspecified + Assert(0); + } FALLTHROUGH; + case G_BasicSamplerKind_PointClamp: + { + G_Filter filter = G_Filter_MinMagMipPoint; + G_AddressMode address_mode = G_AddressMode_Clamp; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_PointWrap: + { + G_Filter filter = G_Filter_MinMagMipPoint; + G_AddressMode address_mode = G_AddressMode_Wrap; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_BilinearClamp: + { + G_Filter filter = G_Filter_MinMagLinearMipPoint; + G_AddressMode address_mode = G_AddressMode_Clamp; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_BilinearWrap: + { + G_Filter filter = G_Filter_MinMagLinearMipPoint; + G_AddressMode address_mode = G_AddressMode_Wrap; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_TrilinearClamp: + { + G_Filter filter = G_Filter_MinMagMipLinear; + G_AddressMode address_mode = G_AddressMode_Clamp; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_TrilinearWrap: + { + G_Filter filter = G_Filter_MinMagMipLinear; + G_AddressMode address_mode = G_AddressMode_Wrap; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + } + G.basic_samplers[sampler_kind] = sampler; + } } G_CommitCommandList(cl); @@ -109,6 +141,35 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList return buffer; } +//- Mip + +i32 G_DimsFromMip1D(i32 texture_dims, i32 mip) +{ + mip = ClampI32(mip, 0, 31); + i32 result = 0; + result = MaxI32(result >> mip, 1); + return result; +} + +Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip) +{ + mip = ClampI32(mip, 0, 31); + Vec2I32 result = Zi; + result.x = MaxI32(texture_dims.x >> mip, 1); + result.y = MaxI32(texture_dims.y >> mip, 1); + return result; +} + +Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip) +{ + mip = ClampI32(mip, 0, 31); + Vec3I32 result = Zi; + result.x = MaxI32(texture_dims.x >> mip, 1); + result.y = MaxI32(texture_dims.y >> mip, 1); + result.z = MaxI32(texture_dims.z >> mip, 1); + return result; +} + //- Viewport / scissor Rng3 G_ViewportFromTexture(G_ResourceHandle texture) @@ -125,21 +186,17 @@ Rng2 G_ScissorFromTexture(G_ResourceHandle texture) //- Shared resources + +G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind) +{ + return G.basic_samplers[kind]; +} + G_IndexBufferDesc G_QuadIndices(void) { return G.quad_indices; } -G_SamplerStateRef G_BasicPointClampSampler(void) -{ - return G.basic_point_clamp_sampler; -} - -G_SamplerStateRef G_BasicPointWrapSampler(void) -{ - return G.basic_point_wrap_sampler; -} - G_Texture2DRef G_BlankTexture2D(void) { return G.blank_tex; diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h index 3a791fe8..eb3ee6d2 100644 --- a/src/gpu/gpu_common.h +++ b/src/gpu/gpu_common.h @@ -5,10 +5,9 @@ Struct(G_Ctx) { // Common shared resources G_IndexBufferDesc quad_indices; - G_SamplerStateRef basic_point_clamp_sampler; - G_SamplerStateRef basic_point_wrap_sampler; G_Texture2DRef blank_tex; G_Texture3DRef basic_noise; + G_SamplerStateRef basic_samplers[G_BasicSamplerKind_COUNT]; }; Struct(G_ThreadLocalCtx) @@ -35,13 +34,17 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList #define G_PushBufferFromCpuCopy(_arena, _cl, _src, ...) \ G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ }) +//- Mip +i32 G_DimsFromMip1D(i32 texture_dims, i32 mip); +Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip); +Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip); + //- Viewport / scissor Rng3 G_ViewportFromTexture(G_ResourceHandle texture); Rng2 G_ScissorFromTexture(G_ResourceHandle texture); //- Shared resources +G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind); G_IndexBufferDesc G_QuadIndices(void); -G_SamplerStateRef G_BasicPointClampSampler(void); -G_SamplerStateRef G_BasicPointWrapSampler(void); G_Texture2DRef G_BlankTexture2D(void); G_Texture3DRef G_BasicNoiseTexture(void); diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index ad79a53c..cc3d741e 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -432,8 +432,8 @@ Struct(G_TextureDesc) G_Format format; Vec3I32 dims; G_Layout initial_layout; - i32 mips; // Will be clamped to range [1, max mips] Vec4 clear_color; + i32 max_mips; // Will be clamped to range [1, max mips] String name; }; @@ -778,14 +778,14 @@ void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc); __VA_ARGS__ \ }) -#define G_DumbMemorySync(cl, resource) \ - G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All) +#define G_DumbMemorySync(cl, resource, ...) \ + G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) -#define G_DumbMemoryLayoutSync(cl, resource, layout) \ - G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout)) +#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \ + G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__) -#define G_DumbGlobalMemorySync(cl) \ - G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All) +#define G_DumbGlobalMemorySync(cl, ...) \ + G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) //- Compute diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index c9991768..c2b6ed4d 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -241,7 +241,7 @@ void G_Bootstrap(void) } ////////////////////////////// - //- Initialize bindless root signature + //- Initialize global root signature { HRESULT hr = 0; @@ -1128,8 +1128,8 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle } else if (is_texture) { - i32 max_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z); - i32 max_mips = FloorF32(Log2F32(max_dim)) + 1; + i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z); + i32 max_mips = FloorF32(Log2F32(largest_dim)) + 1; d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout); d3d_desc.Dimension = ( desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : @@ -1141,7 +1141,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle d3d_desc.Width = MaxI32(desc.texture.dims.x, 1); d3d_desc.Height = MaxI32(desc.texture.dims.y, 1); d3d_desc.DepthOrArraySize = MaxI32(desc.texture.dims.z, 1); - d3d_desc.MipLevels = ClampF32(desc.texture.mips, 1, max_mips); + d3d_desc.MipLevels = ClampF32(desc.texture.max_mips, 1, max_mips); d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); @@ -1278,9 +1278,9 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle ); Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1); - if (is_texture) + for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) { - resource->cmdlist_texture_layout = d3d_initial_layout; + resource->cmdlist_texture_layouts[mip_idx] = d3d_initial_layout; } if (!SUCCEEDED(hr)) @@ -2021,6 +2021,9 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) } // Batch barrier cmds + i64 max_buffer_barriers = 0; + i64 max_texture_barriers = 0; + i64 max_global_barriers = 0; { u64 cmd_idx = 0; u64 batch_gen = 0; @@ -2056,6 +2059,27 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) } cmd->barrier.batch_gen = batch_gen; prev_barrier_cmd = cmd; + + if (cmd->barrier.desc.is_global) + { + max_global_barriers += 1; + } + else + { + G_D12_Resource *resource = G_D12_ResourceFromHandle(cmd->barrier.desc.resource); + if (resource->is_texture) + { + RngI32 mips = cmd->barrier.desc.mips; + mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); + mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); + max_texture_barriers += mips.max - mips.min + 1; + } + else + { + max_buffer_barriers += 1; + } + } + cmd_idx += 1; } break; } @@ -2109,9 +2133,9 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) u64 buffer_barriers_count = 0; u64 texture_barriers_count = 0; u64 global_barriers_count = 0; - D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); - D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); - D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); + D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, max_buffer_barriers); + D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, max_texture_barriers); + D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, max_global_barriers); for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx) { G_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx]; @@ -2123,20 +2147,11 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.stage_next); D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev); D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next); - D3D12_BARRIER_LAYOUT layout_before = D3D12_BARRIER_LAYOUT_UNDEFINED; - D3D12_BARRIER_LAYOUT layout_after = D3D12_BARRIER_LAYOUT_UNDEFINED; D3D12_BARRIER_TYPE barrier_type = D3D12_BARRIER_TYPE_GLOBAL; if (!desc.is_global) { G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; - layout_before = resource->cmdlist_texture_layout; - layout_after = resource->cmdlist_texture_layout; - if (desc.layout != G_Layout_NoChange) - { - layout_after = G_D12_BarrierLayoutFromLayout(desc.layout); - resource->cmdlist_texture_layout = layout_after; - } } // Build barrier @@ -2159,20 +2174,37 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) { G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); RngI32 mips = barrier_cmd->barrier.desc.mips; - mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); - mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); - D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++]; - barrier->SyncBefore = sync_before; - barrier->SyncAfter = sync_after; - barrier->AccessBefore = access_before; - barrier->AccessAfter = access_after; - barrier->LayoutBefore = layout_before; - barrier->LayoutAfter = layout_after; - barrier->pResource = resource->d3d_resource; - barrier->Subresources.IndexOrFirstMipLevel = mips.min; - barrier->Subresources.NumMipLevels = mips.max - mips.min + 1; - barrier->Subresources.NumArraySlices = 1; - barrier->Subresources.NumPlanes = 1; + { + mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); + mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); + } + // Create a barrier for each contiguous span of mips with matching layout + D3D12_TEXTURE_BARRIER *barrier = 0; + for (i32 mip_idx = mips.min; mip_idx <= mips.max; ++mip_idx) + { + D3D12_BARRIER_LAYOUT layout_before = resource->cmdlist_texture_layouts[mip_idx]; + D3D12_BARRIER_LAYOUT layout_after = layout_before; + if (desc.layout != G_Layout_NoChange) + { + layout_after = G_D12_BarrierLayoutFromLayout(desc.layout); + } + if (barrier == 0 || barrier->LayoutBefore != layout_before) + { + barrier = &texture_barriers[texture_barriers_count++]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; + barrier->LayoutBefore = layout_before; + barrier->LayoutAfter = layout_after; + barrier->pResource = resource->d3d_resource; + barrier->Subresources.IndexOrFirstMipLevel = mip_idx; + barrier->Subresources.NumArraySlices = 1; + barrier->Subresources.NumPlanes = 1; + } + barrier->Subresources.NumMipLevels += 1; + resource->cmdlist_texture_layouts[mip_idx] = layout_after; + } } break; case D3D12_BARRIER_TYPE_GLOBAL: @@ -2592,13 +2624,25 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) { G_D12_Resource *resource = cmd->log.resource; String resource_name = STRING(resource->name_len, resource->name_text); - String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layout); + + String layouts_str = Zi; + { + StringList layout_names = Zi; + for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) + { + String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layouts[mip_idx]); + String layout_str = StringF(scratch.arena, "[%F] %F", FmtSint(mip_idx), FmtString(layout_name)); + PushStringToList(scratch.arena, &layout_names, layout_str); + } + layouts_str = StringFromList(scratch.arena, layout_names, Lit(", ")); + } + String msg = StringF( scratch.arena, - "[Gpu command list resource log] uid: %F, name: \"%F\", layout: %F", + "[Gpu command list resource log] uid: %F, name: \"%F\", layouts: { %F }", FmtUint(resource->uid), FmtString(resource_name), - FmtString(layout_name) + FmtString(layouts_str) ); LogDebug(msg); cmd_idx += 1; @@ -3282,7 +3326,7 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma backbuffer->texture_format = format; backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); backbuffer->texture_mips = 1; - backbuffer->cmdlist_texture_layout = D3D12_BARRIER_LAYOUT_PRESENT; + backbuffer->cmdlist_texture_layouts[0] = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->swapchain = swapchain; } } diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 3b967fed..87515748 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -25,6 +25,7 @@ #define G_D12_MaxSamplerDescriptors (1024 * 1) #define G_D12_MaxRtvDescriptors (1024 * 64) +#define G_D12_MaxMips 16 #define G_D12_MaxNameLen 64 //////////////////////////////////////////////////////////// @@ -85,7 +86,7 @@ Struct(G_D12_Resource) G_Format texture_format; Vec3I32 texture_dims; i32 texture_mips; - D3D12_BARRIER_LAYOUT cmdlist_texture_layout; + D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips]; // Sampler info G_SamplerDesc sampler_desc; diff --git a/src/gpu/gpu_shader_core.cgh b/src/gpu/gpu_shader_core.cgh index 2d0aab1d..111e1fbc 100644 --- a/src/gpu/gpu_shader_core.cgh +++ b/src/gpu/gpu_shader_core.cgh @@ -71,6 +71,21 @@ G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10 #define G_TweakFloat G_ShaderConst_TweakF32 #endif +//////////////////////////////////////////////////////////// +//~ Basic samplers + +Enum(G_BasicSamplerKind) +{ + G_BasicSamplerKind_PointClamp, + G_BasicSamplerKind_PointWrap, + G_BasicSamplerKind_BilinearClamp, + G_BasicSamplerKind_BilinearWrap, + G_BasicSamplerKind_TrilinearClamp, + G_BasicSamplerKind_TrilinearWrap, + + G_BasicSamplerKind_COUNT +}; + //////////////////////////////////////////////////////////// //~ Resource dereference diff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay index ccf71049..0477727d 100644 --- a/src/pp/pp_vis/pp_vis.lay +++ b/src/pp/pp_vis/pp_vis.lay @@ -24,6 +24,7 @@ @ComputeShader V_SimParticlesCS @ComputeShader V_ShadeCS @ComputeShader V_CompositeCS +@ComputeShader V_BlurDownCS @VertexShader V_DVertVS @PixelShader V_DVertPS diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 5880ce5d..999bdde2 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -646,9 +646,11 @@ void V_TickForever(WaveLaneCtx *lane) { // Persistent resources + for (G_BasicSamplerKind sampler_kind = 0; sampler_kind < G_BasicSamplerKind_COUNT; ++sampler_kind) + { + frame->basic_samplers[sampler_kind] = G_BasicSamplerFromKind(sampler_kind); + } frame->tiles = gpu_tiles; - frame->pt_clamp_sampler = G_BasicPointClampSampler(); - frame->pt_wrap_sampler = G_BasicPointWrapSampler(); frame->particles = gpu_particles; frame->stain_cells = gpu_stain_cells; frame->ground_cells = gpu_ground_cells; @@ -4847,12 +4849,16 @@ void V_TickForever(WaveLaneCtx *lane) frame->screen_dims, G_Layout_DirectQueue_ShaderReadWrite, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, - .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)) + .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)), + .max_mips = countof(frame->screen_mips_ro) // For blur pyramid ); - frame->screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target); - frame->screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target); Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1)); Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y)); + for (i32 mip_idx = 0; mip_idx < G_CountMips(screen_target); ++mip_idx) + { + frame->screen_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->screen_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx)); + } // Albedo texture G_ResourceHandle albedo_target = G_PushTexture2D( @@ -4874,10 +4880,10 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_ResourceFlag_AllowShaderReadWrite, .name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick)) ); - frame->shade_ro = G_PushTexture2DRef(frame->gpu_arena, shade_target); - frame->shade_rw = G_PushRWTexture2DRef(frame->gpu_arena, shade_target); Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1)); Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y)); + frame->shade_ro = G_PushTexture2DRef(frame->gpu_arena, shade_target); + frame->shade_rw = G_PushRWTexture2DRef(frame->gpu_arena, shade_target); // Quad buffers G_ResourceHandle quads_buff = G_PushBufferFromCpuCopy( @@ -4933,8 +4939,8 @@ void V_TickForever(WaveLaneCtx *lane) G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame); // Set constants - G_SetConstant(frame->cl, V_ShaderConst_Frame, gpu_frame); - G_SetConstant(frame->cl, V_ShaderConst_NoiseTex, G_BasicNoiseTexture()); + G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame); + G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture()); } // Sync @@ -5021,15 +5027,42 @@ void V_TickForever(WaveLaneCtx *lane) } ////////////////////////////// - //- Bloom pass + //- Blur passes { + // TODO: Limit passes + i32 mips_count = G_CountMips(screen_target); + + // Downsample + blur passes + G_LogResource(frame->cl, screen_target); + for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx) + { + Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); + + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1)); + // G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); + + G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); + G_Compute(frame->cl, V_BlurDownCS, V_ThreadGroupSizeFromTexSize(dims)); + } + + // Upsample passes + // for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx) + // { + // Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); + + // G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); + // G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1)); + + // G_SetConstant(frame->cl, V_GpuConst_Mip, mip_idx); + // G_Compute(frame->cl, V_BlurDownCS, V_ThreadGroupSizeFromTexSize(dims)); + // } } ////////////////////////////// //- Debug shapes pass - G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite, .mips = RNGI32(0, 0)); { G_Rasterize( @@ -5045,13 +5078,13 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Finalize screen target - G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0)); { Rng2 uv = Zi; uv.p0 = Vec2FromVec(screen_viewport.p0); uv.p1 = Vec2FromVec(screen_viewport.p1); uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims)); - UI_SetRawTexture(vis_box, frame->screen_ro, uv); + UI_SetRawTexture(vis_box, frame->screen_mips_ro[0], uv); } } diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index f5162940..7aaefdea 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -3,7 +3,7 @@ f32 V_RandFromPos(Vec3 pos) { - Texture3D noise3d = G_Dereference(V_ShaderConst_NoiseTex); + Texture3D noise3d = G_Dereference(V_GpuConst_NoiseTex); // TODO: Compile-time noise dims u32 noise = noise3d[(Vec3U32)pos % countof(noise3d)]; f32 rand = Norm16(noise); @@ -77,7 +77,7 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den ComputeShader2D(V_PrepareShadeCS, 8, 8) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; RWTexture2D shade = G_Dereference(frame.shade_rw); Vec2 shade_pos = SV_DispatchThreadID + 0.5; if (all(shade_pos < countof(shade))) @@ -90,7 +90,7 @@ ComputeShader2D(V_PrepareShadeCS, 8, 8) //- Prepare cells ComputeShader2D(V_PrepareCellsCS, 8, 8) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); RWTexture2D stain_cells = G_Dereference(frame.stain_cells); RWTexture2D ground_cells = G_Dereference(frame.ground_cells); @@ -151,7 +151,7 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8) //- Clear particles ComputeShader(V_ClearParticlesCS, 64) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; RWStructuredBuffer particles = G_Dereference(frame.particles); u32 particle_idx = SV_DispatchThreadID; if (particle_idx < V_ParticlesCap) @@ -168,7 +168,7 @@ ComputeShader(V_ClearParticlesCS, 64) VertexShader(V_QuadVS, V_QuadPSInput) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer quads = G_Dereference(frame.quads); V_Quad quad = quads[SV_InstanceID]; @@ -192,10 +192,10 @@ VertexShader(V_QuadVS, V_QuadPSInput) PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; - RWTexture2D occluders = G_Dereference(frame.occluders); + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer quads = G_Dereference(frame.quads); - SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler); + SamplerState clamp_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); + RWTexture2D occluders = G_Dereference(frame.occluders); V_Quad quad = quads[input.quad_idx]; Texture2D tex = G_Dereference(quad.tex); @@ -229,7 +229,7 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) ComputeShader(V_EmitParticlesCS, 64) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer emitters = G_Dereference(frame.emitters); RWStructuredBuffer particles = G_Dereference(frame.particles); @@ -260,7 +260,7 @@ ComputeShader(V_EmitParticlesCS, 64) ComputeShader(V_SimParticlesCS, 64) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); RWStructuredBuffer particles = G_Dereference(frame.particles); RWTexture2D stain_cells = G_Dereference(frame.stain_cells); @@ -528,13 +528,13 @@ ComputeShader(V_SimParticlesCS, 64) ComputeShader2D(V_ShadeCS, 8, 8) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; - RWTexture2D shade_tex = G_Dereference(frame.shade_rw); - Texture2D albedo_tex = G_Dereference(frame.albedo_ro); + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; + SamplerState clamp_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); Texture2D tiles = G_Dereference(frame.tiles); + Texture2D albedo_tex = G_Dereference(frame.albedo_ro); + RWTexture2D shade_tex = G_Dereference(frame.shade_rw); RWTexture2D stain_cells = G_Dereference(frame.stain_cells); RWTexture2D drynesses = G_Dereference(frame.drynesses); - SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler); Vec2 shade_pos = SV_DispatchThreadID + 0.5; Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1)); @@ -565,10 +565,11 @@ ComputeShader2D(V_ShadeCS, 8, 8) ComputeShader2D(V_CompositeCS, 8, 8) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; // Texture2D shade_tex = G_Dereference(frame.shade_ro); + SamplerState clamp_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); Texture2D albedo_tex = G_Dereference(frame.albedo_ro); - RWTexture2D screen_tex = G_Dereference(frame.screen_rw); + RWTexture2D screen_tex = G_Dereference(frame.screen_mips_rw[0]); RWTexture2D stain_cells = G_Dereference(frame.stain_cells); RWTexture2D ground_cells = G_Dereference(frame.ground_cells); RWTexture2D stain_densities = G_Dereference(frame.stain_densities); @@ -577,7 +578,6 @@ ComputeShader2D(V_CompositeCS, 8, 8) RWTexture2D air_densities = G_Dereference(frame.air_densities); RWTexture2D drynesses = G_Dereference(frame.drynesses); Texture2D tiles = G_Dereference(frame.tiles); - SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler); RWStructuredBuffer particles = G_Dereference(frame.particles); Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5; @@ -934,6 +934,28 @@ ComputeShader2D(V_CompositeCS, 8, 8) } } +//////////////////////////////////////////////////////////// +//~ Blur + +ComputeShader2D(V_BlurDownCS, 8, 8) +{ + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; + Texture2D screen_up = G_Dereference(frame.screen_mips_ro[V_GpuConst_MipIdx - 1]); + RWTexture2D screen_down = G_Dereference(frame.screen_mips_rw[V_GpuConst_MipIdx]); + + // V_GpuConst_MipIdx + + Vec2 blur_pos = SV_DispatchThreadID + 0.5; + + + + Vec4 result = 0; + if (all(blur_pos >= 0) && all(blur_pos < countof(screen_down))) + { + screen_down[blur_pos] = result; + } +} + //////////////////////////////////////////////////////////// //~ Debug shapes @@ -942,7 +964,7 @@ ComputeShader2D(V_CompositeCS, 8, 8) VertexShader(V_DVertVS, V_DVertPSInput) { - V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer verts = G_Dereference(frame.dverts); V_DVert vert = verts[SV_VertexID]; diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh index 2262ede5..7b6bbf64 100644 --- a/src/pp/pp_vis/pp_vis_gpu.gh +++ b/src/pp/pp_vis/pp_vis_gpu.gh @@ -66,10 +66,11 @@ ComputeShader(V_SimParticlesCS, 64); ComputeShader2D(V_ShadeCS, 8, 8); //- Composite -VertexShader(V_CompositeVS, V_CompositePSInput); -PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input); ComputeShader2D(V_CompositeCS, 8, 8); +//- Blur +ComputeShader2D(V_BlurDownCS, 8, 8); + //- Debug shapes VertexShader(V_DVertVS, V_DVertPSInput); PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input); diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index 4d400712..c9c1d40f 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -6,8 +6,9 @@ //////////////////////////////////////////////////////////// //~ State types -G_DeclConstant(G_StructuredBufferRef, V_ShaderConst_Frame, 0); -G_DeclConstant(G_Texture3DRef, V_ShaderConst_NoiseTex, 1); +G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 0); +G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 1); +G_DeclConstant(u32, V_GpuConst_MipIdx, 2); Struct(V_TileDesc) { @@ -130,14 +131,13 @@ Struct(V_SharedFrame) //- Gpu data - G_SamplerStateRef pt_clamp_sampler; - G_SamplerStateRef pt_wrap_sampler; + G_SamplerStateRef basic_samplers[G_BasicSamplerKind_COUNT]; V_TileDesc tile_descs[P_TileKind_COUNT]; G_Texture2DRef tiles; - G_Texture2DRef screen_ro; - G_RWTexture2DRef screen_rw; + G_Texture2DRef screen_mips_ro[16]; + G_RWTexture2DRef screen_mips_rw[16]; G_Texture2DRef shade_ro; G_RWTexture2DRef shade_rw; G_Texture2DRef albedo_ro; diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index 99431481..d2f6767b 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -1724,7 +1724,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) params.target_size = draw_size; params.target_ro = draw_target_ro; params.rects = rects_ro; - params.sampler = G_BasicPointClampSampler(); + params.sampler = G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp); params.cursor_pos = frame->cursor_pos; params.aa = TweakFloat("UI anti-aliasing", 1, 0, 1); } @@ -1736,7 +1736,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_GpuParams); // Constants - G_SetConstant(frame->cl, UI_ShaderConst_Params, params_ro); + G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro); // Sync G_DumbGlobalMemorySync(frame->cl); @@ -1767,7 +1767,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) // Render rect wireframes if (AnyBit(frame->frame_flags, UI_FrameFlag_Debug)) { - G_SetConstant(frame->cl, UI_ShaderConst_DebugDraw, 1); + G_SetConstant(frame->cl, UI_GpuConst_DebugDraw, 1); G_Rasterize( frame->cl, UI_DRectVS, UI_DRectPS, diff --git a/src/ui/ui_gpu.g b/src/ui/ui_gpu.g index 354f7562..7609ac52 100644 --- a/src/ui/ui_gpu.g +++ b/src/ui/ui_gpu.g @@ -6,7 +6,7 @@ VertexShader(UI_DRectVS, UI_DRectPSInput) { - UI_GpuParams params = G_Dereference(UI_ShaderConst_Params)[0]; + UI_GpuParams params = G_Dereference(UI_GpuConst_Params)[0]; StructuredBuffer rects = G_Dereference(params.rects); UI_GpuRect rect = rects[SV_InstanceID]; @@ -35,7 +35,7 @@ VertexShader(UI_DRectVS, UI_DRectPSInput) PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) { - UI_GpuParams params = G_Dereference(UI_ShaderConst_Params)[0]; + UI_GpuParams params = G_Dereference(UI_GpuConst_Params)[0]; StructuredBuffer rects = G_Dereference(params.rects); SamplerState sampler = G_Dereference(params.sampler); @@ -104,7 +104,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) //- Finalize Vec4 result = composite_premul * input.tint_premul; - if (UI_ShaderConst_DebugDraw) + if (UI_GpuConst_DebugDraw) { result = input.debug_premul; } @@ -135,7 +135,7 @@ VertexShader(UI_BlitVS, UI_BlitPSInput) PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input) { - UI_GpuParams params = G_Dereference(UI_ShaderConst_Params)[0]; + UI_GpuParams params = G_Dereference(UI_GpuConst_Params)[0]; Texture2D tex = G_Dereference(params.target_ro); SamplerState sampler = G_Dereference(params.sampler); diff --git a/src/ui/ui_shared.cgh b/src/ui/ui_shared.cgh index 0c9855e6..98a7704a 100644 --- a/src/ui/ui_shared.cgh +++ b/src/ui/ui_shared.cgh @@ -1,8 +1,8 @@ //////////////////////////////////////////////////////////// //~ Constant types -G_DeclConstant(G_StructuredBufferRef, UI_ShaderConst_Params, 0); -G_DeclConstant(b32, UI_ShaderConst_DebugDraw, 1); +G_DeclConstant(G_StructuredBufferRef, UI_GpuConst_Params, 0); +G_DeclConstant(b32, UI_GpuConst_DebugDraw, 1); Struct(UI_GpuParams) {