basic bilinear & trilinear samplers. fix mip resource barrier.

This commit is contained in:
jacob 2026-02-15 10:42:21 -06:00
parent e49bcfbe2c
commit a6796c594c
14 changed files with 307 additions and 130 deletions

View File

@ -19,30 +19,6 @@ void G_BootstrapCommon(void)
G.quad_indices = G_IdxBuff16(quad_indices);
}
// Init point clamp sampler
{
G_ResourceHandle pt_sampler = G_PushSampler(
gpu_perm, cl,
.filter = G_Filter_MinMagMipPoint,
.x = G_AddressMode_Clamp,
.y = G_AddressMode_Clamp,
.z = G_AddressMode_Clamp,
);
G.basic_point_clamp_sampler = G_PushSamplerStateRef(gpu_perm, pt_sampler);
}
// Init point wrap sampler
{
G_ResourceHandle pt_sampler = G_PushSampler(
gpu_perm, cl,
.filter = G_Filter_MinMagMipPoint,
.x = G_AddressMode_Wrap,
.y = G_AddressMode_Wrap,
.z = G_AddressMode_Wrap,
);
G.basic_point_wrap_sampler = G_PushSamplerStateRef(gpu_perm, pt_sampler);
}
// Init blank texture
{
G_ResourceHandle blank_tex = G_PushTexture2D(
@ -79,6 +55,62 @@ void G_BootstrapCommon(void)
G.basic_noise = G_PushTexture3DRef(gpu_perm, noise_tex);
}
// Init basic samplers
for (G_BasicSamplerKind sampler_kind = 0; sampler_kind < countof(G.basic_samplers); ++sampler_kind)
{
G_SamplerStateRef sampler = Zi;
switch (sampler_kind)
{
default:
{
// Sampler unspecified
Assert(0);
} FALLTHROUGH;
case G_BasicSamplerKind_PointClamp:
{
G_Filter filter = G_Filter_MinMagMipPoint;
G_AddressMode address_mode = G_AddressMode_Clamp;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_PointWrap:
{
G_Filter filter = G_Filter_MinMagMipPoint;
G_AddressMode address_mode = G_AddressMode_Wrap;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_BilinearClamp:
{
G_Filter filter = G_Filter_MinMagLinearMipPoint;
G_AddressMode address_mode = G_AddressMode_Clamp;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_BilinearWrap:
{
G_Filter filter = G_Filter_MinMagLinearMipPoint;
G_AddressMode address_mode = G_AddressMode_Wrap;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_TrilinearClamp:
{
G_Filter filter = G_Filter_MinMagMipLinear;
G_AddressMode address_mode = G_AddressMode_Clamp;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_TrilinearWrap:
{
G_Filter filter = G_Filter_MinMagMipLinear;
G_AddressMode address_mode = G_AddressMode_Wrap;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
}
G.basic_samplers[sampler_kind] = sampler;
}
}
G_CommitCommandList(cl);
@ -109,6 +141,35 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList
return buffer;
}
//- Mip
i32 G_DimsFromMip1D(i32 texture_dims, i32 mip)
{
mip = ClampI32(mip, 0, 31);
i32 result = 0;
result = MaxI32(result >> mip, 1);
return result;
}
Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip)
{
mip = ClampI32(mip, 0, 31);
Vec2I32 result = Zi;
result.x = MaxI32(texture_dims.x >> mip, 1);
result.y = MaxI32(texture_dims.y >> mip, 1);
return result;
}
Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip)
{
mip = ClampI32(mip, 0, 31);
Vec3I32 result = Zi;
result.x = MaxI32(texture_dims.x >> mip, 1);
result.y = MaxI32(texture_dims.y >> mip, 1);
result.z = MaxI32(texture_dims.z >> mip, 1);
return result;
}
//- Viewport / scissor
Rng3 G_ViewportFromTexture(G_ResourceHandle texture)
@ -125,21 +186,17 @@ Rng2 G_ScissorFromTexture(G_ResourceHandle texture)
//- Shared resources
G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind)
{
return G.basic_samplers[kind];
}
G_IndexBufferDesc G_QuadIndices(void)
{
return G.quad_indices;
}
G_SamplerStateRef G_BasicPointClampSampler(void)
{
return G.basic_point_clamp_sampler;
}
G_SamplerStateRef G_BasicPointWrapSampler(void)
{
return G.basic_point_wrap_sampler;
}
G_Texture2DRef G_BlankTexture2D(void)
{
return G.blank_tex;

View File

@ -5,10 +5,9 @@ Struct(G_Ctx)
{
// Common shared resources
G_IndexBufferDesc quad_indices;
G_SamplerStateRef basic_point_clamp_sampler;
G_SamplerStateRef basic_point_wrap_sampler;
G_Texture2DRef blank_tex;
G_Texture3DRef basic_noise;
G_SamplerStateRef basic_samplers[G_BasicSamplerKind_COUNT];
};
Struct(G_ThreadLocalCtx)
@ -35,13 +34,17 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList
#define G_PushBufferFromCpuCopy(_arena, _cl, _src, ...) \
G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })
//- Mip
i32 G_DimsFromMip1D(i32 texture_dims, i32 mip);
Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip);
Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip);
//- Viewport / scissor
Rng3 G_ViewportFromTexture(G_ResourceHandle texture);
Rng2 G_ScissorFromTexture(G_ResourceHandle texture);
//- Shared resources
G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind);
G_IndexBufferDesc G_QuadIndices(void);
G_SamplerStateRef G_BasicPointClampSampler(void);
G_SamplerStateRef G_BasicPointWrapSampler(void);
G_Texture2DRef G_BlankTexture2D(void);
G_Texture3DRef G_BasicNoiseTexture(void);

View File

@ -432,8 +432,8 @@ Struct(G_TextureDesc)
G_Format format;
Vec3I32 dims;
G_Layout initial_layout;
i32 mips; // Will be clamped to range [1, max mips]
Vec4 clear_color;
i32 max_mips; // Will be clamped to range [1, max mips]
String name;
};
@ -778,14 +778,14 @@ void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc);
__VA_ARGS__ \
})
#define G_DumbMemorySync(cl, resource) \
G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All)
#define G_DumbMemorySync(cl, resource, ...) \
G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
#define G_DumbMemoryLayoutSync(cl, resource, layout) \
G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout))
#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \
G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__)
#define G_DumbGlobalMemorySync(cl) \
G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All)
#define G_DumbGlobalMemorySync(cl, ...) \
G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
//- Compute

View File

@ -241,7 +241,7 @@ void G_Bootstrap(void)
}
//////////////////////////////
//- Initialize bindless root signature
//- Initialize global root signature
{
HRESULT hr = 0;
@ -1128,8 +1128,8 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
}
else if (is_texture)
{
i32 max_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z);
i32 max_mips = FloorF32(Log2F32(max_dim)) + 1;
i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z);
i32 max_mips = FloorF32(Log2F32(largest_dim)) + 1;
d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout);
d3d_desc.Dimension = (
desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
@ -1141,7 +1141,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
d3d_desc.Width = MaxI32(desc.texture.dims.x, 1);
d3d_desc.Height = MaxI32(desc.texture.dims.y, 1);
d3d_desc.DepthOrArraySize = MaxI32(desc.texture.dims.z, 1);
d3d_desc.MipLevels = ClampF32(desc.texture.mips, 1, max_mips);
d3d_desc.MipLevels = ClampF32(desc.texture.max_mips, 1, max_mips);
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite);
@ -1278,9 +1278,9 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
);
Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1);
if (is_texture)
for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx)
{
resource->cmdlist_texture_layout = d3d_initial_layout;
resource->cmdlist_texture_layouts[mip_idx] = d3d_initial_layout;
}
if (!SUCCEEDED(hr))
@ -2021,6 +2021,9 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
}
// Batch barrier cmds
i64 max_buffer_barriers = 0;
i64 max_texture_barriers = 0;
i64 max_global_barriers = 0;
{
u64 cmd_idx = 0;
u64 batch_gen = 0;
@ -2056,6 +2059,27 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
}
cmd->barrier.batch_gen = batch_gen;
prev_barrier_cmd = cmd;
if (cmd->barrier.desc.is_global)
{
max_global_barriers += 1;
}
else
{
G_D12_Resource *resource = G_D12_ResourceFromHandle(cmd->barrier.desc.resource);
if (resource->is_texture)
{
RngI32 mips = cmd->barrier.desc.mips;
mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1);
mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1);
max_texture_barriers += mips.max - mips.min + 1;
}
else
{
max_buffer_barriers += 1;
}
}
cmd_idx += 1;
} break;
}
@ -2109,9 +2133,9 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
u64 buffer_barriers_count = 0;
u64 texture_barriers_count = 0;
u64 global_barriers_count = 0;
D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, max_buffer_barriers);
D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, max_texture_barriers);
D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, max_global_barriers);
for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx)
{
G_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx];
@ -2123,20 +2147,11 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.stage_next);
D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev);
D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next);
D3D12_BARRIER_LAYOUT layout_before = D3D12_BARRIER_LAYOUT_UNDEFINED;
D3D12_BARRIER_LAYOUT layout_after = D3D12_BARRIER_LAYOUT_UNDEFINED;
D3D12_BARRIER_TYPE barrier_type = D3D12_BARRIER_TYPE_GLOBAL;
if (!desc.is_global)
{
G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource);
barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
layout_before = resource->cmdlist_texture_layout;
layout_after = resource->cmdlist_texture_layout;
if (desc.layout != G_Layout_NoChange)
{
layout_after = G_D12_BarrierLayoutFromLayout(desc.layout);
resource->cmdlist_texture_layout = layout_after;
}
}
// Build barrier
@ -2159,20 +2174,37 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
{
G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource);
RngI32 mips = barrier_cmd->barrier.desc.mips;
mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1);
mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1);
D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++];
barrier->SyncBefore = sync_before;
barrier->SyncAfter = sync_after;
barrier->AccessBefore = access_before;
barrier->AccessAfter = access_after;
barrier->LayoutBefore = layout_before;
barrier->LayoutAfter = layout_after;
barrier->pResource = resource->d3d_resource;
barrier->Subresources.IndexOrFirstMipLevel = mips.min;
barrier->Subresources.NumMipLevels = mips.max - mips.min + 1;
barrier->Subresources.NumArraySlices = 1;
barrier->Subresources.NumPlanes = 1;
{
mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1);
mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1);
}
// Create a barrier for each contiguous span of mips with matching layout
D3D12_TEXTURE_BARRIER *barrier = 0;
for (i32 mip_idx = mips.min; mip_idx <= mips.max; ++mip_idx)
{
D3D12_BARRIER_LAYOUT layout_before = resource->cmdlist_texture_layouts[mip_idx];
D3D12_BARRIER_LAYOUT layout_after = layout_before;
if (desc.layout != G_Layout_NoChange)
{
layout_after = G_D12_BarrierLayoutFromLayout(desc.layout);
}
if (barrier == 0 || barrier->LayoutBefore != layout_before)
{
barrier = &texture_barriers[texture_barriers_count++];
barrier->SyncBefore = sync_before;
barrier->SyncAfter = sync_after;
barrier->AccessBefore = access_before;
barrier->AccessAfter = access_after;
barrier->LayoutBefore = layout_before;
barrier->LayoutAfter = layout_after;
barrier->pResource = resource->d3d_resource;
barrier->Subresources.IndexOrFirstMipLevel = mip_idx;
barrier->Subresources.NumArraySlices = 1;
barrier->Subresources.NumPlanes = 1;
}
barrier->Subresources.NumMipLevels += 1;
resource->cmdlist_texture_layouts[mip_idx] = layout_after;
}
} break;
case D3D12_BARRIER_TYPE_GLOBAL:
@ -2592,13 +2624,25 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
{
G_D12_Resource *resource = cmd->log.resource;
String resource_name = STRING(resource->name_len, resource->name_text);
String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layout);
String layouts_str = Zi;
{
StringList layout_names = Zi;
for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx)
{
String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layouts[mip_idx]);
String layout_str = StringF(scratch.arena, "[%F] %F", FmtSint(mip_idx), FmtString(layout_name));
PushStringToList(scratch.arena, &layout_names, layout_str);
}
layouts_str = StringFromList(scratch.arena, layout_names, Lit(", "));
}
String msg = StringF(
scratch.arena,
"[Gpu command list resource log] uid: %F, name: \"%F\", layout: %F",
"[Gpu command list resource log] uid: %F, name: \"%F\", layouts: { %F }",
FmtUint(resource->uid),
FmtString(resource_name),
FmtString(layout_name)
FmtString(layouts_str)
);
LogDebug(msg);
cmd_idx += 1;
@ -3282,7 +3326,7 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma
backbuffer->texture_format = format;
backbuffer->texture_dims = VEC3I32(size.x, size.y, 1);
backbuffer->texture_mips = 1;
backbuffer->cmdlist_texture_layout = D3D12_BARRIER_LAYOUT_PRESENT;
backbuffer->cmdlist_texture_layouts[0] = D3D12_BARRIER_LAYOUT_PRESENT;
backbuffer->swapchain = swapchain;
}
}

View File

@ -25,6 +25,7 @@
#define G_D12_MaxSamplerDescriptors (1024 * 1)
#define G_D12_MaxRtvDescriptors (1024 * 64)
#define G_D12_MaxMips 16
#define G_D12_MaxNameLen 64
////////////////////////////////////////////////////////////
@ -85,7 +86,7 @@ Struct(G_D12_Resource)
G_Format texture_format;
Vec3I32 texture_dims;
i32 texture_mips;
D3D12_BARRIER_LAYOUT cmdlist_texture_layout;
D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips];
// Sampler info
G_SamplerDesc sampler_desc;

View File

@ -71,6 +71,21 @@ G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10
#define G_TweakFloat G_ShaderConst_TweakF32
#endif
////////////////////////////////////////////////////////////
//~ Basic samplers
Enum(G_BasicSamplerKind)
{
G_BasicSamplerKind_PointClamp,
G_BasicSamplerKind_PointWrap,
G_BasicSamplerKind_BilinearClamp,
G_BasicSamplerKind_BilinearWrap,
G_BasicSamplerKind_TrilinearClamp,
G_BasicSamplerKind_TrilinearWrap,
G_BasicSamplerKind_COUNT
};
////////////////////////////////////////////////////////////
//~ Resource dereference

View File

@ -24,6 +24,7 @@
@ComputeShader V_SimParticlesCS
@ComputeShader V_ShadeCS
@ComputeShader V_CompositeCS
@ComputeShader V_BlurDownCS
@VertexShader V_DVertVS
@PixelShader V_DVertPS

View File

@ -646,9 +646,11 @@ void V_TickForever(WaveLaneCtx *lane)
{
// Persistent resources
for (G_BasicSamplerKind sampler_kind = 0; sampler_kind < G_BasicSamplerKind_COUNT; ++sampler_kind)
{
frame->basic_samplers[sampler_kind] = G_BasicSamplerFromKind(sampler_kind);
}
frame->tiles = gpu_tiles;
frame->pt_clamp_sampler = G_BasicPointClampSampler();
frame->pt_wrap_sampler = G_BasicPointWrapSampler();
frame->particles = gpu_particles;
frame->stain_cells = gpu_stain_cells;
frame->ground_cells = gpu_ground_cells;
@ -4847,12 +4849,16 @@ void V_TickForever(WaveLaneCtx *lane)
frame->screen_dims,
G_Layout_DirectQueue_ShaderReadWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)),
.max_mips = countof(frame->screen_mips_ro) // For blur pyramid
);
frame->screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target);
frame->screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target);
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
for (i32 mip_idx = 0; mip_idx < G_CountMips(screen_target); ++mip_idx)
{
frame->screen_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx));
frame->screen_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx));
}
// Albedo texture
G_ResourceHandle albedo_target = G_PushTexture2D(
@ -4874,10 +4880,10 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
);
frame->shade_ro = G_PushTexture2DRef(frame->gpu_arena, shade_target);
frame->shade_rw = G_PushRWTexture2DRef(frame->gpu_arena, shade_target);
Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1));
Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y));
frame->shade_ro = G_PushTexture2DRef(frame->gpu_arena, shade_target);
frame->shade_rw = G_PushRWTexture2DRef(frame->gpu_arena, shade_target);
// Quad buffers
G_ResourceHandle quads_buff = G_PushBufferFromCpuCopy(
@ -4933,8 +4939,8 @@ void V_TickForever(WaveLaneCtx *lane)
G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame);
// Set constants
G_SetConstant(frame->cl, V_ShaderConst_Frame, gpu_frame);
G_SetConstant(frame->cl, V_ShaderConst_NoiseTex, G_BasicNoiseTexture());
G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame);
G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture());
}
// Sync
@ -5021,15 +5027,42 @@ void V_TickForever(WaveLaneCtx *lane)
}
//////////////////////////////
//- Bloom pass
//- Blur passes
{
// TODO: Limit passes
i32 mips_count = G_CountMips(screen_target);
// Downsample + blur passes
G_LogResource(frame->cl, screen_target);
for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)
{
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
// G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
G_Compute(frame->cl, V_BlurDownCS, V_ThreadGroupSizeFromTexSize(dims));
}
// Upsample passes
// for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
// {
// Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
// G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
// G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));
// G_SetConstant(frame->cl, V_GpuConst_Mip, mip_idx);
// G_Compute(frame->cl, V_BlurDownCS, V_ThreadGroupSizeFromTexSize(dims));
// }
}
//////////////////////////////
//- Debug shapes pass
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite, .mips = RNGI32(0, 0));
{
G_Rasterize(
@ -5045,13 +5078,13 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Finalize screen target
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
{
Rng2 uv = Zi;
uv.p0 = Vec2FromVec(screen_viewport.p0);
uv.p1 = Vec2FromVec(screen_viewport.p1);
uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims));
UI_SetRawTexture(vis_box, frame->screen_ro, uv);
UI_SetRawTexture(vis_box, frame->screen_mips_ro[0], uv);
}
}

View File

@ -3,7 +3,7 @@
f32 V_RandFromPos(Vec3 pos)
{
Texture3D<u32> noise3d = G_Dereference<u32>(V_ShaderConst_NoiseTex);
Texture3D<u32> noise3d = G_Dereference<u32>(V_GpuConst_NoiseTex);
// TODO: Compile-time noise dims
u32 noise = noise3d[(Vec3U32)pos % countof(noise3d)];
f32 rand = Norm16(noise);
@ -77,7 +77,7 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den
ComputeShader2D(V_PrepareShadeCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
RWTexture2D<Vec4> shade = G_Dereference<Vec4>(frame.shade_rw);
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
if (all(shade_pos < countof(shade)))
@ -90,7 +90,7 @@ ComputeShader2D(V_PrepareShadeCS, 8, 8)
//- Prepare cells
ComputeShader2D(V_PrepareCellsCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells);
RWTexture2D<u32> ground_cells = G_Dereference<u32>(frame.ground_cells);
@ -151,7 +151,7 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8)
//- Clear particles
ComputeShader(V_ClearParticlesCS, 64)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
u32 particle_idx = SV_DispatchThreadID;
if (particle_idx < V_ParticlesCap)
@ -168,7 +168,7 @@ ComputeShader(V_ClearParticlesCS, 64)
VertexShader(V_QuadVS, V_QuadPSInput)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
V_Quad quad = quads[SV_InstanceID];
@ -192,10 +192,10 @@ VertexShader(V_QuadVS, V_QuadPSInput)
PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
RWTexture2D<u32> occluders = G_Dereference<u32>(frame.occluders);
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler);
SamplerState clamp_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
RWTexture2D<u32> occluders = G_Dereference<u32>(frame.occluders);
V_Quad quad = quads[input.quad_idx];
Texture2D<Vec4> tex = G_Dereference<Vec4>(quad.tex);
@ -229,7 +229,7 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
ComputeShader(V_EmitParticlesCS, 64)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Emitter> emitters = G_Dereference<V_Emitter>(frame.emitters);
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
@ -260,7 +260,7 @@ ComputeShader(V_EmitParticlesCS, 64)
ComputeShader(V_SimParticlesCS, 64)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells);
@ -528,13 +528,13 @@ ComputeShader(V_SimParticlesCS, 64)
ComputeShader2D(V_ShadeCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
RWTexture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_rw);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState clamp_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
RWTexture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_rw);
RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler);
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1));
@ -565,10 +565,11 @@ ComputeShader2D(V_ShadeCS, 8, 8)
ComputeShader2D(V_CompositeCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
SamplerState clamp_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_mips_rw[0]);
RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells);
RWTexture2D<u32> ground_cells = G_Dereference<u32>(frame.ground_cells);
RWTexture2D<u32> stain_densities = G_Dereference<u32>(frame.stain_densities);
@ -577,7 +578,6 @@ ComputeShader2D(V_CompositeCS, 8, 8)
RWTexture2D<u32> air_densities = G_Dereference<u32>(frame.air_densities);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler);
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5;
@ -934,6 +934,28 @@ ComputeShader2D(V_CompositeCS, 8, 8)
}
}
////////////////////////////////////////////////////////////
//~ Blur
ComputeShader2D(V_BlurDownCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<Vec4> screen_up = G_Dereference<Vec4>(frame.screen_mips_ro[V_GpuConst_MipIdx - 1]);
RWTexture2D<Vec4> screen_down = G_Dereference<Vec4>(frame.screen_mips_rw[V_GpuConst_MipIdx]);
// V_GpuConst_MipIdx
Vec2 blur_pos = SV_DispatchThreadID + 0.5;
Vec4 result = 0;
if (all(blur_pos >= 0) && all(blur_pos < countof(screen_down)))
{
screen_down[blur_pos] = result;
}
}
////////////////////////////////////////////////////////////
//~ Debug shapes
@ -942,7 +964,7 @@ ComputeShader2D(V_CompositeCS, 8, 8)
VertexShader(V_DVertVS, V_DVertPSInput)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_DVert> verts = G_Dereference<V_DVert>(frame.dverts);
V_DVert vert = verts[SV_VertexID];

View File

@ -66,10 +66,11 @@ ComputeShader(V_SimParticlesCS, 64);
ComputeShader2D(V_ShadeCS, 8, 8);
//- Composite
VertexShader(V_CompositeVS, V_CompositePSInput);
PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input);
ComputeShader2D(V_CompositeCS, 8, 8);
//- Blur
ComputeShader2D(V_BlurDownCS, 8, 8);
//- Debug shapes
VertexShader(V_DVertVS, V_DVertPSInput);
PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input);

View File

@ -6,8 +6,9 @@
////////////////////////////////////////////////////////////
//~ State types
G_DeclConstant(G_StructuredBufferRef, V_ShaderConst_Frame, 0);
G_DeclConstant(G_Texture3DRef, V_ShaderConst_NoiseTex, 1);
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 0);
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 1);
G_DeclConstant(u32, V_GpuConst_MipIdx, 2);
Struct(V_TileDesc)
{
@ -130,14 +131,13 @@ Struct(V_SharedFrame)
//- Gpu data
G_SamplerStateRef pt_clamp_sampler;
G_SamplerStateRef pt_wrap_sampler;
G_SamplerStateRef basic_samplers[G_BasicSamplerKind_COUNT];
V_TileDesc tile_descs[P_TileKind_COUNT];
G_Texture2DRef tiles;
G_Texture2DRef screen_ro;
G_RWTexture2DRef screen_rw;
G_Texture2DRef screen_mips_ro[16];
G_RWTexture2DRef screen_mips_rw[16];
G_Texture2DRef shade_ro;
G_RWTexture2DRef shade_rw;
G_Texture2DRef albedo_ro;

View File

@ -1724,7 +1724,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
params.target_size = draw_size;
params.target_ro = draw_target_ro;
params.rects = rects_ro;
params.sampler = G_BasicPointClampSampler();
params.sampler = G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp);
params.cursor_pos = frame->cursor_pos;
params.aa = TweakFloat("UI anti-aliasing", 1, 0, 1);
}
@ -1736,7 +1736,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_GpuParams);
// Constants
G_SetConstant(frame->cl, UI_ShaderConst_Params, params_ro);
G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro);
// Sync
G_DumbGlobalMemorySync(frame->cl);
@ -1767,7 +1767,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
// Render rect wireframes
if (AnyBit(frame->frame_flags, UI_FrameFlag_Debug))
{
G_SetConstant(frame->cl, UI_ShaderConst_DebugDraw, 1);
G_SetConstant(frame->cl, UI_GpuConst_DebugDraw, 1);
G_Rasterize(
frame->cl,
UI_DRectVS, UI_DRectPS,

View File

@ -6,7 +6,7 @@
VertexShader(UI_DRectVS, UI_DRectPSInput)
{
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_ShaderConst_Params)[0];
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects);
UI_GpuRect rect = rects[SV_InstanceID];
@ -35,7 +35,7 @@ VertexShader(UI_DRectVS, UI_DRectPSInput)
PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
{
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_ShaderConst_Params)[0];
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects);
SamplerState sampler = G_Dereference(params.sampler);
@ -104,7 +104,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
//- Finalize
Vec4 result = composite_premul * input.tint_premul;
if (UI_ShaderConst_DebugDraw)
if (UI_GpuConst_DebugDraw)
{
result = input.debug_premul;
}
@ -135,7 +135,7 @@ VertexShader(UI_BlitVS, UI_BlitPSInput)
PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
{
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_ShaderConst_Params)[0];
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
Texture2D<Vec4> tex = G_Dereference<Vec4>(params.target_ro);
SamplerState sampler = G_Dereference(params.sampler);

View File

@ -1,8 +1,8 @@
////////////////////////////////////////////////////////////
//~ Constant types
G_DeclConstant(G_StructuredBufferRef, UI_ShaderConst_Params, 0);
G_DeclConstant(b32, UI_ShaderConst_DebugDraw, 1);
G_DeclConstant(G_StructuredBufferRef, UI_GpuConst_Params, 0);
G_DeclConstant(b32, UI_GpuConst_DebugDraw, 1);
Struct(UI_GpuParams)
{