shader PSO creation

This commit is contained in:
jacob 2025-06-07 20:20:21 -05:00
parent 0551148ae8
commit 74609cdb3c
7 changed files with 229 additions and 139 deletions

View File

@ -28,12 +28,12 @@ struct ps_input {
* Globals
* ========================== */
StructuredBuffer<vs_instance> G_instance_buffer : register(t0);
StructuredBuffer<vs_instance> g_instance_buffer : register(t0);
cbuffer constants : register(b0)
{
float4x4 G_projection;
uint G_instance_offset;
float4x4 g_projection;
uint g_instance_offset;
};
/* ========================== *
@ -49,12 +49,12 @@ static const float2 G_quad_verts[4] = {
ps_input vs_main(uint instance_id : SV_InstanceID, uint vertex_id : SV_VertexID)
{
vs_instance instance = G_instance_buffer[G_instance_offset + instance_id];
vs_instance instance = g_instance_buffer[g_instance_offset + instance_id];
float2 vert = G_quad_verts[vertex_id];
float2 world_pos = mul(instance.xf, float3(vert, 1)).xy;
ps_input output;
output.screen_pos = mul(G_projection, float4(world_pos, 0, 1));
output.screen_pos = mul(g_projection, float4(world_pos, 0, 1));
output.line_thickness = instance.line_thickness;
output.line_spacing = instance.line_spacing;
output.offset = instance.offset;

View File

@ -16,7 +16,7 @@ struct ps_input {
cbuffer constants : register(b0)
{
float4x4 G_projection;
float4x4 g_projection;
};
/* ========================== *
@ -26,7 +26,7 @@ cbuffer constants : register(b0)
ps_input vs_main(vs_input input)
{
ps_input output;
output.screen_pos = mul(G_projection, float4(input.pos.xy, 0.f, 1.f));
output.screen_pos = mul(g_projection, float4(input.pos.xy, 0.f, 1.f));
output.color_lin = linear_from_srgb(input.color_srgb);
return output;

View File

@ -12,12 +12,12 @@ struct ps_input {
* Globals
* ========================== */
StructuredBuffer<vs_instance> G_instance_buffer : register(t0);
StructuredBuffer<vs_instance> g_instance_buffer : register(t0);
cbuffer constants : register(b0)
{
float4x4 G_projection;
uint G_instance_offset;
float4x4 g_projection;
uint g_instance_offset;
};
/* ========================== *
@ -33,11 +33,11 @@ static const float2 G_quad_verts[4] = {
ps_input vs_main(uint instance_id : SV_InstanceID, uint vertex_id : SV_VertexID)
{
vs_instance instance = G_instance_buffer[G_instance_offset + instance_id];
vs_instance instance = g_instance_buffer[g_instance_offset + instance_id];
float2 vert = G_quad_verts[vertex_id];
float2 world_pos = mul(instance.xf, float3(vert, 1)).xy;
ps_input output;
output.screen_pos = mul(G_projection, float4(world_pos, 0, 1));
output.screen_pos = mul(g_projection, float4(world_pos, 0, 1));
return output;
}

View File

@ -1,6 +1,6 @@
#include "shaders/common.hlsl"
struct vs_instance {
struct instance {
float2x3 xf;
float2 uv0;
float2 uv1;
@ -8,55 +8,65 @@ struct vs_instance {
float emittance;
};
struct ps_input {
DESV(float4, screen_pos, SV_POSITION);
DECL(float2, uv);
DECL(float4, tint_lin);
struct constants {
float4x4 projection;
uint instance_offset;
};
/* ========================== *
* Globals
* Root Signature
* ========================== */
StructuredBuffer<vs_instance> G_instance_buffer : register(t0);
#define SIG \
"CBV(b0), " \
"DescriptorTable(SRV(t0), SRV(t1)), " \
"DescriptorTable(Sampler(s0))"
Texture2D G_texture : register(t1);
SamplerState G_sampler : register(s0);
cbuffer constants : register(b0)
cbuffer c : register(b0)
{
float4x4 G_projection;
uint G_instance_offset;
struct constants g_constants;
};
StructuredBuffer<instance> g_instance_buffer : register(t0);
Texture2D g_texture : register(t1);
SamplerState g_sampler : register(s0);
/* ========================== *
* Vertex shader
* ========================== */
static const float2 G_quad_verts[4] = {
static const float2 g_quad_verts[4] = {
float2(-0.5f, -0.5f),
float2( 0.5f, -0.5f),
float2( 0.5f, 0.5f),
float2(-0.5f, 0.5f)
};
static const int2 G_uv_factors[4] = {
static const int2 g_uv_factors[4] = {
int2(0, 0),
int2(1, 0),
int2(1, 1),
int2(0, 1)
};
ps_input vs_main(uint instance_id : SV_InstanceID, uint vertex_id : SV_VertexID)
struct vs_output {
DESV(float4, screen_pos, SV_POSITION);
DECL(float2, uv);
DECL(float4, tint_lin);
};
[RootSignature(SIG)]
vs_output vs_main(uint instance_id : SV_InstanceID, uint vertex_id : SV_VertexID)
{
vs_instance instance = G_instance_buffer[G_instance_offset + instance_id];
float2 vert = G_quad_verts[vertex_id];
float2 uv_factor = G_uv_factors[vertex_id];
instance instance = g_instance_buffer[g_constants.instance_offset + instance_id];
float2 vert = g_quad_verts[vertex_id];
float2 uv_factor = g_uv_factors[vertex_id];
float2 world_pos = mul(instance.xf, float3(vert, 1)).xy;
ps_input output;
output.screen_pos = mul(G_projection, float4(world_pos, 0, 1));
vs_output output;
output.screen_pos = mul(g_constants.projection, float4(world_pos, 0, 1));
output.uv = instance.uv0 + uv_factor * (instance.uv1 - instance.uv0);
output.tint_lin = linear_from_srgb32(instance.tint_srgb);
@ -67,8 +77,9 @@ ps_input vs_main(uint instance_id : SV_InstanceID, uint vertex_id : SV_VertexID)
* Pixel shader
* ========================== */
float4 ps_main(ps_input input) : SV_TARGET
[RootSignature(SIG)]
float4 ps_main(vs_output input) : SV_TARGET
{
float4 color = G_texture.Sample(G_sampler, input.uv) * input.tint_lin;
float4 color = g_texture.Sample(g_sampler, input.uv) * input.tint_lin;
return color;
}

View File

@ -248,13 +248,16 @@ void app_entry_point(struct string args_str)
/* Ideally these layers should have cores "reserved" for them
* 1. User thread
* 2. Sim thread
* 3. Audio mixing/playback thread
* 3. Audio mixing / playback thread
* 4. Networking thread
*/
i32 num_reserved_cores = 3;
i32 num_reserved_cores = 4;
i32 num_logical_cores = (i32)sys_num_logical_processors();
//num_logical_cores = min(num_logical_cores, 8) + (max(num_logical_cores - 8, 0) / 2); /* Dumb heuristic to try and lessen e-core usage */
i32 min_worker_count = 2;
i32 max_worker_count = 128;
i32 target_worker_count = (i32)sys_num_logical_processors() - num_reserved_cores;
i32 target_worker_count = num_logical_cores - num_reserved_cores;
worker_count = (u32)clamp_i32(target_worker_count, min_worker_count, max_worker_count);
#endif
}

View File

@ -56,6 +56,7 @@ struct dx12_shader_desc {
struct dx12_shader {
struct dx12_shader_desc desc;
ID3D12PipelineState *pso;
};
struct dx12_shader_result {
@ -162,6 +163,90 @@ INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(gpu_shutdown)
#endif
}
/* ========================== *
* Handle
* ========================== */
INTERNAL void dx12_texture_release(struct dx12_texture *t);
INTERNAL struct gpu_handle handle_alloc(enum dx12_handle_kind kind, void *data)
{
u64 old_gen = 0;
u64 idx = 0;
struct dx12_handle_entry *entry = NULL;
{
struct sys_lock lock = sys_mutex_lock_e(&G.handle_entries_mutex);
if (G.first_free_handle_entry) {
entry = G.first_free_handle_entry;
G.first_free_handle_entry = entry->next_free;
old_gen = entry->gen;
idx = entry->idx;
} else {
entry = arena_push_no_zero(&G.handle_entries_arena, struct dx12_handle_entry);
idx = G.num_handle_entries_reserved++;
}
sys_mutex_unlock(&lock);
}
MEMZERO_STRUCT(entry);
entry->kind = kind;
entry->gen = old_gen + 1;
entry->idx = idx;
entry->data = data;
struct gpu_handle res = ZI;
res.gen = entry->gen;
res.idx = entry->idx;
return res;
}
INTERNAL struct dx12_handle_entry *handle_get_entry(struct gpu_handle handle, struct sys_lock *lock)
{
sys_assert_locked_e_or_s(lock, &G.handle_entries_mutex);
struct dx12_handle_entry *res = NULL;
if (handle.idx > 0 && handle.idx < G.num_handle_entries_reserved) {
struct dx12_handle_entry *tmp = &((struct dx12_handle_entry *)G.handle_entries_arena.base)[handle.idx];
if (tmp->gen == handle.gen) {
res = tmp;
}
}
return res;
}
/* TODO: The GPU api should ensure that resources freed by the caller will not cause issues on the GPU (via fencing),
* however the caller is responsible for managing resource lifetimes on the CPU side (e.g. using sprites w/ sprite scopes
* to ensure freed textures aren't being used in pending command lists. */
void gpu_release(struct gpu_handle handle)
{
enum dx12_handle_kind kind = NULL;
void *data = NULL;
/* Release handle entry */
struct sys_lock lock = sys_mutex_lock_e(&G.handle_entries_mutex);
{
struct dx12_handle_entry *entry = handle_get_entry(handle, &lock);
if (entry) {
kind = entry->kind;
data = entry->data;
}
++entry->gen;
entry->next_free = G.first_free_handle_entry;
G.first_free_handle_entry = entry;
}
sys_mutex_unlock(&lock);
/* Release data */
if (data) {
switch (kind) {
default: break;
case DX12_HANDLE_KIND_TEXTURE:
{
dx12_texture_release(data);
} break;
}
}
}
/* ========================== *
* Dx12 base initialization
* ========================== */
@ -433,7 +518,7 @@ PACK(struct dx12_texture_shader_instance {
/* Init shaders */
INTERNAL struct dx12_shader_result *shader_alloc_from_descs(struct arena *arena, u64 num_shaders, struct dx12_shader_desc *descs);
INTERNAL void shader_release(struct dx12_shader *shader);
INTERNAL void dx12_shader_release(struct dx12_shader *shader);
INTERNAL void dx12_init_shaders(void)
{
@ -442,18 +527,21 @@ INTERNAL void dx12_init_shaders(void)
struct dx12_shader_desc shader_descs[] = {
/* Texture shader */
{
.name = "shaders/texture.hlsl",
.flags = DX12_SHADER_DESC_FLAG_VS | DX12_SHADER_DESC_FLAG_PS
.name = "shaders/texture.hlsl",
.flags = DX12_SHADER_DESC_FLAG_VS |
DX12_SHADER_DESC_FLAG_PS
}
};
struct dx12_shader_result *results = shader_alloc_from_descs(scratch.arena, ARRAY_COUNT(shader_descs), shader_descs);
for (u64 i = 0; i < ARRAY_COUNT(shader_descs); ++i) {
struct dx12_shader_result *result = &results[i];
if (result->errors_text_len <= 0) {
/* TODO */
if (result->errors_text_len > 0) {
struct string msg = STRING(result->errors_text_len, result->errors_text);
sys_panic(msg);
dx12_shader_release(&result->shader);
} else {
shader_release(&result->shader);
/* TODO */
}
}
@ -538,7 +626,8 @@ INTERNAL void dx12_include_handler_release(struct dx12_include_handler *handler)
* Shader compilation
* ========================== */
/* TODO: Compile shaders offline w/ dxc */
/* TODO: Compile shaders offline w/ dxc.
* Will also allow for some hlsl language features like static_assert */
enum shader_compile_task_kind {
SHADER_COMPILE_TASK_KIND_VS,
@ -673,7 +762,7 @@ INTERNAL WORK_TASK_FUNC_DEF(shader_load_task, load_arg_raw)
struct string shader_name = string_from_cstr_no_limit(desc.name);
logf_info("Loading shader '%F'", FMT_STR(shader_name));
struct resource src_res = resource_open(shader_name);
(UNUSED)result;
struct string error_str = LIT("Unknown error");
struct shader_compile_task_arg vs = ZI;
vs.kind = SHADER_COMPILE_TASK_KIND_VS;
@ -696,9 +785,77 @@ INTERNAL WORK_TASK_FUNC_DEF(shader_load_task, load_arg_raw)
work_wait(work);
b32 success = vs.success && ps.success;
/* FIXME: Validate root signature blob exists in bytecode */
/* Create PSO */
ID3D12PipelineState *pso = NULL;
if (success) {
} else {
struct string error_str = LIT("Unknown error");
/* Default rasterizer state */
D3D12_RASTERIZER_DESC raster_desc = {
.FillMode = D3D12_FILL_MODE_SOLID,
.CullMode = D3D12_CULL_MODE_BACK,
.FrontCounterClockwise = FALSE,
.DepthBias = D3D12_DEFAULT_DEPTH_BIAS,
.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP,
.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
.DepthClipEnable = TRUE,
.MultisampleEnable = FALSE,
.AntialiasedLineEnable = FALSE,
.ForcedSampleCount = 0,
.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF
};
/* No input layout */
D3D12_INPUT_LAYOUT_DESC input_layout_desc = {
.pInputElementDescs = NULL,
.NumElements = 0
};
/* Opaque blend state */
D3D12_BLEND_DESC blend_desc = {
.AlphaToCoverageEnable = FALSE,
.IndependentBlendEnable = FALSE
};
blend_desc.RenderTarget[0].BlendEnable = FALSE;
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
/* Disable depth stencil */
D3D12_DEPTH_STENCIL_DESC depth_stencil_desc = {
.DepthEnable = FALSE,
.StencilEnable = FALSE
};
/* PSO */
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 };
pso_desc.pRootSignature = NULL; /* Use embedded root signature */
if (vs.success) {
pso_desc.VS.pShaderBytecode = ID3D10Blob_GetBufferPointer(vs.blob);
pso_desc.VS.BytecodeLength = ID3D10Blob_GetBufferSize(vs.blob);
}
if (ps.success) {
pso_desc.PS.pShaderBytecode = ID3D10Blob_GetBufferPointer(ps.blob);
pso_desc.PS.BytecodeLength = ID3D10Blob_GetBufferSize(ps.blob);
}
pso_desc.BlendState = blend_desc;
pso_desc.SampleMask = UINT_MAX;
pso_desc.RasterizerState = raster_desc;
pso_desc.DepthStencilState = depth_stencil_desc;
pso_desc.InputLayout = input_layout_desc;
pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
pso_desc.NumRenderTargets = 1;
pso_desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
pso_desc.SampleDesc.Count = 1;
HRESULT hr = ID3D12Device_CreateGraphicsPipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
if (FAILED(hr)) {
error_str = LIT("Failed to create pipeline state object");
success = false;
ASSERT(false);
}
}
/* Copy error */
if (!success) {
ID3D10Blob *error_blob = vs.error_blob ? vs.error_blob : ps.error_blob;
if (error_blob) {
u64 error_blob_cstr_len = ID3D10Blob_GetBufferSize(error_blob);
@ -712,10 +869,12 @@ INTERNAL WORK_TASK_FUNC_DEF(shader_load_task, load_arg_raw)
error_str = error_blob_str;
}
}
result->errors_text_len = max_u64(error_str.len, ARRAY_COUNT(result->errors_text));
result->errors_text_len = min_u64(error_str.len, ARRAY_COUNT(result->errors_text));
MEMCPY(result->errors_text, error_str.text, result->errors_text_len);
}
shader->pso = pso;
if (vs.blob) {
ID3D10Blob_Release(vs.blob);
}
@ -761,94 +920,11 @@ INTERNAL struct dx12_shader_result *shader_alloc_from_descs(struct arena *arena,
return results;
}
INTERNAL void shader_release(struct dx12_shader *shader)
INTERNAL void dx12_shader_release(struct dx12_shader *shader)
{
__prof;
/* TODO */
(UNUSED)shader;
}
/* ========================== *
* Handle
* ========================== */
INTERNAL void dx12_texture_release(struct dx12_texture *t);
INTERNAL struct gpu_handle handle_alloc(enum dx12_handle_kind kind, void *data)
{
u64 old_gen = 0;
u64 idx = 0;
struct dx12_handle_entry *entry = NULL;
{
struct sys_lock lock = sys_mutex_lock_e(&G.handle_entries_mutex);
if (G.first_free_handle_entry) {
entry = G.first_free_handle_entry;
G.first_free_handle_entry = entry->next_free;
old_gen = entry->gen;
idx = entry->idx;
} else {
entry = arena_push_no_zero(&G.handle_entries_arena, struct dx12_handle_entry);
idx = G.num_handle_entries_reserved++;
}
sys_mutex_unlock(&lock);
}
MEMZERO_STRUCT(entry);
entry->kind = kind;
entry->gen = old_gen + 1;
entry->idx = idx;
entry->data = data;
struct gpu_handle res = ZI;
res.gen = entry->gen;
res.idx = entry->idx;
return res;
}
INTERNAL struct dx12_handle_entry *handle_get_entry(struct gpu_handle handle, struct sys_lock *lock)
{
sys_assert_locked_e_or_s(lock, &G.handle_entries_mutex);
struct dx12_handle_entry *res = NULL;
if (handle.idx > 0 && handle.idx < G.num_handle_entries_reserved) {
struct dx12_handle_entry *tmp = &((struct dx12_handle_entry *)G.handle_entries_arena.base)[handle.idx];
if (tmp->gen == handle.gen) {
res = tmp;
}
}
return res;
}
/* TODO: The GPU api should ensure that resources freed by the caller will not cause issues on the GPU (via fencing),
* however the caller is responsible for managing resource lifetimes on the CPU side (e.g. using sprites w/ sprite scopes
* to ensure freed textures aren't being used in pending command lists. */
void gpu_release(struct gpu_handle handle)
{
enum dx12_handle_kind kind = NULL;
void *data = NULL;
/* Release handle entry */
struct sys_lock lock = sys_mutex_lock_e(&G.handle_entries_mutex);
{
struct dx12_handle_entry *entry = handle_get_entry(handle, &lock);
if (entry) {
kind = entry->kind;
data = entry->data;
}
++entry->gen;
entry->next_free = G.first_free_handle_entry;
G.first_free_handle_entry = entry;
}
sys_mutex_unlock(&lock);
/* Release data */
if (data) {
switch (kind) {
default: break;
case DX12_HANDLE_KIND_TEXTURE:
{
dx12_texture_release(data);
} break;
}
if (shader->pso) {
ID3D12PipelineState_Release(shader->pso);
}
}

View File

@ -448,9 +448,9 @@ INTERNAL struct work_handle work_push_from_slate_locked(struct sys_lock *lock, s
* there would be no remaining workers to complete the child work, meaning
* there is a deadlock.
*
* By forcing workers to do their own child work, we can guarantee that this
* does not occur. However it is not ideal since it creates situations in
* which work is not done asynchronously.
* By forcing workers to do their own child work in this scenario, we can
* guarantee that this does not occur. However it is not ideal since it
* creates situations in which work is not done asynchronously.
*/
struct worker_ctx *ctx = thread_local_var_eval(&tl_worker_ctx);
if (ctx->is_worker) {