meta shader compilation

This commit is contained in:
jacob 2025-09-08 17:26:43 -05:00
parent cf7ae04abb
commit 43a82bd540
36 changed files with 1477 additions and 774 deletions

View File

@ -16,7 +16,7 @@ Struct(AppArgList)
};
////////////////////////////////
//~ Shared state
//~ State
Struct(SharedAppState)
{

View File

@ -38,7 +38,7 @@ Struct(AC_Store)
};
////////////////////////////////
//~ Shared state
//~ State
#define AC_MaxAssets 1024
#define AC_AssetLookupTableCapacity (AC_MaxAssets * 4)

View File

@ -88,6 +88,9 @@
# elif defined(_M_ARM64) || defined(__aarch64__)
# define ArchIsX64 0
# define ArchIsArm64 1
# elif LanguageIsGpu
# define ArchIsX64 0
# define ArchIsArm64 0
# else
# error Unknown architecture
# endif
@ -104,13 +107,23 @@
#endif
#endif
//- Windows defines
////////////////////////////////
//~ Platform headers
//- Windows headers
#if PlatformIsWindows
# define COBJMACROS
# define WIN32_LEAN_AND_MEAN
# define UNICODE
# pragma warning(push, 0)
# include <Windows.h>
# include <combaseapi.h>
# include <dcommon.h>
# include <initguid.h>
# include <unknwn.h>
# include <objbase.h>
# include <uuids.h>
# include <Knownfolders.h>
# pragma warning(pop)
#endif
@ -118,13 +131,9 @@
//~ Debug
//- Static assert
#if LanguageIsC
# define StaticAssert2(cond, line, counter) struct STATIC_ASSERT_____##line##counter {int foo[(cond) ? 1 : -1];}
# define StaticAssert1(cond, line, counter) StaticAssert2(cond, line, counter)
# define StaticAssert(cond) StaticAssert1(cond, __LINE__, __COUNTER__)
#else
# define StaticAssert(cond) static_assert(cond, "")
#endif
#define StaticAssert2(cond, line, counter) struct STATIC_ASSERT_____##line##counter {int foo[(cond) ? 1 : -1];}
#define StaticAssert1(cond, line, counter) StaticAssert2(cond, line, counter)
#define StaticAssert(cond) StaticAssert1(cond, __LINE__, __COUNTER__)
//- Debug assert
#if RtcIsEnabled
@ -142,7 +151,7 @@
//- Root constant assert
#define AssertRootConst(s, n) StaticAssert((sizeof(s) % 16 == 0) && /* Root constant struct should pad to 16 byte alignment */ \
((sizeof(s) / 4) == n) && /* Root constant struct size should match the specified 32-bit-constant count */ \
((sizeof(s) / 4) == (n)) && /* Root constant struct size should match the specified 32-bit-constant count */ \
(sizeof(s) <= 256)) /* Root constant struct can only fit 64 DWORDS */
//- Debug alias
@ -363,8 +372,10 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t);
#if LanguageIsGpu
//- Resource heap index
# define GpuResourceFromUrid(urid) ResourceDescriptorHeap[urid]
# define GpuResourceFromNurid(nurid) ResourceDescriptorHeap[NonUniformResourceIndex(nurid)]
# define GpuResourceFromUrid(urid) ResourceDescriptorHeap[(urid)]
# define GpuResourceFromNurid(nurid) ResourceDescriptorHeap[NonUniformResourceIndex((nurid))]
# define GpuSamplerFromUrid(urid) SamplerDescriptorHeap[(urid)]
# define GpuSamplerFromNurid(nurid) SamplerDescriptorHeap[NonUniformResourceIndex((nurid))]
//- Semantic declaration
# define Semantic(t, n) t n : n
@ -589,6 +600,8 @@ ForceInline void UnlockTicketMutex(TicketMutex *tm)
////////////////////////////////
//~ String types
#if LanguageIsC
#define STRING(size, data) ((String) { (size), (data) })
#define Lit(cstr_lit) (String) { (sizeof((cstr_lit)) - 1), (u8 *)(cstr_lit) }
#define LitNoCast(cstr_lit) { .len = (sizeof((cstr_lit)) - 1), .text = (u8 *)(cstr_lit) }
@ -641,27 +654,80 @@ Struct(StringList)
u64 count;
};
#endif
////////////////////////////////
//~ Resource types
#if LanguageIsC
#define ResourceEmbeddedMagic 0xfc060937194f4406
Struct(ResourceStore)
{
u64 hash;
};
Struct(Resource)
{
u64 hash;
};
#endif
////////////////////////////////
//~ Shader types
#if LanguageIsC
Struct(VertexShader) { Resource resource; };
Struct(PixelShader) { Resource resource; };
Struct(ComputeShader) { Resource resource; };
# define VSDecl(name) extern VertexShader name;
# define PSDecl(name) extern PixelShader name;
# define CSDecl(name) extern ComputeShader name;
#elif LanguageIsGpu
# define VSDecl(name)
# define PSDecl(name)
# define CSDecl(name)
# define VSDef(name, ...) name(__VA_ARGS__)
# define PSDef(name, ...) name(__VA_ARGS__)
# define CSDef(name, ...) name(__VA_ARGS__)
#endif
////////////////////////////////
//~ Fiber id
#if PlatformIsWindows
# define FiberId() (*(i16 *)(void *)(volatile u64)__readgsqword(32))
#else
# error FiberId not implemented
#if LanguageIsC
# if PlatformIsWindows
# define FiberId() (*(i16 *)(void *)(volatile u64)__readgsqword(32))
# else
# error FiberId not implemented
# endif
# define MaxFibers 1024
StaticAssert(MaxFibers < I16Max); /* FiberId type should fit MaxFibers */
#endif
#define MaxFibers 1024
StaticAssert(MaxFibers < I16Max); /* FiberId type should fit MaxFibers */
////////////////////////////////
//~ Exit callback types
#define ExitFuncDef(name) void name(void)
#if LanguageIsC
# define ExitFuncDef(name) void name(void)
typedef ExitFuncDef(ExitFunc);
#endif
////////////////////////////////
//~ @hookdecl Core hooks
//~ @hookdecl Api hooks
#if LanguageIsC
//- Core hooks
StringList GetCommandLineArgs(void);
b32 Panic(String msg);
b32 IsRunningInDebugger(void);
@ -670,15 +736,10 @@ void OnExit(ExitFunc *func);
void SignalExit(i32 code);
void ExitNow(i32 code);
////////////////////////////////
//~ @hookdecl Meta hooks
//- Meta hooks
void StartupLayers(void);
////////////////////////////////
//~ Prof
#include "../prof/prof_tracy.h"
#endif
////////////////////////////////
//~ Config

View File

@ -25,7 +25,7 @@ Struct(TempArena)
};
////////////////////////////////
//~ Per-fiber arena ctx types
//~ State
#define ScratchArenasPerCtx 2
@ -35,9 +35,6 @@ Struct(FiberArenaCtx)
Arena *scratch_arenas[ScratchArenasPerCtx];
};
////////////////////////////////
//~ Shared state
Struct(SharedArenaCtx)
{
FiberArenaCtx arena_contexts[MaxFibers];

View File

@ -3,38 +3,41 @@ SharedResourceState shared_resource_state = ZI;
////////////////////////////////
//~ Startup
void InitBaseResources(String archive)
void InitBaseResources(u64 archive_strings_count, String *archive_strings)
{
if (archive.len > 0)
SharedResourceState *g = &shared_resource_state;
Arena *perm = PermArena();
for (u64 archive_string_index = 0; archive_string_index < archive_strings_count; ++archive_string_index)
{
SharedResourceState *g = &shared_resource_state;
Arena *perm = PermArena();
BB_Buff bb = BB_BuffFromString(archive);
BB_Reader br = BB_ReaderFromBuff(&bb);
u64 magic = BB_ReadUBits(&br, 64);
Assert(magic == ResourceEmbeddedMagic);
/* Create & insert entries */
u64 num_entries = BB_ReadUBits(&br, 64);
for (u64 i = 0; i < num_entries; ++i)
String archive = archive_strings[archive_string_index];
if (archive.len > 0)
{
u64 name_start = BB_ReadUBits(&br, 64);
u64 name_len = BB_ReadUBits(&br, 64);
u64 data_start = BB_ReadUBits(&br, 64);
u64 data_len = BB_ReadUBits(&br, 64);
BB_Buff bb = BB_BuffFromString(archive);
BB_Reader br = BB_ReaderFromBuff(&bb);
ResourceEntry *entry = PushStruct(perm, ResourceEntry);
entry->name = STRING(name_len, archive.text + name_start);
entry->data = STRING(data_len, archive.text + data_start);
entry->hash = HashFnv64(Fnv64Basis, entry->name);
u64 magic = BB_ReadUBits(&br, 64);
Assert(magic == ResourceEmbeddedMagic);
ResourceEntryBin *bin = &g->bins[entry->hash % NumResourceEntryBins];
QueuePushN(bin->first, bin->last, entry, next_in_bin);
QueuePushN(g->first_entry, g->last_entry, entry, next);
/* Create & insert entries */
u64 num_entries = BB_ReadUBits(&br, 64);
for (u64 i = 0; i < num_entries; ++i)
{
u64 name_start = BB_ReadUBits(&br, 64);
u64 name_len = BB_ReadUBits(&br, 64);
u64 data_start = BB_ReadUBits(&br, 64);
u64 data_len = BB_ReadUBits(&br, 64);
ResourceEntry *entry = PushStruct(perm, ResourceEntry);
entry->name = STRING(name_len, archive.text + name_start);
entry->data = STRING(data_len, archive.text + data_start);
entry->hash = HashFnv64(Fnv64Basis, entry->name);
ResourceEntryBin *bin = &g->bins[entry->hash % NumResourceEntryBins];
QueuePushN(bin->first, bin->last, entry, next_in_bin);
QueuePushN(g->first_entry, g->last_entry, entry, next);
}
g->entries_count += num_entries;
}
g->entries_count = num_entries;
}
}

View File

@ -1,20 +1,3 @@
////////////////////////////////
//~ Resource types
#define ResourceEmbeddedMagic 0xfc060937194f4406
#define DeclResourceStore(name) extern ResourceStore name
Struct(ResourceStore)
{
u64 hash;
};
Struct(Resource)
{
u64 hash;
};
////////////////////////////////
//~ Resource cache types
@ -35,7 +18,7 @@ Struct(ResourceEntryBin)
};
////////////////////////////////
//~ Shared state
//~ State
#define NumResourceEntryBins 4096
@ -52,7 +35,7 @@ extern SharedResourceState shared_resource_state;
////////////////////////////////
//~ Startup
void InitBaseResources(String archive);
void InitBaseResources(u64 archive_strings_count, String *archive_strings);
////////////////////////////////
//~ Resource operations

View File

@ -3,15 +3,13 @@ W32_SharedState W32_shared_state = ZI;
////////////////////////////////
//~ Win32 embedded data
/* Find first resource with `type` and return the data in `udata`. */
BOOL W32_FindEmbeddedRcData(HMODULE module, LPCWSTR type, LPWSTR wstr_entry_name, LONG_PTR udata)
{
W32_SharedState *g = &W32_shared_state;
W32_FindEmbeddedDataCtx *ctx = (W32_FindEmbeddedDataCtx *)udata;
TempArena scratch = BeginScratchNoConflict();
String *out = (String *)udata;
b32 found = 0;
String entry_name_lower = LowerString(scratch.arena, StringFromWstrNoLimit(scratch.arena, (LPWSTR)wstr_entry_name));
if (EqString(entry_name_lower, Lit(Stringize(W32_EmbeddedDataName))))
String entry_name = StringFromWstrNoLimit(scratch.arena, (LPWSTR)wstr_entry_name);
String embedded_data_prefix = Lit(Stringize(W32_EmbeddedDataPrefix));
if (StringStartsWith(entry_name, embedded_data_prefix))
{
HRSRC hres = FindResourceW(module, wstr_entry_name, type);
if (hres)
@ -19,14 +17,22 @@ BOOL W32_FindEmbeddedRcData(HMODULE module, LPCWSTR type, LPWSTR wstr_entry_name
HGLOBAL hg = LoadResource(module, hres);
if (hg)
{
found = 1;
out->len = SizeofResource(module, hres);
out->text = LockResource(hg);
if (ctx->embedded_strings_count < countof(ctx->embedded_strings))
{
String embedded = ZI;
embedded.len = SizeofResource(module, hres);
embedded.text = LockResource(hg);
ctx->embedded_strings[ctx->embedded_strings_count++] = embedded;
}
else
{
Panic(Lit("Maximum number of embedded resource entries exceeded"));
}
}
}
}
EndScratch(scratch);
return !found;
return 1;
}
////////////////////////////////
@ -221,9 +227,9 @@ i32 W32_Main(void)
/* Init resources */
{
String embedded = ZI;
EnumResourceNamesW(0, RT_RCDATA, &W32_FindEmbeddedRcData, (LONG_PTR)&embedded);
InitBaseResources(embedded);
W32_FindEmbeddedDataCtx ctx = ZI;
EnumResourceNamesW(0, RT_RCDATA, &W32_FindEmbeddedRcData, (LONG_PTR)&ctx);
InitBaseResources(ctx.embedded_strings_count, ctx.embedded_strings);
}
//- App startup

View File

@ -11,7 +11,16 @@ u32 BCryptGenRandom(void *algorithm, u8 *buffer, u32 buffer_size, u32 flags);
#pragma comment(lib, "bcrypt")
////////////////////////////////
//~ Shared state
//~ Embedded data iter types
Struct(W32_FindEmbeddedDataCtx)
{
u64 embedded_strings_count;
String embedded_strings[64];
};
////////////////////////////////
//~ State
#define W32_MaxOnExitFuncs 4096
@ -40,7 +49,7 @@ extern W32_SharedState W32_shared_state;
////////////////////////////////
//~ Embedded data initialization
#define W32_EmbeddedDataName embedded_resource_data
#define W32_EmbeddedDataPrefix EMBEDDED_RESOURCE_DATA__
BOOL W32_FindEmbeddedRcData(HMODULE module, LPCWSTR type, LPWSTR wstr_entry_name, LONG_PTR udata);
////////////////////////////////

View File

@ -101,14 +101,14 @@ void InitJobWorkers(void)
case JobPool_Blocking:
{
name_fmt = Lit("Floating worker #%F");
name_fmt = Lit("Blocking worker #%F");
pool->num_worker_threads = 8;
pool->thread_priority = THREAD_PRIORITY_NORMAL;
} break;
case JobPool_Hyper:
{
name_fmt = Lit("Floating worker #%F");
name_fmt = Lit("Hyper worker #%F");
pool->num_worker_threads = 8;
pool->thread_priority = THREAD_PRIORITY_HIGHEST;
} break;

View File

@ -199,7 +199,7 @@ AlignedStruct(W32_JobPool, 64)
};
////////////////////////////////
//~ Shared state
//~ State
/* Assume scheduler cycle is 20hz at start to be conservative */
#define W32_DefaultSchedulerPeriodNs 50000000

View File

@ -1,3 +1,15 @@
////////////////////////////////
//~ Tweakable defines
/* How close can non-overlapping shapes be before collision is considered */
#define CLD_CollisionTolerance 0.005f
/* NOTE: Should always be less than tolerance, since colliding = 1 if origin is within this distance. */
#define CLD_MinUniquePtDistSq (0.001f * 0.001f)
/* To prevent extremely large prototypes when origin is in exact center of rounded feature */
#define CLD_MaxEpaIterations 64
////////////////////////////////
//~ Shape types
@ -120,18 +132,6 @@ Struct(CLD_EpaData)
#endif
};
////////////////////////////////
//~ Shared state
/* How close can non-overlapping shapes be before collision is considered */
#define CLD_CollisionTolerance 0.005f
/* NOTE: Should always be less than tolerance, since colliding = 1 if origin is within this distance. */
#define CLD_MinUniquePtDistSq (0.001f * 0.001f)
/* To prevent extremely large prototypes when origin is in exact center of rounded feature */
#define CLD_MaxEpaIterations 64
////////////////////////////////
//~ Debug helpers

View File

@ -99,7 +99,7 @@ Struct(D_TextParams)
})
////////////////////////////////
//~ Shared state
//~ State
Struct(D_SharedState)
{

View File

@ -55,7 +55,7 @@ JobDef(F_LoadJob, sig, _)
desc.texture.format = GPU_Format_R8G8B8A8_Unorm;
desc.texture.size = VEC3I32(64, 64, 1);
texture = GPU_AcquireResource(desc);
GPU_PushString(0, texture, STRING(desc.texture.size.x * desc.texture.size.y * 4, (u8 *)result.image_pixels));
GPU_CopyString(0, texture, STRING(desc.texture.size.x * desc.texture.size.y * 4, (u8 *)result.image_pixels));
}
/* Acquire store memory */

View File

@ -3,9 +3,6 @@
Struct(GPU_Resource);
Struct(GPU_CommandList);
Struct(GPU_VertexShader);
Struct(GPU_PixelShader);
Struct(GPU_ComputeShader);
Struct(GPU_Swapchain);
////////////////////////////////
@ -218,16 +215,6 @@ Struct(GPU_ResourceDesc)
};
};
////////////////////////////////
//~ Shader types
Struct(GPU_Shader)
{
char *shader_resource_name;
};
#define GPU_ShaderDecl(name) static GPU_Shader name = { .shader_resource_name = #name }
////////////////////////////////
//~ Rasterizer types
@ -313,7 +300,7 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *cl);
void GPU_ProfN(GPU_CommandList *cl, String name);
////////////////////////////////
//~ @hookdecl Resource transition operations
//~ @hookdecl Resource barrier operations
void GPU_TransitionToSrv(GPU_CommandList *cl, GPU_Resource *resource);
void GPU_TransitionToUav(GPU_CommandList *cl, GPU_Resource *resource);
@ -323,34 +310,39 @@ void GPU_FlushUav(GPU_CommandList *cl, GPU_Resource *resource);
////////////////////////////////
//~ @hookdecl Dispatch operations
void GPU_ClearResource(GPU_CommandList *cl, GPU_Resource *resource);
void GPU_ClearResource(GPU_CommandList *cl, GPU_Resource *resource, Vec4 clear_value);
void GPU_Rasterize(GPU_CommandList *gpu_cl,
u32 sig_size,
void *sig,
GPU_Shader vs,
GPU_Shader ps,
u32 rts_count,
GPU_Resource **rts,
GPU_Viewport viewport,
GPU_Scissor scissor,
u32 instances_count,
GPU_Resource *index_buffer,
GPU_RasterizeMode mode);
#define GPU_Rasterize(cl, sig_ptr, vs, ps, rts_count, rts, viewport, scissor, instances_count, index_buffer, mode) \
GPU_Rasterize_((cl), sizeof(*(sig_ptr)), (sig_ptr), (vs), (ps), (rts_count), (rts), (viewport), (scissor), (instances_count), (index_buffer), (mode))
void GPU_Compute(GPU_CommandList *cl,
u32 sig_size,
void *sig,
GPU_Shader cs,
u32 num_threads_x,
u32 num_threads_y,
u32 num_threads_z);
#define GPU_Compute(cl, sig_ptr, cs, x, y, z) GPU_Compute_((cl), sizeof(*(sig_ptr)), (sig_ptr), (cs), (x), (y), (z))
void GPU_Rasterize_(GPU_CommandList *cl,
u32 sig_size,
void *sig,
VertexShader vs,
PixelShader ps,
u32 rts_count,
GPU_Resource **rts,
GPU_Viewport viewport,
GPU_Scissor scissor,
u32 instances_count,
GPU_Resource *index_buffer,
GPU_RasterizeMode mode);
void GPU_Compute_(GPU_CommandList *cl,
u32 sig_size,
void *sig,
ComputeShader cs,
u32 num_threads_x,
u32 num_threads_y,
u32 num_threads_z);
////////////////////////////////
//~ @hookdecl Resource copy operations
void GPU_PushResource(GPU_CommandList *cl, GPU_Resource *dst, GPU_Resource *src);
void GPU_PushString(GPU_CommandList *cl, GPU_Resource *dst, String src);
void GPU_CopyResource(GPU_CommandList *cl, GPU_Resource *dst, GPU_Resource *src);
void GPU_CopyString(GPU_CommandList *cl, GPU_Resource *dst, String src);
////////////////////////////////
//~ @hookdecl Memory info operations

View File

@ -1,7 +1,7 @@
GPU_D12_SharedState GPU_D12_shared_state = ZI;
////////////////////////////////
//~ Raw fiber state
//~ State operations
GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id)
{
@ -16,6 +16,42 @@ GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id)
return result;
}
////////////////////////////////
//~ Helpers
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format)
{
return (DXGI_FORMAT)format;
}
GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl)
{
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
Arena *perm = PermArena();
GPU_D12_Command *cmd = f->first_free_command;
if (cmd)
{
StackPop(f->first_free_command);
ZeroStruct(cmd);
}
else
{
cmd = PushStruct(perm, GPU_D12_Command);
}
QueuePush(cl->first, cl->last, cmd);
++cl->count;
return cmd;
}
////////////////////////////////
//~ Pipeline operations
GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc)
{
/* TODO */
return 0;
}
////////////////////////////////
//~ Raw command list
@ -92,8 +128,12 @@ GPU_CommandList *GPU_BeginCommandList(void)
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
Arena *perm = PermArena();
GPU_D12_CommandList *cl = f->first_free_command_list;
StackPop(f->first_free_command_list);
if (!cl)
if (cl)
{
StackPop(f->first_free_command_list);
ZeroStruct(cl);
}
else
{
cl = PushStruct(perm, GPU_D12_CommandList);
}
@ -103,7 +143,7 @@ GPU_CommandList *GPU_BeginCommandList(void)
GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl)
{
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
GPU_D12_CommandList *cl = gpu_cl;
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
/* Determine queue kind */
GPU_QueueKind queue_kind = GPU_QueueKind_BackgroundCopy;
@ -117,7 +157,6 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl)
/* Process gpu commands into dx12 commands */
{
for (GPU_D12_Command *cmd = cl->first; cmd; cmd = cmd->next)
{
GPU_D12_CommandKind kind = cmd->kind;
@ -125,27 +164,49 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl)
{
default: break;
//- Resource barrier
case GPU_D12_CommandKind_TransitionToSrv:
case GPU_D12_CommandKind_TransitionToUav:
case GPU_D12_CommandKind_TransitionToRtv:
case GPU_D12_CommandKind_FlushUav:
{
/* TODO */
} break;
//- Clear resource
case GPU_D12_CommandKind_Clear:
{
/* TODO */
} break;
//- Dispatch Vs/Ps shader
case GPU_D12_CommandKind_Rasterize:
{
GPU_D12_RawPipeline *pipeline = 0;
GPU_D12_Pipeline *pipeline = 0;
{
GPU_D12_RawPipelineDesc pipeline_desc = ZI;
pipeline_desc.vs = cmd->rasterise.vs;
pipeline_desc.ps = cmd->rasterise.ps;
pipeline_desc.render_targets_count = rts_count;
for (u32 i = 0; i < rts_count && i < GPU_MaxRenderTargets; ++i)
GPU_D12_PipelineDesc pipeline_desc = ZI;
pipeline_desc.vs = cmd->rasterize.vs;
pipeline_desc.ps = cmd->rasterize.ps;
for (u32 i = 0; i < countof(cmd->rasterize.rts); ++i)
{
pipeline_desc.render_target_formats[i] = rts[i]->format;
GPU_D12_Resource *r = cmd->rasterize.rts[i];
if (r)
{
pipeline_desc.render_target_formats[i] = r->format;
}
else
{
break;
}
}
pipeline = GPU_D12_RawPipelineFromDesc(pipeline_desc);
pipeline = GPU_D12_PipelineFromDesc(pipeline_desc);
}
if (pipeline)
{
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(rc, pipeline->raw);
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->raw);
/* Fill signature */
{
@ -157,7 +218,7 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl)
/* Set rasterizer state */
{
D3D12_RECT scissor = GPU_D12_ScissorRectFromRect(ui_viewport);
D3D12_RECT scissor = ZI;
scissor.left = cmd->rasterize.scissor.left;
scissor.top = cmd->rasterize.scissor.top;
scissor.right = cmd->rasterize.scissor.right;
@ -191,32 +252,55 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl)
/* Set index buffer */
u32 indices_count = 0;
{
GPU_D12_Resource *indices = cmd->rasterizer.index_buffer;
GPU_D12_Resource *indices = cmd->rasterize.index_buffer;
if (indices)
{
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
ibv.buffer_location = indices->gpu_address;
ibv.format = indices->format;
ibv.BufferLocation = indices->gpu_address;
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(indices->format);
ibv.SizeInBytes = indices->size;
indices_count = indices->count;
}
}
/* Dispatch */
ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, cmd->rasterize.instance_count, indices_count, 0, 0, 0);
ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, cmd->rasterize.instances_count, indices_count, 0, 0, 0);
}
} break;
//- Dispatch compute shader
case GPU_D12_CommandKind_Compute:
{
GPU_D12_Pipeline *pipeline = 0;
{
GPU_D12_PipelineDesc pipeline_desc = ZI;
pipeline_desc.cs = cmd->compute.cs;
pipeline = GPU_D12_PipelineFromDesc(pipeline_desc);
}
if (pipeline)
{
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->raw);
/* Fill signature */
{
u32 sig_size = cmd->compute.sig_size;
void *sig = cmd->compute.sig;
u32 num32bit = sig_size / 4;
ID3D12GraphicsCommandList_SetComputeRoot32BitConstants(rcl, 0, num32bit, sig, 0);
}
/* Dispatch */
ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.num_threads_x, cmd->compute.num_threads_y, cmd->compute.num_threads_z);
}
} break;
}
}
}
/* End dx12 command list */
GPU_D12_EndRawCommandList(rcl);
GPU_D12_EndRawCommandList(dx12_cl);
/* Free commands */
if (cl->last)
@ -232,7 +316,7 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl)
}
////////////////////////////////
//~ @hookdef Profiling helpers
//~ @hookdef Profiling helper hooks
void GPU_ProfN(GPU_CommandList *cl, String name)
{
@ -240,74 +324,127 @@ void GPU_ProfN(GPU_CommandList *cl, String name)
}
////////////////////////////////
//~ @hookdef Resource transition hooks
//~ @hookdef Resource barrier hooks
void GPU_TransitionToSrv(GPU_CommandList *cl, GPU_Resource *resource)
void GPU_TransitionToSrv(GPU_CommandList *gpu_cl, GPU_Resource *resource)
{
/* TODO */
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_TransitionToSrv;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
}
void GPU_TransitionToUav(GPU_CommandList *cl, GPU_Resource *resource)
void GPU_TransitionToUav(GPU_CommandList *gpu_cl, GPU_Resource *resource)
{
/* TODO */
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_TransitionToUav;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
}
void GPU_TransitionToRtv(GPU_CommandList *cl, GPU_Resource *resource)
void GPU_TransitionToRtv(GPU_CommandList *gpu_cl, GPU_Resource *resource)
{
/* TODO */
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_TransitionToRtv;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
}
void GPU_FlushUav(GPU_CommandList *cl, GPU_Resource *resource)
void GPU_FlushUav(GPU_CommandList *gpu_cl, GPU_Resource *resource)
{
/* TODO */
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_FlushUav;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
}
////////////////////////////////
//~ @hookdef Dispatch hooks
void GPU_ClearResource(GPU_CommandList *cl, GPU_Resource *resource)
void GPU_ClearResource(GPU_CommandList *gpu_cl, GPU_Resource *resource, Vec4 clear_value)
{
/* TODO */
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_Clear;
cmd->clear.resource = (GPU_D12_Resource *)resource;
cmd->clear.value = clear_value;
}
void GPU_Rasterize(GPU_CommandList *gpu_cl,
GPU_Shader vs,
GPU_Shader ps,
u32 sig_size,
void *sig,
u32 rts_count,
GPU_Resource **rts,
GPU_Viewport viewport,
GPU_Scissor scissor,
u32 instances_count,
GPU_Resource *index_buffer,
GPU_RasterizeMode mode)
void GPU_Rasterize_(GPU_CommandList *gpu_cl,
u32 sig_size,
void *sig,
VertexShader vs,
PixelShader ps,
u32 rts_count,
GPU_Resource **rts,
GPU_Viewport viewport,
GPU_Scissor scissor,
u32 instances_count,
GPU_Resource *index_buffer,
GPU_RasterizeMode mode)
{
/* TODO */
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_Rasterize;
Assert(sig_size <= sizeof(cmd->rasterize.sig));
cmd->rasterize.sig_size = MinU32(sizeof(cmd->rasterize.sig), sig_size);
CopyBytes(cmd->rasterize.sig, sig, cmd->rasterize.sig_size);
cmd->rasterize.vs = vs;
cmd->rasterize.ps = ps;
Assert(rts_count < GPU_MaxRenderTargets);
for (u32 i = 0; i < MinU32(rts_count, GPU_MaxRenderTargets); ++i)
{
cmd->rasterize.rts[i] = (GPU_D12_Resource *)rts[i];
}
cmd->rasterize.viewport = viewport;
cmd->rasterize.scissor = scissor;
cmd->rasterize.instances_count = instances_count;
cmd->rasterize.index_buffer = (GPU_D12_Resource *)index_buffer;
cmd->rasterize.mode = mode;
}
void GPU_Compute(GPU_CommandList *cl,
GPU_Shader cs,
u32 sig_size,
void *sig,
u32 num_threads_x,
u32 num_threads_y,
u32 num_threads_z)
void GPU_Compute_(GPU_CommandList *gpu_cl,
u32 sig_size,
void *sig,
ComputeShader cs,
u32 num_threads_x,
u32 num_threads_y,
u32 num_threads_z)
{
/* TODO */
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_Compute;
Assert(sig_size <= sizeof(cmd->compute.sig));
cmd->compute.sig_size = MinU32(sizeof(cmd->compute.sig), sig_size);
CopyBytes(cmd->compute.sig, sig, cmd->compute.sig_size);
cmd->compute.cs = cs;
cmd->compute.num_threads_x = num_threads_x;
cmd->compute.num_threads_y = num_threads_y;
cmd->compute.num_threads_z = num_threads_z;
}
////////////////////////////////
//~ @hookdef Copy hooks
void GPU_PushResource(GPU_CommandList *cl, GPU_Resource *dst, GPU_Resource *src)
void GPU_CopyResource(GPU_CommandList *gpu_cl, GPU_Resource *gpu_dst, GPU_Resource *gpu_src)
{
/* TODO */
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Resource *dst = (GPU_D12_Resource *)gpu_dst;
GPU_D12_Resource *src = (GPU_D12_Resource *)gpu_src;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_Copy;
cmd->copy.dst = dst;
cmd->copy.src_resource = src;
}
void GPU_PushString(GPU_CommandList *cl, GPU_Resource *dst, String src)
void GPU_CopyString(GPU_CommandList *gpu_cl, GPU_Resource *gpu_dst, String src)
{
/* TODO */
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Resource *dst = (GPU_D12_Resource *)gpu_dst;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_Copy;
cmd->copy.dst = dst;
cmd->copy.src_string = src;
}
////////////////////////////////
@ -338,7 +475,28 @@ void GPU_WaitOnSwapchain(GPU_Swapchain *swapchain)
/* TODO */
}
void GPU_PresentSwapchain(GPU_Swapchain *swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync)
void GPU_PresentSwapchain(GPU_Swapchain *gpu_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync)
{
/* TODO */
GPU_D12_Swapchain *swapchain = (GPU_D12_Swapchain *)gpu_swapchain;
// GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_resolution);
// GPU_D12_Resource *texture_resource = (GPU_D12_Resource *)texture;
/* Blit */
// GPU_D12_BlitToSwapchain(swapchain_buffer, texture_resource, texture_xf);
u32 present_flags = 0;
if (GPU_D12_TearingIsAllowed && vsync == 0)
{
present_flags |= DXGI_PRESENT_ALLOW_TEARING;
}
/* Present */
{
__profn("Present");
HRESULT hr = IDXGISwapChain3_Present(swapchain->raw, vsync, present_flags);
if (!SUCCEEDED(hr))
{
Assert(0);
}
}
}

View File

@ -1,3 +1,49 @@
////////////////////////////////
//~ DirectX12 headers
#include <d3d12.h>
#include <dxgidebug.h>
#include <dxgi1_6.h>
////////////////////////////////
//~ Tweakable defines
#define GPU_D12_TearingIsAllowed 1
#define GPU_D12_FrameLatency 1
#define GPU_D12_SwapchainFlags (((GPU_D12_TearingIsAllowed != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) \
| ((GPU_D12_FrameLatency != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
#define GPU_D12_SwapchainBufferCount (4)
////////////////////////////////
//~ Pipeline types
Struct(GPU_D12_PipelineDesc)
{
VertexShader vs;
PixelShader ps;
ComputeShader cs;
GPU_Format render_target_formats[GPU_MaxRenderTargets];
};
Struct(GPU_D12_Pipeline)
{
ID3D12PipelineState *raw;
ID3D12RootSignature *rootsig;
};
////////////////////////////////
//~ Resource types
Struct(GPU_D12_Resource)
{
ID3D12Resource *raw;
GPU_Format format;
u32 element_size;
u32 size;
u32 count;
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
};
////////////////////////////////
//~ Raw command list types
@ -8,12 +54,28 @@ Struct(GPU_D12_RawCommandList)
};
////////////////////////////////
//~ Command types
//~ Command list types
Enum(GPU_D12_CommandKind)
{
GPU_D12_CommandKind_None,
/* Barrier */
GPU_D12_CommandKind_TransitionToSrv,
GPU_D12_CommandKind_TransitionToUav,
GPU_D12_CommandKind_TransitionToRtv,
GPU_D12_CommandKind_FlushUav,
/* Copy */
GPU_D12_CommandKind_Copy,
/* Clear */
GPU_D12_CommandKind_Clear,
/* Rasterize */
GPU_D12_CommandKind_Rasterize,
/* Compute */
GPU_D12_CommandKind_Compute,
};
@ -21,28 +83,44 @@ Struct(GPU_D12_Command)
{
GPU_D12_Command *next;
GPU_D12_CommandKind kind;
union
{
struct
{
GPU_Shader vs;
GPU_Shader ps;
GPU_D12_Resource *resource;
} barrier;
struct
{
GPU_D12_Resource *dst;
GPU_D12_Resource *src_resource;
String src_string;
} copy;
struct
{
GPU_D12_Resource *resource;
Vec4 value;
} clear;
struct
{
u32 sig_size;
void *sig;
u32 rts_count;
GPU_Resource **rts;
u32 viewports_count;
GPU_Viewport *viewports;
u32 scissors_count;
GPU_Scissor *scissors;
u8 sig[256];
VertexShader vs;
PixelShader ps;
GPU_D12_Resource *rts[GPU_MaxRenderTargets];
GPU_Viewport viewport;
GPU_Scissor scissor;
u32 instances_count;
GPU_Resource *index_buffer;
GPU_D12_Resource *index_buffer;
GPU_RasterizeMode mode;
} rasterize;
struct
{
i32 _;
u32 sig_size;
u8 sig[256];
ComputeShader cs;
u32 num_threads_x;
u32 num_threads_y;
u32 num_threads_z;
} compute;
};
};
@ -56,7 +134,18 @@ Struct(GPU_D12_CommandList)
};
////////////////////////////////
//~ Fiber state
//~ Swapchain types
Struct(GPU_D12_Swapchain)
{
IDXGISwapChain3 *raw;
HWND window_hwnd;
HANDLE waitable;
Vec3I32 resolution;
};
////////////////////////////////
//~ State
Struct(GPU_D12_FiberState)
{
@ -64,9 +153,6 @@ Struct(GPU_D12_FiberState)
GPU_D12_Command *first_free_command;
};
////////////////////////////////
//~ Shared state
Struct(GPU_D12_SharedState)
{
i32 _;
@ -75,10 +161,21 @@ Struct(GPU_D12_SharedState)
extern GPU_D12_SharedState GPU_D12_shared_state;
////////////////////////////////
//~ Fiber state operations
//~ State operations
GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id);
////////////////////////////////
//~ Helpers
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format);
GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl);
////////////////////////////////
//~ Pipeline operations
GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc);
////////////////////////////////
//~ Raw command list operations

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@

View File

@ -21,7 +21,7 @@ String F_GetFullCrossPlatform(Arena *arena, String path)
String F_GetFileName(String path)
{
String result = ZI;
String result = path;
u64 start = path.len;
for (u64 i = path.len; i-- > 0;)
{

View File

@ -433,9 +433,9 @@ M_Layer M_GetFlattenedEntries(Arena *arena, M_LayerList unflattened, StringList
while (stack)
{
StackNode *stack_node = stack;
StackPop(stack);
IterState *state = stack_node->state;
M_Layer *layer = state->layer;
StackPop(stack);
if (stack_node->exit)
{

View File

@ -78,6 +78,9 @@ Enum(M_EntryKind)
M_EntryKind_IncludeGpu,
M_EntryKind_DefaultWindowsImpl,
M_EntryKind_Startup,
M_EntryKind_VertexShader,
M_EntryKind_PixelShader,
M_EntryKind_ComputeShader,
M_EntryKind_EmbedDir,
};
@ -88,6 +91,9 @@ Global Readonly char *M_entry_kind_rules[] = {
[M_EntryKind_IncludeGpu] = "@IncludeGpu",
[M_EntryKind_DefaultWindowsImpl] = "@DefaultWindowsImpl",
[M_EntryKind_Startup] = "@Startup",
[M_EntryKind_VertexShader] = "@VertexShader",
[M_EntryKind_PixelShader] = "@PixelShader",
[M_EntryKind_ComputeShader] = "@ComputeShader",
[M_EntryKind_EmbedDir] = "@EmbedDir",
};

View File

@ -75,7 +75,7 @@ Struct(MIX_Track){
};
////////////////////////////////
//~ Shared state
//~ State
Struct(MIX_SharedState)
{

View File

@ -2,7 +2,6 @@
//~ Windows headers
#pragma warning(push, 0)
# include <uuids.h>
# include <mfapi.h>
# include <mfidl.h>
# include <mfreadwrite.h>

View File

@ -1,5 +1,5 @@
////////////////////////////////
//~ Shared state
//~ State
P_SharedLogState P_shared_log_state = ZI;

View File

@ -56,7 +56,7 @@ Struct(LogEventCallback)
#define P_LogLevel_Count 6
////////////////////////////////
//~ Shared state
//~ State
//- Shared context
Struct(P_SharedLogState)

View File

@ -110,7 +110,7 @@ Struct(P_W32_Sock)
};
////////////////////////////////
//~ Shared state
//~ State
#define P_W32_WindowClassName L"power_play_window_class"

View File

@ -2,9 +2,6 @@
//~ Win32 libs
#pragma warning(push, 0)
# include <initguid.h>
# include <objbase.h>
# include <uuids.h>
# include <Audioclient.h>
# include <mmdeviceapi.h>
#pragma warning(pop)
@ -25,7 +22,7 @@ Struct(PB_WSP_Buff)
};
////////////////////////////////
//~ Shared state
//~ State
Struct(PB_WSP_SharedState)
{

View File

@ -417,7 +417,7 @@ GPU_Resource *AcquireTransferBuffer(u32 element_count, u32 element_size, void *s
GPU_Resource *r = GPU_AcquireResource(desc);
{
__profn("Copy to transfer buffer");
GPU_PushString(0, r, STRING(size, src));
GPU_CopyString(0, r, STRING(size, src));
}
return r;
}
@ -2209,8 +2209,8 @@ void UpdateUser(P_Window *window)
GPU_ProfN(cl, Lit("Clear gbuffers"));
GPU_TransitionToRtv(cl, g->albedo);
GPU_TransitionToRtv(cl, g->emittance);
GPU_ClearResource(cl, g->albedo);
GPU_ClearResource(cl, g->emittance);
GPU_ClearResource(cl, g->albedo, VEC4(0, 0, 0, 0));
GPU_ClearResource(cl, g->emittance, VEC4(0, 0, 0, 0));
}
//- Material pass
@ -2226,12 +2226,13 @@ void UpdateUser(P_Window *window)
GPU_Scissor scissor = GPU_ScissorFromRect(render_viewport);
MaterialSig sig = ZI;
/* FIXME: set sampler urid id here */
sig.projection = world_to_render_vp_matrix;
sig.instances_urid = GPU_GetResourceId(material_instance_buffer);
sig.grids_urid = GPU_GetResourceId(grids_buffer);
GPU_Rasterize(cl,
MaterialVS, MaterialPS,
&sig,
MaterialVS, MaterialPS,
countof(rts), rts,
viewport,
scissor,
@ -2272,7 +2273,7 @@ void UpdateUser(P_Window *window)
sig.target_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_target);
sig.tex_width = g->render_size.x;
sig.tex_height = g->render_size.y;
GPU_Compute(cl, FloodCS, &sig, (g->render_size.x + 7) / 8, (g->render_size.y + 7) / 8, 1);
GPU_Compute(cl, &sig, FloodCS, (g->render_size.x + 7) / 8, (g->render_size.y + 7) / 8, 1);
/* Swap buffers */
GPU_Resource *swp = g->emittance_flood_read;
@ -2301,7 +2302,7 @@ void UpdateUser(P_Window *window)
GPU_TransitionToUav(cl, g->shade_target);
GPU_FlushUav(cl, g->emittance_flood_read);
GPU_FlushUav(cl, g->shade_read);
GPU_ClearResource(cl, g->shade_target);
GPU_ClearResource(cl, g->shade_target, VEC4(0, 0, 0, 0));
}
//- Shade pass
@ -2329,7 +2330,7 @@ void UpdateUser(P_Window *window)
sig.emittance_flood_tex_urid = GPU_GetResourceId(g->emittance_flood_read);
sig.read_tex_urid = GPU_GetResourceId(g->shade_read);
sig.target_tex_urid = GPU_GetResourceId(g->shade_target);
GPU_Compute(cl, ShadeCS, &sig, (g->render_size.x + 7) / 8, (g->render_size.y + 7) / 8, 1);
GPU_Compute(cl, &sig, ShadeCS, (g->render_size.x + 7) / 8, (g->render_size.y + 7) / 8, 1);
/* Swap */
GPU_Resource *swp = g->shade_read;
@ -2343,7 +2344,7 @@ void UpdateUser(P_Window *window)
GPU_ProfN(cl, Lit("Clear ui target"));
GPU_TransitionToRtv(cl, g->ui_target);
GPU_FlushUav(cl, g->shade_read);
GPU_ClearResource(cl, g->ui_target);
GPU_ClearResource(cl, g->ui_target, VEC4(0, 0, 0, 0));
}
//- Ui blit pass
@ -2361,8 +2362,8 @@ void UpdateUser(P_Window *window)
sig.gamma = (f32)2.2;
sig.tex_urid = GPU_GetResourceId(g->shade_read);
GPU_Rasterize(cl,
UiBlitVS, UiBlitPS,
&sig,
UiBlitVS, UiBlitPS,
1, &g->ui_target,
viewport,
scissor,
@ -2383,8 +2384,8 @@ void UpdateUser(P_Window *window)
sig.projection = ui_vp_matrix;
sig.instances_urid = GPU_GetResourceId(ui_rect_instance_buffer);
GPU_Rasterize(cl,
UiRectVS, UiRectPS,
&sig,
UiRectVS, UiRectPS,
1, &g->ui_target,
viewport,
scissor,
@ -2405,8 +2406,8 @@ void UpdateUser(P_Window *window)
sig.projection = ui_vp_matrix;
sig.verts_urid = GPU_GetResourceId(ui_shape_verts_buffer);
GPU_Rasterize(cl,
UiShapeVS, UiShapePS,
&sig,
UiShapeVS, UiShapePS,
1, &g->ui_target,
viewport,
scissor,

View File

@ -139,7 +139,7 @@ Struct(DecodeQueue)
};
////////////////////////////////
//~ Shared state
//~ State
Struct(BindState)
{
@ -270,11 +270,6 @@ Struct(SharedUserState)
extern SharedUserState shared_user_state;
////////////////////////////////
//~ Resources
DeclResourceStore(GameResources);
////////////////////////////////
//~ Startup

View File

@ -32,6 +32,18 @@
//- Embeds
@EmbedDir GameResources pp_res
//- Shaders
@VertexShader MaterialVS
@PixelShader MaterialPS
@ComputeShader FloodCS
@ComputeShader ShadeCS
@VertexShader UiBlitVS
@PixelShader UiBlitPS
@VertexShader UiRectVS
@PixelShader UiRectPS
@VertexShader UiShapeVS
@PixelShader UiShapePS
//- Startup
@Startup StartupSim
@Startup StartupUser

View File

@ -1,12 +1,9 @@
////////////////////////////////
//~ Signatures
ConstantBuffer<BlitSig> blit_sig : register(b0);
ConstantBuffer<FloodSig> flood_sig : register(b0);
ConstantBuffer<MaterialSig> mat_sig : register(b0);
ConstantBuffer<ShadeSig> shade_sig : register(b0);
ConstantBuffer<UiSig> ui_sig : register(b0);
ConstantBuffer<ShapeSig> shape_sig : register(b0);
ConstantBuffer<MaterialSig> g_mat_sig : register (b0);
ConstantBuffer<FloodSig> g_flood_sig : register (b0);
ConstantBuffer<ShadeSig> g_shade_sig : register (b0);
ConstantBuffer<UiBlitSig> g_ui_blit_sig : register (b0);
ConstantBuffer<UiRectSig> g_ui_rect_sig : register (b0);
ConstantBuffer<UiShapeSig> g_ui_shape_sig : register (b0);
////////////////////////////////
//~ Material
@ -29,18 +26,22 @@ Struct(MaterialPS_Output)
//- Vertex shader
MaterialPS_Input GPU_VertexShaderDef(MaterialVS)(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
MaterialPS_Input VSDef(MaterialVS, Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
{
ConstantBuffer<MaterialSig> sig = g_mat_sig;
static const Vec2 unit_quad_verts[4] = {
Vec2(-0.5f, -0.5f),
Vec2(0.5f, -0.5f),
Vec2(0.5f, 0.5f),
Vec2(-0.5f, 0.5f)
};
StructuredBuffer<K_MaterialInstance> instances = GpuResourceFromUrid(sig.instances_urid);
K_MaterialInstance instance = instances[SV_InstanceID];
StructuredBuffer<MaterialInstance> instances = GpuResourceFromUrid(sig.instances_urid);
Vec2 vert = unit_quad_verts[SV_VertexID];
MaterialInstance instance = instances[SV_InstanceID];
Vec2 world_pos = mul(instance.xf, Vec3(vert, 1)).xy;
MaterialPS_Input output;
output.SV_Position = mul(sig.projection, Vec4(world_pos, 0, 1));
output.tex_nurid = instance.tex_nurid;
@ -53,23 +54,26 @@ MaterialPS_Input GPU_VertexShaderDef(MaterialVS)(Semantic(u32, SV_InstanceID), S
//- Pixel shader
MaterialPS_Output GPU_PixelShaderDef(MaterialPS)(MaterialPS_Input input)
MaterialPS_Output PSDef(MaterialPS, MaterialPS_Input input)
{
ConstantBuffer<MaterialSig> sig = g_mat_sig;
MaterialPS_Output output;
Vec4 albedo = input.tint_lin;
/* Texture */
if (input.tex_nurid < 0xFFFFFFFF)
{
SamplerState sampler = GpuSamplerFromUrid(sig.tex_sampler_urid);
Texture2D<Vec4> tex = GpuResourceFromNurid(input.tex_nurid);
albedo *= tex.Sample(s_point_clamp, input.uv);
albedo *= tex.Sample(sampler, input.uv);
}
/* Grid */
if (input.grid_id < 0xFFFFFFFF)
{
StructuredBuffer<K_MaterialGrid> grids = GpuResourceFromUrid(sig.grids_urid);
K_MaterialGrid grid = grids[input.grid_id];
StructuredBuffer<MaterialGrid> grids = GpuResourceFromUrid(sig.grids_urid);
MaterialGrid grid = grids[input.grid_id];
Vec2 grid_pos = input.SV_Position.xy + grid.offset;
float half_thickness = grid.line_thickness / 2;
float spacing = grid.line_spacing;
@ -122,8 +126,10 @@ MaterialPS_Output GPU_PixelShaderDef(MaterialPS)(MaterialPS_Input input)
//- Compute shader
[numthreads(8, 8, 1)]
void GPU_ComputeShaderDef(FloodCS)(Semantic(uint3, SV_DispatchThreadID))
void CSDef(FloodCS, Semantic(uint3, SV_DispatchThreadID))
{
ConstantBuffer<FloodSig> sig = g_flood_sig;
uint2 id = SV_DispatchThreadID.xy;
uint2 tex_size = uint2(sig.tex_width, sig.tex_height);
if (id.x < tex_size.x && id.y < tex_size.y)
@ -186,23 +192,26 @@ void GPU_ComputeShaderDef(FloodCS)(Semantic(uint3, SV_DispatchThreadID))
#define LightMarches 16
#define LightEdgeFalloff 100
float rand_angle(uint2 pos, u32 ray_index)
float RandAngle(uint2 pos, u32 ray_index)
{
Texture3D<u32> noise_tex = GpuResourceFromUrid(K_BLUE_NOISE_TEX_ID);
ConstantBuffer<ShadeSig> sig = g_shade_sig;
Texture3D<u32> noise_tex = GpuResourceFromUrid(sig.noise_tex_urid);
Vec3I32 noise_coord = Vec3I32(1, 1, 1);
noise_coord += Vec3I32(pos.xy, ray_index);
noise_coord.xyz += sig.frame_seed.xyz;
// noise_coord.xy -= sig.camera_offset;
u32 noise = noise_tex[noise_coord % uint3(K_BLUE_NOISE_TEX_WIDTH, K_BLUE_NOISE_TEX_HEIGHT, K_BLUE_NOISE_TEX_DEPTH)];
u32 noise = noise_tex[noise_coord % uint3(sig.noise_tex_width, sig.noise_tex_height, sig.noise_tex_depth)];
return ((float)noise / (float)0xFFFF) * Tau;
}
Vec3 get_light_in_dir(uint2 ray_start, Vec2 ray_dir)
Vec3 ColorFromDir(uint2 ray_start, Vec2 ray_dir)
{
ConstantBuffer<ShadeSig> sig = g_shade_sig;
Texture2D<uint2> flood_tex = GpuResourceFromUrid(sig.emittance_flood_tex_urid);
Texture2D<Vec4> emittance_tex = GpuResourceFromUrid(sig.emittance_tex_urid);
Texture3D<u32> noise_tex = GpuResourceFromUrid(sig.noise_tex_urid);
Vec3 result = Vec3(0, 0, 0);
Vec2 at_float = ray_start;
@ -235,14 +244,14 @@ Vec3 get_light_in_dir(uint2 ray_start, Vec2 ray_dir)
return result;
}
Vec3 get_light_at_pos(uint2 pos)
Vec3 ColorFromPos(uint2 pos)
{
Vec3 result = 0;
for (u32 i = 0; i < LightSamples; ++i)
{
float angle = rand_angle(pos, i);
float angle = RandAngle(pos, i);
Vec2 dir = Vec2(cos(angle), sin(angle));
Vec3 light_in_dir = get_light_in_dir(pos, dir);
Vec3 light_in_dir = ColorFromDir(pos, dir);
result += light_in_dir;
}
result /= LightSamples;
@ -252,8 +261,10 @@ Vec3 get_light_at_pos(uint2 pos)
//- Compute shader
[numthreads(8, 8, 1)]
void GPU_ComputeShaderDef(ShadeCS)(Semantic(uint3, SV_DispatchThreadID))
void CSDef(ShadeCS, Semantic(uint3, SV_DispatchThreadID))
{
ConstantBuffer<ShadeSig> sig = g_shade_sig;
uint2 id = SV_DispatchThreadID.xy;
if (id.x < sig.tex_width && id.y < sig.tex_height)
{
@ -268,7 +279,7 @@ void GPU_ComputeShaderDef(ShadeCS)(Semantic(uint3, SV_DispatchThreadID))
/* Apply lighting */
if (!(sig.flags & K_SHADE_FLAG_DISABLE_EFFECTS))
{
color.rgb *= get_light_at_pos(id);
color.rgb *= ColorFromPos(id);
}
/* Apply temporal accumulation */
@ -301,22 +312,22 @@ Struct(UiBlitPS_Output)
/* ACES approximation by Krzysztof Narkowicz
* https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/ */
Vec3 tone_map(Vec3 v)
Vec3 ToneMap(Vec3 v)
{
return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));
}
//- Vertex shader
UiBlitPS_Input GPUVertexShaderDef(UiBlitVS)(Semantic(u32, SV_VertexID))
UiBlitPS_Input VSDef(UiBlitVS, Semantic(u32, SV_VertexID))
{
ConstantBuffer<UiBlitSig> sig = g_ui_blit_sig;
static const Vec2 unit_quad_verts[4] = {
Vec2(-0.5f, -0.5f),
Vec2(0.5f, -0.5f),
Vec2(0.5f, 0.5f),
Vec2(-0.5f, 0.5f)
};
Vec2 vert = unit_quad_verts[SV_VertexID];
UiBlitPS_Input output;
@ -327,22 +338,25 @@ UiBlitPS_Input GPUVertexShaderDef(UiBlitVS)(Semantic(u32, SV_VertexID))
//- Pixel shader
UiBlitPS_Output GPUPixelShaderDef(UiBlitPS)(UiBlitPS_Input input)
UiBlitPS_Output PSDef(UiBlitPS, UiBlitPS_Input input)
{
ConstantBuffer<UiBlitSig> sig = g_ui_blit_sig;
SamplerState sampler = GpuSamplerFromUrid(sig.tex_sampler_urid);
UiBlitPS_Output output;
Texture2D<Vec4> tex = GpuResourceFromUrid(sig.tex_urid);
Vec4 color = tex.Sample(s_point_clamp, input.uv);
Vec4 color = tex.Sample(sampler, input.uv);
/* Apply tone map */
if (sig.flags & K_BLIT_FLAG_TONE_MAP)
if (sig.flags & UiBlitFlag_ToneMap)
{
/* TODO: Dynamic exposure based on average scene luminance */
color.rgb *= sig.exposure;
color.rgb = tone_map(color.rgb);
color.rgb = ToneMap(color.rgb);
}
/* Apply gamma correction */
if (sig.flags & K_BLIT_FLAG_GAMMA_CORRECT)
if (sig.flags & UiBlitFlag_GammaCorrect)
{
color = pow(abs(color), 1/sig.gamma);
}
@ -369,8 +383,9 @@ Struct(UiRectPS_Output)
//- Vertex shader
UiRectPS_Input GPUVertexShaderDef(UiRectVS)(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
UiRectPS_Input VSDef(UiRectVS, Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
{
ConstantBuffer<UiRectSig> sig = g_ui_rect_sig;
static const Vec2 unit_quad_verts[4] = {
Vec2(-0.5f, -0.5f),
Vec2(0.5f, -0.5f),
@ -378,8 +393,8 @@ UiRectPS_Input GPUVertexShaderDef(UiRectVS)(Semantic(u32, SV_InstanceID), Semant
Vec2(-0.5f, 0.5f)
};
StructuredBuffer<K_UiInstance> instances = GpuResourceFromUrid(sig.instances_urid);
K_UiInstance instance = instances[SV_InstanceID];
StructuredBuffer<UiRectInstance> instances = GpuResourceFromUrid(sig.instances_urid);
UiRectInstance instance = instances[SV_InstanceID];
Vec2 vert = unit_quad_verts[SV_VertexID];
Vec2 world_pos = mul(instance.xf, Vec3(vert, 1)).xy;
@ -393,8 +408,9 @@ UiRectPS_Input GPUVertexShaderDef(UiRectVS)(Semantic(u32, SV_InstanceID), Semant
//- Pixel shader
UiRectPS_Output GPUPixelShaderDef(UiRectPS)(PSInput input)
UiRectPS_Output PSDef(UiRectPS, UiRectPS_Input input)
{
ConstantBuffer<UiRectSig> sig = g_ui_rect_sig;
UiRectPS_Output output;
Vec4 color = input.tint_srgb;
@ -402,7 +418,8 @@ UiRectPS_Output GPUPixelShaderDef(UiRectPS)(PSInput input)
if (input.tex_nurid < 0xFFFFFFFF)
{
Texture2D<Vec4> tex = GpuResourceFromNurid(input.tex_nurid);
color *= tex.Sample(s_point_clamp, input.uv);
SamplerState sampler = GpuSamplerFromUrid(sig.tex_sampler_urid);
color *= tex.Sample(sampler, input.uv);
}
output.SV_Target0 = color;
@ -425,8 +442,9 @@ Struct(UiShapePS_Output)
//- Vertex shader
UiShapePS_Input GPUVertexShaderDef(UiShapeVS)(Semantic(u32, SV_VertexID))
UiShapePS_Input VSDef(UiShapeVS, Semantic(u32, SV_VertexID))
{
ConstantBuffer<UiShapeSig> sig = g_ui_shape_sig;
StructuredBuffer<UiShapeVert> verts = GpuResourceFromUrid(sig.verts_urid);
UiShapeVert vert = verts[SV_VertexID];
UiShapePS_Input output;
@ -437,7 +455,7 @@ UiShapePS_Input GPUVertexShaderDef(UiShapeVS)(Semantic(u32, SV_VertexID))
//- Pixel shader
UiShapePS_Output GPUPixelShaderDef(UiShapePS)(PSInput input)
UiShapePS_Output PSDef(UiShapePS, UiShapePS_Input input)
{
UiShapePS_Output output;
output.SV_Target = input.color_srgb;

View File

@ -6,10 +6,10 @@ Struct(MaterialSig)
/* ----------------------------------------------------- */
Mat4x4 projection; /* 16 consts */
/* ----------------------------------------------------- */
u32 tex_sampler_urid; /* 01 consts */
u32 instances_urid; /* 01 consts */
u32 grids_urid; /* 01 consts */
u32 _pad0; /* 01 consts (padding) */
u32 _pad1; /* 01 consts (padding) */
/* ----------------------------------------------------- */
};
AssertRootConst(MaterialSig, 20);
@ -82,8 +82,13 @@ Struct(ShadeSig)
u32 read_tex_urid; /* 01 consts */
u32 target_tex_urid; /* 01 consts */
/* ----------------------------------------------------- */
u32 noise_tex_urid; /* 01 consts */
u32 noise_tex_width; /* 01 consts */
u32 noise_tex_height; /* 01 consts */
u32 noise_tex_depth; /* 01 consts */
/* ----------------------------------------------------- */
};
AssertRootConst(ShadeSig, 16);
AssertRootConst(ShadeSig, 20);
////////////////////////////////
//~ Ui blit types
@ -102,8 +107,13 @@ Struct(UiBlitSig)
f32 exposure; /* 01 consts */
f32 gamma; /* 01 consts */
/* ----------------------------------------------------- */
u32 tex_sampler_urid; /* 01 consts */
u32 _pad0; /* 01 consts (padding) */
u32 _pad1; /* 01 consts (padding) */
u32 _pad2; /* 01 consts (padding) */
/* ----------------------------------------------------- */
};
AssertRootConst(UiBlitSig, 20);
AssertRootConst(UiBlitSig, 24);
////////////////////////////////
@ -115,9 +125,9 @@ Struct(UiRectSig)
Mat4x4 projection; /* 16 consts */
/* ----------------------------------------------------- */
u32 instances_urid; /* 01 consts */
u32 tex_sampler_urid; /* 01 consts */
u32 _pad0; /* 01 consts (padding) */
u32 _pad1; /* 01 consts (padding) */
u32 _pad2; /* 01 consts (padding) */
/* ----------------------------------------------------- */
};
AssertRootConst(UiRectSig, 20);
@ -153,28 +163,3 @@ Struct(UiShapeVert)
Vec2 pos;
u32 color_srgb;
};
////////////////////////////////
//~ Shaders
//- Material
GPU_ShaderDecl(MaterialVS);
GPU_ShaderDecl(MaterialPS);
//- Flood
GPU_ShaderDecl(FloodCS);
//- Shade
GPU_ShaderDecl(ShadeCS);
//- Ui blit
GPU_ShaderDecl(UiBlitVS);
GPU_ShaderDecl(UiBlitPS);
//- Ui rect
GPU_ShaderDecl(UiRectVS);
GPU_ShaderDecl(UiRectPS);
//- Ui shape
GPU_ShaderDecl(UiShapeVS);
GPU_ShaderDecl(UiShapePS);

View File

@ -227,7 +227,7 @@ Inline Snapshot *NilSnapshot(void)
}
////////////////////////////////
//~ Shared state
//~ State
#define ClientLookupBinsCount 127
#define TickLookupBinsCount 127

1
src/prof/prof_inc.h Normal file
View File

@ -0,0 +1 @@
#include "prof_tracy.h"

View File

@ -121,7 +121,7 @@ Struct(S_EntryBin)
};
////////////////////////////////
//~ Shared state
//~ State
#define S_EntryBinsCount 1024

View File

@ -1,11 +1,6 @@
////////////////////////////////
//~ Win32 libs
#include <combaseapi.h>
#include <dcommon.h>
#include <initguid.h>
#include <unknwn.h>
#pragma comment(lib, "dwrite")
#pragma comment(lib, "gdi32")
@ -145,7 +140,7 @@ static inline UINT32 IDWriteGdiInterop_Release
EXTERN_C HRESULT DECLSPEC_IMPORT WINAPI DWriteCreateFactory (DWRITE_FACTORY_TYPE factoryType, const GUID* iid, void** factory) WIN_NOEXCEPT;
////////////////////////////////
//~ Shared state
//~ State
/* TODO: Determine font dpi dynamically */
#define TTF_DW_Dpi (96.0f)