From 0ac32845119032f62c45bdceea353501a1f794cd Mon Sep 17 00:00:00 2001 From: jacob Date: Thu, 20 Nov 2025 01:25:37 -0600 Subject: [PATCH] more refactoring to support gpu pointers --- src/asset_cache/asset_cache.c | 2 +- src/asset_cache/asset_cache.h | 2 +- src/base/base.h | 815 ++++--- src/base/base_inc.h | 8 +- src/base/base_job.h | 2 +- src/base/base_log.h | 2 +- src/base/base_memory.c | 8 +- src/base/base_memory.h | 2 +- src/base/base_snc.c | 4 +- src/base/base_snc.h | 4 +- src/base/base_string.c | 2 +- src/base/base_win32/base_win32.c | 24 +- src/base/base_win32/base_win32_job.c | 11 +- src/base/base_win32/base_win32_log.c | 4 +- src/base/base_win32/base_win32_time.c | 2 +- src/collider/collider.c | 2 +- src/config.h | 8 +- src/font/font.c | 2 +- src/gpu/gpu_common.c | 56 +- src/gpu/gpu_common.h | 21 +- src/gpu/gpu_core.h | 243 +- src/gpu/gpu_dx12/gpu_dx12.c | 2083 +++++++++-------- src/gpu/gpu_dx12/gpu_dx12.h | 221 +- src/meta/meta.c | 68 +- src/meta/meta_lay.c | 2 +- src/meta/meta_os/meta_os_inc.h | 2 +- .../meta_os/meta_os_win32/meta_os_win32.c | 8 +- src/platform/platform_win32/platform_win32.c | 22 +- .../playback_wasapi/playback_wasapi.c | 2 +- src/pp/pp.lay | 48 +- src/{proto => pp}/pp_sim/pp_sim.lay | 0 src/{proto => pp}/pp_sim/pp_sim_core.c | 2 +- src/{proto => pp}/pp_sim/pp_sim_core.h | 0 src/{proto => pp}/pp_vis/pp_vis.lay | 0 src/{proto => pp}/pp_vis/pp_vis_core.c | 6 +- src/{proto => pp}/pp_vis/pp_vis_core.h | 0 src/{proto => pp}/pp_vis/pp_vis_draw.c | 0 src/{proto => pp}/pp_vis/pp_vis_draw.h | 0 src/{proto => pp}/pp_vis/pp_vis_gpu.gpu | 4 +- src/{proto => pp}/pp_vis/pp_vis_gpu.h | 0 .../pp_vis_res}/font/fixedsys.ttf | 0 .../pp_vis_res}/font/roboto-med.ttf | 0 src/{proto => pp}/pp_vis/pp_vis_widgets.c | 0 src/{proto => pp}/pp_vis/pp_vis_widgets.h | 0 src/{pp => pp_old}/pp.c | 10 +- src/{pp => pp_old}/pp.h | 4 +- src/pp_old/pp.lay | 48 + src/{pp => pp_old}/pp_draw.gpu | 0 src/{pp => pp_old}/pp_draw.h | 0 src/{pp => pp_old}/pp_ent.c | 2 +- src/{pp => pp_old}/pp_ent.h | 0 src/{pp => pp_old}/pp_phys.c | 2 +- src/{pp => pp_old}/pp_phys.h | 2 +- .../pp_res}/font/fixedsys.ttf | 0 .../pp_res}/font/roboto-med.ttf | 0 src/{pp => pp_old}/pp_res/icon.ico | Bin src/{pp => pp_old}/pp_res/sprite/blood.ase | 0 src/{pp => pp_old}/pp_res/sprite/box.ase | 0 .../pp_res/sprite/box_rounded.ase | 0 src/{pp => pp_old}/pp_res/sprite/bullet.ase | 0 .../pp_res/sprite/crosshair.ase | 0 src/{pp => pp_old}/pp_res/sprite/gun.ase | 0 src/{pp => pp_old}/pp_res/sprite/tile.ase | 0 src/{pp => pp_old}/pp_res/sprite/tim.ase | 0 src/{pp => pp_old}/pp_sim.c | 0 src/{pp => pp_old}/pp_sim.h | 0 src/{pp => pp_old}/pp_space.c | 0 src/{pp => pp_old}/pp_space.h | 0 src/{pp => pp_old}/pp_step.c | 0 src/{pp => pp_old}/pp_step.h | 0 src/{pp => pp_old}/pp_widgets.c | 0 src/{pp => pp_old}/pp_widgets.h | 0 src/prof/prof_tracy.cpp | 2 +- src/prof/prof_tracy.h | 8 +- src/proto/pp.lay | 4 - src/proto/proto.c | 3 + src/proto/proto.lay | 13 + src/sound/sound.c | 2 +- src/sprite/sprite.c | 4 +- src/ttf/ttf_dwrite/ttf_dwrite.c | 8 +- src/window/window_win32/window_win32.c | 8 +- 81 files changed, 2098 insertions(+), 1714 deletions(-) rename src/{proto => pp}/pp_sim/pp_sim.lay (100%) rename src/{proto => pp}/pp_sim/pp_sim_core.c (99%) rename src/{proto => pp}/pp_sim/pp_sim_core.h (100%) rename src/{proto => pp}/pp_vis/pp_vis.lay (100%) rename src/{proto => pp}/pp_vis/pp_vis_core.c (99%) rename src/{proto => pp}/pp_vis/pp_vis_core.h (100%) rename src/{proto => pp}/pp_vis/pp_vis_draw.c (100%) rename src/{proto => pp}/pp_vis/pp_vis_draw.h (100%) rename src/{proto => pp}/pp_vis/pp_vis_gpu.gpu (97%) rename src/{proto => pp}/pp_vis/pp_vis_gpu.h (100%) rename src/pp/{pp_res => pp_vis/pp_vis_res}/font/fixedsys.ttf (100%) rename src/pp/{pp_res => pp_vis/pp_vis_res}/font/roboto-med.ttf (100%) rename src/{proto => pp}/pp_vis/pp_vis_widgets.c (100%) rename src/{proto => pp}/pp_vis/pp_vis_widgets.h (100%) rename src/{pp => pp_old}/pp.c (99%) rename src/{pp => pp_old}/pp.h (99%) create mode 100644 src/pp_old/pp.lay rename src/{pp => pp_old}/pp_draw.gpu (100%) rename src/{pp => pp_old}/pp_draw.h (100%) rename src/{pp => pp_old}/pp_ent.c (99%) rename src/{pp => pp_old}/pp_ent.h (100%) rename src/{pp => pp_old}/pp_phys.c (99%) rename src/{pp => pp_old}/pp_phys.h (99%) rename src/{proto/pp_vis/pp_vis_res => pp_old/pp_res}/font/fixedsys.ttf (100%) rename src/{proto/pp_vis/pp_vis_res => pp_old/pp_res}/font/roboto-med.ttf (100%) rename src/{pp => pp_old}/pp_res/icon.ico (100%) rename src/{pp => pp_old}/pp_res/sprite/blood.ase (100%) rename src/{pp => pp_old}/pp_res/sprite/box.ase (100%) rename src/{pp => pp_old}/pp_res/sprite/box_rounded.ase (100%) rename src/{pp => pp_old}/pp_res/sprite/bullet.ase (100%) rename src/{pp => pp_old}/pp_res/sprite/crosshair.ase (100%) rename src/{pp => pp_old}/pp_res/sprite/gun.ase (100%) rename src/{pp => pp_old}/pp_res/sprite/tile.ase (100%) rename src/{pp => pp_old}/pp_res/sprite/tim.ase (100%) rename src/{pp => pp_old}/pp_sim.c (100%) rename src/{pp => pp_old}/pp_sim.h (100%) rename src/{pp => pp_old}/pp_space.c (100%) rename src/{pp => pp_old}/pp_space.h (100%) rename src/{pp => pp_old}/pp_step.c (100%) rename src/{pp => pp_old}/pp_step.h (100%) rename src/{pp => pp_old}/pp_widgets.c (100%) rename src/{pp => pp_old}/pp_widgets.h (100%) delete mode 100644 src/proto/pp.lay create mode 100644 src/proto/proto.c create mode 100644 src/proto/proto.lay diff --git a/src/asset_cache/asset_cache.c b/src/asset_cache/asset_cache.c index c69f020b..d82c6699 100644 --- a/src/asset_cache/asset_cache.c +++ b/src/asset_cache/asset_cache.c @@ -26,7 +26,7 @@ u64 AC_HashFromKey(String key) void AC_RefreshDebugTable(void) { -#if RtcIsEnabled +#if IsRtcEnabled AC_SharedState *g = &AC_shared_state; Lock lock = LockE(&g->dbg_table_mutex); ZeroArray(g->dbg_table); diff --git a/src/asset_cache/asset_cache.h b/src/asset_cache/asset_cache.h index 2e30de54..156abeef 100644 --- a/src/asset_cache/asset_cache.h +++ b/src/asset_cache/asset_cache.h @@ -53,7 +53,7 @@ Struct(AC_SharedState) Mutex store_mutex; Arena *store_arena; -#if RtcIsEnabled +#if IsRtcEnabled /* Array of len `num_assets` pointing into populated entries of `lookup`. */ AC_Asset *dbg_table[AC_AssetLookupTableCapacity]; u64 dbg_table_count; diff --git a/src/base/base.h b/src/base/base.h index 76295b8c..a53aa2a3 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -2,43 +2,43 @@ //~ Compiler flag checks #ifndef IsConsoleApp -# error Missing compile time definition for 'IsConsoleApp' + #error Missing compile time definition for 'IsConsoleApp' #endif -#ifndef RtcIsEnabled -# error Missing compile time definition for 'RtcIsEnabled' +#ifndef IsRtcEnabled + #error Missing compile time definition for 'IsRtcEnabled' #endif -#ifndef AsanIsEnabled -# error Missing compile time definition for 'AsanIsEnabled' +#ifndef IsAsanEnabled + #error Missing compile time definition for 'IsAsanEnabled' #endif -#ifndef CrtlibIsEnabled -# error Missing compile time definition for 'CrtlibIsEnabled' +#ifndef IsCrtlibEnabled + #error Missing compile time definition for 'IsCrtlibEnabled' #endif -#ifndef DebinfoEnabled -# error Missing compile time definition for 'DebinfoEnabled' +#ifndef IsDebinfoEnabled + #error Missing compile time definition for 'IsDebinfoEnabled' #endif -#ifndef DeveloperIsEnabled -# error Missing compile time definition for 'DeveloperIsEnabled' +#ifndef IsDeveloperModeEnabled + #error Missing compile time definition for 'IsDeveloperModeEnabled' #endif -#ifndef ProfilingIsEnabled -# error Missing compile time definition for 'ProfilingIsEnabled' +#ifndef IsProfilingEnabled + #error Missing compile time definition for 'IsProfilingEnabled' #endif -#ifndef UnoptimizedIsEnabled -# error Missing compile time definition for 'UnoptimizedIsEnabled' +#ifndef IsUnoptimized + #error Missing compile time definition for 'IsUnoptimized' #endif -#ifndef TestsAreEnabled -# error Missing compile time definition for 'TestsAreEnabled' +#ifndef IsTestingEnabled + #error Missing compile time definition for 'IsTestingEnabled' #endif -#ifndef HotSwappingIsEnabled -# error Missing compile time definition for 'HotSwappingIsEnabled' +#ifndef IsHotSwappingEnabled + #error Missing compile time definition for 'IsHotSwappingEnabled' #endif //////////////////////////////////////////////////////////// @@ -46,58 +46,58 @@ //- Compiler #if defined(__clang__) -# define CompilerIsClang 1 -# define CompilerIsMsvc 0 + #define IsCompilerClang 1 + #define IsCompilerMsvc 0 #elif defined(_MSC_VER) -# define CompilerIsClang 0 -# define CompilerIsMsvc 1 + #define IsCompilerClang 0 + #define IsCompilerMsvc 1 #else -# error Unknown compiler + #error Unknown compiler #endif //- Language #if defined(__HLSL_VERSION) -# define LanguageIsC 0 -# define LanguageIsGpu 1 + #define IsLanguageC 0 + #define IsLanguageGpu 1 #else -# define LanguageIsC 1 -# define LanguageIsGpu 0 + #define IsLanguageC 1 + #define IsLanguageGpu 0 #endif //- Platform system #if defined(_WIN32) -# define PlatformIsWindows 1 -# define PlatformIsMac 0 -# define PlatformIsLinux 0 + #define IsPlatformWindows 1 + #define IsPlatformMac 0 + #define IsPlatformLinux 0 #elif defined(__APPLE__) && defined(__MACH__) -# define PlatformIsWindows 0 -# define PlatformIsMac 1 -# define PlatformIsLinux 0 + #define IsPlatformWindows 0 + #define IsPlatformMac 1 + #define IsPlatformLinux 0 #elif defined(__gnu_linux__) -# define PlatformIsWindows 0 -# define PlatformIsMac 0 -# define PlatformIsLinux 1 -#elif LanguageIsGpu -# define PlatformIsWindows 0 -# define PlatformIsMac 0 -# define PlatformIsLinux 0 + #define IsPlatformWindows 0 + #define IsPlatformMac 0 + #define IsPlatformLinux 1 +#elif IsLanguageGpu + #define IsPlatformWindows 0 + #define IsPlatformMac 0 + #define IsPlatformLinux 0 #else -# error Unknown platform + #error Unknown platform #endif //- Architecture -# if defined(_M_AMD64) || defined(__amd64__) -# define ArchIsX64 1 -# define ArchIsArm64 0 -# elif defined(_M_ARM64) || defined(__aarch64__) -# define ArchIsX64 0 -# define ArchIsArm64 1 -# elif LanguageIsGpu -# define ArchIsX64 0 -# define ArchIsArm64 0 -# else -# error Unknown architecture -# endif +#if defined(_M_AMD64) || defined(__amd64__) + #define IsArchX64 1 + #define IsArchArm64 0 +#elif defined(_M_ARM64) || defined(__aarch64__) + #define IsArchX64 0 + #define IsArchArm64 1 +#elif IsLanguageGpu + #define IsArchX64 0 + #define IsArchArm64 0 +#else + #error Unknown architecture +#endif //- Cache line size /* TODO: Just hard-code to something like 128 or 256 if Apple silicon is ever supported */ @@ -106,33 +106,33 @@ //- Windows NTDDI version /* TODO: Remove this */ #if 0 -#if CompilerIsMsvc -# define NTDDI_WIN11_DT 0x0C0A0000 -# define NTDDI_VERSION 0x0A000000 -# if RtcIsEnabled -# define _ALLOW_RTCc_IN_STL 1 -# endif -#endif + #if IsCompilerMsvc + #define NTDDI_WIN11_DT 0x0C0A0000 + #define NTDDI_VERSION 0x0A000000 + #if IsRtcEnabled + #define _ALLOW_RTCc_IN_STL 1 + #endif + #endif #endif //////////////////////////////////////////////////////////// //~ Platform headers //- Windows headers -#if PlatformIsWindows -# define COBJMACROS -# define WIN32_LEAN_AND_MEAN -# define UNICODE -# pragma warning(push, 0) -# include -# include -# include -# include -# include -# include -# include -# include -# pragma warning(pop) +#if IsPlatformWindows + #define COBJMACROS + #define WIN32_LEAN_AND_MEAN + #define UNICODE + #pragma warning(push, 0) + #include + #include + #include + #include + #include + #include + #include + #include + #pragma warning(pop) #endif //////////////////////////////////////////////////////////// @@ -144,74 +144,53 @@ #define StaticAssert(cond) StaticAssert1(cond, __LINE__, __COUNTER__) //- Debug assert -#if RtcIsEnabled -# if CompilerIsMsvc -# define Assert(cond) ((cond) ? 1 : (IsRunningInDebugger() ? (*(volatile i32 *)0 = 0) : Panic(Lit(__FILE__ ":" Stringize(__LINE__) ":0: assertion failed: "#cond"")))) -# define DEBUGBREAK __debugbreak() -# else -# define Assert(cond) ((cond) ? 1 : (__builtin_trap(), 0)) -# define DEBUGBREAK __builtin_debugtrap() -# endif -# define DEBUGBREAKABLE { volatile i32 __DEBUGBREAKABLE_VAR = 0; LAX __DEBUGBREAKABLE_VAR; } (void)0 +#if IsRtcEnabled + #if IsCompilerMsvc + #define Assert(cond) ((cond) ? 1 : (IsRunningInDebugger() ? (*(volatile i32 *)0 = 0) : Panic(Lit(__FILE__ ":" Stringize(__LINE__) ":0: assertion failed: "#cond"")))) + #define DEBUGBREAK __debugbreak() + #else + #define Assert(cond) ((cond) ? 1 : (__builtin_trap(), 0)) + #define DEBUGBREAK __builtin_debugtrap() + #endif + #define DEBUGBREAKABLE { volatile i32 __DEBUGBREAKABLE_VAR = 0; LAX __DEBUGBREAKABLE_VAR; } (void)0 #else -# define Assert(cond) (void)(0) -#endif - -//- Root constant assert -#define AssertRootConst(s, n) StaticAssert((sizeof(s) % 16 == 0) && /* Root constant struct should pad to 16 byte alignment */ \ - ((sizeof(s) / 4) == (n)) && /* Root constant struct size should match the specified 32-bit-constant count */ \ - (sizeof(s) <= 256)) /* Root constant struct can only fit 64 DWORDS */ - -//- Debug alias -/* TODO: Remove this */ -#if CompilerIsMsvc -# if DebinfoEnabled -# define DebugAlias(var, alias) *(alias) = &(var) -# else -# define DebugAlias(var, alias) *(alias) = &(var) -# endif -#else -# if DebinfoEnabled -# define DebugAlias(var, alias) __attribute((used)) *(alias) = &(var) -# else -# define DebugAlias(var, alias) __attribute((unused)) *(alias) = &(var) -# endif + #define Assert(cond) (void)(0) #endif //- Address sanitization -#if AsanIsEnabled -void __asan_poison_memory_region(void const volatile *, size_t); -void __asan_unpoison_memory_region(void const volatile *add, size_t); -# define AsanPoison(addr, size) __asan_poison_memory_region((addr), (size)) -# define AsanUnpoison(addr, size) __asan_unpoison_memory_region((addr), (size)) +#if IsAsanEnabled + void __asan_poison_memory_region(void const volatile *, size_t); + void __asan_unpoison_memory_region(void const volatile *add, size_t); + #define AsanPoison(addr, size) __asan_poison_memory_region((addr), (size)) + #define AsanUnpoison(addr, size) __asan_unpoison_memory_region((addr), (size)) #else -# define AsanPoison(addr, size) -# define AsanUnpoison(addr, size) + #define AsanPoison(addr, size) + #define AsanUnpoison(addr, size) #endif //////////////////////////////////////////////////////////// //~ Common utility macros //- ZeroStruct initialization macro -#if LanguageIsC -# define ZI { 0 } +#if IsLanguageC + #define ZI { 0 } #else -# define ZI { } + #define ZI { } #endif //- Inline #define Inline static inline -#if CompilerIsMsvc -# define ForceInline Inline __forceinline +#if IsCompilerMsvc + #define ForceInline Inline __forceinline #else -# define ForceInline Inline __attribute((always_inline)) + #define ForceInline Inline __attribute((always_inline)) #endif -#if CompilerIsMsvc -# define ForceNoInline __declspec(noinline) +#if IsCompilerMsvc + #define ForceNoInline __declspec(noinline) #else -# define ForceNoInline __attribute__((noinline)) + #define ForceNoInline __attribute__((noinline)) #endif //- Static @@ -219,49 +198,49 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); #define Global static //- Read-only -#if PlatformIsWindows -# if CompilerIsMsvc -# pragma section(".rdata$", read) -# define Readonly __declspec(allocate(".rdata$")) -# else -# define Readonly __declspec(allocate(".rdata")) -# endif -#elif PlatformIsMac -# define Readonly __attribute((section("__TEXT,__const"))) +#if IsPlatformWindows + #if IsCompilerMsvc + # pragma section(".rdata$", read) + # define Readonly __declspec(allocate(".rdata$")) + #else + #define Readonly __declspec(allocate(".rdata")) + #endif +#elif IsPlatformMac + #define Readonly __attribute((section("__TEXT,__const"))) #else -# define Readonly __attribute((section(".rodata"))) + #define Readonly __attribute((section(".rodata"))) #endif //- Barriers -#if CompilerIsMsvc -# define WriteBarrier() _WriteBarrier() -# define ReadBarrier() _ReadBarrier() +#if IsCompilerMsvc + #define WriteBarrier() _WriteBarrier() + #define ReadBarrier() _ReadBarrier() #elif defined(__x86_64) || defined(__i386__) -# define WriteBarrier() __asm__ volatile("" ::: "memory") -# define ReadBarrier() __asm__ volatile("" ::: "memory") -#elif LanguageIsGpu -# define WriteBarrier() -# define ReadBarrier() + #define WriteBarrier() __asm__ volatile("" ::: "memory") + #define ReadBarrier() __asm__ volatile("" ::: "memory") +#elif IsLanguageGpu + #define WriteBarrier() + #define ReadBarrier() #else -# error Memory barriers not implemented + #error Memory barriers not implemented #endif //- Unused markup /* Strict unused markup */ -#if CompilerIsClang -# define UNUSED __attribute((unused)) +#if IsCompilerClang + #define UNUSED __attribute((unused)) #else -# define UNUSED + #define UNUSED #endif /* Relaxed unused markup */ #define LAX (void) //- Fallthrough -#if CompilerIsClang -# define FALLTHROUGH __attribute((fallthrough)) +#if IsCompilerClang + #define FALLTHROUGH __attribute((fallthrough)) #else -# define FALLTHROUGH + #define FALLTHROUGH #endif //- Preprocessor concatenation @@ -433,21 +412,21 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); //////////////////////////////////////////////////////////// //~ Intrinsic headers -#if LanguageIsC -/* Intrinsic header info: - * mmintrin.h MMX - * xmmintrin.h SSE - * emmintrin.h SSE2 - * pmmintrin.h SSE3 - * tmmintrin.h SSSE3 - * smmintrin.h SSE4.1 - * nmmintrin.h SSE4.2 - * ammintrin.h SSE4A - * wmmintrin.h AES - * immintrin.h AVX, AVX2, FMA -*/ -#include -#include /* SSE4.2 */ +#if IsLanguageC + /* Intrinsic header info: + * mmintrin.h MMX + * xmmintrin.h SSE + * emmintrin.h SSE2 + * pmmintrin.h SSE3 + * tmmintrin.h SSSE3 + * smmintrin.h SSE4.1 + * nmmintrin.h SSE4.2 + * ammintrin.h SSE4A + * wmmintrin.h AES + * immintrin.h AVX, AVX2, FMA + */ + #include + #include /* SSE4.2 */ #endif //////////////////////////////////////////////////////////// @@ -459,15 +438,15 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); #define AlignedBlock(n) struct alignas(n) //- Enum -#if LanguageIsC -# define Enum(name) typedef enum name name; enum name +#if IsLanguageC + #define Enum(name) typedef enum name name; enum name #else -# define Enum(name) enum name + #define Enum(name) enum name #endif //- alignof -#if LanguageIsC && (CompilerIsMsvc || __STDC_VERSION__ < 202311L) -# define alignof(type) __alignof(type) +#if IsLanguageC && (IsCompilerMsvc || __STDC_VERSION__ < 202311L) + #define alignof(type) __alignof(type) #endif //- field sizeof @@ -481,12 +460,12 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); #define IsArray(a) (IsIndexable(a) && (((void *)&a) == ((void *)a))) //- offsetof -#if !CompilerIsMsvc -# ifdef _CRT_USE_BUILTIN_OFFSETOF -# define offsetof(type, field) __builtin_offsetof(type, field) -# else -# define offsetof(type, field) ((u64)&(((type *)0)->field)) -# endif +#if !IsCompilerMsvc + #ifdef _CRT_USE_BUILTIN_OFFSETOF + #define offsetof(type, field) __builtin_offsetof(type, field) + #else + #define offsetof(type, field) ((u64)&(((type *)0)->field)) + #endif #endif //- struct region @@ -496,54 +475,48 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); #define ZeroFieldRegion(dst, src, r) ZeroBytes(&dst->__begfieldreg__##r, &src->__begfieldreg__##r, (u8 *)&dst->__endfieldreg__##r - (u8 *)&dst->__begfieldreg__##r) //- Packed -#if CompilerIsMsvc -# define Packed(s) __pragma(pack(push, 1)) s __pragma(pack(pop)) -#elif CompilerIsClang -# define Packed(s) s __attribute((__packed__)) -#elif LanguageIsGpu -# define Packed(s) s +#if IsCompilerMsvc + #define Packed(s) __pragma(pack(push, 1)) s __pragma(pack(pop)) +#elif IsCompilerClang + #define Packed(s) s __attribute((__packed__)) +#elif IsLanguageGpu + #define Packed(s) s #endif //- alignas -#if (CompilerIsMsvc && LanguageIsC) || (LanguageIsC && __STDC_VERSION__ < 202311L) -# if CompilerIsMsvc -# define alignas(n) __declspec(align(n)) -# else -# define alignas(n) __attribute__((aligned(n))) -# endif +#if (IsCompilerMsvc && IsLanguageC) || (IsLanguageC && __STDC_VERSION__ < 202311L) + #if IsCompilerMsvc + #define alignas(n) __declspec(align(n)) + #else + #define alignas(n) __attribute__((aligned(n))) + #endif #endif //////////////////////////////////////////////////////////// //~ Scalar types -#if LanguageIsC - -//- Cpu scalar types -#include -typedef int8_t i8; -typedef int16_t i16; -typedef int32_t i32; -typedef int64_t i64; -typedef uint8_t u8; -typedef uint16_t u16; -typedef uint32_t u32; -typedef uint64_t u64; -typedef float f32; -typedef double f64; -typedef i8 b8; -typedef u32 b32; - -#elif LanguageIsGpu - -//- Gpu scalar types -typedef int i32; -typedef int2 i64; -typedef uint u32; -typedef uint2 u64; -typedef float f32; -typedef uint b32; - +#if IsLanguageC + #include + typedef int8_t i8; + typedef int16_t i16; + typedef int32_t i32; + typedef int64_t i64; + typedef uint8_t u8; + typedef uint16_t u16; + typedef uint32_t u32; + typedef uint64_t u64; + typedef float f32; + typedef double f64; + typedef i8 b8; + typedef u32 b32; +#elif IsLanguageGpu + typedef int i32; + typedef int2 i64; + typedef uint u32; + typedef uint2 u64; + typedef float f32; + typedef uint b32; #endif //- Min / max constants @@ -563,170 +536,166 @@ typedef uint b32; #define I64Min ((i64)0x8000000000000000LL) //- Float infinity / nan constants -#if LanguageIsC -Global const u32 _f32_infinity_u32 = 0x7f800000; -Global const f32 *_f32_infinity = (f32 *)&_f32_infinity_u32; -#define F32Infinity (*_f32_infinity) +#if IsLanguageC + Global const u32 _f32_infinity_u32 = 0x7f800000; + Global const f32 *_f32_infinity = (f32 *)&_f32_infinity_u32; + #define F32Infinity (*_f32_infinity) -Global const u64 _f64_infinity_u64 = 0x7ff0000000000000ULL; -Global const f64 *_f64_infinity = (f64 *)&_f64_infinity_u64; -#define F64Infinity (*_f64_infinity) + Global const u64 _f64_infinity_u64 = 0x7ff0000000000000ULL; + Global const f64 *_f64_infinity = (f64 *)&_f64_infinity_u64; + #define F64Infinity (*_f64_infinity) -Global const u32 _f32_nan_u32 = 0x7f800001; -Global const f32 *_f32_nan = (f32 *)&_f32_nan_u32; -#define F32Nan (*_f32_nan) + Global const u32 _f32_nan_u32 = 0x7f800001; + Global const f32 *_f32_nan = (f32 *)&_f32_nan_u32; + #define F32Nan (*_f32_nan) -Global const u64 _f64_nan_u64 = 0x7ff8000000000001; -Global const f64 *_f64_nan = (f64 *)&_f64_nan_u64; -#define F64Nan (*_f64_nan) + Global const u64 _f64_nan_u64 = 0x7ff8000000000001; + Global const f64 *_f64_nan = (f64 *)&_f64_nan_u64; + #define F64Nan (*_f64_nan) -#define IsF32Nan(x) (x != x) -#define IsF64Nan(x) (x != x) + #define IsF32Nan(x) (x != x) + #define IsF64Nan(x) (x != x) #endif //////////////////////////////////////////////////////////// //~ Atomics -#if LanguageIsC +#if IsLanguageC + //- Atomic types + Struct(Atomic8) { volatile i8 _v; }; + Struct(Atomic16) { volatile i16 _v; }; + Struct(Atomic32) { volatile i32 _v; }; + Struct(Atomic64) { volatile i64 _v; }; -//- Atomic types -Struct(Atomic8) { volatile i8 _v; }; -Struct(Atomic16) { volatile i16 _v; }; -Struct(Atomic32) { volatile i32 _v; }; -Struct(Atomic64) { volatile i64 _v; }; - -//- Cache-line isolated aligned atomic types -AlignedStruct(Atomic8Padded, CachelineSize) { Atomic8 v; }; -AlignedStruct(Atomic16Padded, CachelineSize) { Atomic16 v; }; -AlignedStruct(Atomic32Padded, CachelineSize) { Atomic32 v; }; -AlignedStruct(Atomic64Padded, CachelineSize) { Atomic64 v; }; -StaticAssert(alignof(Atomic8Padded) == CachelineSize && sizeof(Atomic8Padded) % CachelineSize == 0); -StaticAssert(alignof(Atomic16Padded) == CachelineSize && sizeof(Atomic16Padded) % CachelineSize == 0); -StaticAssert(alignof(Atomic32Padded) == CachelineSize && sizeof(Atomic32Padded) % CachelineSize == 0); -StaticAssert(alignof(Atomic64Padded) == CachelineSize && sizeof(Atomic64Padded) % CachelineSize == 0); - -#if PlatformIsWindows && ArchIsX64 -//- Memory barriers -# define CompilerMemoryBarrier() _ReadWriteBarrier() -# define HardwareMemoryBarrier() MemoryBarrier() -//- 8 bit atomic operations -ForceInline i8 Atomic8Fetch (Atomic8 *x) { i8 result = (x)->_v; CompilerMemoryBarrier(); return result; } -ForceInline void Atomic8Set (Atomic8 *x, i8 e) { CompilerMemoryBarrier(); (x)->_v = e; } -ForceInline i8 Atomic8FetchSet (Atomic8 *x, i8 e) { return (i8)_InterlockedExchange8((volatile char *)&(x)->_v, (e)); } -ForceInline i8 Atomic8FetchTestSet (Atomic8 *x, i8 c, i8 e) { return (i8)_InterlockedCompareExchange8((volatile char *)&(x)->_v, (e), (c)); } -ForceInline i8 Atomic8FetchXor (Atomic8 *x, i8 c) { return (i8)_InterlockedXor8((volatile char *)&(x)->_v, (c)); } -ForceInline i8 Atomic8FetchAdd (Atomic8 *x, i8 a) { return (i8)_InterlockedExchangeAdd8((volatile char *)&(x)->_v, (a)); } -//- 16 bit atomic operations -ForceInline i16 Atomic16Fetch (Atomic16 *x) { i16 result = (x)->_v; CompilerMemoryBarrier(); return result; } -ForceInline void Atomic16Set (Atomic16 *x, i16 e) { CompilerMemoryBarrier(); (x)->_v = e; } -ForceInline i16 Atomic16FetchSet (Atomic16 *x, i16 e) { return (i16)_InterlockedExchange16(&(x)->_v, (e)); } -ForceInline i16 Atomic16FetchTestSet (Atomic16 *x, i16 c, i16 e) { return (i16)_InterlockedCompareExchange16(&(x)->_v, (e), (c)); } -ForceInline i16 Atomic16FetchXor (Atomic16 *x, i16 c) { return (i16)_InterlockedXor16(&(x)->_v, (c)); } -ForceInline i16 Atomic16FetchAdd (Atomic16 *x, i16 a) { return (i16)_InterlockedExchangeAdd16(&(x)->_v, (a)); } -//- 32 bit atomic operations -ForceInline i32 Atomic32Fetch (Atomic32 *x) { i32 result = (x)->_v; CompilerMemoryBarrier(); return result; } -ForceInline void Atomic32Set (Atomic32 *x, i32 e) { CompilerMemoryBarrier(); (x)->_v = e; } -ForceInline i32 Atomic32FetchSet (Atomic32 *x, i32 e) { return (i32)_InterlockedExchange((volatile long *)&(x)->_v, (e)); } -ForceInline i32 Atomic32FetchTestSet (Atomic32 *x, i32 c, i32 e) { return (i32)_InterlockedCompareExchange((volatile long *)&(x)->_v, (e), (c)); } -ForceInline i32 Atomic32FetchXor (Atomic32 *x, i32 c) { return (i32)_InterlockedXor((volatile long *)&(x)->_v, (c)); } -ForceInline i32 Atomic32FetchAdd (Atomic32 *x, i32 a) { return (i32)_InterlockedExchangeAdd((volatile long *)&(x)->_v, (a)); } -//- 64 bit atomic operations -ForceInline i64 Atomic64Fetch (Atomic64 *x) { i64 result = (x)->_v; CompilerMemoryBarrier(); return result; } -ForceInline void Atomic64Set (Atomic64 *x, i64 e) { CompilerMemoryBarrier(); (x)->_v = e; } -ForceInline i64 Atomic64FetchSet (Atomic64 *x, i64 e) { return (i64)_InterlockedExchange64(&(x)->_v, (e)); } -ForceInline i64 Atomic64FetchTestSet (Atomic64 *x, i64 c, i64 e) { return (i64)_InterlockedCompareExchange64(&(x)->_v, (e), (c)); } -ForceInline i64 Atomic64FetchXor (Atomic64 *x, i64 c) { return (i64)_InterlockedXor64(&(x)->_v, (c)); } -ForceInline i64 Atomic64FetchAdd (Atomic64 *x, i64 a) { return (i64)_InterlockedExchangeAdd64(&(x)->_v, (a)); } -#else -# error Atomics not implemented -#endif + //- Cache-line isolated aligned atomic types + AlignedStruct(Atomic8Padded, CachelineSize) { Atomic8 v; }; + AlignedStruct(Atomic16Padded, CachelineSize) { Atomic16 v; }; + AlignedStruct(Atomic32Padded, CachelineSize) { Atomic32 v; }; + AlignedStruct(Atomic64Padded, CachelineSize) { Atomic64 v; }; + StaticAssert(alignof(Atomic8Padded) == CachelineSize && sizeof(Atomic8Padded) % CachelineSize == 0); + StaticAssert(alignof(Atomic16Padded) == CachelineSize && sizeof(Atomic16Padded) % CachelineSize == 0); + StaticAssert(alignof(Atomic32Padded) == CachelineSize && sizeof(Atomic32Padded) % CachelineSize == 0); + StaticAssert(alignof(Atomic64Padded) == CachelineSize && sizeof(Atomic64Padded) % CachelineSize == 0); + #if IsPlatformWindows && IsArchX64 + //- Memory barriers + # define CompilerMemoryBarrier() _ReadWriteBarrier() + # define HardwareMemoryBarrier() MemoryBarrier() + //- 8 bit atomic operations + ForceInline i8 Atomic8Fetch (Atomic8 *x) { i8 result = (x)->_v; CompilerMemoryBarrier(); return result; } + ForceInline void Atomic8Set (Atomic8 *x, i8 e) { CompilerMemoryBarrier(); (x)->_v = e; } + ForceInline i8 Atomic8FetchSet (Atomic8 *x, i8 e) { return (i8)_InterlockedExchange8((volatile char *)&(x)->_v, (e)); } + ForceInline i8 Atomic8FetchTestSet (Atomic8 *x, i8 c, i8 e) { return (i8)_InterlockedCompareExchange8((volatile char *)&(x)->_v, (e), (c)); } + ForceInline i8 Atomic8FetchXor (Atomic8 *x, i8 c) { return (i8)_InterlockedXor8((volatile char *)&(x)->_v, (c)); } + ForceInline i8 Atomic8FetchAdd (Atomic8 *x, i8 a) { return (i8)_InterlockedExchangeAdd8((volatile char *)&(x)->_v, (a)); } + //- 16 bit atomic operations + ForceInline i16 Atomic16Fetch (Atomic16 *x) { i16 result = (x)->_v; CompilerMemoryBarrier(); return result; } + ForceInline void Atomic16Set (Atomic16 *x, i16 e) { CompilerMemoryBarrier(); (x)->_v = e; } + ForceInline i16 Atomic16FetchSet (Atomic16 *x, i16 e) { return (i16)_InterlockedExchange16(&(x)->_v, (e)); } + ForceInline i16 Atomic16FetchTestSet (Atomic16 *x, i16 c, i16 e) { return (i16)_InterlockedCompareExchange16(&(x)->_v, (e), (c)); } + ForceInline i16 Atomic16FetchXor (Atomic16 *x, i16 c) { return (i16)_InterlockedXor16(&(x)->_v, (c)); } + ForceInline i16 Atomic16FetchAdd (Atomic16 *x, i16 a) { return (i16)_InterlockedExchangeAdd16(&(x)->_v, (a)); } + //- 32 bit atomic operations + ForceInline i32 Atomic32Fetch (Atomic32 *x) { i32 result = (x)->_v; CompilerMemoryBarrier(); return result; } + ForceInline void Atomic32Set (Atomic32 *x, i32 e) { CompilerMemoryBarrier(); (x)->_v = e; } + ForceInline i32 Atomic32FetchSet (Atomic32 *x, i32 e) { return (i32)_InterlockedExchange((volatile long *)&(x)->_v, (e)); } + ForceInline i32 Atomic32FetchTestSet (Atomic32 *x, i32 c, i32 e) { return (i32)_InterlockedCompareExchange((volatile long *)&(x)->_v, (e), (c)); } + ForceInline i32 Atomic32FetchXor (Atomic32 *x, i32 c) { return (i32)_InterlockedXor((volatile long *)&(x)->_v, (c)); } + ForceInline i32 Atomic32FetchAdd (Atomic32 *x, i32 a) { return (i32)_InterlockedExchangeAdd((volatile long *)&(x)->_v, (a)); } + //- 64 bit atomic operations + ForceInline i64 Atomic64Fetch (Atomic64 *x) { i64 result = (x)->_v; CompilerMemoryBarrier(); return result; } + ForceInline void Atomic64Set (Atomic64 *x, i64 e) { CompilerMemoryBarrier(); (x)->_v = e; } + ForceInline i64 Atomic64FetchSet (Atomic64 *x, i64 e) { return (i64)_InterlockedExchange64(&(x)->_v, (e)); } + ForceInline i64 Atomic64FetchTestSet (Atomic64 *x, i64 c, i64 e) { return (i64)_InterlockedCompareExchange64(&(x)->_v, (e), (c)); } + ForceInline i64 Atomic64FetchXor (Atomic64 *x, i64 c) { return (i64)_InterlockedXor64(&(x)->_v, (c)); } + ForceInline i64 Atomic64FetchAdd (Atomic64 *x, i64 a) { return (i64)_InterlockedExchangeAdd64(&(x)->_v, (a)); } + #else + #error Atomics not implemented + #endif #endif //////////////////////////////////////////////////////////// //~ Ticket mutex -#if LanguageIsC -Struct(TicketMutex) -{ - Atomic64Padded ticket; - Atomic64Padded serving; -}; - -ForceInline void LockTicketMutex(TicketMutex *tm) -{ - i64 ticket = Atomic64FetchAdd(&tm->ticket.v, 1); - while (Atomic64Fetch(&tm->serving.v) != ticket) +#if IsLanguageC + Struct(TicketMutex) { - _mm_pause(); - } -} + Atomic64Padded ticket; + Atomic64Padded serving; + }; -ForceInline void UnlockTicketMutex(TicketMutex *tm) -{ - /* TODO: Atomic set w/ known ticket + 1 */ - Atomic64FetchAdd(&tm->serving.v, 1); -} + ForceInline void LockTicketMutex(TicketMutex *tm) + { + i64 ticket = Atomic64FetchAdd(&tm->ticket.v, 1); + while (Atomic64Fetch(&tm->serving.v) != ticket) + { + _mm_pause(); + } + } + + ForceInline void UnlockTicketMutex(TicketMutex *tm) + { + /* TODO: Atomic set w/ known ticket + 1 */ + Atomic64FetchAdd(&tm->serving.v, 1); + } #endif //////////////////////////////////////////////////////////// //~ String types -#if LanguageIsC +#if IsLanguageC + #define STRING(size, data) ((String) { (size), (data) }) + #define Zstr ((String) { 0, 0}) + #define Lit(cstr_lit) (String) { (sizeof((cstr_lit)) - 1), (u8 *)(cstr_lit) } + #define CompLit(cstr_lit) { .len = (sizeof((cstr_lit)) - 1), .text = (u8 *)(cstr_lit) } + #define StringFromPointers(p0, p1) ((String) { (u8 *)(p1) - (u8 *)(p0), (u8 *)p0 }) + #define StringFromStruct(ptr) ((String) { sizeof(*(ptr)), (u8 *)(ptr) }) + #define StringFromArena(arena) (STRING((arena)->pos, ArenaBase(arena))) -#define STRING(size, data) ((String) { (size), (data) }) -#define Zstr ((String) { 0, 0}) -#define Lit(cstr_lit) (String) { (sizeof((cstr_lit)) - 1), (u8 *)(cstr_lit) } -#define CompLit(cstr_lit) { .len = (sizeof((cstr_lit)) - 1), .text = (u8 *)(cstr_lit) } -#define StringFromPointers(p0, p1) ((String) { (u8 *)(p1) - (u8 *)(p0), (u8 *)p0 }) -#define StringFromStruct(ptr) ((String) { sizeof(*(ptr)), (u8 *)(ptr) }) -#define StringFromArena(arena) (STRING((arena)->pos, ArenaBase(arena))) + /* String from static array */ + #define StringFromArray(a) \ + ( \ + Assert(IsArray(a)), \ + ((String) { .len = sizeof(a), .text = (u8 *)(a) }) \ + ) -/* String from static array */ -#define StringFromArray(a) \ - ( \ - Assert(IsArray(a)), \ - ((String) { .len = sizeof(a), .text = (u8 *)(a) }) \ - ) + Struct(String) + { + u64 len; + u8 *text; + }; -Struct(String) -{ - u64 len; - u8 *text; -}; + Struct(String16) + { + u64 len; + u16 *text; + }; -Struct(String16) -{ - u64 len; - u16 *text; -}; + Struct(String32) + { + u64 len; + u32 *text; + }; -Struct(String32) -{ - u64 len; - u32 *text; -}; + Struct(StringArray) + { + u64 count; + String *strings; + }; -Struct(StringArray) -{ - u64 count; - String *strings; -}; - -Struct(StringListNode) -{ - String s; - StringListNode *next; - StringListNode *prev; -}; - -Struct(StringList) -{ - StringListNode *first; - StringListNode *last; - u64 count; -}; + Struct(StringListNode) + { + String s; + StringListNode *next; + StringListNode *prev; + }; + Struct(StringList) + { + StringListNode *first; + StringListNode *last; + u64 count; + }; #endif //////////////////////////////////////////////////////////// @@ -741,111 +710,125 @@ Struct(U128) //////////////////////////////////////////////////////////// //~ Resource types -#if LanguageIsC +#if IsLanguageC + #define ResourceEmbeddedMagic 0xfc060937194f4406 -#define ResourceEmbeddedMagic 0xfc060937194f4406 - -Struct(ResourceStore) -{ - u64 hash; -}; - -Struct(ResourceKey) -{ - u64 hash; -}; + Struct(ResourceStore) + { + u64 hash; + }; + Struct(ResourceKey) + { + u64 hash; + }; #endif //////////////////////////////////////////////////////////// //~ Shader types -#if LanguageIsC +#if IsLanguageC + //- Shader linkage -Struct(GpuPointer) { u32 v; }; -Struct(GpuBufferPos) { GpuPointer p; u64 byte_offset; }; + Struct(VertexShader) { ResourceKey resource; }; + Struct(PixelShader) { ResourceKey resource; }; + Struct(ComputeShader) { ResourceKey resource; }; -Inline b32 IsGpuPointerNil(GpuPointer p) { return p.v == 0; } + //- Pointers -Struct(VertexShader) { ResourceKey resource; }; -Struct(PixelShader) { ResourceKey resource; }; -Struct(ComputeShader) { ResourceKey resource; }; + Struct(GpuBufferPtr) { u32 v; }; + Struct(GpuRWBufferPtr) { u32 v; }; + Struct(GpuIndexBufferPtr) { u32 v; }; + Struct(GpuTexture1DPtr) { u32 v; }; + Struct(GpuRWTexture1DPtr) { u32 v; }; + Struct(GpuTexture2DPtr) { u32 v; }; + Struct(GpuRWTexture2DPtr) { u32 v; }; + Struct(GpuTexture3DPtr) { u32 v; }; + Struct(GpuRWTexture3DPtr) { u32 v; }; + Struct(GpuRasterTargetPtr) { u32 v; }; + Struct(GpuSamplerPtr) { u32 v; }; -//- Resource descriptor index types -Struct(StructuredBufferRid) { u32 v; }; -Struct(RWStructuredBufferRid) { u32 v; }; -Struct(Texture1DRid) { u32 v; }; -Struct(Texture2DRid) { u32 v; }; -Struct(Texture3DRid) { u32 v; }; -Struct(RWTexture1DRid) { u32 v; }; -Struct(RWTexture2DRid) { u32 v; }; -Struct(RWTexture3DRid) { u32 v; }; -Struct(SamplerStateRid) { u32 v; }; + #define IsGpuPtrNil(p) ((p).v == 0) +#elif IsLanguageGpu + //- Shader declaration -#elif LanguageIsGpu + #define ComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID)) + #define ComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID)) + #define ComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID)) + #define VertexShader(name, return_type) return_type name(Semantic(u32, SV_VertexID), Semantic(u32, SV_InstanceID)) + #define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) -typedef u32 GpuBuffer; -typedef u32 GpuTexture; -typedef u32 GpuSampler; + //- Semantic declaration -//- Shader declaration -# define ComputeShader(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID)) -# define VertexShader(name, return_type) return_type name(Semantic(u32, SV_VertexID), Semantic(u32, SV_InstanceID)) -# define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) + # define Semantic(t, n) t n : n -//- Semantic declaration -# define Semantic(t, n) t n : n + //- Pointers -//- Descriptor heap index -# define UniformResourceFromRid(rid) ResourceDescriptorHeap[rid] -# define UniformSamplerFromRid(rid) SamplerDescriptorHeap[rid] -# define NonUniformResourceFromRid(rid) ResourceDescriptorHeap[NonUniformResourceIndex(rid)] -# define NonUniformSamplerFromRid(rid) SamplerDescriptorHeap[NonUniformResourceIndex(rid)] + typedef GpuBufferPtr u32; + typedef GpuRWBufferPtr u32; + typedef GpuIndexBufferPtr u32; + typedef GpuTexture1DPtr u32; + typedef GpuRWTexture1DPtr u32; + typedef GpuTexture2DPtr u32; + typedef GpuRWTexture2DPtr u32; + typedef GpuTexture3DPtr u32; + typedef GpuRWTexture3DPtr u32; + typedef GpuRasterTargetPtr u32; + typedef GpuSamplerPtr u32; + #define IsGpuPtrNil(p) ((p) == 0) + + //- Pointer dereference + + #define DerefUniformBuffer(p) ResourceDescriptorHeap[p] + #define DerefUniformTexture(p) ResourceDescriptorHeap[p] + #define DerefUniformSampler(p) SamplerDescriptorHeap[p] + + #define DerefNonUniformBuffer(p) ResourceDescriptorHeap[NonUniformResourceIndex(p)] + #define DerefNonUniformTexture(p) ResourceDescriptorHeap[NonUniformResourceIndex(p)] + #define DerefNonUniformSampler(p) SamplerDescriptorHeap[NonUniformResourceIndex(p)] #endif //////////////////////////////////////////////////////////// //~ Fibers -# define MaxFibers 4096 +#define MaxFibers 4096 StaticAssert(MaxFibers < I16Max); /* MaxFibers should fit in FiberId */ -#if LanguageIsC -# if PlatformIsWindows -# define FiberId() (*(volatile i16 *)__readgsqword(0x20)) -# else -# error FiberId not implemented -# endif +#if IsLanguageC + #if IsPlatformWindows + #define FiberId() (*(volatile i16 *)__readgsqword(0x20)) + #else + #error FiberId not implemented + #endif #endif //////////////////////////////////////////////////////////// //~ Exit callback types -#if LanguageIsC -# define ExitFuncDef(name) void name(void) -typedef ExitFuncDef(ExitFunc); +#if IsLanguageC + #define ExitFuncDef(name) void name(void) + typedef ExitFuncDef(ExitFunc); #endif //////////////////////////////////////////////////////////// //~ @hookdecl Api hooks -#if LanguageIsC - -//- Core hooks -StringList GetRawCommandline(void); -void Echo(String msg); -b32 Panic(String msg); -b32 IsRunningInDebugger(void); -i64 TimeNs(void); -u32 GetNumHardwareThreads(void); -void TrueRand(String buffer); -void OnExit(ExitFunc *func); -void SignalExit(i32 code); -void ExitNow(i32 code); - -//- Meta hooks -void StartupLayers(void); +#if IsLanguageC + //- Core hooks + StringList GetRawCommandline(void); + void Echo(String msg); + b32 Panic(String msg); + b32 IsRunningInDebugger(void); + i64 TimeNs(void); + u32 GetNumHardwareThreads(void); + void TrueRand(String buffer); + void OnExit(ExitFunc *func); + void SignalExit(i32 code); + void ExitNow(i32 code); + //- Meta hooks + void StartupLayers(void); #endif //////////////////////////////////////////////////////////// diff --git a/src/base/base_inc.h b/src/base/base_inc.h index ff6d3bfe..4f471bad 100644 --- a/src/base/base_inc.h +++ b/src/base/base_inc.h @@ -4,7 +4,7 @@ //- Api #include "base.h" -#if LanguageIsC +#if IsLanguageC # include "base_intrinsics.h" # include "base_memory.h" # include "base_arena.h" @@ -26,12 +26,12 @@ # include "base_bitbuff.h" # include "base_resource.h" # include "base_controller.h" -#elif LanguageIsGpu +#elif IsLanguageGpu # include "base_math_gpu.h" #endif //- Impl -#if LanguageIsC +#if IsLanguageC # include "base_memory.c" # include "base_arena.c" # include "base_futex.c" @@ -50,6 +50,6 @@ #endif //- Include base_win32 -#if LanguageIsC && PlatformIsWindows +#if IsLanguageC && IsPlatformWindows # include "base_win32/base_win32_inc.h" #endif diff --git a/src/base/base_job.h b/src/base/base_job.h index a996e01b..6f5fa219 100644 --- a/src/base/base_job.h +++ b/src/base/base_job.h @@ -85,7 +85,7 @@ JobPoolId AsyncPool(void); void job(job##_Sig *, i32); \ StaticAssert(1) -#define JobDef(job, sig_arg, id_arg) void job(job##_Sig *sig_arg, i32 id_arg) +#define JobImpl(job, sig_arg, id_arg) void job(job##_Sig *sig_arg, i32 id_arg) //////////////////////////////////////////////////////////// //~ @hookdecl Job dispatch operations diff --git a/src/base/base_log.h b/src/base/base_log.h index 5a62e371..de37874c 100644 --- a/src/base/base_log.h +++ b/src/base/base_log.h @@ -29,7 +29,7 @@ Struct(LogEventsArray) /* Log level configuration */ #ifndef LogLevel_CompTime -# if RtcIsEnabled || ProfilingIsEnabled +# if IsRtcEnabled || IsProfilingEnabled # define LogLevel_CompTime LogLevel_Debug # else # define LogLevel_CompTime LogLevel_Info diff --git a/src/base/base_memory.c b/src/base/base_memory.c index 1f2d9d46..c58e725a 100644 --- a/src/base/base_memory.c +++ b/src/base/base_memory.c @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////// //~ Win32 memory allocation -#if PlatformIsWindows +#if IsPlatformWindows //- Reserve void *ReserveMemory(u64 size) @@ -42,12 +42,12 @@ void SetMemoryReadWrite(void *address, u64 size) #else # error Memory allocation not implemented for this platform -#endif /* PlatformIsWindows */ +#endif /* IsPlatformWindows */ //////////////////////////////////////////////////////////// //~ Crtlib mem op stubs -#if !CrtlibIsEnabled +#if !IsCrtlibEnabled //- memcpy __attribute((section(".text.memcpy"))) @@ -92,4 +92,4 @@ i32 memcmp(const void *p1, const void *p2, u64 count) return result; } -#endif /* !CrtlibIsEnabled */ +#endif /* !IsCrtlibEnabled */ diff --git a/src/base/base_memory.h b/src/base/base_memory.h index 8019f29c..aa244d41 100644 --- a/src/base/base_memory.h +++ b/src/base/base_memory.h @@ -33,7 +33,7 @@ void SetMemoryReadWrite(void *address, u64 size); //////////////////////////////////////////////////////////// //~ Crtlib stubs -#if CrtlibIsEnabled +#if IsCrtlibEnabled # include #else void *memcpy(void *__restrict dst, const void *__restrict src, u64 n); diff --git a/src/base/base_snc.c b/src/base/base_snc.c index 26a4450d..9b3748c3 100644 --- a/src/base/base_snc.c +++ b/src/base/base_snc.c @@ -55,7 +55,7 @@ Lock LockSpinE(Mutex *m, i32 spin) } } -#if RtcIsEnabled +#if IsRtcEnabled Atomic32Set(&m->exclusive_fiber_id, FiberId()); #endif @@ -122,7 +122,7 @@ void Unlock(Lock *l) Mutex *m = l->mutex; if (l->exclusive) { -#if RtcIsEnabled +#if IsRtcEnabled Atomic32Set(&m->exclusive_fiber_id, 0); #endif Atomic32Set(&m->v, 0); diff --git a/src/base/base_snc.h b/src/base/base_snc.h index 12ad09ec..a765995f 100644 --- a/src/base/base_snc.h +++ b/src/base/base_snc.h @@ -11,7 +11,7 @@ AlignedStruct(Mutex, CachelineSize) */ Atomic32 v; -#if RtcIsEnabled +#if IsRtcEnabled Atomic32 exclusive_fiber_id; #endif }; @@ -52,7 +52,7 @@ Lock LockS(Mutex *m); void Unlock(Lock *lock); //- Lock assertion -#if RtcIsEnabled +#if IsRtcEnabled # define AssertLockedE(l, m) Assert((l)->mutex == (m) && (l)->exclusive == 1) # define AssertLockedES(l, m) Assert((l)->mutex == (m)) #else diff --git a/src/base/base_string.c b/src/base/base_string.c index 9196cbf3..54603234 100644 --- a/src/base/base_string.c +++ b/src/base/base_string.c @@ -640,7 +640,7 @@ String FormatStringV(Arena *arena, String fmt, va_list args) } } -#if RtcIsEnabled +#if IsRtcEnabled if (!no_more_args) { FmtArg last_arg = va_arg(args, FmtArg); diff --git a/src/base/base_win32/base_win32.c b/src/base/base_win32/base_win32.c index 891c9469..9dada622 100644 --- a/src/base/base_win32/base_win32.c +++ b/src/base/base_win32/base_win32.c @@ -36,7 +36,7 @@ BOOL W32_FindEmbeddedRcData(HMODULE module, LPCWSTR type, LPWSTR wstr_entry_name } //////////////////////////////////////////////////////////// -//~ @hookdef Core hooks +//~ @hookimpl Core hooks StringList GetRawCommandline(void) { @@ -110,16 +110,16 @@ void TrueRand(String buffer) } //////////////////////////////////////////////////////////// -//~ @hookdef Swap hooks +//~ @hookimpl Swap hooks b32 IsSwappedIn(void) { - return HotSwappingIsEnabled; + return IsHotSwappingEnabled; } b32 IsSwappingOut(void) { - return HotSwappingIsEnabled; + return IsHotSwappingEnabled; } String SwappedStateFromName(Arena *arena, String name) @@ -171,7 +171,7 @@ void WriteSwappedState(String name, String data) } //////////////////////////////////////////////////////////// -//~ @hookdef Exit hooks +//~ @hookimpl Exit hooks void OnExit(ExitFunc *func) { @@ -199,7 +199,7 @@ void ExitNow(i32 code) //////////////////////////////////////////////////////////// //~ Startup / shutdown jobs -JobDef(W32_StartupLayers, UNUSED sig, UNUSED id) +JobImpl(W32_StartupLayers, UNUSED sig, UNUSED id) { W32_SharedState *g = &W32_shared_state; TempArena scratch = BeginScratchNoConflict(); @@ -210,7 +210,7 @@ JobDef(W32_StartupLayers, UNUSED sig, UNUSED id) EndScratch(scratch); } -JobDef(W32_ShutdownLayers, UNUSED sig, UNUSED id) +JobImpl(W32_ShutdownLayers, UNUSED sig, UNUSED id) { __prof; W32_SharedState *g = &W32_shared_state; @@ -231,7 +231,7 @@ i32 W32_Main(void) __profthread("Main thread", PROF_THREAD_GROUP_MAIN); W32_SharedState *g = &W32_shared_state; -#if ProfilingIsEnabled +#if IsProfilingEnabled /* Start profiler */ { __profn("Launch profiler"); @@ -436,7 +436,7 @@ i32 W32_Main(void) //////////////////////////////////////////////////////////// //~ Crt main -#if CrtlibIsEnabled +#if IsCrtlibEnabled # if IsConsoleApp int main(UNUSED char **argc, UNUSED int argv) { @@ -448,12 +448,12 @@ int CALLBACK wWinMain(UNUSED _In_ HINSTANCE instance, UNUSED _In_opt_ HINSTANCE return W32_Main(); } # endif /* IsConsoleApp */ -#endif /* CrtlibIsEnabled */ +#endif /* IsCrtlibEnabled */ //////////////////////////////////////////////////////////// //~ Crt stub -#if !CrtlibIsEnabled +#if !IsCrtlibEnabled #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wmissing-variable-declarations" @@ -472,4 +472,4 @@ void __stdcall wWinMainCRTStartup(void) #pragma clang diagnostic pop -#endif /* !CrtlibIsEnabled */ +#endif /* !IsCrtlibEnabled */ diff --git a/src/base/base_win32/base_win32_job.c b/src/base/base_win32/base_win32_job.c index 8e00afd9..7e947d62 100644 --- a/src/base/base_win32/base_win32_job.c +++ b/src/base/base_win32/base_win32_job.c @@ -1,7 +1,7 @@ W32_SharedJobState W32_shared_job_state = ZI; //////////////////////////////////////////////////////////// -//~ @hookdef Startup +//~ @hookimpl Startup void InitJobSystem(void) { @@ -23,7 +23,7 @@ void InitJobSystem(void) //////////////////////////////////////////////////////////// //~ Win32 thread -JobDef(W32_DummyJob, sig, id) +JobImpl(W32_DummyJob, sig, id) { } @@ -456,7 +456,7 @@ W32_ThreadDef(W32_JobWorkerEntryPoint, worker_ctx_arg) } //////////////////////////////////////////////////////////// -//~ @hookdef Fiber suspend/resume operations +//~ @hookimpl Fiber suspend/resume operations void SuspendFiber(void) { @@ -556,7 +556,7 @@ void ResumeFibers(i16 fiber_ids_count, i16 *fiber_ids) } //////////////////////////////////////////////////////////// -//~ @hookdef Job pool operations +//~ @hookimpl Job pool operations JobPoolId InitJobPool(u32 thread_count, String name, JobPoolPriority priority) { @@ -602,7 +602,7 @@ JobPoolId HyperPool(void) } //////////////////////////////////////////////////////////// -//~ @hookdef Job operations +//~ @hookimpl Job operations Job *OpenJob(JobFunc *func, JobPoolId pool_id) { @@ -649,7 +649,6 @@ u32 CloseJob(Job *job) if (num_tasks == 0) { - Assert(0); job->func = W32_DummyJob; num_tasks = 1; } diff --git a/src/base/base_win32/base_win32_log.c b/src/base/base_win32/base_win32_log.c index 66b2055e..37bd5507 100644 --- a/src/base/base_win32/base_win32_log.c +++ b/src/base/base_win32/base_win32_log.c @@ -1,7 +1,7 @@ W32_SharedLogState W32_shared_log_state = ZI; //////////////////////////////////////////////////////////// -//~ @hookdef Init hooks +//~ @hookimpl Init hooks void InitLogSystem(String logfile_path) { @@ -101,7 +101,7 @@ void W32_Log(i32 level, String msg) } //////////////////////////////////////////////////////////// -//~ @hookdef Log hooks +//~ @hookimpl Log hooks /* Panic log function is separate to enforce zero side effects other than * immediately writing to log file. */ diff --git a/src/base/base_win32/base_win32_time.c b/src/base/base_win32/base_win32_time.c index 890cfd8c..dee0d8e0 100644 --- a/src/base/base_win32/base_win32_time.c +++ b/src/base/base_win32/base_win32_time.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////// -//~ @hookdef DateTime hooks +//~ @hookimpl DateTime hooks DateTime LocalDateTime(void) { diff --git a/src/collider/collider.c b/src/collider/collider.c index 14cdde2b..ff5d250b 100644 --- a/src/collider/collider.c +++ b/src/collider/collider.c @@ -4,7 +4,7 @@ #if COLLIDER_DEBUG void CLD_DebugBreakable(void) { -#if RtcIsEnabled +#if IsRtcEnabled DEBUGBREAKABLE; #endif } diff --git a/src/config.h b/src/config.h index 38c4da58..b25f9992 100644 --- a/src/config.h +++ b/src/config.h @@ -3,14 +3,14 @@ #define WRITE_DIR "power_play" /* Window title */ -#if RtcIsEnabled -# if DeveloperIsEnabled +#if IsRtcEnabled +# if IsDeveloperModeEnabled # define WINDOW_TITLE "Debug (Developer Build)" # else # define WINDOW_TITLE "Debug" # endif #else -# if DeveloperIsEnabled +# if IsDeveloperModeEnabled # define WINDOW_TITLE "Power Play (Developer Build)" # else # define WINDOW_TITLE "Power Play" @@ -80,7 +80,7 @@ /* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */ #define BITBUFF_DEBUG 0 -#define BITBUFF_TEST RtcIsEnabled +#define BITBUFF_TEST IsRtcEnabled /* If enabled, things like network writes & memory allocations will be tracked in a global statistics struct */ #define GstatIsEnabled 1 diff --git a/src/font/font.c b/src/font/font.c index 129ab370..b3ec35fa 100644 --- a/src/font/font.c +++ b/src/font/font.c @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////// //~ Font load job -JobDef(F_Load, sig, _) +JobImpl(F_Load, sig, _) { __prof; TempArena scratch = BeginScratchNoConflict(); diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index 8dd3644c..6fb76cb2 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -7,51 +7,53 @@ void GPU_StartupCommon(void) { GPU_SharedUtilState *g = &GPU_shared_util_state; - GPU_Arena *gpu_perm = GPU_Perm(); + GPU_ArenaHandle gpu_perm = GPU_PermArena(); - /* Upload data to gpu */ - GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct); + /* Init point sampler */ + GPU_ResourceHandle pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint }); + g->pt_sampler = GPU_PushSamplerPtr(gpu_perm, pt_sampler); + + GPU_CommandListHandle cl = GPU_OpenCommandList(GPU_QueueKind_Direct); { /* Init noise texture */ String noise_data = DataFromResource(ResourceKeyFromStore(&GPU_Resources, Lit("noise_128x128x64_16.dat"))); Vec3I32 noise_dims = VEC3I32(128, 128, 64); - GpuPointer noise_tex = ZI; + GPU_ResourceHandle noise_tex = ZI; { - GPU_TextureDesc noise_desc = ZI; - noise_desc.format = GPU_Format_R16_Uint; - noise_desc.size = noise_dims; if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2) { Panic(Lit("Unexpected noise texture size")); } - noise_tex = GPU_PushTexture(gpu_perm, GPU_TextureKind_2D, GPU_Format_R16_Uint, noise_dims, GPU_TextureFlag_None); - GPU_CopyFromCpu(cl, noise_tex, noise_data); + noise_tex = GPU_PushTexture3D(gpu_perm, noise_dims, GPU_Format_R16_Uint, GPU_AccessKind_CopyWrite); + GPU_CopyResourceFromCpu(cl, noise_tex, noise_data); } - g->noise_tex = noise_tex; /* Init quad index buffer */ - GpuPointer quad_indices = ZI; + GPU_ResourceHandle quad_indices = ZI; { u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 }; - quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data), GPU_BufferFlag_None); - GPU_CopyFromCpu(cl, quad_indices, StringFromArray(quad_data)); + quad_indices = GPU_PushBuffer(gpu_perm, u16, GPU_AccessKind_CopyWrite); + GPU_CopyResourceFromCpu(cl, quad_indices, StringFromArray(quad_data)); } - g->quad_indices = quad_indices; + + g->noise_tex = GPU_PushTexture3DPtr(gpu_perm, noise_tex); + g->quad_indices = GPU_PushIndexBufferPtr(gpu_perm, quad_indices, u16); + + /* FIXME: Block other queues until common startup finishes here */ + GPU_SetAccess(cl, noise_tex, GPU_AccessKind_AnyRead); + GPU_SetAccess(cl, quad_indices, GPU_AccessKind_AnyRead); } GPU_CloseCommandList(cl); - - /* Init point sampler */ - g->pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint }); } //////////////////////////////////////////////////////////// //~ Arena helpers -GPU_Arena *GPU_Perm(void) +GPU_ArenaHandle GPU_PermArena(void) { i16 fiber_id = FiberId(); - GPU_Arena *perm = GPU_shared_util_state.perm_arenas[fiber_id]; - if (!perm) + GPU_ArenaHandle perm = GPU_shared_util_state.perm_arenas[fiber_id]; + if (IsGpuPtrNil(perm)) { GPU_shared_util_state.perm_arenas[fiber_id] = GPU_AcquireArena(); perm = GPU_shared_util_state.perm_arenas[fiber_id]; @@ -59,20 +61,28 @@ GPU_Arena *GPU_Perm(void) return perm; } +//////////////////////////////////////////////////////////// +//~ Cpu -> Gpu copy helpers + +void GPU_CopyResourceFromCpu(GPU_CommandListHandle cl, GPU_ResourceHandle dst, String src) +{ + /* TODO */ +} + //////////////////////////////////////////////////////////// //~ Common resource helpers -GpuPointer GPU_GetCommonPointSampler(void) +GpuSamplerPtr GPU_GetCommonPointSampler(void) { return GPU_shared_util_state.pt_sampler; } -GpuPointer GPU_GetCommonQuadIndices(void) +GpuIndexBufferPtr GPU_GetCommonQuadIndices(void) { return GPU_shared_util_state.quad_indices; } -GpuPointer GPU_GetCommonNoise(void) +GpuTexture3DPtr GPU_GetCommonNoise(void) { return GPU_shared_util_state.noise_tex; } diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h index 5ec9da41..3dd69503 100644 --- a/src/gpu/gpu_common.h +++ b/src/gpu/gpu_common.h @@ -4,11 +4,11 @@ Struct(GPU_SharedUtilState) { /* Common shared resources */ - GpuPointer pt_sampler; - GpuPointer quad_indices; - GpuPointer noise_tex; + GpuSamplerPtr pt_sampler; + GpuIndexBufferPtr quad_indices; + GpuTexture3DPtr noise_tex; - GPU_Arena *perm_arenas[MaxFibers]; + GPU_ArenaHandle perm_arenas[MaxFibers]; } extern GPU_shared_util_state; //////////////////////////////////////////////////////////// @@ -19,11 +19,16 @@ void GPU_StartupCommon(void); //////////////////////////////////////////////////////////// //~ Arena helpers -GPU_Arena *GPU_Perm(void); +GPU_ArenaHandle GPU_PermArena(void); + +//////////////////////////////////////////////////////////// +//~ Cpu -> Gpu copy helpers + +void GPU_CopyResourceFromCpu(GPU_CommandListHandle cl, GPU_ResourceHandle dst, String src); //////////////////////////////////////////////////////////// //~ Common resource helpers -GpuPointer GPU_GetCommonPointSampler(void); -GpuPointer GPU_GetCommonQuadIndices(void); -GpuPointer GPU_GetCommonNoise(void); +GpuSamplerPtr GPU_GetCommonPointSampler(void); +GpuIndexBufferPtr GPU_GetCommonQuadIndices(void); +GpuTexture3DPtr GPU_GetCommonNoise(void); diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 7798c1eb..a3a05732 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -1,17 +1,22 @@ //////////////////////////////////////////////////////////// -//~ Opaque types +//~ Handle types -Struct(GPU_Arena); -Struct(GPU_CommandList); -Struct(GPU_Swapchain); +Struct(GPU_ArenaHandle) { u64 v; }; + +Struct(GPU_CommandListHandle) { u64 v; }; + +Struct(GPU_ResourceHandle) { u64 v; }; + +Struct(GPU_SwapchainHandle) { u64 v; }; //////////////////////////////////////////////////////////// //~ Queue types -#define GPU_MultiQueueEnabled !ProfilingIsEnabled +#define GPU_MultiQueueIsEnabled (!IsProfilingEnabled) + Enum(GPU_QueueKind) { -#if GPU_MultiQueueEnabled +#if GPU_MultiQueueIsEnabled GPU_QueueKind_Direct = 0, GPU_QueueKind_AsyncCompute = 1, GPU_QueueKind_AsyncCopy = 2, @@ -156,22 +161,36 @@ Enum(GPU_Format) }; //////////////////////////////////////////////////////////// -//~ Shader access types +//~ Access types -Enum(GPU_ShaderAccessKind) +Enum(GPU_AccessKind) { - GPU_ShaderAccessKind_Readonly, /* Default state for all resources */ - GPU_ShaderAccessKind_ReadWrite, - GPU_ShaderAccessKind_RasterTarget, -}; + GPU_AccessKind_AnyRead, -//////////////////////////////////////////////////////////// -//~ Arena types + GPU_AccessKind_AnyReadWrite, -Struct(GPU_TempArena) -{ - GPU_Arena *arena; - u64 start_pos; + GPU_AccessKind_CopyRead, + GPU_AccessKind_CopyWrite, + + GPU_AccessKind_AnyShaderRead, + GPU_AccessKind_AnyShaderReadWrite, + + GPU_AccessKind_ComputeRead, + GPU_AccessKind_ComputeReadWrite, + + GPU_AccessKind_VertexPixelRead, + GPU_AccessKind_VertexPixelReadWrite, + + GPU_AccessKind_VertexRead, + GPU_AccessKind_VertexReadWrite, + + GPU_AccessKind_PixelRead, + GPU_AccessKind_PixelReadWrite, + + GPU_AccessKind_DepthStencilRead, + GPU_AccessKind_DepthStencilReadWrite, + + GPU_AccessKind_RasterTargetWrite, }; //////////////////////////////////////////////////////////// @@ -183,6 +202,13 @@ Enum(GPU_BufferFlag) GPU_BufferFlag_Writable = (1 << 0), }; +Struct(GPU_BufferDesc) +{ + u64 size; + GPU_BufferFlag flags; + GPU_AccessKind initial_access; +}; + //////////////////////////////////////////////////////////// //~ Texture types @@ -191,8 +217,8 @@ Enum(GPU_BufferFlag) Enum(GPU_TextureFlag) { GPU_TextureFlag_None = 0, - GPU_TextureFlag_Writable = (1 << 0), - GPU_TextureFlag_Rasterizable = (1 << 1), + GPU_TextureFlag_AllowWritable = (1 << 0), + GPU_TextureFlag_AllowRasterTarget = (1 << 1), }; Enum(GPU_TextureKind) @@ -204,11 +230,13 @@ Enum(GPU_TextureKind) Struct(GPU_TextureDesc) { - GPU_TextureFlag flags; + GPU_TextureKind kind; GPU_Format format; - Vec3I32 size; + Vec3I32 dims; + GPU_TextureFlag flags; + GPU_AccessKind initial_access; + i32 mip_levels; /* Will be clamped to range [1, max] */ Vec4 clear_color; - i32 mip_levels; }; //////////////////////////////////////////////////////////// @@ -314,8 +342,8 @@ Enum(GPU_RasterMode) GPU_RasterMode_LineList, GPU_RasterMode_LineStrip, GPU_RasterMode_TriangleList, - GPU_RasterMode_WireTriangleList, GPU_RasterMode_TriangleStrip, + GPU_RasterMode_WireTriangleList, GPU_RasterMode_WireTriangleStrip, }; @@ -333,6 +361,8 @@ Struct(GPU_Stats) /* Resources */ u64 driver_resources_allocated; u64 driver_descriptors_allocated; + + /* TODO: Arena stats (committed, reserved, etc) */ }; //////////////////////////////////////////////////////////// @@ -341,66 +371,133 @@ Struct(GPU_Stats) void GPU_Startup(void); //////////////////////////////////////////////////////////// -//~ @hookdecl Arenas +//~ @hookdecl Arena -GPU_Arena *GPU_AcquireArena(void); -void GPU_ReleaseArena(GPU_Arena *arena); +GPU_ArenaHandle GPU_AcquireArena(void); +void GPU_ReleaseArena(GPU_ArenaHandle arena); //////////////////////////////////////////////////////////// -//~ @hookdecl Resource creation +//~ @hookdecl Resource -GpuPointer GPU_PushBufferEx(GPU_Arena *arena, i32 element_size, i32 element_align, i32 element_count, GPU_BufferFlag flags); -#define GPU_PushBuffer(arena, type, count, flags) GPU_PushBufferEx((arena), sizeof(type), alignof(type), (count), (flags)) +//- Resource creation -GpuPointer GPU_PushTextureEx(GPU_Arena *arena, GPU_TextureDesc desc); -GpuPointer GPU_PushTexture(GPU_Arena *arena, GPU_TextureKind kind, GPU_Format format, Vec3I32 size, GPU_TextureFlag flags); +GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc); +GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena, GPU_TextureDesc desc); +GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc); -GpuPointer GPU_PushSampler(GPU_Arena *arena, GPU_SamplerDesc desc); +#define GPU_PushBuffer(arena, type, count, ...) GPU_PushBufferEx((arena), \ + (GPU_BufferDesc) { \ + .initial_access = GPU_AccessKind_AnyReadWrite, \ + .size = sizeof(type) * (count), \ + __VA_ARGS__ \ + } \ +) + +#define GPU_PushTexture1D(arena, _size, _format, _initial_access) GPU_PushTextureEx((arena), \ + (GPU_TextureDesc) { \ + .kind = GPU_TextureKind_1D, \ + .format = (_format), \ + .dims = Vec3I32((_size), 1, 1), \ + .initial_access = (_initial_access), \ + __VA_ARGS__ \ + } \ +) + +#define GPU_PushTexture2D(arena, _size, _format, _initial_access) GPU_PushTextureEx((arena), \ + (GPU_TextureDesc) { \ + .kind = GPU_TextureKind_2D, \ + .format = (_format), \ + .dims = Vec3I32((_size).x, (_size).y, 1), \ + .initial_access = (_initial_access), \ + __VA_ARGS__ \ + } \ +) + +#define GPU_PushTexture3D(arena, _size, _format, _initial_access) GPU_PushTextureEx((arena), \ + (GPU_TextureDesc) { \ + .kind = GPU_TextureKind_3D, \ + .format = (_format), \ + .dims = (_size), \ + .initial_access = (_initial_access), \ + __VA_ARGS__ \ + } \ +) + +//- Pointer creation + +GpuBufferPtr GPU_PushBufferPtrEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); +GpuRWBufferPtr GPU_PushRWBufferPtrEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); +GpuIndexBufferPtr GPU_PushIndexBufferPtrEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); +GpuTexture1DPtr GPU_PushTexture1DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +GpuRWTexture1DPtr GPU_PushRWTexture1DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +GpuTexture2DPtr GPU_PushTexture2DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +GpuRWTexture2DPtr GPU_PushRWTexture2DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +GpuTexture3DPtr GPU_PushTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +GpuRWTexture3DPtr GPU_PushRWTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +GpuRasterTargetPtr GPU_PushRasterTargetPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +GpuSamplerPtr GPU_PushSamplerPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); + +#define GPU_PushBufferPtr(arena, resource, type) GPU_PushBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) +#define GPU_PushRWBufferPtr(arena, resource, type) GPU_PushRWBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) +#define GPU_PushIndexBufferPtr(arena, resource, type) GPU_PushIndexBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) + +//- Count + +u64 GPU_CountBufferEx(GPU_ResourceHandle buffer, u64 element_size); +u64 GPU_Count1D(GPU_ResourceHandle texture1d); +u64 GPU_Count2D(GPU_ResourceHandle texture2d); +u64 GPU_Count3D(GPU_ResourceHandle texture3d); + +#define GPU_CountBuffer(buffer, type) GPU_CountBufferEx((buffer), sizeof(type)) //////////////////////////////////////////////////////////// -//~ @hookdecl Commands +//~ @hookdecl Command -//- Command list creation -GPU_CommandList *GPU_OpenCommandList(GPU_QueueKind queue); -void GPU_CloseCommandList(GPU_CommandList *cl); +//- Command list +GPU_CommandListHandle GPU_OpenCommandList(GPU_QueueKind queue); +void GPU_CloseCommandList(GPU_CommandListHandle cl); -//- Cpu -> Gpu -void GPU_CopyBytesFromCpu(GPU_CommandList *cl, GpuPointer dst, RngU64 dst_range, void *src); -void GPU_CopyTexelsFromCpu(GPU_CommandList *cl, GpuPointer dst, Rng3U64 dst_range, void *src); -void GPU_CopyFromCpu(GPU_CommandList *cl, GpuPointer dst, String src); +//- Arena +void GPU_ResetArena(GPU_CommandListHandle cl, GPU_ArenaHandle arena); -//- Gpu -> Cpu -void GPU_AddCpuFence(GPU_CommandList *cl, Fence *fence, i64 v); -void GPU_SetCpuFence(GPU_CommandList *cl, Fence *fence, i64 v); +//- Copy +void GPU_CopyBuffer(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, u64 src_offset, u64 size); +void GPU_CopyTexture(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Vec3I32 src_offset, Vec3I32 dims); -//- Implicit state -void GPU_SetShaderAccess(GPU_CommandList *cl, GpuPointer ptr, GPU_ShaderAccessKind access_kind); -void GPU_SetRasterizeMode(GPU_CommandList *cl, GPU_RasterMode mode); -void GPU_SetConstantU32(GPU_CommandList *cl, i32 slot, u32 v); -void GPU_SetConstantF32(GPU_CommandList *cl, i32 slot, f32 v); -void GPU_SetConstantPtr(GPU_CommandList *cl, i32 slot, GpuPointer v); +//- Constants +void GPU_SetConstU32 (GPU_CommandListHandle cl, i32 slot, u32 v); +void GPU_SetConstF32 (GPU_CommandListHandle cl, i32 slot, f32 v); +void GPU_SetConstBufferPtr (GPU_CommandListHandle cl, i32 slot, GpuBufferPtr v); +void GPU_SetConstRWBufferPtr (GPU_CommandListHandle cl, i32 slot, GpuRWBufferPtr v); +void GPU_SetConstIndexBufferPtr (GPU_CommandListHandle cl, i32 slot, GpuIndexBufferPtr v); +void GPU_SetConstTexture1DPtr (GPU_CommandListHandle cl, i32 slot, GpuTexture1DPtr v); +void GPU_SetConstRWTexture1DPtr (GPU_CommandListHandle cl, i32 slot, GpuRWTexture1DPtr v); +void GPU_SetConstTexture2DPtr (GPU_CommandListHandle cl, i32 slot, GpuTexture2DPtr v); +void GPU_SetConstRWTexture2DPtr (GPU_CommandListHandle cl, i32 slot, GpuRWTexture2DPtr v); +void GPU_SetConstTexture3DPtr (GPU_CommandListHandle cl, i32 slot, GpuTexture3DPtr v); +void GPU_SetConstRWTexture3DPtr (GPU_CommandListHandle cl, i32 slot, GpuRWTexture3DPtr v); +void GPU_SetConstRasterTargetPtr (GPU_CommandListHandle cl, i32 slot, GpuRasterTargetPtr v); +void GPU_SetConstSamplerPtr (GPU_CommandListHandle cl, i32 slot, GpuSamplerPtr v); -//- Clear -void GPU_ClearRasterTarget(GPU_CommandList *cl, GpuPointer target); +//- Access +void GPU_SetAccess(GPU_CommandListHandle cl, GPU_ResourceHandle resource, GPU_AccessKind kind); //- Compute -void GPU_Compute(GPU_CommandList *cl, ComputeShader cs, Vec3U32 threads); +void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups); //- Rasterize -void GPU_RasterizeEx(GPU_CommandList *cl, - VertexShader vs, PixelShader ps, - u32 instances_count, - GpuPointer idx_buff, RngU64 idx_buff_range, - u32 raster_targets_count, GpuPointer *raster_targets, - Rng3 viewport, Rng2 scissor); - -void GPU_Rasterize(GPU_CommandList *cl, +void GPU_Rasterize(GPU_CommandListHandle cl, VertexShader vs, PixelShader ps, - u32 instances_count, GpuPointer idx_buff, - u32 raster_targets_count, GpuPointer *raster_targets); + u32 instances_count, GpuIndexBufferPtr idx_buff, + u32 raster_targets_count, GpuRasterTargetPtr *raster_targets, + Rng3 viewport, Rng2 scissor, + GPU_RasterMode mode); -//- Profiling -void GPU_ProfN(GPU_CommandList *cl, String name); +//- Clear +void GPU_ClearRasterTarget(GPU_CommandListHandle cl, GpuRasterTargetPtr ptr); + +//- Profile +void GPU_ProfN(GPU_CommandListHandle cl, String name); //////////////////////////////////////////////////////////// //~ @hookdecl Statistics @@ -410,15 +507,19 @@ GPU_Stats GPU_QueryStats(void); //////////////////////////////////////////////////////////// //~ @hookdecl Swapchain -GPU_Swapchain *GPU_AcquireSwapchain(WND_Handle window, GPU_Format format, Vec2I32 size); -void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain); +GPU_SwapchainHandle GPU_AcquireSwapchain(WND_Handle window, GPU_Format format, Vec2I32 size); +void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain); /* Waits until a new backbuffer is ready to be written to. * This should be called before rendering for minimum latency. */ -void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain); +void GPU_YieldOnSwapchain(GPU_SwapchainHandle swapchain); /* 1. Recreates backbuffer at desired size if necessary - * 2. Blits the source texture into the backbuffer - * 3. Presents the backbuffer + * 2. Clears the backbuffer using clear color + * 3. Blits the source texture into the backbuffer + * 4. Presents the backbuffer */ -void GPU_PresentSwapchain(GPU_Swapchain *swapchain, Vec2I32 dst_size, Rng2I32 dst_range, GpuPointer src, Vec2I32 src_pos, i32 vsync, Vec4 clear_color); +void GPU_PresentSwapchain(GPU_SwapchainHandle swapchain, Vec4 dst_clear_color, + Vec2U32 dst_size, Vec2U32 dst_offset, + GpuTexture2DPtr src, Vec2U32 src_offset, + i32 vsync); diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index d04183ef..bf124f0c 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -15,7 +15,7 @@ void GPU_D12_Startup(void) { HRESULT hr = 0; - //- Enable debug layer + /* Enable debug layer */ u32 dxgi_factory_flags = 0; #if GPU_DEBUG { @@ -48,7 +48,7 @@ void GPU_D12_Startup(void) } #endif - //- Create factory + /* Create factory */ { __profn("Create factory"); hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory); @@ -58,7 +58,7 @@ void GPU_D12_Startup(void) } } - //- Create device + /* Create device */ { __profn("Create device"); IDXGIAdapter3 *adapter = 0; @@ -118,7 +118,7 @@ void GPU_D12_Startup(void) g->device = device; } - //- Enable debug layer breaks + /* Enable debug layer breaks */ { #if GPU_DEBUG /* Enable D3D12 Debug break */ @@ -152,71 +152,120 @@ void GPU_D12_Startup(void) } ////////////////////////////// - //- Initialize queues + //- Initialize command queues { - GPU_D12_Queue *direct = PushStruct(perm, GPU_D12_Queue); - GPU_D12_Queue *async_compute = PushStruct(perm, GPU_D12_Queue); - GPU_D12_Queue *async_copy = PushStruct(perm, GPU_D12_Queue); - g->queues[GPU_QueueKind_Direct] = direct; - g->queues[GPU_QueueKind_AsyncCompute] = async_compute; - g->queues[GPU_QueueKind_AsyncCopy] = async_copy; - b32 ok = 1; + GPU_D12_CommandQueueDesc descs [] = { + { .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH }, + { .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }, + { .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }, + }; + for (u32 i = 0; i < MinU32(countof(descs), countof(g->queues)); ++i) { - if (ok) + GPU_D12_CommandQueueDesc desc = descs[i]; + D3D12_COMMAND_QUEUE_DESC d3d_desc = { .Type = desc.type, .Priority = desc.priority }; + GPU_D12_Queue *queue = &g->queues[i]; + queue->desc = desc; + HRESULT hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue); + if (SUCCEEDED(hr)) { - D3D12_COMMAND_QUEUE_DESC desc = { .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, .Priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH }; - ok = SUCCEEDED(ID3D12Device_CreateCommandQueue(g->device, &desc, &IID_ID3D12CommandQueue, (void **)&direct->d3d_queue)); - if (ok) - { - ok = SUCCEEDED(ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&direct->submit_fence)); - } + hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&queue->submit_fence); } - if (ok) + if (FAILED(hr)) { - D3D12_COMMAND_QUEUE_DESC desc = { .Type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }; - ok = SUCCEEDED(ID3D12Device_CreateCommandQueue(g->device, &desc, &IID_ID3D12CommandQueue, (void **)&async_compute->d3d_queue)); - if (ok) - { - ok = SUCCEEDED(ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&async_compute->submit_fence)); - } + Panic(Lit("Failed to create GPU Command Queue")); } - if (ok) - { - D3D12_COMMAND_QUEUE_DESC desc = { .Type = D3D12_COMMAND_LIST_TYPE_COPY, .Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }; - ok = SUCCEEDED(ID3D12Device_CreateCommandQueue(g->device, &desc, &IID_ID3D12CommandQueue, (void **)&async_copy->d3d_queue)); - if (ok) - { - ok = SUCCEEDED(ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&async_copy->submit_fence)); - } - } - } - if (!ok) - { - Panic(Lit("Failed to create GPU Command Queues")); } } - /* Init descriptor heaps */ - g->cbv_srv_uav_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, - GPU_D12_MaxCbvSrvUavDescriptors, - ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); + ////////////////////////////// + //- Initialize descriptor heaps - g->sampler_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, - GPU_D12_MaxSamplerDescriptors, - ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); + { + Struct(Dx12HeapDesc) { GPU_D12_DescriptorHeap *dst; D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; }; + Dx12HeapDesc descs[] = { + { .dst = &g->cbv_srv_uav_heap, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = GPU_D12_MaxCbvSrvUavDescriptors, }, + { .dst = &g->sampler_heap, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = GPU_D12_MaxSamplerDescriptors, }, + { .dst = &g->rtv_heap, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, .max = GPU_D12_MaxRtvDescriptors, }, + }; + for (u32 i = 0; i < countof(descs); ++i) + { + Dx12HeapDesc desc = descs[i]; - g->rtv_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_RTV, - D3D12_DESCRIPTOR_HEAP_FLAG_NONE, - GPU_D12_MaxRtvDescriptors, - ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV)); - /* Init rootsig */ - GPU_D12_InitRootsig(); + Arena *arena = AcquireArena(Gibi(64)); + GPU_D12_DescriptorHeap *heap = PushStruct(arena, GPU_D12_DescriptorHeap); + heap->arena = arena; + + heap->type = desc.type; + heap->max_count = desc.max; + heap->descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, desc.type); + + D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI; + d3d_desc.Type = desc.type; + d3d_desc.Flags = desc.flags; + d3d_desc.NumDescriptors = desc.max; + HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap); + if (FAILED(hr)) + { + Panic(Lit("Failed to create CPU descriptor heap")); + } + ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); + + } + } + + ////////////////////////////// + //- Initialize bindless root signature + + { + HRESULT hr = 0; + + /* Serialize root signature */ + ID3D10Blob *blob = 0; + if (SUCCEEDED(hr)) + { + __profn("Serialize root signature"); + + D3D12_ROOT_PARAMETER param = ZI; + param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + param.Constants.ShaderRegister = 0; + param.Constants.RegisterSpace = 0; + param.Constants.Num32BitValues = 64; + + D3D12_ROOT_SIGNATURE_DESC desc = ZI; + desc.NumParameters = 1; + desc.pParameters = ¶m; + desc.NumStaticSamplers = 0; + desc.pStaticSamplers = 0; + desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; + + hr = D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, 0); + } + + /* Create root signature */ + ID3D12RootSignature *rootsig = 0; + if (SUCCEEDED(hr)) + { + __profn("Create root signature"); + hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig); + } + g->bindless_rootsig = rootsig; + + if (blob) + { + ID3D10Blob_Release(blob); + } + if (FAILED(hr)) + { + Panic(Lit("Failed to create root signature")); + } + } + + ////////////////////////////// + //- Initialize queue sync worker - /* Start queue sync job */ JobPoolId sync_pool = InitJobPool(1, Lit("Dx12 queue sync"), JobPoolPriority_Critical); RunJob(GPU_D12_StartQueueSync, .pool = sync_pool); @@ -224,102 +273,37 @@ void GPU_D12_Startup(void) } //////////////////////////////////////////////////////////// -//~ Initialization +//~ Helpers -//- Heap initialization - -GPU_D12_DescriptorHeap *GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags, u32 max_descs, u32 desc_size) +DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format) { - GPU_D12_SharedState *g = &GPU_D12_shared_state; - Arena *arena = AcquireArena(Gibi(64)); - GPU_D12_DescriptorHeap *heap = PushStruct(arena, GPU_D12_DescriptorHeap); - heap->arena = arena; - - heap->type = type; - heap->max_count = max_descs; - heap->descriptor_size = desc_size; - - D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI; - d3d_desc.Type = type; - d3d_desc.Flags = flags; - d3d_desc.NumDescriptors = max_descs; - HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap); - if (FAILED(hr)) - { - Panic(Lit("Failed to create CPU descriptor heap")); - } - ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); - - return heap; + return (DXGI_FORMAT)format; } -//- Rootsig initialization - -void GPU_D12_InitRootsig(void) +GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle) { - GPU_D12_SharedState *g = &GPU_D12_shared_state; - b32 ok = 1; - HRESULT hr = 0; - String error_str = ZI; + return (GPU_D12_Arena *)handle.v; +} - /* Serialize root signature */ - ID3D10Blob *blob = 0; - if (ok) - { - __profn("Create root signature"); +GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle) +{ + return (GPU_D12_CmdList *)handle.v; +} - D3D12_ROOT_PARAMETER param = ZI; - param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param.Constants.ShaderRegister = 0; - param.Constants.RegisterSpace = 0; - param.Constants.Num32BitValues = 64; +GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle) +{ + return (GPU_D12_Resource *)handle.v; +} - D3D12_ROOT_SIGNATURE_DESC desc = ZI; - desc.NumParameters = 1; - desc.pParameters = ¶m; - desc.NumStaticSamplers = 0; - desc.pStaticSamplers = 0; - desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; - - hr = D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, 0); - if (FAILED(hr)) - { - error_str = Lit("Failed to serialize root signature"); - ok = 0; - } - } - - /* Create root signature */ - ID3D12RootSignature *rootsig = 0; - if (ok) - { - __profn("Create root signature"); - - hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig); - if (FAILED(hr)) - { - error_str = Lit("Failed to create root signature"); - ok = 0; - } - } - - if (blob) - { - ID3D10Blob_Release(blob); - } - - g->bindless_rootsig = rootsig; - if (!ok) - { - Panic(error_str); - } +GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle) +{ + return (GPU_D12_Swapchain *)handle.v; } //////////////////////////////////////////////////////////// //~ Pipeline operations -JobDef(GPU_D12_LoadPipeline, sig, _) +JobImpl(GPU_D12_LoadPipeline, sig, _) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Pipeline *pipeline = sig->pipeline; @@ -480,7 +464,7 @@ GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc) GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; - return g->queues[kind]; + return &g->queues[kind]; } //////////////////////////////////////////////////////////// @@ -532,6 +516,19 @@ void GPU_D12_ReleaseDescriptor(GPU_D12_Descriptor *descriptor) Unlock(&lock); } +GPU_D12_Descriptor *GPU_D12_RtvDescriptorFromPtr(GpuRasterTargetPtr ptr) +{ + /* TODO */ + return 0; +} + +D3D12_INDEX_BUFFER_VIEW GPU_D12_IbvFromPtr(GpuIndexBufferPtr ptr) +{ + /* TODO */ + D3D12_INDEX_BUFFER_VIEW result = ZI; + return result; +} + //////////////////////////////////////////////////////////// //~ Raw command list @@ -570,13 +567,13 @@ GPU_D12_RawCommandList *GPU_D12_BeginRawCommandList(GPU_QueueKind queue_kind) } cl->queue = queue; - HRESULT hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.d3d_type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); + HRESULT hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); if (FAILED(hr)) { Panic(Lit("Failed to create command allocator")); } - hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.d3d_type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl); + hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl); if (FAILED(hr)) { Panic(Lit("Failed to create command list")); @@ -645,7 +642,7 @@ u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl) //////////////////////////////////////////////////////////// //~ Queue sync job -JobDef(GPU_D12_StartQueueSync, _, __) +JobImpl(GPU_D12_StartQueueSync, _, __) { GPU_D12_SharedState *g = &GPU_D12_shared_state; HANDLE queue_fences_events[GPU_NumQueues] = ZI; @@ -674,7 +671,7 @@ JobDef(GPU_D12_StartQueueSync, _, __) } //////////////////////////////////////////////////////////// -//~ @hookdef Startup hook +//~ @hookimpl Startup hook void GPU_Startup(void) { @@ -682,7 +679,7 @@ void GPU_Startup(void) } //////////////////////////////////////////////////////////// -//~ @hookdef Fence hooks +//~ @hookimpl Fence hooks Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind) { @@ -699,7 +696,9 @@ void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value) } //////////////////////////////////////////////////////////// -//~ @hookdef Resource hooks +//~ @hookimpl Resource hooks + +#if 0 GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) { @@ -1073,445 +1072,479 @@ u64 GPU_GetBufferCount(GPU_Resource *gpu_resource) return resource->desc.buffer.count; } -//////////////////////////////////////////////////////////// -//~ @hookdef Resource index hooks - -StructuredBufferRid GPU_StructuredBufferRidFromResource(GPU_Resource *resource) { return (StructuredBufferRid) { ((GPU_D12_Resource *)resource)->srv_descriptor->index }; } -RWStructuredBufferRid GPU_RWStructuredBufferRidFromResource(GPU_Resource *resource) { return (RWStructuredBufferRid) { ((GPU_D12_Resource *)resource)->uav_descriptor->index }; } -Texture1DRid GPU_Texture1DRidFromResource(GPU_Resource *resource) { return (Texture1DRid) { ((GPU_D12_Resource *)resource)->srv_descriptor->index }; } -Texture2DRid GPU_Texture2DRidFromResource(GPU_Resource *resource) { return (Texture2DRid) { ((GPU_D12_Resource *)resource)->srv_descriptor->index }; } -Texture3DRid GPU_Texture3DRidFromResource(GPU_Resource *resource) { return (Texture3DRid) { ((GPU_D12_Resource *)resource)->srv_descriptor->index }; } -RWTexture1DRid GPU_RWTexture1DRidFromResource(GPU_Resource *resource) { return (RWTexture1DRid) { ((GPU_D12_Resource *)resource)->uav_descriptor->index }; } -RWTexture2DRid GPU_RWTexture2DRidFromResource(GPU_Resource *resource) { return (RWTexture2DRid) { ((GPU_D12_Resource *)resource)->uav_descriptor->index }; } -RWTexture3DRid GPU_RWTexture3DRidFromResource(GPU_Resource *resource) { return (RWTexture3DRid) { ((GPU_D12_Resource *)resource)->uav_descriptor->index }; } -SamplerStateRid GPU_SamplerStateRidFromResource(GPU_Resource *resource) { return (SamplerStateRid) { ((GPU_D12_Resource *)resource)->sampler_descriptor->index }; } +#endif //////////////////////////////////////////////////////////// -//~ @hookdef Command list hooks +//~ @hookimpl Arena -GPU_CommandList *GPU_OpenCommnadList(GPU_QueueKind queue_kind) +GPU_ArenaHandle GPU_AcquireArena(void) { - GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); - Arena *perm = PermArena(); - GPU_D12_CommandList *cl = f->first_free_command_list; - if (cl) - { - SllStackPop(f->first_free_command_list); - ZeroStruct(cl); - } - else - { - cl = PushStruct(perm, GPU_D12_CommandList); - } - cl->queue_kind = queue_kind; - return (GPU_CommandList *)cl; + /* TODO */ + return (GPU_ArenaHandle) { 0 }; } -i64 GPU_CloseCommandList(GPU_CommandList *gpu_cl) +void GPU_ReleaseArena(GPU_ArenaHandle arena) +{ + /* TODO */ +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Resource + +//- Resource creation + +GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc) +{ + /* TODO */ + return (GPU_ResourceHandle) { 0 }; +} + +GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena, GPU_TextureDesc desc) +{ + /* TODO */ + return (GPU_ResourceHandle) { 0 }; +} + +GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc) +{ + /* TODO */ + return (GPU_ResourceHandle) { 0 }; +} + +//- Pointer creation + +GpuBufferPtr GPU_PushBufferPtrEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range) +{ + /* TODO */ + return (GpuBufferPtr) { 0 }; +} + +GpuRWBufferPtr GPU_PushRWBufferPtrEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range) +{ + /* TODO */ + return (GpuRWBufferPtr) { 0 }; +} + +GpuIndexBufferPtr GPU_PushIndexBufferPtrEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range) +{ + /* TODO */ + return (GpuIndexBufferPtr) { 0 }; +} + +GpuTexture1DPtr GPU_PushTexture1DPtr(GPU_ArenaHandle arena, GPU_ResourceHandle resource) +{ + /* TODO */ + return (GpuTexture1DPtr) { 0 }; +} + +GpuRWTexture1DPtr GPU_PushRWTexture1DPtr(GPU_ArenaHandle arena, GPU_ResourceHandle resource) +{ + /* TODO */ + return (GpuRWTexture1DPtr) { 0 }; +} + +GpuTexture2DPtr GPU_PushTexture2DPtr(GPU_ArenaHandle arena, GPU_ResourceHandle resource) +{ + /* TODO */ + return (GpuTexture2DPtr) { 0 }; +} + +GpuRWTexture2DPtr GPU_PushRWTexture2DPtr(GPU_ArenaHandle arena, GPU_ResourceHandle resource) +{ + /* TODO */ + return (GpuRWTexture2DPtr) { 0 }; +} + +GpuTexture3DPtr GPU_PushTexture3DPtr(GPU_ArenaHandle arena, GPU_ResourceHandle resource) +{ + /* TODO */ + return (GpuTexture3DPtr) { 0 }; +} + +GpuRWTexture3DPtr GPU_PushRWTexture3DPtr(GPU_ArenaHandle arena, GPU_ResourceHandle resource) +{ + /* TODO */ + return (GpuRWTexture3DPtr) { 0 }; +} + +GpuRasterTargetPtr GPU_PushRasterTargetPtr(GPU_ArenaHandle arena, GPU_ResourceHandle resource) +{ + /* TODO */ + return (GpuRasterTargetPtr) { 0 }; +} + +GpuSamplerPtr GPU_PushSamplerPtr(GPU_ArenaHandle arena, GPU_ResourceHandle resource) +{ + /* TODO */ + return (GpuSamplerPtr) { 0 }; +} + +//- Count + +u64 GPU_CountBufferEx(GPU_ResourceHandle buffer, u64 element_size) +{ + /* TODO */ + return 0; +} + +u64 GPU_Count1D(GPU_ResourceHandle texture1d) +{ + /* TODO */ + return 0; +} + +u64 GPU_Count2D(GPU_ResourceHandle texture2d) +{ + /* TODO */ + return 0; +} + +u64 GPU_Count3D(GPU_ResourceHandle texture3d) +{ + /* TODO */ + return 0; +} + +//////////////////////////////////////////////////////////// +//~ Command helpers + +GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl) { GPU_D12_SharedState *g = &GPU_D12_shared_state; - GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; + + /* Grab chunk */ + GPU_D12_CmdChunk *chunk = cl->last_cmd_chunk; + { + if (chunk && chunk->cmds_count >= GPU_D12_CmdsPerChunk) + { + chunk = 0; + } + if (!chunk) + { + Lock lock = LockE(&g->free_cmd_chunks_mutex); + { + chunk = g->first_free_cmd_chunk; + if (chunk) + { + g->first_free_cmd_chunk = chunk->next; + } + } + Unlock(&lock); + if (chunk) + { + GPU_D12_Cmd *cmds = chunk->cmds; + ZeroStruct(chunk); + chunk->cmds = cmds; + } + } + if (!chunk) + { + Arena *perm = PermArena(); + chunk = PushStruct(perm, GPU_D12_CmdChunk); + chunk->cmds = PushStructsNoZero(perm, GPU_D12_Cmd, GPU_D12_CmdsPerChunk); + } + if (chunk != cl->last_cmd_chunk) + { + SllQueuePush(cl->first_cmd_chunk, cl->last_cmd_chunk, chunk); + } + } + + /* Push cmd to chunk */ + GPU_D12_Cmd *cmd = &chunk->cmds[chunk->cmds_count++]; + return cmd; +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Command + +//- Command list + +GPU_CommandListHandle GPU_OpenCommandList(GPU_QueueKind queue_kind) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_CmdList *cl = 0; + Lock lock = LockE(&g->free_cmd_lists_mutex); + { + cl = g->first_free_cmd_list; + if (cl) + { + g->first_free_cmd_list = cl->next; + ZeroStruct(cl); + } + else + { + Arena *perm = PermArena(); + cl = PushStruct(perm, GPU_D12_CmdList); + } + } + Unlock(&lock); + return (GPU_CommandListHandle) { .v = (u64)cl }; +} + +void GPU_CloseCommandList(GPU_CommandListHandle cl_handle) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_QueueKind queue_kind = cl->queue_kind; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); TempArena scratch = BeginScratchNoConflict(); - GPU_D12_Resource *slotted_render_targets[GPU_MaxRasterTargets] = ZI; - GPU_D12_Resource *bound_render_targets[GPU_MaxRasterTargets] = ZI; - /* Begin dx12 command list */ GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind); ID3D12GraphicsCommandList *rcl = dx12_cl->cl; + /* Pipeline state */ b32 graphics_rootsig_set = 0; b32 compute_rootsig_set = 0; b32 descriptor_heaps_set = 0; GPU_D12_Pipeline *bound_pipeline = 0; + /* Rasterizer state */ + D3D12_VIEWPORT bound_viewport = ZI; + D3D12_RECT bound_scissor = ZI; + D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; + D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI; + D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRasterTargets] = ZI; + + u64 cmds_count = cl->cmds_count; + GPU_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, GPU_D12_Cmd, cmds_count); + { + /* Flatten command chunks */ + { + u64 flattened_idx = 0; + for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) + { + for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx) + { + cmds[flattened_idx] = chunk->cmds[cmd_chunk_idx]; + ++flattened_idx; + } + } + } + /* Free command chunks */ + { + Lock lock = LockE(&g->free_cmd_chunks_mutex); + { + for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) + { + chunk->next = g->first_free_cmd_chunk; + g->first_free_cmd_chunk = chunk; + } + } + Unlock(&lock); + } + } + /* Process gpu commands into dx12 commands */ { - GPU_D12_Command *cmd = cl->first; - while (cmd) + u64 cmd_idx = 0; + while (cmd_idx < cmds_count) { + GPU_D12_Cmd *cmd = &cmds[cmd_idx]; switch (cmd->kind) { default: break; //- Resource barrier - case GPU_D12_CommandKind_TransitionToSrv: - case GPU_D12_CommandKind_TransitionToUav: - case GPU_D12_CommandKind_TransitionToRtv: - case GPU_D12_CommandKind_TransitionToCopySrc: - case GPU_D12_CommandKind_TransitionToCopyDst: - case GPU_D12_CommandKind_FlushUav: - { - u64 barrier_gen = 1 + Atomic64FetchAdd(&g->resource_barrier_gen.v, 1); + // case GPU_D12_CmdKind_TransitionToSrv: + // case GPU_D12_CmdKind_TransitionToUav: + // case GPU_D12_CmdKind_TransitionToRtv: + // case GPU_D12_CmdKind_TransitionToCopySrc: + // case GPU_D12_CmdKind_TransitionToCopyDst: + // case GPU_D12_CmdKind_FlushUav: + // { + // u64 barrier_gen = 1 + Atomic64FetchAdd(&g->resource_barrier_gen.v, 1); - /* Build barriers batch list */ - Struct(TmpBarrier) { TmpBarrier *next; GPU_D12_Resource *r; }; - u32 max_barriers_count = 0; - TmpBarrier *first_barrier = 0; - TmpBarrier *last_barrier = 0; - while (cmd && (cmd->kind == GPU_D12_CommandKind_TransitionToSrv - || cmd->kind == GPU_D12_CommandKind_TransitionToUav - || cmd->kind == GPU_D12_CommandKind_TransitionToRtv - || cmd->kind == GPU_D12_CommandKind_TransitionToCopySrc - || cmd->kind == GPU_D12_CommandKind_TransitionToCopyDst - || cmd->kind == GPU_D12_CommandKind_FlushUav)) - { - D3D12_RESOURCE_BARRIER_TYPE type = ZI; - D3D12_RESOURCE_STATES state_after = ZI; - GPU_D12_Resource *resource = cmd->barrier.resource; + // /* Build barriers batch list */ + // Struct(TmpBarrier) { TmpBarrier *next; GPU_D12_Resource *r; }; + // u32 max_barriers_count = 0; + // TmpBarrier *first_barrier = 0; + // TmpBarrier *last_barrier = 0; + // while (cmd && (cmd->kind == GPU_D12_CmdKind_TransitionToSrv + // || cmd->kind == GPU_D12_CmdKind_TransitionToUav + // || cmd->kind == GPU_D12_CmdKind_TransitionToRtv + // || cmd->kind == GPU_D12_CmdKind_TransitionToCopySrc + // || cmd->kind == GPU_D12_CmdKind_TransitionToCopyDst + // || cmd->kind == GPU_D12_CmdKind_FlushUav)) + // { + // D3D12_RESOURCE_BARRIER_TYPE type = ZI; + // D3D12_RESOURCE_STATES state_after = ZI; + // GPU_D12_Resource *resource = cmd->barrier.resource; - switch (cmd->kind) - { - default: break; - case GPU_D12_CommandKind_TransitionToSrv: - { + // switch (cmd->kind) + // { + // default: break; + // case GPU_D12_CmdKind_TransitionToSrv: + // { - type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - state_after = D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; - } break; - case GPU_D12_CommandKind_TransitionToUav: - { - type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - state_after = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - } break; - case GPU_D12_CommandKind_TransitionToRtv: - { - type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - state_after = D3D12_RESOURCE_STATE_RENDER_TARGET; - i32 slot = cmd->barrier.rt_slot; - if (slot >= 0 && slot < countof(slotted_render_targets)) - { - slotted_render_targets[slot] = resource; - } - } break; - case GPU_D12_CommandKind_TransitionToCopySrc: - { - type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - state_after = D3D12_RESOURCE_STATE_COPY_SOURCE; - } break; - case GPU_D12_CommandKind_TransitionToCopyDst: - { - type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - state_after = D3D12_RESOURCE_STATE_COPY_DEST; - } break; - case GPU_D12_CommandKind_FlushUav: - { - type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - } break; - } + // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // state_after = D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; + // } break; + // case GPU_D12_CmdKind_TransitionToUav: + // { + // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // state_after = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + // } break; + // case GPU_D12_CmdKind_TransitionToRtv: + // { + // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // state_after = D3D12_RESOURCE_STATE_RENDER_TARGET; + // i32 slot = cmd->barrier.rt_slot; + // if (slot >= 0 && slot < countof(slotted_raster_targets)) + // { + // slotted_raster_targets[slot] = resource; + // } + // } break; + // case GPU_D12_CmdKind_TransitionToCopySrc: + // { + // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // state_after = D3D12_RESOURCE_STATE_COPY_SOURCE; + // } break; + // case GPU_D12_CmdKind_TransitionToCopyDst: + // { + // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // state_after = D3D12_RESOURCE_STATE_COPY_DEST; + // } break; + // case GPU_D12_CmdKind_FlushUav: + // { + // type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + // } break; + // } - b32 skip = 0; - /* Skip UAV transitions on resources that already have transition in the batch */ - if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV && resource->barrier_gen == barrier_gen) - { - skip = 1; - } - /* Skip redundant transitions */ - if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && ((resource->barrier_state_after & state_after) == state_after)) - { - skip = 1; - } - /* Skip transitions that will occur via implicit promotion */ - if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && resource->state == D3D12_RESOURCE_STATE_COMMON && - (state_after != D3D12_RESOURCE_STATE_RENDER_TARGET && - state_after != D3D12_RESOURCE_STATE_DEPTH_WRITE && - state_after != D3D12_RESOURCE_STATE_UNORDERED_ACCESS && - state_after != D3D12_RESOURCE_STATE_RESOLVE_DEST && - state_after != D3D12_RESOURCE_STATE_PRESENT)) - { - /* Skip transitions into existing state */ - skip = 1; - } + // b32 skip = 0; + // /* Skip UAV transitions on resources that already have transition in the batch */ + // if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV && resource->barrier_gen == barrier_gen) + // { + // skip = 1; + // } + // /* Skip redundant transitions */ + // if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && ((resource->barrier_state_after & state_after) == state_after)) + // { + // skip = 1; + // } + // /* Skip transitions that will occur via implicit promotion */ + // if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && resource->state == D3D12_RESOURCE_STATE_COMMON && + // (state_after != D3D12_RESOURCE_STATE_RENDER_TARGET && + // state_after != D3D12_RESOURCE_STATE_DEPTH_WRITE && + // state_after != D3D12_RESOURCE_STATE_UNORDERED_ACCESS && + // state_after != D3D12_RESOURCE_STATE_RESOLVE_DEST && + // state_after != D3D12_RESOURCE_STATE_PRESENT)) + // { + // /* Skip transitions into existing state */ + // skip = 1; + // } - if (!skip) - { - resource->barrier_type = type; - resource->barrier_state_after = state_after; - if (resource->barrier_gen != barrier_gen) - { - TmpBarrier *b = PushStruct(scratch.arena, TmpBarrier); - resource->barrier_gen = barrier_gen; - b->r = resource; - SllQueuePush(first_barrier, last_barrier, b); - ++max_barriers_count; - } - } + // if (!skip) + // { + // resource->barrier_type = type; + // resource->barrier_state_after = state_after; + // if (resource->barrier_gen != barrier_gen) + // { + // TmpBarrier *b = PushStruct(scratch.arena, TmpBarrier); + // resource->barrier_gen = barrier_gen; + // b->r = resource; + // SllQueuePush(first_barrier, last_barrier, b); + // ++max_barriers_count; + // } + // } - cmd = cmd->next; - } + // cmd = cmd->next; + // } - /* Submit batched barriers */ - /* FIXME: Transitions from UAV -> UAV should insert UAV barrier */ - u32 barriers_count = 0; - D3D12_RESOURCE_BARRIER *rbs = PushStructs(scratch.arena, D3D12_RESOURCE_BARRIER, max_barriers_count); - for (TmpBarrier *b = first_barrier; b; b = b->next) - { - GPU_D12_Resource *resource = b->r; - D3D12_RESOURCE_BARRIER_TYPE type = resource->barrier_type; - D3D12_RESOURCE_STATES state_before = resource->state; - D3D12_RESOURCE_STATES state_after = resource->barrier_state_after; - if (!(type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && state_before == state_after)) - { - D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - rb->Type = resource->barrier_type; - if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) - { - rb->Transition.pResource = resource->d3d_resource; - rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rb->Transition.StateBefore = state_before; - rb->Transition.StateAfter = state_after; - resource->state = state_after; - } - else if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) - { - rb->UAV.pResource = resource->d3d_resource; - } - } - } - if (barriers_count > 0) - { - ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); - } - } break; - - //- Clear rtv - case GPU_D12_CommandKind_ClearRtv: - { - GPU_D12_Resource *resource = cmd->clear.resource; - Assert(resource->state == D3D12_RESOURCE_STATE_RENDER_TARGET); - f32 clear_color[4] = ZI; - clear_color[0] = resource->desc.clear_color.x; - clear_color[1] = resource->desc.clear_color.y; - clear_color[2] = resource->desc.clear_color.z; - clear_color[3] = resource->desc.clear_color.w; - ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, resource->rtv_descriptor->handle, clear_color, 0, 0); - cmd = cmd->next; - } break; + // /* Submit batched barriers */ + // /* FIXME: Transitions from UAV -> UAV should insert UAV barrier */ + // u32 barriers_count = 0; + // D3D12_RESOURCE_BARRIER *rbs = PushStructs(scratch.arena, D3D12_RESOURCE_BARRIER, max_barriers_count); + // for (TmpBarrier *b = first_barrier; b; b = b->next) + // { + // GPU_D12_Resource *resource = b->r; + // D3D12_RESOURCE_BARRIER_TYPE type = resource->barrier_type; + // D3D12_RESOURCE_STATES state_before = resource->state; + // D3D12_RESOURCE_STATES state_after = resource->barrier_state_after; + // if (!(type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && state_before == state_after)) + // { + // D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; + // rb->Type = resource->barrier_type; + // if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) + // { + // rb->Transition.pResource = resource->d3d_resource; + // rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + // rb->Transition.StateBefore = state_before; + // rb->Transition.StateAfter = state_after; + // resource->state = state_after; + // } + // else if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) + // { + // rb->UAV.pResource = resource->d3d_resource; + // } + // } + // } + // if (barriers_count > 0) + // { + // ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); + // } + // } break; //- Copy resource - case GPU_D12_CommandKind_Copy: - { - GPU_D12_Resource *dst = cmd->copy.dst; - GPU_D12_Resource *src = cmd->copy.src; + // case GPU_D12_CmdKind_Copy: + // { + // GPU_D12_Resource *dst = cmd->copy.dst; + // GPU_D12_Resource *src = cmd->copy.src; - D3D12_RESOURCE_DESC dst_desc = ZI; - D3D12_RESOURCE_DESC src_desc = ZI; - ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc); - ID3D12Resource_GetDesc(src->d3d_resource, &src_desc); + // D3D12_RESOURCE_DESC dst_desc = ZI; + // D3D12_RESOURCE_DESC src_desc = ZI; + // ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc); + // ID3D12Resource_GetDesc(src->d3d_resource, &src_desc); - if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) - { /* Copy buffer -> buffer */ - u64 dst_len = dst->desc.buffer.count * dst->desc.buffer.stride; - u64 src_len = src->desc.buffer.count * src->desc.buffer.stride; - u64 cpy_len = MinU64(dst_len, src_len); - if (cpy_len > 0) - { - ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len); - /* Implicit promotion */ - if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; - if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; - } - } - else if (src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) - { /* Copy buffer -> texture */ - D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_placed_footprint = ZI; - ID3D12Device_GetCopyableFootprints(g->device, &dst_desc, 0, 1, 0, &dst_placed_footprint, 0, 0, 0); + // if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + // { /* Copy buffer -> buffer */ + // u64 dst_len = dst->desc.buffer.count * dst->desc.buffer.stride; + // u64 src_len = src->desc.buffer.count * src->desc.buffer.stride; + // u64 cpy_len = MinU64(dst_len, src_len); + // if (cpy_len > 0) + // { + // ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len); + // /* Implicit promotion */ + // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; + // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; + // } + // } + // else if (src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + // { /* Copy buffer -> texture */ + // D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_placed_footprint = ZI; + // ID3D12Device_GetCopyableFootprints(g->device, &dst_desc, 0, 1, 0, &dst_placed_footprint, 0, 0, 0); - D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; - dst_loc.pResource = dst->d3d_resource; - dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst_loc.SubresourceIndex = 0; + // D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; + // dst_loc.pResource = dst->d3d_resource; + // dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + // dst_loc.SubresourceIndex = 0; - D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; - src_loc.pResource = src->d3d_resource; - src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src_loc.PlacedFootprint = dst_placed_footprint; + // D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; + // src_loc.pResource = src->d3d_resource; + // src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + // src_loc.PlacedFootprint = dst_placed_footprint; - ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0); - /* Implicit promotion */ - if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; - if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; - } - else if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) - { /* Copy texture -> buffer */ - /* TODO */ - Assert(0); - } - else if (dst_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER) - { /* Copy texture -> texture */ - /* TODO */ - Assert(0); - } + // ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0); + // /* Implicit promotion */ + // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; + // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; + // } + // else if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + // { /* Copy texture -> buffer */ + // /* TODO */ + // Assert(0); + // } + // else if (dst_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER) + // { /* Copy texture -> texture */ + // /* TODO */ + // Assert(0); + // } - cmd = cmd->next; - } break; - - //- Dispatch Vs/Ps shader - case GPU_D12_CommandKind_Rasterize: - { - GPU_D12_Pipeline *pipeline = 0; - { - GPU_D12_PipelineDesc pipeline_desc = ZI; - pipeline_desc.vs = cmd->rasterize.vs; - pipeline_desc.ps = cmd->rasterize.ps; - { - pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; - switch (cmd->rasterize.mode) - { - default: Assert(0); break; - case GPU_RasterizeMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; - case GPU_RasterizeMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case GPU_RasterizeMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case GPU_RasterizeMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case GPU_RasterizeMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case GPU_RasterizeMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case GPU_RasterizeMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - } - } - if (cmd->rasterize.mode == GPU_RasterizeMode_WireTriangleList || cmd->rasterize.mode == GPU_RasterizeMode_WireTriangleStrip) - { - pipeline_desc.is_wireframe = 1; - } - for (u32 i = 0; i < cmd->rasterize.rts_count; ++i) - { - GPU_D12_Resource *r = slotted_render_targets[i]; - if (r) - { - pipeline_desc.render_target_formats[i] = r->desc.texture.format; - } - else - { - Assert(0); /* No bound render target in slot */ - pipeline_desc.render_target_formats[i] = GPU_Format_Unknown; - } - } - pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); - } - - if (pipeline - && cmd->rasterize.index_buffer->desc.buffer.count > 0) - { - /* Set descriptor heaps */ - if (!descriptor_heaps_set) - { - ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap->d3d_heap, g->sampler_heap->d3d_heap }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); - descriptor_heaps_set = 1; - } - - /* Bind rootsig */ - if (!graphics_rootsig_set) - { - ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig); - graphics_rootsig_set = 1; - } - - /* Bind pipeline */ - if (pipeline != bound_pipeline) - { - ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); - bound_pipeline = pipeline; - } - - /* Fill signature */ - /* TODO: Only upload dirty */ - { - u32 sig_size = cmd->rasterize.sig_size; - void *sig = cmd->rasterize.sig; - u32 num32bit = sig_size / 4; - ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(rcl, 0, num32bit, sig, 0); - } - - /* Set rasterizer state */ - /* TODO: Only set dirty */ - { - D3D12_RECT scissor = ZI; - scissor.left = cmd->rasterize.scissor.left; - scissor.top = cmd->rasterize.scissor.top; - scissor.right = cmd->rasterize.scissor.right; - scissor.bottom = cmd->rasterize.scissor.bottom; - D3D12_VIEWPORT viewport = ZI; - viewport.TopLeftX = cmd->rasterize.viewport.top_left_x; - viewport.TopLeftY = cmd->rasterize.viewport.top_left_y; - viewport.Width = cmd->rasterize.viewport.width; - viewport.Height = cmd->rasterize.viewport.height; - viewport.MinDepth = cmd->rasterize.viewport.min_depth; - viewport.MaxDepth = cmd->rasterize.viewport.max_depth; - ID3D12GraphicsCommandList_RSSetScissorRects(rcl, 1, &scissor); - ID3D12GraphicsCommandList_RSSetViewports(rcl, 1, &viewport); - } - - /* Set topology */ - /* TODO: Only set dirty */ - { - D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - switch (cmd->rasterize.mode) - { - default: Assert(0); break; - case GPU_RasterizeMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; - case GPU_RasterizeMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; - case GPU_RasterizeMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; - case GPU_RasterizeMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case GPU_RasterizeMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case GPU_RasterizeMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - case GPU_RasterizeMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - } - ID3D12GraphicsCommandList_IASetPrimitiveTopology(rcl, topology); - } - - /* Set index buffer */ - /* TODO: Only set dirty */ - u32 indices_count = 0; - { - GPU_D12_Resource *indices = cmd->rasterize.index_buffer; - D3D12_INDEX_BUFFER_VIEW ibv = ZI; - ibv.BufferLocation = indices->buffer_gpu_address; - if (indices->desc.buffer.stride == 2) - { - ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R16_UINT); - } - else - { - Assert(indices->desc.buffer.stride == 4); - ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT); - } - ibv.SizeInBytes = indices->desc.buffer.count * indices->desc.buffer.stride; - indices_count = indices->desc.buffer.count; - ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv); - } - - /* Bind render targets */ - { - b32 om_dirty = 0; - D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_render_targets)] = ZI; - for (u32 i = 0; i < cmd->rasterize.rts_count; ++i) - { - GPU_D12_Resource *target = slotted_render_targets[i]; - if (bound_render_targets[i] != target) - { - bound_render_targets[i] = target; - om_dirty = 1; - } - rtvs[i] = target->rtv_descriptor->handle; - } - if (om_dirty) - { - ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, cmd->rasterize.rts_count, rtvs, 0, 0); - } - } - - /* Dispatch */ - ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); - } - - cmd = cmd->next; - } break; + // cmd_idx += 1; + // } break; //- Dispatch compute shader - case GPU_D12_CommandKind_Compute: + case GPU_D12_CmdKind_Compute: { GPU_D12_Pipeline *pipeline = 0; { @@ -1525,7 +1558,7 @@ i64 GPU_CloseCommandList(GPU_CommandList *gpu_cl) /* Set descriptor heaps */ if (!descriptor_heaps_set) { - ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap->d3d_heap, g->sampler_heap->d3d_heap }; + ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap.d3d_heap, g->sampler_heap.d3d_heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); descriptor_heaps_set = 1; } @@ -1544,20 +1577,208 @@ i64 GPU_CloseCommandList(GPU_CommandList *gpu_cl) bound_pipeline = pipeline; } - /* Fill signature */ - /* TODO: Only upload dirty */ + /* Dispatch */ + ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); + } + + cmd_idx += 1; + } break; + + //- Dispatch Vs/Ps shader + case GPU_D12_CmdKind_Rasterize: + { + GPU_D12_Pipeline *pipeline = 0; + { + GPU_D12_PipelineDesc pipeline_desc = ZI; + pipeline_desc.vs = cmd->rasterize.vs; + pipeline_desc.ps = cmd->rasterize.ps; { - u32 sig_size = cmd->compute.sig_size; - void *sig = cmd->compute.sig; - u32 num32bit = sig_size / 4; - ID3D12GraphicsCommandList_SetComputeRoot32BitConstants(rcl, 0, num32bit, sig, 0); + pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; + switch (cmd->rasterize.mode) + { + default: Assert(0); break; + case GPU_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; + case GPU_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case GPU_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case GPU_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case GPU_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case GPU_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case GPU_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + } + } + if (cmd->rasterize.mode == GPU_RasterMode_WireTriangleList || cmd->rasterize.mode == GPU_RasterMode_WireTriangleStrip) + { + pipeline_desc.is_wireframe = 1; + } + for (u32 i = 0; i < countof(cmd->rasterize.rtv_descriptors); ++i) + { + GPU_D12_Descriptor *rtv_descriptor = cmd->rasterize.rtv_descriptors[i]; + if (rtv_descriptor != 0) + { + pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_desc.format; + } + else + { + pipeline_desc.render_target_formats[i] = GPU_Format_Unknown; + } + } + pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); + } + + /* Calculate IBV count */ + u32 indices_count = 0; + D3D12_INDEX_BUFFER_VIEW ibv = cmd->rasterize.ibv; + if (ibv.Format == DXGI_FORMAT_R16_UINT) + { + indices_count = ibv.SizeInBytes / 2; + } + else if (ibv.Format == DXGI_FORMAT_R32_UINT) + { + indices_count = ibv.SizeInBytes / 4; + } + + /* Prepare & dispatch */ + if (pipeline && indices_count > 0) + { + /* Set descriptor heaps */ + if (!descriptor_heaps_set) + { + ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap.d3d_heap, g->sampler_heap.d3d_heap }; + ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); + descriptor_heaps_set = 1; + } + + /* Bind rootsig */ + if (!graphics_rootsig_set) + { + ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig); + graphics_rootsig_set = 1; + } + + /* Bind pipeline */ + if (pipeline != bound_pipeline) + { + ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); + bound_pipeline = pipeline; + } + + // /* Fill signature */ + // /* TODO: Only upload dirty */ + // { + // u32 sig_size = cmd->rasterize.sig_size; + // void *sig = cmd->rasterize.sig; + // u32 num32bit = sig_size / 4; + // ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(rcl, 0, num32bit, sig, 0); + // } + + /* Set viewport */ + { + D3D12_VIEWPORT viewport = ZI; + { + Rng3 range = cmd->rasterize.viewport; + viewport.TopLeftX = range.p0.x; + viewport.TopLeftY = range.p0.y; + viewport.Width = range.p1.x - range.p0.x; + viewport.Height = range.p1.y - range.p0.y; + viewport.MinDepth = range.p0.z; + viewport.MaxDepth = range.p1.z; + } + if (!MatchStruct(&viewport, &bound_viewport)) + { + bound_viewport = viewport; + ID3D12GraphicsCommandList_RSSetViewports(rcl, 1, &viewport); + } + } + + /* Set scissor */ + { + D3D12_RECT scissor = ZI; + { + Rng2 range = cmd->rasterize.scissor; + scissor.left = range.p0.x; + scissor.top = range.p0.y; + scissor.right = range.p1.x; + scissor.bottom = range.p1.y; + } + if (!MatchStruct(&scissor, &bound_scissor)) + { + bound_scissor = scissor; + ID3D12GraphicsCommandList_RSSetScissorRects(rcl, 1, &scissor); + } + } + + /* Set topology */ + { + D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + switch (cmd->rasterize.mode) + { + default: Assert(0); break; + case GPU_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; + case GPU_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; + case GPU_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; + case GPU_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case GPU_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + case GPU_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case GPU_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + } + if (topology != bound_primitive_topology) + { + ID3D12GraphicsCommandList_IASetPrimitiveTopology(rcl, topology); + } + } + + /* Set index buffer */ + if (!MatchStruct(&ibv, &bound_ibv)) + { + ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv); + bound_ibv = ibv; + } + + /* Bind render targets */ + { + b32 om_dirty = 0; + u32 rtvs_count = 0; + D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_raster_targets)] = ZI; + for (u32 i = 0; i < countof(cmd->rasterize.rtv_descriptors); ++i) + { + GPU_D12_Descriptor *rtv_desc = cmd->rasterize.rtv_descriptors[i]; + if (rtv_desc != 0) + { + om_dirty = om_dirty || (bound_raster_targets[i].ptr != rtv_desc->handle.ptr); + rtvs[rtvs_count++] = rtv_desc->handle; + } + else + { + break; + } + } + if (om_dirty) + { + CopyStructs(bound_raster_targets, rtvs, rtvs_count); + ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, rtvs_count, rtvs, 0, 0); + } } /* Dispatch */ - ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.num_threads_x, cmd->compute.num_threads_y, cmd->compute.num_threads_z); + ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); } - cmd = cmd->next; + cmd_idx += 1; + } break; + + //- Clear rtv + case GPU_D12_CmdKind_ClearRtv: + { + GPU_D12_Descriptor *descriptor = cmd->clear_rtv.rtv_descriptor; + GPU_D12_Resource *resource = descriptor->resource; + Assert(resource->layout == D3D12_BARRIER_LAYOUT_RENDER_TARGET); + f32 clear_color[4] = ZI; + clear_color[0] = resource->texture_desc.clear_color.x; + clear_color[1] = resource->texture_desc.clear_color.y; + clear_color[2] = resource->texture_desc.clear_color.z; + clear_color[3] = resource->texture_desc.clear_color.w; + ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, descriptor->handle, clear_color, 0, 0); + cmd_idx += 1; } break; } } @@ -1566,205 +1787,226 @@ i64 GPU_CloseCommandList(GPU_CommandList *gpu_cl) /* End dx12 command list */ u64 fence_target = GPU_D12_EndRawCommandList(dx12_cl); - /* Free commands */ - if (cl->last) + /* Free command list */ { - cl->last->next = f->first_free_command; - f->first_free_command = cl->first; + Lock lock = LockE(&g->free_cmd_lists_mutex); + { + cl->next = g->first_free_cmd_list; + g->first_free_cmd_list = cl; + } + Unlock(&lock); } - /* Free command list */ - SllStackPush(f->first_free_command_list, cl); - EndScratch(scratch); - return fence_target; } -//////////////////////////////////////////////////////////// -//~ @hookdef Profiling helper hooks +//- Arena -void GPU_ProfN(GPU_CommandList *cl, String name) +void GPU_ResetArena(GPU_CommandListHandle cl, GPU_ArenaHandle arena) +{ + /* TODO */ +} + +//- Copy + +void GPU_CopyBuffer(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, u64 src_offset, u64 size) +{ + /* TODO */ +} + +void GPU_CopyTexture(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Vec3I32 src_offset, Vec3I32 dims) +{ + /* TODO */ +} + +//- Constants + +void GPU_SetConstU32(GPU_CommandListHandle cl, i32 slot, u32 v) +{ + /* TODO */ +} + +void GPU_SetConstF32(GPU_CommandListHandle cl, i32 slot, f32 v) +{ + /* TODO */ +} + +void GPU_SetConstBufferPtr(GPU_CommandListHandle cl, i32 slot, GpuBufferPtr v) +{ + /* TODO */ +} + +void GPU_SetConstRWBufferPtr(GPU_CommandListHandle cl, i32 slot, GpuRWBufferPtr v) +{ + /* TODO */ +} + +void GPU_SetConstIndexBufferPtr(GPU_CommandListHandle cl, i32 slot, GpuIndexBufferPtr v) +{ + /* TODO */ +} + +void GPU_SetConstTexture1DPtr(GPU_CommandListHandle cl, i32 slot, GpuTexture1DPtr v) +{ + /* TODO */ +} + +void GPU_SetConstRWTexture1DPtr(GPU_CommandListHandle cl, i32 slot, GpuRWTexture1DPtr v) +{ + /* TODO */ +} + +void GPU_SetConstTexture2DPtr(GPU_CommandListHandle cl, i32 slot, GpuTexture2DPtr v) +{ + /* TODO */ +} + +void GPU_SetConstRWTexture2DPtr(GPU_CommandListHandle cl, i32 slot, GpuRWTexture2DPtr v) +{ + /* TODO */ +} + +void GPU_SetConstTexture3DPtr(GPU_CommandListHandle cl, i32 slot, GpuTexture3DPtr v) +{ + /* TODO */ +} + +void GPU_SetConstRWTexture3DPtr(GPU_CommandListHandle cl, i32 slot, GpuRWTexture3DPtr v) +{ + /* TODO */ +} + +void GPU_SetConstRasterTargetPtr(GPU_CommandListHandle cl, i32 slot, GpuRasterTargetPtr v) +{ + /* TODO */ +} + +void GPU_SetConstSamplerPtr(GPU_CommandListHandle cl, i32 slot, GpuSamplerPtr v) +{ + /* TODO */ +} + +//- Access + +void GPU_SetAccess(GPU_CommandListHandle cl, GPU_ResourceHandle handle, GPU_AccessKind kind) +{ + /* TODO */ +} + +//- Compute + +void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 groups) +{ + GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); + GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); + cmd->kind = GPU_D12_CmdKind_Compute; + cmd->compute.cs = cs; + cmd->compute.groups = groups; +} + +//- Rasterize + +void GPU_Rasterize(GPU_CommandListHandle cl_handle, + VertexShader vs, PixelShader ps, + u32 instances_count, GpuIndexBufferPtr idx_buff, + u32 raster_targets_count, GpuRasterTargetPtr *raster_targets, + Rng3 viewport, Rng2 scissor, + GPU_RasterMode mode) +{ + GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); + GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); + cmd->kind = GPU_D12_CmdKind_Rasterize; + cmd->rasterize.vs = vs; + cmd->rasterize.ps = ps; + cmd->rasterize.instances_count = instances_count; + cmd->rasterize.ibv = GPU_D12_IbvFromPtr(idx_buff); + for (u32 i = 0; i < MinU32(raster_targets_count, GPU_MaxRasterTargets); ++i) + { + cmd->rasterize.rtv_descriptors[i] = GPU_D12_RtvDescriptorFromPtr(raster_targets[i]); + } + cmd->rasterize.viewport = viewport; + cmd->rasterize.scissor = scissor; + cmd->rasterize.mode = mode; +} + +//- Clear + +void GPU_ClearRasterTarget(GPU_CommandListHandle cl_handle, GpuRasterTargetPtr ptr) +{ + GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); + GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); + cmd->kind = GPU_D12_CmdKind_ClearRtv; + cmd->clear_rtv.rtv_descriptor = GPU_D12_RtvDescriptorFromPtr(ptr); +} + +//- Profile + +void GPU_ProfN(GPU_CommandListHandle cl, String name) { /* TODO */ } //////////////////////////////////////////////////////////// -//~ @hookdef Barrier hooks +//~ @hookimpl Map hooks -void GPU_TransitionToReadable(GPU_CommandList *cl, GPU_Resource *resource) -{ - GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); - cmd->kind = GPU_D12_CommandKind_TransitionToSrv; - cmd->barrier.resource = (GPU_D12_Resource *)resource; -} +// GPU_Mapped GPU_Map(GPU_Resource *gpu_r) +// { +// GPU_Mapped result = ZI; +// result.resource = gpu_r; +// GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r; +// D3D12_RANGE read_range = ZI; +// HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem); +// if (FAILED(hr) || !result.mem) +// { +// /* TODO: Don't panic */ +// Panic(Lit("Failed to map command buffer resource")); +// } +// return result; +// } -void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource) -{ - GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); - cmd->kind = GPU_D12_CommandKind_TransitionToUav; - cmd->barrier.resource = (GPU_D12_Resource *)resource; -} +// void GPU_Unmap(GPU_Mapped m) +// { +// GPU_D12_Resource *r = (GPU_D12_Resource *)m.resource; +// ID3D12Resource_Unmap(r->d3d_resource, 0, 0); +// } -void GPU_TransitionToRasterizable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot) -{ - GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); - cmd->kind = GPU_D12_CommandKind_TransitionToRtv; - cmd->barrier.resource = (GPU_D12_Resource *)resource; - cmd->barrier.rt_slot = slot; -} +// void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference) +// { +// GPU_D12_SharedState *g = &GPU_D12_shared_state; -void GPU_TransitionToCopySrc(GPU_CommandList *cl, GPU_Resource *resource) -{ - GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); - cmd->kind = GPU_D12_CommandKind_TransitionToCopySrc; - cmd->barrier.resource = (GPU_D12_Resource *)resource; -} +// D3D12_RESOURCE_DESC desc = ZI; +// ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc); -void GPU_TransitionToCopyDst(GPU_CommandList *cl, GPU_Resource *resource) -{ - GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); - cmd->kind = GPU_D12_CommandKind_TransitionToCopyDst; - cmd->barrier.resource = (GPU_D12_Resource *)resource; -} +// u64 upload_size = 0; +// u64 upload_row_size = 0; +// u32 upload_num_rows = 0; +// D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; +// ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); +// D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; -void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource) -{ - GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); - cmd->kind = GPU_D12_CommandKind_FlushUav; - cmd->barrier.resource = (GPU_D12_Resource *)resource; -} +// { +// D3D12_RANGE read_range = ZI; +// u8 *dst_base = (u8 *)dst + placed_footprint.Offset; +// u8 *src_base = src; + +// u32 z_size = upload_row_size * upload_num_rows; + +// b32 src_overflow = 0; +// for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z) +// { +// u32 z_offset = z * z_size; +// for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y) +// { +// u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset; +// u8 *src_row = src_base + y * upload_row_size + z_offset; +// CopyBytes(dst_row, src_row, upload_row_size); +// } +// } +// } +// } //////////////////////////////////////////////////////////// -//~ @hookdef Dispatch hooks - -void GPU_ClearRasterizable(GPU_CommandList *gpu_cl, GPU_Resource *resource) -{ - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; - GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); - cmd->kind = GPU_D12_CommandKind_ClearRtv; - cmd->clear.resource = (GPU_D12_Resource *)resource; -} - -void GPU_RasterizeEx(GPU_CommandList *gpu_cl, - u32 sig_size, - void *sig, - VertexShader vs, - PixelShader ps, - u32 rts_count, - u32 instances_count, - GPU_Resource *index_buffer, - GPU_RasterizeMode mode) -{ - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; - GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); - cmd->kind = GPU_D12_CommandKind_Rasterize; - Assert(sig_size <= sizeof(cmd->rasterize.sig)); - cmd->rasterize.sig_size = MinU32(sizeof(cmd->rasterize.sig), sig_size); - CopyBytes(cmd->rasterize.sig, sig, cmd->rasterize.sig_size); - cmd->rasterize.vs = vs; - cmd->rasterize.ps = ps; - cmd->rasterize.rts_count = rts_count; - Assert(rts_count < GPU_MaxRasterTargets); - cmd->rasterize.viewport = viewport; - cmd->rasterize.scissor = scissor; - cmd->rasterize.instances_count = instances_count; - cmd->rasterize.index_buffer = (GPU_D12_Resource *)index_buffer; - cmd->rasterize.mode = mode; -} - -void GPU_ComputeEx(GPU_CommandList *gpu_cl, - u32 sig_size, - void *sig, - ComputeShader cs, - Vec3U32 threads_count) -{ - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; - GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); - cmd->kind = GPU_D12_CommandKind_Compute; - Assert(sig_size <= sizeof(cmd->compute.sig)); - cmd->compute.sig_size = MinU32(sizeof(cmd->compute.sig), sig_size); - CopyBytes(cmd->compute.sig, sig, cmd->compute.sig_size); - cmd->compute.cs = cs; - cmd->compute.num_threads_x = threads_count.x; - cmd->compute.num_threads_y = threads_count.y; - cmd->compute.num_threads_z = threads_count.z; -} - -//////////////////////////////////////////////////////////// -//~ @hookdef Copy hooks - -void GPU_CopyResource(GPU_CommandList *gpu_cl, GPU_Resource *gpu_dst, GPU_Resource *gpu_src) -{ - GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; - GPU_D12_Resource *dst = (GPU_D12_Resource *)gpu_dst; - GPU_D12_Resource *src = (GPU_D12_Resource *)gpu_src; - GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); - cmd->kind = GPU_D12_CommandKind_Copy; - cmd->copy.dst = dst; - cmd->copy.src = src; -} - -//////////////////////////////////////////////////////////// -//~ @hookdef Map hooks - -GPU_Mapped GPU_Map(GPU_Resource *gpu_r) -{ - GPU_Mapped result = ZI; - result.resource = gpu_r; - GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r; - D3D12_RANGE read_range = ZI; - HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem); - if (FAILED(hr) || !result.mem) - { - /* TODO: Don't panic */ - Panic(Lit("Failed to map command buffer resource")); - } - return result; -} - -void GPU_Unmap(GPU_Mapped m) -{ - GPU_D12_Resource *r = (GPU_D12_Resource *)m.resource; - ID3D12Resource_Unmap(r->d3d_resource, 0, 0); -} - -void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference) -{ - GPU_D12_SharedState *g = &GPU_D12_shared_state; - - D3D12_RESOURCE_DESC desc = ZI; - ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc); - - u64 upload_size = 0; - u64 upload_row_size = 0; - u32 upload_num_rows = 0; - D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; - ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); - D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; - - { - D3D12_RANGE read_range = ZI; - u8 *dst_base = (u8 *)dst + placed_footprint.Offset; - u8 *src_base = src; - - u32 z_size = upload_row_size * upload_num_rows; - - b32 src_overflow = 0; - for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z) - { - u32 z_offset = z * z_size; - for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y) - { - u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset; - u8 *src_row = src_base + y * upload_row_size + z_offset; - CopyBytes(dst_row, src_row, upload_row_size); - } - } - } -} - -//////////////////////////////////////////////////////////// -//~ @hookdef Statistics +//~ @hookimpl Statistics GPU_Stats GPU_QueryStats(void) { @@ -1799,28 +2041,27 @@ GPU_Stats GPU_QuerySharedMemoryStats(void) void GPU_D12_InitSwapchainResources(GPU_D12_Swapchain *swapchain) { - GPU_D12_SharedState *g = &GPU_D12_shared_state; - for (u32 i = 0; i < countof(swapchain->buffers); ++i) - { - ID3D12Resource *resource = 0; - HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource); - if (FAILED(hr)) - { - /* TODO: Don't panic */ - Panic(Lit("Failed to get swapchain buffer")); - } - GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i]; - ZeroStruct(sb); - sb->swapchain = swapchain; - sb->d3d_resource = resource; - sb->rtv_descriptor = GPU_D12_AcquireDescriptor(g->rtv_heap); - sb->state = D3D12_RESOURCE_STATE_COMMON; - ID3D12Device_CreateRenderTargetView(g->device, sb->d3d_resource, 0, sb->rtv_descriptor->handle); - } + // GPU_D12_SharedState *g = &GPU_D12_shared_state; + // for (u32 i = 0; i < countof(swapchain->buffers); ++i) + // { + // ID3D12Resource *resource = 0; + // HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource); + // if (FAILED(hr)) + // { + // /* TODO: Don't panic */ + // Panic(Lit("Failed to get swapchain buffer")); + // } + // GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i]; + // ZeroStruct(sb); + // sb->swapchain = swapchain; + // sb->d3d_resource = resource; + // sb->state = D3D12_RESOURCE_STATE_COMMON; + // } } GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, Vec2I32 resolution) { +#if 0 __prof; GPU_D12_SharedState *g = &GPU_D12_shared_state; resolution.x = MaxI32(resolution.x, 1); @@ -1849,7 +2090,6 @@ GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, V for (u32 i = 0; i < countof(swapchain->buffers); ++i) { GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i]; - GPU_D12_ReleaseDescriptor(sb->rtv_descriptor); ID3D12Resource_Release(sb->d3d_resource); } @@ -1870,153 +2110,154 @@ GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, V u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain); return &swapchain->buffers[backbuffer_index]; +#else + return 0; +#endif } -i64 GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color) +void GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color) { - GPU_D12_SharedState *g = &GPU_D12_shared_state; + // GPU_D12_SharedState *g = &GPU_D12_shared_state; - GPU_D12_Swapchain *swapchain = dst->swapchain; - GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(GPU_QueueKind_Direct); - ID3D12GraphicsCommandList *rcl = dx12_cl->cl; - D3D12_RESOURCE_STATES old_texture_state = texture->state; + // GPU_D12_Swapchain *swapchain = dst->swapchain; + // GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(GPU_QueueKind_Direct); + // ID3D12GraphicsCommandList *rcl = dx12_cl->cl; + // D3D12_RESOURCE_STATES old_texture_state = texture->state; - { - u32 barriers_count = 0; - D3D12_RESOURCE_BARRIER rbs[2] = ZI; - /* Transition backbuffer to RENDER_TARGET */ - { - D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb->Transition.pResource = dst->d3d_resource; - rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rb->Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; - rb->Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; - } - ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); - } + // { + // u32 barriers_count = 0; + // D3D12_RESOURCE_BARRIER rbs[2] = ZI; + // /* Transition backbuffer to RENDER_TARGET */ + // { + // D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; + // rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // rb->Transition.pResource = dst->d3d_resource; + // rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + // rb->Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; + // rb->Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; + // } + // ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); + // } - /* Clear */ - { - f32 clear_color_arr[4] = { clear_color.x, clear_color.y, clear_color.z, clear_color.w }; - ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, dst->rtv_descriptor->handle, clear_color_arr, 0, 0); - } + // /* Clear */ + // { + // f32 clear_color_arr[4] = { clear_color.x, clear_color.y, clear_color.z, clear_color.w }; + // ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, dst->rtv_descriptor->handle, clear_color_arr, 0, 0); + // } - { - u32 barriers_count = 0; - D3D12_RESOURCE_BARRIER rbs[2] = ZI; - /* Transition backbuffer to COPY_DEST */ - { - D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb->Transition.pResource = dst->d3d_resource; - rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rb->Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; - } - /* Transition texture to COPY_SRC */ - if (texture->state != D3D12_RESOURCE_STATE_COPY_SOURCE) - { - D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb->Transition.pResource = texture->d3d_resource; - rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rb->Transition.StateBefore = texture->state; - rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - texture->state = rb->Transition.StateAfter; - } - ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); - } + // { + // u32 barriers_count = 0; + // D3D12_RESOURCE_BARRIER rbs[2] = ZI; + // /* Transition backbuffer to COPY_DEST */ + // { + // D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; + // rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // rb->Transition.pResource = dst->d3d_resource; + // rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + // rb->Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; + // rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + // } + // /* Transition texture to COPY_SRC */ + // if (texture->state != D3D12_RESOURCE_STATE_COPY_SOURCE) + // { + // D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; + // rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // rb->Transition.pResource = texture->d3d_resource; + // rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + // rb->Transition.StateBefore = texture->state; + // rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + // texture->state = rb->Transition.StateAfter; + // } + // ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); + // } - /* Copy */ - { - D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; - dst_loc.pResource = dst->d3d_resource; - dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst_loc.SubresourceIndex = 0; + // /* Copy */ + // { + // D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; + // dst_loc.pResource = dst->d3d_resource; + // dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + // dst_loc.SubresourceIndex = 0; - D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; - src_loc.pResource = texture->d3d_resource; - src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src_loc.SubresourceIndex = 0; + // D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; + // src_loc.pResource = texture->d3d_resource; + // src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + // src_loc.SubresourceIndex = 0; - Vec2I32 dst_size = swapchain->resolution; + // Vec2I32 dst_size = swapchain->resolution; - i32 dst_top = dst_p0.y; - i32 dst_left = dst_p0.x; + // i32 dst_top = dst_p0.y; + // i32 dst_left = dst_p0.x; - i32 src_left = src_p0.x; - i32 src_top = src_p0.y; - i32 src_right = src_p1.x; - i32 src_bottom = src_p1.y; + // i32 src_left = src_p0.x; + // i32 src_top = src_p0.y; + // i32 src_right = src_p1.x; + // i32 src_bottom = src_p1.y; - /* Clamp copy src & dst */ - if (dst_left < 0) - { - src_left -= dst_left; - dst_left = 0; - } - if (dst_top < 0) - { - src_top -= dst_top; - dst_top = 0; - } - if (dst_left + (src_left + src_right) > dst_size.x) - { - src_right -= (dst_left + (src_left + src_right)) - dst_size.x; - } - if (dst_top + (src_top + src_bottom) > dst_size.y) - { - src_bottom -= (dst_top + (src_top + src_bottom)) - dst_size.y; - } + // /* Clamp copy src & dst */ + // if (dst_left < 0) + // { + // src_left -= dst_left; + // dst_left = 0; + // } + // if (dst_top < 0) + // { + // src_top -= dst_top; + // dst_top = 0; + // } + // if (dst_left + (src_left + src_right) > dst_size.x) + // { + // src_right -= (dst_left + (src_left + src_right)) - dst_size.x; + // } + // if (dst_top + (src_top + src_bottom) > dst_size.y) + // { + // src_bottom -= (dst_top + (src_top + src_bottom)) - dst_size.y; + // } - if (src_left < src_right && src_bottom > src_top) - { - D3D12_BOX src_box = ZI; - src_box.left = src_left; - src_box.top = src_top; - src_box.right = src_right; - src_box.bottom = src_bottom; - src_box.back = 1; - ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, dst_left, dst_top, 0, &src_loc, &src_box); - } - } + // if (src_left < src_right && src_bottom > src_top) + // { + // D3D12_BOX src_box = ZI; + // src_box.left = src_left; + // src_box.top = src_top; + // src_box.right = src_right; + // src_box.bottom = src_bottom; + // src_box.back = 1; + // ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, dst_left, dst_top, 0, &src_loc, &src_box); + // } + // } - { - u32 barriers_count = 0; - D3D12_RESOURCE_BARRIER rbs[2] = ZI; - /* Transition backbuffer to PRESENT */ - { - D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb->Transition.pResource = dst->d3d_resource; - rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rb->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - rb->Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; - } - /* Transition texture to original state */ - if (texture->state != old_texture_state) - { - D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - rb->Transition.pResource = texture->d3d_resource; - rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rb->Transition.StateBefore = texture->state; - rb->Transition.StateAfter = old_texture_state; - texture->state = rb->Transition.StateAfter; - } - ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); - } - - i64 fence_target = GPU_D12_EndRawCommandList(dx12_cl); - return fence_target; + // { + // u32 barriers_count = 0; + // D3D12_RESOURCE_BARRIER rbs[2] = ZI; + // /* Transition backbuffer to PRESENT */ + // { + // D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; + // rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // rb->Transition.pResource = dst->d3d_resource; + // rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + // rb->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + // rb->Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; + // } + // /* Transition texture to original state */ + // if (texture->state != old_texture_state) + // { + // D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; + // rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + // rb->Transition.pResource = texture->d3d_resource; + // rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + // rb->Transition.StateBefore = texture->state; + // rb->Transition.StateAfter = old_texture_state; + // texture->state = rb->Transition.StateAfter; + // } + // ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); + // } } //////////////////////////////////////////////////////////// -//~ @hookdef Swapchain operations +//~ @hookimpl Swapchain operations -GPU_Swapchain *GPU_AcquireSwapchain(WND_Handle window, GPU_Format format, Vec2I32 size) +GPU_SwapchainHandle GPU_AcquireSwapchain(WND_Handle window, GPU_Format format, Vec2I32 size) { +#if 0 GPU_D12_SharedState *g = &GPU_D12_shared_state; HRESULT hr = 0; HWND hwnd = WND_W32_WindowFromHandle(window)->hwnd; @@ -2088,68 +2329,74 @@ GPU_Swapchain *GPU_AcquireSwapchain(WND_Handle window, GPU_Format format, Vec2I3 GPU_D12_InitSwapchainResources(swapchain); - return (GPU_Swapchain *)swapchain; + return (GPU_SwapchainHandle) { .v = (u64)swapchain }; +#else + return (GPU_SwapchainHandle) { 0 }; +#endif } -void GPU_ReleaseSwapchain(GPU_Swapchain *gpu_swapchain) +void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain_handle) { /* TODO */ } -void GPU_YieldOnSwapchain(GPU_Swapchain *gpu_swapchain) +void GPU_YieldOnSwapchain(GPU_SwapchainHandle swapchain_handle) { /* TODO: Actually yield, don't block */ - GPU_D12_Swapchain *swapchain = (GPU_D12_Swapchain *)gpu_swapchain; - if (swapchain->waitable) - { - WaitForSingleObjectEx(swapchain->waitable, 1000, 1); - } + // GPU_D12_Swapchain *swapchain = GPU_D12_SwapchainFromHandle(swapchain_handle); + // if (swapchain->waitable) + // { + // WaitForSingleObjectEx(swapchain->waitable, 1000, 1); + // } } -i64 GPU_PresentSwapchain(GPU_Swapchain *gpu_swapchain, GPU_Resource *gpu_texture, i32 vsync, Vec2I32 backbuffer_size, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color) +void GPU_PresentSwapchain(GPU_SwapchainHandle swapchain, Vec4 dst_clear_color, + Vec2U32 dst_size, Vec2U32 dst_offset, + GpuTexture2DPtr src_handle, Vec2U32 src_offset, + i32 vsync) { - GPU_D12_Swapchain *swapchain = (GPU_D12_Swapchain *)gpu_swapchain; - GPU_D12_Resource *texture = (GPU_D12_Resource *)gpu_texture; - GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_size); + // GPU_D12_Swapchain *swapchain = GPU_D12_SwapchainFromHandle(swapchain_handle); + // GPU_D12_Resource *src = ; + // GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_size); - D3D12_RESOURCE_DESC src_desc = ZI; - D3D12_RESOURCE_DESC dst_desc = ZI; - ID3D12Resource_GetDesc(texture->d3d_resource, &src_desc); - ID3D12Resource_GetDesc(swapchain_buffer->d3d_resource, &dst_desc); + // D3D12_RESOURCE_DESC src_desc = ZI; + // D3D12_RESOURCE_DESC dst_desc = ZI; + // ID3D12Resource_GetDesc(src->d3d_resource, &src_desc); + // ID3D12Resource_GetDesc(swapchain_buffer->d3d_resource, &dst_desc); - b32 is_blitable = src_desc.Dimension == dst_desc.Dimension - && src_desc.SampleDesc.Count == dst_desc.SampleDesc.Count - && src_desc.SampleDesc.Quality == dst_desc.SampleDesc.Quality; - Assert(is_blitable == 1); /* Texture resource must be similar enough to backbuffer resource to blit */ + // b32 is_blitable = src_desc.Dimension == dst_desc.Dimension + // && src_desc.SampleDesc.Count == dst_desc.SampleDesc.Count + // && src_desc.SampleDesc.Quality == dst_desc.SampleDesc.Quality; + // Assert(is_blitable == 1); /* Texture resource must be similar enough to backbuffer resource to blit */ - i64 fence_target = 0; - if (is_blitable) - { - /* Blit */ - fence_target = GPU_D12_BlitToSwapchain(swapchain_buffer, texture, dst_p0, dst_p1, src_p0, src_p1, clear_color); + // i64 fence_target = 0; + // if (is_blitable) + // { + // /* Blit */ + // fence_target = GPU_D12_BlitToSwapchain(swapchain_buffer, src, dst_p0, dst_p1, src_p0, src_p1, clear_color); - u32 present_flags = 0; - if (GPU_D12_TearingIsAllowed && vsync == 0) - { - present_flags |= DXGI_PRESENT_ALLOW_TEARING; - } + // u32 present_flags = 0; + // if (GPU_D12_TearingIsAllowed && vsync == 0) + // { + // present_flags |= DXGI_PRESENT_ALLOW_TEARING; + // } - if (vsync != 0) - { - /* FIXME: Don't flush in fullscreen mode? */ - // DwmFlush(); - } + // if (vsync != 0) + // { + // /* FIXME: Don't flush in fullscreen mode? */ + // // DwmFlush(); + // } - /* Present */ - { - __profn("Present"); - HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags); - if (!SUCCEEDED(hr)) - { - Assert(0); - } - } - } + // /* Present */ + // { + // __profn("Present"); + // HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags); + // if (!SUCCEEDED(hr)) + // { + // Assert(0); + // } + // } + // } - return fence_target; + // return fence_target; } diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 903b7cf3..d2f4ad2c 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -20,9 +20,17 @@ | ((GPU_D12_FrameLatency != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)) -#define GPU_D12_MaxCbvSrvUavDescriptors (1024 * 64) +#define GPU_D12_MaxCbvSrvUavDescriptors (1024 * 128) #define GPU_D12_MaxSamplerDescriptors (1024 * 1) -#define GPU_D12_MaxRtvDescriptors (1024 * 1) +#define GPU_D12_MaxRtvDescriptors (1024 * 64) + +//////////////////////////////////////////////////////////// +//~ Arena types + +Struct(GPU_D12_Arena) +{ + i32 _; +}; //////////////////////////////////////////////////////////// //~ Pipeline types @@ -62,7 +70,9 @@ Struct(GPU_D12_PipelineBin) Struct(GPU_D12_Descriptor) { GPU_D12_Descriptor *next_free; + struct GPU_D12_DescriptorHeap *heap; + struct GPU_D12_Resource *resource; b32 valid; u32 index; @@ -92,23 +102,18 @@ Struct(GPU_D12_DescriptorHeap) Struct(GPU_D12_Resource) { GPU_D12_Resource *next_free; - GPU_ResourceDesc desc; ID3D12Resource *d3d_resource; - D3D12_RESOURCE_STATES state; - u64 buffer_size; /* Actual size of buffer in GPU memory */ - - GPU_D12_Descriptor *srv_descriptor; - GPU_D12_Descriptor *uav_descriptor; - GPU_D12_Descriptor *rtv_descriptor; - GPU_D12_Descriptor *sampler_descriptor; - - u64 barrier_gen; - D3D12_RESOURCE_BARRIER_TYPE barrier_type; - D3D12_RESOURCE_STATES barrier_state_after; + D3D12_BARRIER_LAYOUT layout; + /* Buffer info */ + GPU_BufferDesc buffer_desc; D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; + + /* Texture info */ + b32 is_texture; + GPU_TextureDesc texture_desc; }; Struct(GPU_D12_ResourceReuseList) @@ -130,10 +135,16 @@ Struct(GPU_D12_ResourceReuseListBin) //////////////////////////////////////////////////////////// //~ Queue types +Struct(GPU_D12_CommandQueueDesc) +{ + D3D12_COMMAND_LIST_TYPE type; + D3D12_COMMAND_QUEUE_PRIORITY priority; +}; + Struct(GPU_D12_Queue) { - GPU_D12_QueueDesc desc; ID3D12CommandQueue *d3d_queue; + GPU_D12_CommandQueueDesc desc; Mutex submit_mutex; ID3D12Fence *submit_fence; @@ -161,83 +172,107 @@ Struct(GPU_D12_RawCommandList) //////////////////////////////////////////////////////////// //~ Command list types -Enum(GPU_D12_CommandKind) -{ - GPU_D12_CommandKind_None, +#define GPU_D12_CmdsPerChunk 256 - /* Barrier */ - GPU_D12_CommandKind_TransitionToSrv, - GPU_D12_CommandKind_TransitionToUav, - GPU_D12_CommandKind_TransitionToRtv, - GPU_D12_CommandKind_TransitionToCopySrc, - GPU_D12_CommandKind_TransitionToCopyDst, - GPU_D12_CommandKind_FlushUav, +Enum(GPU_D12_CmdKind) +{ + GPU_D12_CmdKind_None, + + /* Access */ + GPU_D12_CmdKind_SetAccess, + + /* Constant */ + GPU_D12_CmdKind_SetConstant, /* Copy */ - GPU_D12_CommandKind_Copy, - - /* Clear */ - GPU_D12_CommandKind_ClearRtv, - - /* Rasterize */ - GPU_D12_CommandKind_Rasterize, + GPU_D12_CmdKind_Copy, /* Compute */ - GPU_D12_CommandKind_Compute, + GPU_D12_CmdKind_Compute, + + /* Rasterize */ + GPU_D12_CmdKind_Rasterize, + + /* Clear rtv */ + GPU_D12_CmdKind_ClearRtv, }; -Struct(GPU_D12_Command) +Struct(GPU_D12_Cmd) { - GPU_D12_Command *next; - GPU_D12_CommandKind kind; + GPU_D12_CmdKind kind; union { struct { GPU_D12_Resource *resource; - i32 rt_slot; - } barrier; + GPU_AccessKind access_kind; + } access; + + struct + { + i32 slot; + u32 value; + } constant; + struct { GPU_D12_Resource *dst; GPU_D12_Resource *src; - String src_string; - } copy; + u64 dst_offset; + u64 src_offset; + u64 size; + } copy_bytes; + struct { - GPU_D12_Resource *resource; - } clear; + GPU_D12_Resource *dst; + GPU_D12_Resource *src; + Vec3I32 dst_offset; + Vec3I32 src_offset; + Vec3I32 size; + } copy_texels; + + struct + { + ComputeShader cs; + Vec3I32 groups; + } compute; + struct { - u32 sig_size; - u8 sig[256]; VertexShader vs; PixelShader ps; - u32 rts_count; - GPU_Viewport viewport; - GPU_Scissor scissor; u32 instances_count; - GPU_D12_Resource *index_buffer; - GPU_RasterizeMode mode; + D3D12_INDEX_BUFFER_VIEW ibv; + GPU_D12_Descriptor *rtv_descriptors[GPU_MaxRasterTargets]; + Rng3 viewport; + Rng2 scissor; + GPU_RasterMode mode; } rasterize; + struct { - u32 sig_size; - u8 sig[256]; - ComputeShader cs; - u32 num_threads_x; - u32 num_threads_y; - u32 num_threads_z; - } compute; + GPU_D12_Descriptor *rtv_descriptor; + } clear_rtv; }; }; -Struct(GPU_D12_CommandList) +Struct(GPU_D12_CmdChunk) { - GPU_D12_CommandList *next; - GPU_D12_Command *first; - GPU_D12_Command *last; - u64 count; + GPU_D12_CmdChunk *next; + struct GPU_D12_CmdList *cl; + GPU_D12_Cmd *cmds; + u64 cmds_count; +}; + +Struct(GPU_D12_CmdList) +{ + GPU_D12_CmdList *next; + + GPU_D12_CmdChunk *first_cmd_chunk; + GPU_D12_CmdChunk *last_cmd_chunk; + u64 chunks_count; + u64 cmds_count; GPU_QueueKind queue_kind; }; @@ -249,7 +284,6 @@ Struct(GPU_D12_SwapchainBuffer) { struct GPU_D12_Swapchain *swapchain; ID3D12Resource *d3d_resource; - GPU_D12_Descriptor *rtv_descriptor; D3D12_RESOURCE_STATES state; }; @@ -269,14 +303,6 @@ Struct(GPU_D12_Swapchain) //////////////////////////////////////////////////////////// //~ State types -#define GPU_D12_NumResourceReuseBins 1024 - -Struct(GPU_D12_FiberState) -{ - GPU_D12_CommandList *first_free_command_list; - GPU_D12_Command *first_free_command; -}; - Struct(GPU_D12_SharedState) { Atomic64Padded resource_barrier_gen; @@ -286,7 +312,7 @@ Struct(GPU_D12_SharedState) Atomic64 driver_descriptors_allocated; /* Queues */ - GPU_D12_Queue *queues[GPU_NumQueues]; + GPU_D12_Queue queues[GPU_NumQueues]; /* Rootsig */ ID3D12RootSignature *bindless_rootsig; @@ -295,14 +321,17 @@ Struct(GPU_D12_SharedState) GPU_D12_PipelineBin pipeline_bins[1024]; /* Descriptor heaps */ - GPU_D12_DescriptorHeap *cbv_srv_uav_heap; - GPU_D12_DescriptorHeap *sampler_heap; - GPU_D12_DescriptorHeap *rtv_heap; + GPU_D12_DescriptorHeap cbv_srv_uav_heap; + GPU_D12_DescriptorHeap sampler_heap; + GPU_D12_DescriptorHeap rtv_heap; - /* Resources */ - Mutex free_resources_mutex; - GPU_D12_Resource *first_free_resource; - GPU_D12_ResourceReuseListBin resource_reuse_bins[GPU_D12_NumResourceReuseBins]; + /* Command lists */ + Mutex free_cmd_lists_mutex; + GPU_D12_CmdList *first_free_cmd_list; + + /* Command chunks */ + Mutex free_cmd_chunks_mutex; + GPU_D12_CmdChunk *first_free_cmd_chunk; /* Swapchains */ Mutex free_swapchains_mutex; @@ -314,33 +343,19 @@ Struct(GPU_D12_SharedState) ID3D12Device *device; } extern GPU_D12_shared_state; -//////////////////////////////////////////////////////////// -//~ Helpers - -GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id); -DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format); -GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl); -u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc, u64 buffer_size); - //////////////////////////////////////////////////////////// //~ Startup void GPU_D12_Startup(void); //////////////////////////////////////////////////////////// -//~ Initialization +//~ Helpers -//- Device initialization -void GPU_D12_InitDevice(void); - -//- Queue initialization -JobDecl(GPU_D12_InitQueue, { GPU_D12_QueueDesc *descs; }); - -//- Heap initialization -GPU_D12_DescriptorHeap *GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags, u32 max_descs, u32 desc_size); - -//- Rootsig initialization -void GPU_D12_InitRootsig(void); +DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format); +GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle); +GPU_D12_CmdList *GPU_D12_CommandListFromHandle(GPU_CommandListHandle handle); +GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle); +GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle); //////////////////////////////////////////////////////////// //~ Pipeline operations @@ -359,18 +374,26 @@ GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind); GPU_D12_Descriptor *GPU_D12_AcquireDescriptor(GPU_D12_DescriptorHeap *heap); void GPU_D12_ReleaseDescriptor(GPU_D12_Descriptor *descriptor); +GPU_D12_Descriptor *GPU_D12_RtvDescriptorFromPtr(GpuRasterTargetPtr ptr); +D3D12_INDEX_BUFFER_VIEW GPU_D12_IbvFromPtr(GpuIndexBufferPtr ptr); + //////////////////////////////////////////////////////////// //~ Raw command list operations GPU_D12_RawCommandList *GPU_D12_BeginRawCommandList(GPU_QueueKind queue_kind); u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl); +//////////////////////////////////////////////////////////// +//~ Command helpers + +GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl); + //////////////////////////////////////////////////////////// //~ Swapchain helpers void GPU_D12_InitSwapchainResources(GPU_D12_Swapchain *swapchain); GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, Vec2I32 resolution); -i64 GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color); +void GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color); //////////////////////////////////////////////////////////// //~ Sync job diff --git a/src/meta/meta.c b/src/meta/meta.c index 5b1da4f9..69af812c 100644 --- a/src/meta/meta.c +++ b/src/meta/meta.c @@ -9,44 +9,44 @@ # define IsConsoleApp 1 #endif -#ifndef RtcIsEnabled -# define RtcIsEnabled 1 +#ifndef IsRtcEnabled +# define IsRtcEnabled 1 #endif -#ifndef UnoptimizedIsEnabled -# define UnoptimizedIsEnabled 1 +#ifndef IsUnoptimized +# define IsUnoptimized 1 #endif -#ifndef AsanIsEnabled -# define AsanIsEnabled 0 +#ifndef IsAsanEnabled +# define IsAsanEnabled 0 #endif -#ifndef CrtlibIsEnabled -# define CrtlibIsEnabled 1 +#ifndef IsCrtlibEnabled +# define IsCrtlibEnabled 1 #endif -#ifndef DebinfoEnabled -# define DebinfoEnabled 1 +#ifndef IsDebinfoEnabled +# define IsDebinfoEnabled 1 #endif -#ifndef DeveloperIsEnabled -# define DeveloperIsEnabled 1 +#ifndef IsDeveloperModeEnabled +# define IsDeveloperModeEnabled 1 #endif -#ifndef ProfilingIsEnabled -# define ProfilingIsEnabled 0 +#ifndef IsProfilingEnabled +# define IsProfilingEnabled 0 #endif -#ifndef UnoptimizedIsEnabled -# define UnoptimizedIsEnabled 1 +#ifndef IsUnoptimized +# define IsUnoptimized 1 #endif -#ifndef TestsAreEnabled -# define TestsAreEnabled 0 +#ifndef IsTestingEnabled +# define IsTestingEnabled 0 #endif -#ifndef HotSwappingIsEnabled -# define HotSwappingIsEnabled 0 +#ifndef IsHotSwappingEnabled +# define IsHotSwappingEnabled 0 #endif //////////////////////////////////////////////////////////// @@ -118,7 +118,7 @@ Struct(RunCommandResult) }; JobDecl(RunCommand, { String *cmds; RunCommandResult *results; }); -JobDef(RunCommand, sig, id) +JobImpl(RunCommand, sig, id) { i64 start_ns = TimeNs(); Arena *arena = PermArena(); @@ -204,7 +204,7 @@ void InheritStepResults(Arena *arena, StepResult *dst, u64 srcs_count, StepResul } JobDecl(Step, { StepParams *params; StepResult *results; }); -JobDef(Step, sig, id) +JobImpl(Step, sig, id) { StepParams *params = &sig->params[id]; StepParamsFlag flags = params->flags; @@ -788,7 +788,7 @@ JobDef(Step, sig, id) F_ClearWrite(arc_out_file, arc_contents); PushStringToList(arena, output, StringF(arena, "%F (%F mb)", FmtString(F_GetFileName(arc_out_file)), FmtFloatP((f32)arc_contents.len / 1024 / 1024, 3))); - if (PlatformIsWindows) + if (IsPlatformWindows) { //- Generate rc file String rc_out_file = StringF(arena, "%F.rc", FmtString(store)); @@ -833,7 +833,7 @@ JobDef(Step, sig, id) //~ Startup JobDecl(Build, EmptySig); -JobDef(Build, _, __) +JobImpl(Build, _, __) { Arena *arena = PermArena(); M_ErrorList errors = ZI; @@ -916,15 +916,15 @@ JobDef(Build, _, __) //- Common { PushStringToList(arena, &cp.defs, Lit("-DIsConsoleApp=0")); - PushStringToList(arena, &cp.defs, Lit("-DRtcIsEnabled=1")); - PushStringToList(arena, &cp.defs, Lit("-DAsanIsEnabled=0")); - PushStringToList(arena, &cp.defs, Lit("-DCrtlibIsEnabled=1")); - PushStringToList(arena, &cp.defs, Lit("-DDebinfoEnabled=1")); - PushStringToList(arena, &cp.defs, Lit("-DDeveloperIsEnabled=1")); - PushStringToList(arena, &cp.defs, Lit("-DProfilingIsEnabled=0")); - PushStringToList(arena, &cp.defs, Lit("-DUnoptimizedIsEnabled=1")); - PushStringToList(arena, &cp.defs, Lit("-DTestsAreEnabled=0")); - PushStringToList(arena, &cp.defs, Lit("-DHotSwappingIsEnabled=1")); + PushStringToList(arena, &cp.defs, Lit("-DIsRtcEnabled=1")); + PushStringToList(arena, &cp.defs, Lit("-DIsAsanEnabled=0")); + PushStringToList(arena, &cp.defs, Lit("-DIsCrtlibEnabled=1")); + PushStringToList(arena, &cp.defs, Lit("-DIsDebinfoEnabled=1")); + PushStringToList(arena, &cp.defs, Lit("-DIsDeveloperModeEnabled=1")); + PushStringToList(arena, &cp.defs, Lit("-DIsProfilingEnabled=0")); + PushStringToList(arena, &cp.defs, Lit("-DIsUnoptimized=1")); + PushStringToList(arena, &cp.defs, Lit("-DIsTestingEnabled=0")); + PushStringToList(arena, &cp.defs, Lit("-DIsHotSwappingEnabled=1")); } //- Msvc @@ -1168,7 +1168,7 @@ JobDef(Build, _, __) } //////////////////////////////////////////////////////////// -//~ @hookdef Startup +//~ @hookimpl Startup void StartupLayers(void) { diff --git a/src/meta/meta_lay.c b/src/meta/meta_lay.c index 86735dad..e4f91bf7 100644 --- a/src/meta/meta_lay.c +++ b/src/meta/meta_lay.c @@ -467,7 +467,7 @@ M_Layer M_GetFlattenedEntries(Arena *arena, M_LayerList unflattened, StringList /* Push downstream impl enters to stack */ for (M_Entry *entry = layer->first; entry->valid; entry = entry->next) { - b32 include = (PlatformIsWindows && entry->kind == M_EntryKind_DefaultWindowsImpl); + b32 include = (IsPlatformWindows && entry->kind == M_EntryKind_DefaultWindowsImpl); if (include) { M_Token *impl_token = entry->arg_tokens[0]; diff --git a/src/meta/meta_os/meta_os_inc.h b/src/meta/meta_os/meta_os_inc.h index d2fbcc39..84f72c30 100644 --- a/src/meta/meta_os/meta_os_inc.h +++ b/src/meta/meta_os/meta_os_inc.h @@ -1,5 +1,5 @@ #include "meta_os.h" -#if PlatformIsWindows +#if IsPlatformWindows # include "meta_os_win32/meta_os_win32_inc.h" #endif diff --git a/src/meta/meta_os/meta_os_win32/meta_os_win32.c b/src/meta/meta_os/meta_os_win32/meta_os_win32.c index 5356c3a8..35f0f8cc 100644 --- a/src/meta/meta_os/meta_os_win32/meta_os_win32.c +++ b/src/meta/meta_os/meta_os_win32/meta_os_win32.c @@ -26,14 +26,14 @@ String W32_StringFromError(Arena *arena, DWORD err) } //////////////////////////////////////////////////////////// -//~ @hookdef Startup hook +//~ @hookimpl Startup hook void OS_Startup(void) { } //////////////////////////////////////////////////////////// -//~ @hookdef File system hooks +//~ @hookimpl File system hooks OS_File OS_OpenFile(String path, OS_FileFlag flags, i64 timeout_ns) { @@ -151,7 +151,7 @@ u64 OS_LastWriteTimestampFromPath(String path) } //////////////////////////////////////////////////////////// -//~ @hookdef Directory helper hooks +//~ @hookimpl Directory helper hooks b32 OS_FileOrDirExists(String path) { @@ -197,7 +197,7 @@ void OS_Rm(String path) } //////////////////////////////////////////////////////////// -//~ @hookdef Shell hooks +//~ @hookimpl Shell hooks OS_CommandResult OS_RunCommand(Arena *arena, String cmd) { diff --git a/src/platform/platform_win32/platform_win32.c b/src/platform/platform_win32/platform_win32.c index 56ccf51c..c04534c0 100644 --- a/src/platform/platform_win32/platform_win32.c +++ b/src/platform/platform_win32/platform_win32.c @@ -1,7 +1,7 @@ P_W32_SharedState P_W32_shared_state = ZI; //////////////////////////////////////////////////////////// -//~ @hookdef Startup +//~ @hookimpl Startup void P_Startup(void) { @@ -158,7 +158,7 @@ P_Address P_W32_PlatformAddressFromWin32Address(P_W32_Address ws_addr) //////////////////////////////////////////////////////////// //~ Timer job -JobDef(P_W32_StartTimerSync, _, __) +JobImpl(P_W32_StartTimerSync, _, __) { P_W32_SharedState *g = &P_W32_shared_state; SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL); @@ -220,7 +220,7 @@ JobDef(P_W32_StartTimerSync, _, __) } //////////////////////////////////////////////////////////// -//~ @hookdef File system hooks +//~ @hookimpl File system hooks //- File system helpers String P_GetWritePath(Arena *arena) @@ -514,7 +514,7 @@ P_FileTime P_GetFileTime(P_File file) } //////////////////////////////////////////////////////////// -//~ @hookdef File map hooks +//~ @hookimpl File map hooks P_FileMap P_OpenFileMap(P_File file) { @@ -582,7 +582,7 @@ String P_GetFileMapData(P_FileMap map) } //////////////////////////////////////////////////////////// -//~ @hookdef Address helper hooks +//~ @hookimpl Address helper hooks P_Address P_AddressFromIpPortCstr(char *ip_cstr, char *port_cstr) { @@ -784,7 +784,7 @@ b32 P_MatchAddress(P_Address a, P_Address b) } //////////////////////////////////////////////////////////// -//~ @hookdef Sock hooks +//~ @hookimpl Sock hooks P_Sock *P_AcquireSock(u16 listen_port, u64 sndbuf_size, u64 rcvbuf_size) { @@ -864,7 +864,7 @@ P_SockReadResult P_ReadSock(Arena *arena, P_Sock *sock) } else { -#if RtcIsEnabled +#if IsRtcEnabled i32 err = WSAGetLastError(); if (err != WSAEWOULDBLOCK && err != WSAETIMEDOUT && err != WSAECONNRESET) { @@ -885,7 +885,7 @@ void P_WriteSock(P_Sock *sock, P_Address address, String data) { AddGstat(GSTAT_SOCK_BYTES_SENT, size); } -#if RtcIsEnabled +#if IsRtcEnabled if (size != (i32)data.len) { i32 err = WSAGetLastError(); @@ -896,7 +896,7 @@ void P_WriteSock(P_Sock *sock, P_Address address, String data) } //////////////////////////////////////////////////////////// -//~ @hookdef Utility hooks +//~ @hookimpl Utility hooks void P_MessageBox(P_MessageBoxKind kind, String message) { @@ -978,7 +978,7 @@ String P_GetClipboardText(Arena *arena) } //////////////////////////////////////////////////////////// -//~ @hookdef Timer hooks +//~ @hookimpl Timer hooks Fence *P_GetTimerFence(void) { @@ -993,7 +993,7 @@ i64 P_GetCurrentTimerPeriodNs(void) } //////////////////////////////////////////////////////////// -//~ @hookdef Sleep hooks +//~ @hookimpl Sleep hooks void P_SleepPrecise(i64 sleep_time_ns) { diff --git a/src/playback/playback_wasapi/playback_wasapi.c b/src/playback/playback_wasapi/playback_wasapi.c index 8ecd704a..de4b8f42 100644 --- a/src/playback/playback_wasapi/playback_wasapi.c +++ b/src/playback/playback_wasapi/playback_wasapi.c @@ -182,7 +182,7 @@ void PB_WSP_EndUpdate(PB_WSP_Buff *wspbuf, MIX_PcmF32 src) //////////////////////////////////////////////////////////// //~ Playback job -JobDef(PB_WSP_Playback, UNUSED sig, UNUSED id) +JobImpl(PB_WSP_Playback, UNUSED sig, UNUSED id) { __prof; PB_WSP_SharedState *g = &PB_WSP_shared_state; diff --git a/src/pp/pp.lay b/src/pp/pp.lay index aff673a5..580ce43a 100644 --- a/src/pp/pp.lay +++ b/src/pp/pp.lay @@ -1,48 +1,4 @@ @Layer pp -//- Dependencies -@Dep gpu -@Dep sprite -@Dep font -@Dep collider -@Dep net -@Dep mixer -@Dep rendertest -@Dep playback -@Dep platform -@Dep window -@Dep ui - -//- Api -@IncludeC pp_sim.h -@IncludeC pp_phys.h -@IncludeC pp_space.h -@IncludeC pp_ent.h -@IncludeC pp_step.h -@IncludeC pp_widgets.h -@IncludeC pp_draw.h -@IncludeC pp.h -@IncludeGpu pp_draw.h - -//- Impl -@IncludeC pp_sim.c -@IncludeC pp_phys.c -@IncludeC pp_space.c -@IncludeC pp_ent.c -@IncludeC pp_step.c -@IncludeC pp_widgets.c -@IncludeC pp.c -@IncludeGpu pp_draw.gpu - -//- Embeds -@EmbedDir PP_Resources pp_res - -//- Shaders -@VertexShader PP_MaterialVS -@PixelShader PP_MaterialPS -@ComputeShader PP_FloodCS -@ComputeShader PP_ShadeCS - -//- Startup -@Startup PP_StartupSim -@Startup PP_StartupUser +@Dep pp_sim +@Dep pp_vis diff --git a/src/proto/pp_sim/pp_sim.lay b/src/pp/pp_sim/pp_sim.lay similarity index 100% rename from src/proto/pp_sim/pp_sim.lay rename to src/pp/pp_sim/pp_sim.lay diff --git a/src/proto/pp_sim/pp_sim_core.c b/src/pp/pp_sim/pp_sim_core.c similarity index 99% rename from src/proto/pp_sim/pp_sim_core.c rename to src/pp/pp_sim/pp_sim_core.c index 0d75e9db..efc8839d 100644 --- a/src/proto/pp_sim/pp_sim_core.c +++ b/src/pp/pp_sim/pp_sim_core.c @@ -296,7 +296,7 @@ MergesortCompareFuncDef(S_SortEntsByKeyCmp, arg_a, arg_b, _) //////////////////////////////////////////////////////////// //~ Sim worker -JobDef(S_SimWorker, _, __) +JobImpl(S_SimWorker, _, __) { S_SharedState *shared = &S_shared_state; Arena *frame_arena = AcquireArena(Gibi(64)); diff --git a/src/proto/pp_sim/pp_sim_core.h b/src/pp/pp_sim/pp_sim_core.h similarity index 100% rename from src/proto/pp_sim/pp_sim_core.h rename to src/pp/pp_sim/pp_sim_core.h diff --git a/src/proto/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay similarity index 100% rename from src/proto/pp_vis/pp_vis.lay rename to src/pp/pp_vis/pp_vis.lay diff --git a/src/proto/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c similarity index 99% rename from src/proto/pp_vis/pp_vis_core.c rename to src/pp/pp_vis/pp_vis_core.c index e4c186dd..8204eb20 100644 --- a/src/proto/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -25,7 +25,7 @@ void V_Shutdown(void) //////////////////////////////////////////////////////////// //~ Vis worker -JobDef(V_VisWorker, _, __) +JobImpl(V_VisWorker, _, __) { V_SharedState *vis_shared = &V_shared_state; S_SharedState *sim_shared = &S_shared_state; @@ -581,13 +581,13 @@ JobDef(V_VisWorker, _, __) /* Backdrop pass */ { - GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_ReadWrite); + GPU_SyncAccess(cl, draw_target, GPU_AccessKind_ComputeReadWrite); GPU_Compute(cl, V_BackdropCS, V_BackdropCSThreadSizeFromTexSize(draw_size)); } /* Shapes pass */ { - GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_RasterTarget); + GPU_SyncAccess(cl, draw_target, GPU_AccessKind_RasterTarget) GPU_Rasterize(cl, V_DVertVS, V_DVertPS, 1, dvert_idxs_buffer, diff --git a/src/proto/pp_vis/pp_vis_core.h b/src/pp/pp_vis/pp_vis_core.h similarity index 100% rename from src/proto/pp_vis/pp_vis_core.h rename to src/pp/pp_vis/pp_vis_core.h diff --git a/src/proto/pp_vis/pp_vis_draw.c b/src/pp/pp_vis/pp_vis_draw.c similarity index 100% rename from src/proto/pp_vis/pp_vis_draw.c rename to src/pp/pp_vis/pp_vis_draw.c diff --git a/src/proto/pp_vis/pp_vis_draw.h b/src/pp/pp_vis/pp_vis_draw.h similarity index 100% rename from src/proto/pp_vis/pp_vis_draw.h rename to src/pp/pp_vis/pp_vis_draw.h diff --git a/src/proto/pp_vis/pp_vis_gpu.gpu b/src/pp/pp_vis/pp_vis_gpu.gpu similarity index 97% rename from src/proto/pp_vis/pp_vis_gpu.gpu rename to src/pp/pp_vis/pp_vis_gpu.gpu index d02daae1..3926b9d2 100644 --- a/src/proto/pp_vis/pp_vis_gpu.gpu +++ b/src/pp/pp_vis/pp_vis_gpu.gpu @@ -5,10 +5,10 @@ ConstantBuffer V_dvert_sig : register (b0); //////////////////////////////////////////////////////////// //~ Backdrop shader -ComputeShader(V_BackdropCS, 8, 8, 1) +ComputeShader2D(V_BackdropCS, 8, 8) { ConstantBuffer sig = V_backdrop_sig; - Vec2U32 target_pos = SV_DispatchThreadID.xy; + Vec2U32 target_pos = SV_DispatchThreadID; Vec2I32 target_size = sig.target_size; if (target_pos.x < target_size.x && target_pos.y < target_size.y) { diff --git a/src/proto/pp_vis/pp_vis_gpu.h b/src/pp/pp_vis/pp_vis_gpu.h similarity index 100% rename from src/proto/pp_vis/pp_vis_gpu.h rename to src/pp/pp_vis/pp_vis_gpu.h diff --git a/src/pp/pp_res/font/fixedsys.ttf b/src/pp/pp_vis/pp_vis_res/font/fixedsys.ttf similarity index 100% rename from src/pp/pp_res/font/fixedsys.ttf rename to src/pp/pp_vis/pp_vis_res/font/fixedsys.ttf diff --git a/src/pp/pp_res/font/roboto-med.ttf b/src/pp/pp_vis/pp_vis_res/font/roboto-med.ttf similarity index 100% rename from src/pp/pp_res/font/roboto-med.ttf rename to src/pp/pp_vis/pp_vis_res/font/roboto-med.ttf diff --git a/src/proto/pp_vis/pp_vis_widgets.c b/src/pp/pp_vis/pp_vis_widgets.c similarity index 100% rename from src/proto/pp_vis/pp_vis_widgets.c rename to src/pp/pp_vis/pp_vis_widgets.c diff --git a/src/proto/pp_vis/pp_vis_widgets.h b/src/pp/pp_vis/pp_vis_widgets.h similarity index 100% rename from src/proto/pp_vis/pp_vis_widgets.h rename to src/pp/pp_vis/pp_vis_widgets.h diff --git a/src/pp/pp.c b/src/pp_old/pp.c similarity index 99% rename from src/pp/pp.c rename to src/pp_old/pp.c index d61b46c4..6397a57d 100644 --- a/src/pp/pp.c +++ b/src/pp_old/pp.c @@ -1398,7 +1398,7 @@ void PP_UpdateUser(void) LAX e0; LAX e1; -#if DeveloperIsEnabled +#if IsDeveloperModeEnabled /* Draw contact points */ { f32 radius = 5; @@ -1893,7 +1893,7 @@ void PP_UpdateUser(void) } } -#if RtcIsEnabled +#if IsRtcEnabled /* Gjk steps */ { if (g->bind_states[PP_BindKind_ResetDebugSteps].num_presses_and_repeats > 0) @@ -2173,7 +2173,7 @@ void PP_UpdateUser(void) //UI_BuildLabelF(\n")); //UI_BuildLabelF(\n")); -#if RtcIsEnabled +#if IsRtcEnabled UI_BuildSpacer(UI_FNT(1, 0)); UI_BuildLabelF("Debug steps: %F", FmtUint(GetGstat(GSTAT_DEBUG_STEPS))); //UI_BuildLabelF(\n")); @@ -2410,7 +2410,7 @@ void PP_UpdateUser(void) //////////////////////////////////////////////////////////// //~ User update job -JobDef(PP_UpdateUserOrSleep, UNUSED sig, UNUSED key) +JobImpl(PP_UpdateUserOrSleep, UNUSED sig, UNUSED key) { PP_SharedUserState *g = &PP_shared_user_state; i64 time_ns = TimeNs(); @@ -2470,7 +2470,7 @@ void PP_GenerateuserInputCmds(PP_Client *user_input_client, u64 tick) //////////////////////////////////////////////////////////// //~ Sim update -JobDef(PP_UpdateSim, UNUSED sig, UNUSED key) +JobImpl(PP_UpdateSim, UNUSED sig, UNUSED key) { PP_SharedUserState *g = &PP_shared_user_state; #if 0 diff --git a/src/pp/pp.h b/src/pp_old/pp.h similarity index 99% rename from src/pp/pp.h rename to src/pp_old/pp.h index 0af2b11a..09b29215 100644 --- a/src/pp/pp.h +++ b/src/pp_old/pp.h @@ -40,7 +40,7 @@ Enum(PP_BindKind) PP_BindKind_ZoomOut, PP_BindKind_Pan, -#if RtcIsEnabled +#if IsRtcEnabled /* Debug */ PP_BindKind_ResetDebugSteps, @@ -92,7 +92,7 @@ Global Readonly PP_BindKind g_binds[Btn_Count] = { [Btn_MWheelDown] = PP_BindKind_ZoomOut, [Btn_M3] = PP_BindKind_Pan, -#if RtcIsEnabled +#if IsRtcEnabled [Btn_ForwardSlash] = PP_BindKind_ResetDebugSteps, [Btn_Comma] = PP_BindKind_DecrementDebugSteps, [Btn_Period] = PP_BindKind_IncrementDebugSteps diff --git a/src/pp_old/pp.lay b/src/pp_old/pp.lay new file mode 100644 index 00000000..a9153b7b --- /dev/null +++ b/src/pp_old/pp.lay @@ -0,0 +1,48 @@ +@Layer pp_old + +//- Dependencies +@Dep gpu +@Dep sprite +@Dep font +@Dep collider +@Dep net +@Dep mixer +@Dep rendertest +@Dep playback +@Dep platform +@Dep window +@Dep ui + +//- Api +@IncludeC pp_sim.h +@IncludeC pp_phys.h +@IncludeC pp_space.h +@IncludeC pp_ent.h +@IncludeC pp_step.h +@IncludeC pp_widgets.h +@IncludeC pp_draw.h +@IncludeC pp.h +@IncludeGpu pp_draw.h + +//- Impl +@IncludeC pp_sim.c +@IncludeC pp_phys.c +@IncludeC pp_space.c +@IncludeC pp_ent.c +@IncludeC pp_step.c +@IncludeC pp_widgets.c +@IncludeC pp.c +@IncludeGpu pp_draw.gpu + +//- Embeds +@EmbedDir PP_Resources pp_res + +//- Shaders +@VertexShader PP_MaterialVS +@PixelShader PP_MaterialPS +@ComputeShader PP_FloodCS +@ComputeShader PP_ShadeCS + +//- Startup +@Startup PP_StartupSim +@Startup PP_StartupUser diff --git a/src/pp/pp_draw.gpu b/src/pp_old/pp_draw.gpu similarity index 100% rename from src/pp/pp_draw.gpu rename to src/pp_old/pp_draw.gpu diff --git a/src/pp/pp_draw.h b/src/pp_old/pp_draw.h similarity index 100% rename from src/pp/pp_draw.h rename to src/pp_old/pp_draw.h diff --git a/src/pp/pp_ent.c b/src/pp_old/pp_ent.c similarity index 99% rename from src/pp/pp_ent.c rename to src/pp_old/pp_ent.c index db0b68f0..67fbeb0d 100644 --- a/src/pp/pp_ent.c +++ b/src/pp_old/pp_ent.c @@ -236,7 +236,7 @@ void PP_SetEntKey(PP_Ent *ent, PP_EntKey key) /* Insert new key into lookup */ if (!PP_IsNilEntKey(key)) { -#if RtcIsEnabled +#if IsRtcEnabled { PP_Ent *existing = PP_EntFromKey(ss, key); /* Collision should be extremely unlikely under normal circumstances, there's probably a logic error somewhere. */ diff --git a/src/pp/pp_ent.h b/src/pp_old/pp_ent.h similarity index 100% rename from src/pp/pp_ent.h rename to src/pp_old/pp_ent.h diff --git a/src/pp/pp_phys.c b/src/pp_old/pp_phys.c similarity index 99% rename from src/pp/pp_phys.c rename to src/pp_old/pp_phys.c index b9369b5b..8bf6d6c8 100644 --- a/src/pp/pp_phys.c +++ b/src/pp_old/pp_phys.c @@ -170,7 +170,7 @@ void PP_CreateAndUpdateContacts(PP_PhysStepCtx *ctx, f32 elapsed_dt, u64 phys_it contact->vcp1 = SubVec2(point, e1_xf.og); contact->starting_separation = sep; -#if DeveloperIsEnabled +#if IsDeveloperModeEnabled contact->dbg_pt = point; #endif } diff --git a/src/pp/pp_phys.h b/src/pp_old/pp_phys.h similarity index 99% rename from src/pp/pp_phys.h rename to src/pp_old/pp_phys.h index 1c10d06d..3a84f308 100644 --- a/src/pp/pp_phys.h +++ b/src/pp_old/pp_phys.h @@ -54,7 +54,7 @@ Struct(PP_ContactPoint) f32 inv_tangent_mass; /* Debugging */ -#if DeveloperIsEnabled +#if IsDeveloperModeEnabled Vec2 dbg_pt; #endif }; diff --git a/src/proto/pp_vis/pp_vis_res/font/fixedsys.ttf b/src/pp_old/pp_res/font/fixedsys.ttf similarity index 100% rename from src/proto/pp_vis/pp_vis_res/font/fixedsys.ttf rename to src/pp_old/pp_res/font/fixedsys.ttf diff --git a/src/proto/pp_vis/pp_vis_res/font/roboto-med.ttf b/src/pp_old/pp_res/font/roboto-med.ttf similarity index 100% rename from src/proto/pp_vis/pp_vis_res/font/roboto-med.ttf rename to src/pp_old/pp_res/font/roboto-med.ttf diff --git a/src/pp/pp_res/icon.ico b/src/pp_old/pp_res/icon.ico similarity index 100% rename from src/pp/pp_res/icon.ico rename to src/pp_old/pp_res/icon.ico diff --git a/src/pp/pp_res/sprite/blood.ase b/src/pp_old/pp_res/sprite/blood.ase similarity index 100% rename from src/pp/pp_res/sprite/blood.ase rename to src/pp_old/pp_res/sprite/blood.ase diff --git a/src/pp/pp_res/sprite/box.ase b/src/pp_old/pp_res/sprite/box.ase similarity index 100% rename from src/pp/pp_res/sprite/box.ase rename to src/pp_old/pp_res/sprite/box.ase diff --git a/src/pp/pp_res/sprite/box_rounded.ase b/src/pp_old/pp_res/sprite/box_rounded.ase similarity index 100% rename from src/pp/pp_res/sprite/box_rounded.ase rename to src/pp_old/pp_res/sprite/box_rounded.ase diff --git a/src/pp/pp_res/sprite/bullet.ase b/src/pp_old/pp_res/sprite/bullet.ase similarity index 100% rename from src/pp/pp_res/sprite/bullet.ase rename to src/pp_old/pp_res/sprite/bullet.ase diff --git a/src/pp/pp_res/sprite/crosshair.ase b/src/pp_old/pp_res/sprite/crosshair.ase similarity index 100% rename from src/pp/pp_res/sprite/crosshair.ase rename to src/pp_old/pp_res/sprite/crosshair.ase diff --git a/src/pp/pp_res/sprite/gun.ase b/src/pp_old/pp_res/sprite/gun.ase similarity index 100% rename from src/pp/pp_res/sprite/gun.ase rename to src/pp_old/pp_res/sprite/gun.ase diff --git a/src/pp/pp_res/sprite/tile.ase b/src/pp_old/pp_res/sprite/tile.ase similarity index 100% rename from src/pp/pp_res/sprite/tile.ase rename to src/pp_old/pp_res/sprite/tile.ase diff --git a/src/pp/pp_res/sprite/tim.ase b/src/pp_old/pp_res/sprite/tim.ase similarity index 100% rename from src/pp/pp_res/sprite/tim.ase rename to src/pp_old/pp_res/sprite/tim.ase diff --git a/src/pp/pp_sim.c b/src/pp_old/pp_sim.c similarity index 100% rename from src/pp/pp_sim.c rename to src/pp_old/pp_sim.c diff --git a/src/pp/pp_sim.h b/src/pp_old/pp_sim.h similarity index 100% rename from src/pp/pp_sim.h rename to src/pp_old/pp_sim.h diff --git a/src/pp/pp_space.c b/src/pp_old/pp_space.c similarity index 100% rename from src/pp/pp_space.c rename to src/pp_old/pp_space.c diff --git a/src/pp/pp_space.h b/src/pp_old/pp_space.h similarity index 100% rename from src/pp/pp_space.h rename to src/pp_old/pp_space.h diff --git a/src/pp/pp_step.c b/src/pp_old/pp_step.c similarity index 100% rename from src/pp/pp_step.c rename to src/pp_old/pp_step.c diff --git a/src/pp/pp_step.h b/src/pp_old/pp_step.h similarity index 100% rename from src/pp/pp_step.h rename to src/pp_old/pp_step.h diff --git a/src/pp/pp_widgets.c b/src/pp_old/pp_widgets.c similarity index 100% rename from src/pp/pp_widgets.c rename to src/pp_old/pp_widgets.c diff --git a/src/pp/pp_widgets.h b/src/pp_old/pp_widgets.h similarity index 100% rename from src/pp/pp_widgets.h rename to src/pp_old/pp_widgets.h diff --git a/src/prof/prof_tracy.cpp b/src/prof/prof_tracy.cpp index 3081822d..7a3b35ab 100644 --- a/src/prof/prof_tracy.cpp +++ b/src/prof/prof_tracy.cpp @@ -1,4 +1,4 @@ -#if defined(ProfilingIsEnabled) && ProfilingIsEnabled == 1 +#if defined(IsProfilingEnabled) && IsProfilingEnabled == 1 #pragma clang diagnostic push #pragma clang diagnostic ignored "-Weverything" diff --git a/src/prof/prof_tracy.h b/src/prof/prof_tracy.h index 1b399f82..2ddc5481 100644 --- a/src/prof/prof_tracy.h +++ b/src/prof/prof_tracy.h @@ -1,10 +1,10 @@ -#if defined(ProfilingIsEnabled) && ProfilingIsEnabled == 1 +#if defined(IsProfilingEnabled) && IsProfilingEnabled == 1 //////////////////////////////////////////////////////////// //~ Profiling enabled #ifndef __clang__ -# error Only clang is supported when compiling with ProfilingIsEnabled=1 (cleanup attributes are required for profiling markup) +# error Only clang is supported when compiling with IsProfilingEnabled=1 (cleanup attributes are required for profiling markup) #endif #define ProfilingSystemTrace 0 @@ -82,7 +82,7 @@ enum __prof_plot_type { #define __prof_plot_i(name, val) #define __prof_is_connected() 0 -#endif /* ProfilingIsEnabled */ +#endif /* IsProfilingEnabled */ #if ProfilingLocks # define __proflock_ctx(name) struct TracyCSharedLockCtx *name @@ -111,7 +111,7 @@ enum __prof_plot_type { # define __proflock_after_try_shared_lock(ctx, acquired) # define __proflock_mark(ctx) # define __proflock_custom_name(ctx, name, len) -#endif /* ProfilingIsEnabled && ProfilingLocks */ +#endif /* IsProfilingEnabled && ProfilingLocks */ #if ProfilingGpu /* Dx11 */ diff --git a/src/proto/pp.lay b/src/proto/pp.lay deleted file mode 100644 index 5da4b077..00000000 --- a/src/proto/pp.lay +++ /dev/null @@ -1,4 +0,0 @@ -@Layer proto - -@Dep pp_sim -@Dep pp_vis diff --git a/src/proto/proto.c b/src/proto/proto.c new file mode 100644 index 00000000..14819158 --- /dev/null +++ b/src/proto/proto.c @@ -0,0 +1,3 @@ +void PR_Startup(void) +{ +} diff --git a/src/proto/proto.lay b/src/proto/proto.lay new file mode 100644 index 00000000..e68376b9 --- /dev/null +++ b/src/proto/proto.lay @@ -0,0 +1,13 @@ +@Layer proto + +//- Dependencies + +@Dep gpu + +//- Impl + +@IncludeC proto.c + +//- Startup + +@Startup PR_Startup diff --git a/src/sound/sound.c b/src/sound/sound.c index 6789872d..54da66a9 100644 --- a/src/sound/sound.c +++ b/src/sound/sound.c @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////// //~ Load job -JobDef(SND_Load, sig, UNUSED id) +JobImpl(SND_Load, sig, UNUSED id) { __prof; TempArena scratch = BeginScratchNoConflict(); diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index fa4b2ce1..c1008b2a 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -5,7 +5,7 @@ SPR_SharedState SPR_shared_state = ZI; //////////////////////////////////////////////////////////// //~ Load jobs -JobDef(SPR_LoadTexture, sig, _) +JobImpl(SPR_LoadTexture, sig, _) { TempArena scratch = BeginScratchNoConflict(); SPR_Entry *entry = sig->entry; @@ -44,7 +44,7 @@ JobDef(SPR_LoadTexture, sig, _) EndScratch(scratch); } -JobDef(SPR_LoadSheet, sig, _) +JobImpl(SPR_LoadSheet, sig, _) { TempArena scratch = BeginScratchNoConflict(); Arena *perm = PermArena(); diff --git a/src/ttf/ttf_dwrite/ttf_dwrite.c b/src/ttf/ttf_dwrite/ttf_dwrite.c index 20516c3f..a06f70d7 100644 --- a/src/ttf/ttf_dwrite/ttf_dwrite.c +++ b/src/ttf/ttf_dwrite/ttf_dwrite.c @@ -4,7 +4,7 @@ TTF_DW_SharedState TTF_DW_shared_state = ZI; //////////////////////////////////////////////////////////// -//~ @hookdef Startup +//~ @hookimpl Startup /* Call this during font system startup */ void TTF_Startup(void) @@ -16,7 +16,7 @@ void TTF_Startup(void) * 10? Need to verify. Maybe should just use a custom loader. (We're only * using a factory5 since I think WriteInMemoryFileLoader wasn't * implemented until then) */ -#if CompilerIsClang +#if IsCompilerClang # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wlanguage-extension-token" /* for __uuidof */ #endif @@ -25,7 +25,7 @@ void TTF_Startup(void) &IID_IDWriteFactory5, (void **)&g->factory ); -#if CompilerIsClang +#if IsCompilerClang # pragma clang diagnostic pop #endif if (error != S_OK) @@ -36,7 +36,7 @@ void TTF_Startup(void) } //////////////////////////////////////////////////////////// -//~ @hookdef Decode +//~ @hookimpl Decode TTF_Decoded TTF_Decode(Arena *arena, String encoded, f32 em_size, u32 *cache_codes, u32 cache_codes_count) { diff --git a/src/window/window_win32/window_win32.c b/src/window/window_win32/window_win32.c index 6c251282..f339bc29 100644 --- a/src/window/window_win32/window_win32.c +++ b/src/window/window_win32/window_win32.c @@ -1,7 +1,7 @@ WND_W32_SharedState WND_W32_shared_state = ZI; //////////////////////////////////////////////////////////// -//~ @hookdef Startup +//~ @hookimpl Startup void WND_Startup(void) { @@ -102,7 +102,7 @@ WND_W32_Window *WND_W32_WindowFromHandle(WND_Handle handle) //~ Initialization /* Win32 limitation: Window must be initialized on same thread that processes events */ -JobDef(WND_W32_ProcessMessagesForever, sig, id) +JobImpl(WND_W32_ProcessMessagesForever, sig, id) { WND_W32_SharedState *g = &WND_W32_shared_state; WND_W32_Window *window = &g->window; @@ -385,7 +385,7 @@ LRESULT CALLBACK WND_W32_WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM l } //////////////////////////////////////////////////////////// -//~ @hookdef Cmds +//~ @hookimpl Cmds void WND_PushCmd_(WND_Frame frame, WND_Cmd desc) { @@ -400,7 +400,7 @@ void WND_PushCmd_(WND_Frame frame, WND_Cmd desc) } //////////////////////////////////////////////////////////// -//~ @hookdef Frame +//~ @hookimpl Frame WND_Frame WND_BeginFrame(void) {