revert d3d12 to enhanced barriers w/ explicit layouts. use independent-device agility sdk.
This commit is contained in:
parent
6f35da3fa6
commit
27885ead8a
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -17,6 +17,7 @@
|
||||
*.exe filter=lfs diff=lfs merge=lfs -text
|
||||
*.dll filter=lfs diff=lfs merge=lfs -text
|
||||
*.lib filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.tga filter=lfs diff=lfs merge=lfs -text
|
||||
*.ase filter=lfs diff=lfs merge=lfs -text
|
||||
*.ttf filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
@ -501,7 +501,7 @@ i32 W32_Main(void)
|
||||
// Create app dir
|
||||
{
|
||||
String path = PathFromString(perm, appdir_path, '\\');
|
||||
wchar_t *path_wstr = WstrFromString(perm, appdir_path);
|
||||
wchar_t *path_wstr = WstrFromString(perm, path);
|
||||
i32 err_code = SHCreateDirectoryExW(0, path_wstr, 0);
|
||||
String err = StringF(perm, "Error code %F", FmtSint(err_code));
|
||||
switch (err_code)
|
||||
@ -519,6 +519,10 @@ i32 W32_Main(void)
|
||||
{
|
||||
err = Lit("User canceled the operation");
|
||||
} break;
|
||||
case ERROR_PATH_NOT_FOUND:
|
||||
{
|
||||
err = Lit("The system cannot find the path specified.");
|
||||
} break;
|
||||
}
|
||||
if (err_code != ERROR_SUCCESS && err_code != ERROR_ALREADY_EXISTS && err_code != ERROR_FILE_EXISTS)
|
||||
{
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#include <dwmapi.h>
|
||||
#include <avrt.h>
|
||||
#include <shellapi.h>
|
||||
#include <compressapi.h>
|
||||
// #pragma warning(pop)
|
||||
|
||||
#ifndef BCRYPT_RNG_ALG_HANDLE
|
||||
@ -37,6 +38,8 @@
|
||||
#pragma comment(lib, "kernel32")
|
||||
#pragma comment(lib, "user32")
|
||||
#pragma comment(lib, "bcrypt")
|
||||
#pragma comment(lib, "gdi32")
|
||||
#pragma comment(lib, "cabinet")
|
||||
#pragma comment(lib, "shell32")
|
||||
#pragma comment(lib, "ole32")
|
||||
#pragma comment(lib, "winmm")
|
||||
|
||||
@ -316,6 +316,7 @@ void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame)
|
||||
gpu_perm, cl,
|
||||
G_Format_R8G8B8A8_Unorm_Srgb,
|
||||
atlas->dims,
|
||||
G_Layout_Simultaneous,
|
||||
.name = Lit("Glyph atlas")
|
||||
);
|
||||
atlas->tex = G_PushTexture2DRef(gpu_perm, atlas->tex_res);
|
||||
|
||||
@ -25,6 +25,7 @@ void G_BootstrapCommon(void)
|
||||
gpu_perm, cl,
|
||||
G_Format_R8G8B8A8_Uint,
|
||||
VEC2I32(8, 8),
|
||||
G_Layout_Common,
|
||||
.flags = G_ResourceFlag_ZeroMemory
|
||||
);
|
||||
G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);
|
||||
@ -42,7 +43,8 @@ void G_BootstrapCommon(void)
|
||||
noise_tex = G_PushTexture3D(
|
||||
gpu_perm, cl,
|
||||
G_Format_R16_Uint,
|
||||
noise_dims
|
||||
noise_dims,
|
||||
G_Layout_Common
|
||||
);
|
||||
G_CopyCpuToTexture(
|
||||
cl,
|
||||
|
||||
@ -188,6 +188,117 @@ Enum(G_Format)
|
||||
G_Format_COUNT = 192
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Memory sync types
|
||||
|
||||
Enum(G_Stage)
|
||||
{
|
||||
G_Stage_None = 0,
|
||||
|
||||
// Compute stages
|
||||
G_Stage_ComputeShading = (1 << 1),
|
||||
|
||||
// Draw stages
|
||||
G_Stage_IndexAssembly = (1 << 2),
|
||||
G_Stage_VertexShading = (1 << 3),
|
||||
G_Stage_PixelShading = (1 << 4),
|
||||
G_Stage_DepthStencil = (1 << 5),
|
||||
G_Stage_RenderTarget = (1 << 6),
|
||||
|
||||
// Copy stages
|
||||
G_Stage_Copy = (1 << 7),
|
||||
|
||||
// Indirect stages
|
||||
G_Stage_Indirect = (1 << 8),
|
||||
|
||||
// Aggregate stages
|
||||
G_Stage_Drawing = G_Stage_IndexAssembly |
|
||||
G_Stage_VertexShading |
|
||||
G_Stage_PixelShading |
|
||||
G_Stage_DepthStencil |
|
||||
G_Stage_RenderTarget,
|
||||
|
||||
G_Stage_Shading = G_Stage_ComputeShading |
|
||||
G_Stage_VertexShading |
|
||||
G_Stage_PixelShading,
|
||||
|
||||
G_Stage_All = 0xFFFFFFFF
|
||||
};
|
||||
|
||||
Enum(G_Access)
|
||||
{
|
||||
G_Access_None = 0,
|
||||
|
||||
G_Access_ShaderReadWrite = (1 << 1),
|
||||
G_Access_ShaderRead = (1 << 2),
|
||||
|
||||
G_Access_CopyWrite = (1 << 3),
|
||||
G_Access_CopyRead = (1 << 4),
|
||||
|
||||
G_Access_DepthStencilRead = (1 << 5),
|
||||
G_Access_DepthStencilWrite = (1 << 6),
|
||||
G_Access_RenderTargetWrite = (1 << 7),
|
||||
|
||||
G_Access_IndexBuffer = (1 << 8),
|
||||
G_Access_IndirectArgument = (1 << 9),
|
||||
|
||||
G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the stage specified in the barrier
|
||||
};
|
||||
|
||||
Enum(G_Layout)
|
||||
{
|
||||
G_Layout_NoChange,
|
||||
|
||||
G_Layout_Undefined, // No access <-- D3D12_BARRIER_LAYOUT_UNDEFINED
|
||||
|
||||
// Simultaneous layout allows a resource to be used on any queue with any
|
||||
// access type (except depth-stencil). Resources cannot transition to/from
|
||||
// this layout, they must be created with it. Allows concurrent reads
|
||||
// while up to 1 write is occuring to non-overlapping regions.
|
||||
|
||||
G_Layout_Simultaneous, // Any access except depth-stencil <-- D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS
|
||||
|
||||
|
||||
G_Layout_Common, // ShaderRead/CopyRead/CopyWrite/Present <-- D3D12_BARRIER_LAYOUT_COMMON
|
||||
|
||||
//////////////////////////////
|
||||
//- Direct queue
|
||||
|
||||
G_Layout_DirectQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON
|
||||
G_Layout_DirectQueue_Read, // ShaderRead/CopyRead/DepthStencilRead <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ
|
||||
|
||||
G_Layout_DirectQueue_DepthStencil, // DepthStencilRead/DepthStencilWrite <-- D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE
|
||||
G_Layout_DirectQueue_RenderTarget, // RenderTargetWrite <-- D3D12_BARRIER_LAYOUT_RENDER_TARGET
|
||||
|
||||
//////////////////////////////
|
||||
//- Compute queue
|
||||
|
||||
G_Layout_ComputeQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON
|
||||
|
||||
//////////////////////////////
|
||||
//- Direct & Compute queue
|
||||
|
||||
G_Layout_DirectComputeQueue_Read, // ShaderRead/CopyRead <-- D3D12_BARRIER_LAYOUT_GENERIC_READ
|
||||
G_Layout_DirectComputeQueue_ShaderReadWrite, // ShaderReadWrite <-- D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS
|
||||
G_Layout_DirectComputeQueue_CopyWrite, // CopyWrite <-- D3D12_BARRIER_LAYOUT_COPY_DEST
|
||||
};
|
||||
|
||||
// Barrier will execute after previous stages specified by `stage_prev`, and before next stages specified by `stage_next`.
|
||||
// When barrier executes:
|
||||
// - Necessary resource flushes will occur based on `access_prev` & `access_next`
|
||||
// - Texture layout will transition based on `layout` (if specified)
|
||||
Struct(G_MemoryBarrierDesc)
|
||||
{
|
||||
G_ResourceHandle resource;
|
||||
b32 is_global;
|
||||
G_Stage stage_prev;
|
||||
G_Stage stage_next;
|
||||
G_Access access_prev;
|
||||
G_Access access_next;
|
||||
G_Layout layout;
|
||||
RngI32 mips; // Inclusive range of texture mip indices to sync
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Filter types
|
||||
|
||||
@ -303,6 +414,7 @@ Struct(G_TextureDesc)
|
||||
G_ResourceFlag flags;
|
||||
G_Format format;
|
||||
Vec3I32 dims;
|
||||
G_Layout initial_layout;
|
||||
Vec4 clear_color;
|
||||
i32 max_mips; // Will be clamped to range [1, max mips]
|
||||
String name;
|
||||
@ -424,34 +536,37 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena, G_CommandListHandle cl, G_R
|
||||
} \
|
||||
)
|
||||
|
||||
#define G_PushTexture1D(arena, cl, _format, _size, ...) G_PushResource((arena), (cl), \
|
||||
#define G_PushTexture1D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \
|
||||
(G_ResourceDesc) { \
|
||||
.kind = G_ResourceKind_Texture1D, \
|
||||
.texture = { \
|
||||
.format = (_format), \
|
||||
.dims = VEC3I32((_size), 1, 1), \
|
||||
.initial_layout = (_initial_layout), \
|
||||
__VA_ARGS__ \
|
||||
} \
|
||||
} \
|
||||
)
|
||||
|
||||
#define G_PushTexture2D(arena, cl, _format, _size, ...) G_PushResource((arena), (cl), \
|
||||
#define G_PushTexture2D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \
|
||||
(G_ResourceDesc) { \
|
||||
.kind = G_ResourceKind_Texture2D, \
|
||||
.texture = { \
|
||||
.format = (_format), \
|
||||
.dims = VEC3I32((_size).x, (_size).y, 1), \
|
||||
.initial_layout = (_initial_layout), \
|
||||
__VA_ARGS__ \
|
||||
} \
|
||||
} \
|
||||
)
|
||||
|
||||
#define G_PushTexture3D(arena, cl, _format, _size, ...) G_PushResource((arena), (cl), \
|
||||
#define G_PushTexture3D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \
|
||||
(G_ResourceDesc) { \
|
||||
.kind = G_ResourceKind_Texture3D, \
|
||||
.texture = { \
|
||||
.format = (_format), \
|
||||
.dims = (_size), \
|
||||
.initial_layout = (_initial_layout), \
|
||||
__VA_ARGS__ \
|
||||
} \
|
||||
} \
|
||||
@ -608,7 +723,50 @@ void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size
|
||||
|
||||
//- Memory sync
|
||||
|
||||
void G_Barrier(G_CommandListHandle cl);
|
||||
void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc);
|
||||
|
||||
#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, ...) \
|
||||
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
|
||||
.resource = (_resource), \
|
||||
.stage_prev = _stage_prev, \
|
||||
.access_prev = _access_prev, \
|
||||
.stage_next = _stage_next, \
|
||||
.access_next = _access_next, \
|
||||
.mips.max = 64, \
|
||||
__VA_ARGS__ \
|
||||
})
|
||||
|
||||
#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout, ...) \
|
||||
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
|
||||
.resource = (_resource), \
|
||||
.stage_prev = _stage_prev, \
|
||||
.access_prev = _access_prev, \
|
||||
.stage_next = _stage_next, \
|
||||
.access_next = _access_next, \
|
||||
.layout = _layout, \
|
||||
.mips.max = 64, \
|
||||
__VA_ARGS__ \
|
||||
})
|
||||
|
||||
#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next, ...) \
|
||||
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
|
||||
.is_global = 1, \
|
||||
.stage_prev = _stage_prev, \
|
||||
.access_prev = _access_prev, \
|
||||
.stage_next = _stage_next, \
|
||||
.access_next = _access_next, \
|
||||
.mips.max = 64, \
|
||||
__VA_ARGS__ \
|
||||
})
|
||||
|
||||
#define G_DumbMemorySync(cl, resource, ...) \
|
||||
G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
|
||||
|
||||
#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \
|
||||
G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__)
|
||||
|
||||
#define G_DumbGlobalMemorySync(cl, ...) \
|
||||
G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
|
||||
|
||||
//- Compute
|
||||
|
||||
|
||||
5
src/gpu/gpu_dx12/gpu_dx12.lay
generated
5
src/gpu/gpu_dx12/gpu_dx12.lay
generated
@ -1,5 +1,10 @@
|
||||
@Layer gpu_dx12
|
||||
|
||||
//////////////////////////////
|
||||
//- Resources
|
||||
|
||||
@EmbedDir G_D12_Resources gpu_dx12_res
|
||||
|
||||
//////////////////////////////
|
||||
//- Api
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -72,7 +72,7 @@ Struct(G_D12_Resource)
|
||||
u64 uid;
|
||||
|
||||
// D3D12 resource
|
||||
D3D12_RESOURCE_DESC d3d_desc;
|
||||
D3D12_RESOURCE_DESC1 d3d_desc;
|
||||
ID3D12Resource *d3d_resource;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
|
||||
void *mapped;
|
||||
@ -86,6 +86,7 @@ Struct(G_D12_Resource)
|
||||
G_Format texture_format;
|
||||
Vec3I32 texture_dims;
|
||||
i32 texture_mips;
|
||||
D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips];
|
||||
|
||||
// Sampler info
|
||||
G_SamplerDesc sampler_desc;
|
||||
@ -261,7 +262,7 @@ Struct(G_D12_RawCommandList)
|
||||
u64 commit_fence_target;
|
||||
|
||||
ID3D12CommandAllocator *d3d_ca;
|
||||
ID3D12GraphicsCommandList *d3d_cl;
|
||||
ID3D12GraphicsCommandList7 *d3d_cl;
|
||||
|
||||
// Direct queue command lists keep a constant list of CPU-only descriptors
|
||||
G_D12_Descriptor *rtv_descriptors[G_MaxRenderTargets];
|
||||
@ -323,7 +324,10 @@ Struct(G_D12_Cmd)
|
||||
|
||||
struct
|
||||
{
|
||||
G_MemoryBarrierDesc desc;
|
||||
|
||||
// Post-batch data
|
||||
b32 is_end_of_batch;
|
||||
u64 batch_gen;
|
||||
} barrier;
|
||||
|
||||
@ -474,7 +478,7 @@ Struct(G_D12_Ctx)
|
||||
// Device
|
||||
IDXGIFactory6 *factory;
|
||||
IDXGIAdapter3 *adapter;
|
||||
ID3D12Device1 *device;
|
||||
ID3D12Device10 *device;
|
||||
|
||||
// Release-queue
|
||||
Mutex pending_releases_mutex;
|
||||
@ -505,7 +509,10 @@ G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle);
|
||||
G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle);
|
||||
|
||||
DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format);
|
||||
b32 G_D12_IsSimultaneous(G_D12_Resource *resource);
|
||||
D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages);
|
||||
D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses);
|
||||
D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout);
|
||||
String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout);
|
||||
|
||||
void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip);
|
||||
|
||||
|
||||
BIN
src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat
(Stored with Git LFS)
Normal file
BIN
src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat
(Stored with Git LFS)
Normal file
BIN
src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat
(Stored with Git LFS)
Normal file
Binary file not shown.
@ -26,6 +26,19 @@ Struct(PLT_FileMap)
|
||||
b32 valid;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Compression types
|
||||
|
||||
Enum(PLT_CompressionLevel)
|
||||
{
|
||||
PLT_CompressionLevel_0, // Fastest
|
||||
PLT_CompressionLevel_1,
|
||||
PLT_CompressionLevel_2,
|
||||
PLT_CompressionLevel_3,
|
||||
|
||||
PLT_CompressionLevel_COUNT
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Message box types
|
||||
|
||||
@ -74,6 +87,12 @@ PLT_FileMap PLT_OpenFileMap(PLT_File file);
|
||||
void PLT_CloseFileMap(PLT_FileMap map);
|
||||
String PLT_GetFileMapData(PLT_FileMap map);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hoodkecl Compression
|
||||
|
||||
String PLT_Compress(Arena *arena, String data, PLT_CompressionLevel level);
|
||||
String PLT_Decompress(Arena *arena, String data, PLT_CompressionLevel level);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Utils
|
||||
|
||||
|
||||
@ -12,6 +12,19 @@ void PLT_Bootstrap(void)
|
||||
DispatchWave(Lit("Win32 timer sync"), 1, PLT_W32_SyncTimerForever, 0);
|
||||
}
|
||||
|
||||
DWORD PLT_W32_CompressionAlgorithmFromLevel(PLT_CompressionLevel level)
|
||||
{
|
||||
// Win32 compression algorithms from fastest -> slowest
|
||||
PERSIST Readonly DWORD algos[] = {
|
||||
COMPRESS_ALGORITHM_XPRESS,
|
||||
COMPRESS_ALGORITHM_XPRESS_HUFF,
|
||||
COMPRESS_ALGORITHM_MSZIP,
|
||||
COMPRESS_ALGORITHM_LZMS,
|
||||
};
|
||||
i32 algo_idx = ClampI32(level, 0, countof(algos));
|
||||
return algos[algo_idx];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Time
|
||||
|
||||
@ -258,50 +271,41 @@ void PLT_CloseFile(PLT_File file)
|
||||
|
||||
String PLT_ReadFile(Arena *arena, PLT_File file)
|
||||
{
|
||||
i64 size = 0;
|
||||
GetFileSizeEx((HANDLE)file.handle, (PLARGE_INTEGER)&size);
|
||||
|
||||
String result;
|
||||
result.len = size;
|
||||
if (size > 0)
|
||||
String result = Zi;
|
||||
HANDLE handle = (HANDLE)file.handle;
|
||||
u32 chunk_size = Kibi(64);
|
||||
result.text = ArenaNext(arena, u8);
|
||||
for (;;)
|
||||
{
|
||||
// ReadFile returns non-zero on success
|
||||
// TODO: error checking
|
||||
result.text = PushStructsNoZero(arena, u8, size);
|
||||
ReadFile(
|
||||
(HANDLE)file.handle,
|
||||
result.text,
|
||||
(DWORD)result.len,
|
||||
0,
|
||||
0
|
||||
);
|
||||
u8 *chunk = PushStructsNoZero(arena, u8, chunk_size);
|
||||
DWORD chunk_bytes_read = 0;
|
||||
ReadFile(handle, chunk, chunk_size, &chunk_bytes_read, 0);
|
||||
result.len += chunk_bytes_read;
|
||||
if (chunk_bytes_read < chunk_size)
|
||||
{
|
||||
PopStructsNoCopy(arena, u8, chunk_size - chunk_bytes_read);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void PLT_WriteFile(PLT_File file, String data)
|
||||
{
|
||||
// TODO: Check what the real data limit is and chunk sequentially based on
|
||||
// that (rather than failing)
|
||||
if (data.len >= 0x7FFF)
|
||||
u32 chunk_size = Kibi(64);
|
||||
u32 pos = 0;
|
||||
while (pos < data.len)
|
||||
{
|
||||
TempArena scratch = BeginScratchNoConflict();
|
||||
Panic(StringF(
|
||||
scratch.arena,
|
||||
"Tried to write too many bytes to disk (%F)",
|
||||
FmtUint(data.len)
|
||||
));
|
||||
EndScratch(scratch);
|
||||
}
|
||||
|
||||
// WriteFile returns TRUE on success
|
||||
u32 part_size = MinU32(chunk_size, data.len - pos);
|
||||
WriteFile(
|
||||
(HANDLE)file.handle,
|
||||
data.text,
|
||||
(DWORD)data.len,
|
||||
data.text + pos,
|
||||
part_size,
|
||||
0,
|
||||
0
|
||||
);
|
||||
pos += part_size;
|
||||
}
|
||||
}
|
||||
|
||||
u64 PLT_GetFileSize(PLT_File file)
|
||||
@ -412,6 +416,97 @@ String PLT_GetFileMapData(PLT_FileMap map)
|
||||
return map.mapped_memory;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl compression
|
||||
|
||||
String PLT_Compress(Arena *arena, String data, PLT_CompressionLevel level)
|
||||
{
|
||||
String result = Zi;
|
||||
b32 ok = 1;
|
||||
DWORD algo = PLT_W32_CompressionAlgorithmFromLevel(level);
|
||||
|
||||
COMPRESSOR_HANDLE compressor = 0;
|
||||
if (ok)
|
||||
{
|
||||
ok = CreateCompressor(algo, 0, &compressor);
|
||||
}
|
||||
|
||||
SIZE_T compressed_cap = data.len;
|
||||
if (ok)
|
||||
{
|
||||
Compress(compressor, data.text, data.len, 0, 0, &compressed_cap);
|
||||
}
|
||||
|
||||
if (ok)
|
||||
{
|
||||
SIZE_T written_count = 0;
|
||||
u8 *compressed = PushStructsNoZero(arena, u8, compressed_cap);
|
||||
ok = Compress(compressor, data.text, data.len, compressed, compressed_cap, &written_count);
|
||||
if (ok)
|
||||
{
|
||||
result.text = compressed;
|
||||
result.len = written_count;
|
||||
PopBytesNoCopy(arena, compressed_cap - written_count);
|
||||
}
|
||||
}
|
||||
|
||||
if (compressor)
|
||||
{
|
||||
CloseCompressor(compressor);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
String PLT_Decompress(Arena *arena, String data, PLT_CompressionLevel level)
|
||||
{
|
||||
String result = Zi;
|
||||
b32 ok = data.len > 0;
|
||||
DWORD algo = PLT_W32_CompressionAlgorithmFromLevel(level);
|
||||
|
||||
DECOMPRESSOR_HANDLE decompressor = 0;
|
||||
if (ok)
|
||||
{
|
||||
ok = CreateDecompressor(algo, 0, &decompressor);
|
||||
}
|
||||
|
||||
SIZE_T out_cap = MaxI64(NextPow2U64(data.len * 8), Kibi(4));
|
||||
|
||||
while (ok)
|
||||
{
|
||||
u8 *out = PushStructsNoZero(arena, u8, out_cap);
|
||||
|
||||
SIZE_T written_count = 0;
|
||||
b32 decompress_ok = Decompress(decompressor, data.text, data.len, out, out_cap, &written_count);
|
||||
if (decompress_ok)
|
||||
{
|
||||
result.text = out;
|
||||
result.len = written_count;
|
||||
PopBytesNoCopy(arena, out_cap - written_count);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
DWORD err = GetLastError();
|
||||
if (err == ERROR_INSUFFICIENT_BUFFER)
|
||||
{
|
||||
out_cap *= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
ok = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (decompressor)
|
||||
{
|
||||
CloseDecompressor(decompressor);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Utils
|
||||
|
||||
|
||||
@ -29,6 +29,11 @@ Struct(PLT_W32_Ctx)
|
||||
|
||||
extern PLT_W32_Ctx PLT_W32;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Helpers
|
||||
|
||||
DWORD PLT_W32_CompressionAlgorithmFromLevel(PLT_CompressionLevel level);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Time
|
||||
|
||||
|
||||
@ -416,6 +416,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R8_Uint,
|
||||
tiles_dims,
|
||||
G_Layout_DirectQueue_General,
|
||||
.flags = G_ResourceFlag_ZeroMemory,
|
||||
.name = Lit("Tiles")
|
||||
);
|
||||
@ -440,6 +441,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R32_Uint,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_General,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))
|
||||
);
|
||||
@ -452,6 +454,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R32_Uint,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_General,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))
|
||||
);
|
||||
@ -466,6 +469,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_General,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = Lit("Stains")
|
||||
);
|
||||
@ -477,6 +481,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_General,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = Lit("Dry stains")
|
||||
);
|
||||
@ -488,6 +493,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R32_Float,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_General,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = Lit("Drynesses")
|
||||
);
|
||||
@ -499,6 +505,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R32_Uint,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_General,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = Lit("Occluders cells")
|
||||
);
|
||||
@ -2494,9 +2501,9 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
//////////////////////////////
|
||||
//- Push test emitter
|
||||
|
||||
if (frame->held_buttons[Button_F])
|
||||
// if (frame->held_buttons[Button_F])
|
||||
// if (frame->held_buttons[Button_F] && !prev_frame->held_buttons[Button_F])
|
||||
// if (0)
|
||||
if (0)
|
||||
{
|
||||
{
|
||||
V_Emitter emitter = Zi;
|
||||
@ -2556,9 +2563,9 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
//////////////////////////////
|
||||
//- Push test explosion
|
||||
|
||||
if (frame->held_buttons[Button_G])
|
||||
// if (frame->held_buttons[Button_G])
|
||||
// if (frame->held_buttons[Button_G] && !prev_frame->held_buttons[Button_G])
|
||||
// if (0)
|
||||
if (0)
|
||||
{
|
||||
// Fire
|
||||
{
|
||||
@ -4784,6 +4791,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->gpu_arena, frame->cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
frame->screen_dims,
|
||||
G_Layout_DirectQueue_RenderTarget,
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
|
||||
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
|
||||
);
|
||||
@ -4797,6 +4805,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->gpu_arena, frame->cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
G_DimsFromMip2D(G_Count2D(screen_target), 1),
|
||||
G_Layout_DirectQueue_General,
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
|
||||
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
|
||||
.max_mips = 64
|
||||
@ -4812,6 +4821,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->gpu_arena, frame->cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
frame->screen_dims,
|
||||
G_Layout_DirectQueue_RenderTarget,
|
||||
.flags = G_ResourceFlag_AllowRenderTarget,
|
||||
.name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick))
|
||||
);
|
||||
@ -4822,6 +4832,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->gpu_arena, frame->cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
frame->shade_dims,
|
||||
G_Layout_DirectQueue_General,
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
|
||||
);
|
||||
@ -4888,7 +4899,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture());
|
||||
|
||||
// Sync
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
|
||||
//////////////////////////////
|
||||
//- Initialization pass
|
||||
@ -4907,12 +4918,14 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
V.particle_seq = 0;
|
||||
}
|
||||
|
||||
// Prepare albedo RT
|
||||
// Prepare RTs
|
||||
G_DiscardRenderTarget(frame->cl, screen_target, 0);
|
||||
G_ClearRenderTarget(frame->cl, albedo_target, VEC4(0, 0, 0, 0), 0);
|
||||
}
|
||||
|
||||
// Sync
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_General);
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Quads & emitters pass
|
||||
@ -4932,7 +4945,10 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
G_Compute(frame->cl, V_EmitParticlesCS, V_ThreadGroupSizeFromBufferSize(frame->emitters_count));
|
||||
|
||||
// Sync particles & occluders
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
|
||||
// Transition albedo
|
||||
G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_General);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
@ -4943,7 +4959,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
G_Compute(frame->cl, V_SimParticlesCS, V_ThreadGroupSizeFromBufferSize(V_ParticlesCap));
|
||||
|
||||
// Sync cells
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
@ -4962,7 +4978,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||
|
||||
// Sync screen tex
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
@ -4984,7 +5000,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
|
||||
G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims));
|
||||
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
|
||||
//- Upsample passes
|
||||
@ -4995,7 +5011,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
|
||||
G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims));
|
||||
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5005,13 +5021,15 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
{
|
||||
G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Debug shapes pass
|
||||
|
||||
{
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTarget);
|
||||
|
||||
G_Rasterize(
|
||||
frame->cl,
|
||||
V_DVertVS, V_DVertPS,
|
||||
@ -5021,7 +5039,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
G_RasterMode_TriangleList
|
||||
);
|
||||
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_General);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
|
||||
@ -439,6 +439,7 @@ void SPR_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame)
|
||||
gpu_perm, cl,
|
||||
G_Format_R8G8B8A8_Unorm_Srgb,
|
||||
atlas->dims,
|
||||
G_Layout_Simultaneous,
|
||||
.name = Lit("Sprite atlas")
|
||||
);
|
||||
atlas->tex = G_PushTexture2DRef(gpu_perm, atlas->tex_res);
|
||||
|
||||
@ -2,7 +2,6 @@
|
||||
//~ Win32 libs
|
||||
|
||||
#pragma comment(lib, "dwrite")
|
||||
#pragma comment(lib, "gdi32")
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ DirectWrite types
|
||||
|
||||
@ -1703,6 +1703,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
|
||||
frame->gpu_arena, frame->cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
monitor_size,
|
||||
G_Layout_DirectQueue_RenderTarget,
|
||||
.flags = G_ResourceFlag_AllowRenderTarget,
|
||||
.name = Lit("UI draw target")
|
||||
);
|
||||
@ -1738,7 +1739,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
|
||||
G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro);
|
||||
|
||||
// Sync
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
|
||||
//////////////////////////////
|
||||
//- Dispatch shaders
|
||||
@ -1780,7 +1781,8 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
|
||||
|
||||
//- Backbuffer blit pass
|
||||
|
||||
G_Barrier(frame->cl);
|
||||
G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_General);
|
||||
G_DumbMemoryLayoutSync(frame->cl, backbuffer, G_Layout_DirectQueue_RenderTarget);
|
||||
|
||||
{
|
||||
G_Rasterize(
|
||||
@ -1792,6 +1794,8 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
|
||||
G_RasterMode_TriangleList
|
||||
);
|
||||
}
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, backbuffer, G_Layout_Common);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
|
||||
@ -1,10 +1,5 @@
|
||||
WND_W32_Ctx WND_W32 = Zi;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Win32 libs
|
||||
|
||||
#pragma comment(lib, "gdi32")
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Bootstrap
|
||||
|
||||
|
||||
926
tatus
926
tatus
@ -1,926 +0,0 @@
|
||||
[1mdiff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c[m
|
||||
[1mindex a9686d87..43835793 100644[m
|
||||
[1m--- a/src/gpu/gpu_common.c[m
|
||||
[1m+++ b/src/gpu/gpu_common.c[m
|
||||
[36m@@ -25,7 +25,7 @@[m [mvoid G_BootstrapCommon(void)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R8G8B8A8_Uint,[m
|
||||
VEC2I32(8, 8),[m
|
||||
[31m- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present,[m
|
||||
[32m+[m[32m G_Layout_Simultaneous,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory[m
|
||||
);[m
|
||||
G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);[m
|
||||
[36m@@ -44,7 +44,7 @@[m [mvoid G_BootstrapCommon(void)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R16_Uint,[m
|
||||
noise_dims,[m
|
||||
[31m- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present[m
|
||||
[32m+[m[32m G_Layout_Simultaneous[m
|
||||
);[m
|
||||
G_CopyCpuToTexture([m
|
||||
cl,[m
|
||||
[36m@@ -143,30 +143,54 @@[m [mG_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList[m
|
||||
[m
|
||||
//- Mip[m
|
||||
[m
|
||||
[31m-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip)[m
|
||||
[32m+[m[32mi32 G_DimsFromMip1D(i32 mip0_dims, i32 mip)[m
|
||||
{[m
|
||||
[31m- mip = ClampI32(mip, 0, 31);[m
|
||||
[32m+[m[32m mip = ClampI32(mip, -31, 31);[m
|
||||
i32 result = 0;[m
|
||||
[31m- result = MaxI32(result >> mip, 1);[m
|
||||
[32m+[m[32m if (mip >= 0)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result = MaxI32(result >> mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result = MaxI32(result << -mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
return result;[m
|
||||
}[m
|
||||
[m
|
||||
[31m-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip)[m
|
||||
[32m+[m[32mVec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip)[m
|
||||
{[m
|
||||
[31m- mip = ClampI32(mip, 0, 31);[m
|
||||
[32m+[m[32m mip = ClampI32(mip, -31, 31);[m
|
||||
Vec2I32 result = Zi;[m
|
||||
[31m- result.x = MaxI32(texture_dims.x >> mip, 1);[m
|
||||
[31m- result.y = MaxI32(texture_dims.y >> mip, 1);[m
|
||||
[32m+[m[32m if (mip >= 0)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result.x = MaxI32(mip0_dims.x >> mip, 1);[m
|
||||
[32m+[m[32m result.y = MaxI32(mip0_dims.y >> mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result.x = MaxI32(mip0_dims.x << -mip, 1);[m
|
||||
[32m+[m[32m result.y = MaxI32(mip0_dims.y << -mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
return result;[m
|
||||
}[m
|
||||
[m
|
||||
[31m-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip)[m
|
||||
[32m+[m[32mVec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip)[m
|
||||
{[m
|
||||
[31m- mip = ClampI32(mip, 0, 31);[m
|
||||
[32m+[m[32m mip = ClampI32(mip, -31, 31);[m
|
||||
Vec3I32 result = Zi;[m
|
||||
[31m- result.x = MaxI32(texture_dims.x >> mip, 1);[m
|
||||
[31m- result.y = MaxI32(texture_dims.y >> mip, 1);[m
|
||||
[31m- result.z = MaxI32(texture_dims.z >> mip, 1);[m
|
||||
[32m+[m[32m if (mip >= 0)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result.x = MaxI32(mip0_dims.x >> mip, 1);[m
|
||||
[32m+[m[32m result.y = MaxI32(mip0_dims.y >> mip, 1);[m
|
||||
[32m+[m[32m result.z = MaxI32(mip0_dims.z >> mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result.x = MaxI32(mip0_dims.x << -mip, 1);[m
|
||||
[32m+[m[32m result.y = MaxI32(mip0_dims.y << -mip, 1);[m
|
||||
[32m+[m[32m result.z = MaxI32(mip0_dims.z << -mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
return result;[m
|
||||
}[m
|
||||
[m
|
||||
[1mdiff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h[m
|
||||
[1mindex eb3ee6d2..03927040 100644[m
|
||||
[1m--- a/src/gpu/gpu_common.h[m
|
||||
[1m+++ b/src/gpu/gpu_common.h[m
|
||||
[36m@@ -35,9 +35,9 @@[m [mG_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList[m
|
||||
G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })[m
|
||||
[m
|
||||
//- Mip[m
|
||||
[31m-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip);[m
|
||||
[31m-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip);[m
|
||||
[31m-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip);[m
|
||||
[32m+[m[32mi32 G_DimsFromMip1D(i32 mip0_dims, i32 mip);[m
|
||||
[32m+[m[32mVec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip);[m
|
||||
[32m+[m[32mVec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip);[m
|
||||
[m
|
||||
//- Viewport / scissor[m
|
||||
Rng3 G_ViewportFromTexture(G_ResourceHandle texture);[m
|
||||
[1mdiff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h[m
|
||||
[1mindex 7e1b329a..bed18c93 100644[m
|
||||
[1m--- a/src/gpu/gpu_core.h[m
|
||||
[1m+++ b/src/gpu/gpu_core.h[m
|
||||
[36m@@ -242,18 +242,16 @@[m [mEnum(G_Access)[m
|
||||
G_Access_IndexBuffer = (1 << 8),[m
|
||||
G_Access_IndirectArgument = (1 << 9),[m
|
||||
[m
|
||||
[31m- G_Access_All = 0xFFFFFFFF[m
|
||||
[32m+[m[32m G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the specified sync stage[m
|
||||
};[m
|
||||
[m
|
||||
Enum(G_Layout)[m
|
||||
{[m
|
||||
G_Layout_NoChange,[m
|
||||
[m
|
||||
[31m- // "Simultaneous" allows a resource to be used on any queue with any access[m
|
||||
[31m- // type, as long as there is only one writer at a time, and the writer is not[m
|
||||
[31m- // writing to any texels currently being read.[m
|
||||
[31m- // Resources cannot transition to/from this layout. They must be created[m
|
||||
[31m- // with it and are locked to it.[m
|
||||
[32m+[m[32m // Simultaneous layout allows a resource to be used on any queue with any[m
|
||||
[32m+[m[32m // access type (except depth-stencil). Resources cannot transition to/from[m
|
||||
[32m+[m[32m // this layout, they must be created with it.[m
|
||||
G_Layout_Simultaneous, // D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS[m
|
||||
[m
|
||||
G_Layout_Undefined, // D3D12_BARRIER_LAYOUT_UNDEFINED[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay[m
|
||||
[1mindex f72dc528..2d916376 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis.lay[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis.lay[m
|
||||
[36m@@ -26,7 +26,7 @@[m
|
||||
@ComputeShader V_CompositeCS[m
|
||||
@ComputeShader V_BloomDownCS[m
|
||||
@ComputeShader V_BloomUpCS[m
|
||||
[31m-@ComputeShader V_PostProcessCS[m
|
||||
[32m+[m[32m@ComputeShader V_FinalizeCS[m
|
||||
@VertexShader V_DVertVS[m
|
||||
@PixelShader V_DVertPS[m
|
||||
[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c[m
|
||||
[1mindex f2f5e6b5..338036ba 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_core.c[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_core.c[m
|
||||
[36m@@ -416,7 +416,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R8_Uint,[m
|
||||
tiles_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderRead,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory,[m
|
||||
.name = Lit("Tiles")[m
|
||||
);[m
|
||||
[36m@@ -441,7 +441,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R32_Uint,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))[m
|
||||
);[m
|
||||
[36m@@ -454,7 +454,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R32_Uint,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))[m
|
||||
);[m
|
||||
[36m@@ -469,7 +469,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = Lit("Stains")[m
|
||||
);[m
|
||||
[36m@@ -481,7 +481,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = Lit("Dry stains")[m
|
||||
);[m
|
||||
[36m@@ -493,7 +493,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R32_Float,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = Lit("Drynesses")[m
|
||||
);[m
|
||||
[36m@@ -505,7 +505,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R32_Uint,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = Lit("Occluders cells")[m
|
||||
);[m
|
||||
[36m@@ -614,6 +614,8 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->dt = SecondsFromNs(frame->dt_ns);[m
|
||||
frame->rand = prev_frame->rand;[m
|
||||
[m
|
||||
[32m+[m[32m frame->should_tone_map = TweakBool("Tone mapping enabled", 1);[m
|
||||
[32m+[m
|
||||
if (P_IsEntKeyNil(V.player_key))[m
|
||||
{[m
|
||||
TrueRand(StringFromStruct(&V.player_key));[m
|
||||
[36m@@ -4918,18 +4920,17 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->tile_descs[tile_kind] = tile_desc;[m
|
||||
}[m
|
||||
}[m
|
||||
[32m+[m
|
||||
// Upload tiles[m
|
||||
if (frame->tiles_dirty)[m
|
||||
{[m
|
||||
// LogDebugF("Uploading tiles to gpu");[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_CopyWrite);[m
|
||||
G_CopyCpuToTexture([m
|
||||
frame->cl,[m
|
||||
gpu_tiles_res, VEC3I32(0, 0, 0),[m
|
||||
local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1),[m
|
||||
RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1))[m
|
||||
);[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_ShaderRead);[m
|
||||
}[m
|
||||
[m
|
||||
// Screen texture[m
|
||||
[36m@@ -4937,7 +4938,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->gpu_arena, frame->cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
frame->screen_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,[m
|
||||
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))[m
|
||||
);[m
|
||||
[36m@@ -4951,11 +4952,10 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->gpu_arena, frame->cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
G_DimsFromMip2D(G_Count2D(screen_target), 1),[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,[m
|
||||
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),[m
|
||||
[31m- // .max_mips = 4[m
|
||||
[31m- .max_mips = 8[m
|
||||
[32m+[m[32m .max_mips = 64[m
|
||||
);[m
|
||||
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)[m
|
||||
{[m
|
||||
[36m@@ -4979,7 +4979,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->gpu_arena, frame->cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
frame->shade_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))[m
|
||||
);[m
|
||||
[36m@@ -5091,6 +5091,9 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
[m
|
||||
// Sync particles & occluders[m
|
||||
G_DumbGlobalMemorySync(frame->cl);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // Transition albedo[m
|
||||
[32m+[m[32m G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
[36m@@ -5113,83 +5116,63 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
G_Compute(frame->cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims));[m
|
||||
}[m
|
||||
[m
|
||||
[31m- //////////////////////////////[m
|
||||
[31m- //- Transition G-buffers to readonly[m
|
||||
[31m-[m
|
||||
[31m- {[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead);[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, shade_target, G_Layout_DirectQueue_ShaderRead);[m
|
||||
[31m- }[m
|
||||
[31m-[m
|
||||
//////////////////////////////[m
|
||||
//- Composite pass[m
|
||||
[m
|
||||
{[m
|
||||
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));[m
|
||||
[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);[m
|
||||
[32m+[m[32m // Sync screen tex[m
|
||||
[32m+[m[32m G_DumbGlobalMemorySync(frame->cl);[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
//- Bloom passes[m
|
||||
[m
|
||||
{[m
|
||||
[31m- i32 mips_count = G_CountMips(bloom_target);[m
|
||||
[32m+[m[32m i32 mips_count = G_CountMips(bloom_target) + 1;[m
|
||||
[32m+[m[32m G_SetConstant(frame->cl, V_GpuConst_MipsCount, mips_count);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // NOTE: Because bloom mip chain starts at half screen size, mip_idx 0[m
|
||||
[32m+[m[32m // actually represents the screen texture, while mip_idx - 1 represents[m
|
||||
[32m+[m[32m // the first mip index in the bloom mip chain[m
|
||||
[m
|
||||
//- Downsample + blur passes[m
|
||||
[31m- for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx)[m
|
||||
[32m+[m[32m for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)[m
|
||||
{[m
|
||||
[31m- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);[m
|
||||
[31m- if (mip_idx == 0)[m
|
||||
[31m- {[m
|
||||
[31m- // Init bloom pyramid from screen target on first pass (prefilter)[m
|
||||
[31m- gpu_flags |= V_GpuFlag_InitBloom;[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);[m
|
||||
[31m- }[m
|
||||
[31m- else[m
|
||||
[31m- {[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);[m
|
||||
[31m- }[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);[m
|
||||
[31m- {[m
|
||||
[31m- G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));[m
|
||||
[31m- }[m
|
||||
[31m- gpu_flags &= ~V_GpuFlag_InitBloom;[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);[m
|
||||
[32m+[m[32m Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);[m
|
||||
[32m+[m[32m G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims));[m
|
||||
[32m+[m
|
||||
[32m+[m[32m G_DumbGlobalMemorySync(frame->cl);[m
|
||||
}[m
|
||||
[m
|
||||
//- Upsample passes[m
|
||||
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)[m
|
||||
{[m
|
||||
[31m- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);[m
|
||||
[31m-[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));[m
|
||||
[32m+[m[32m Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);[m
|
||||
[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]);[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);[m
|
||||
[32m+[m[32m G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);[m
|
||||
[32m+[m[32m G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims));[m
|
||||
[m
|
||||
[31m- G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));[m
|
||||
[31m- }[m
|
||||
[32m+[m[32m G_DumbGlobalMemorySync(frame->cl);[m
|
||||
[32m+[m[32m }[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
[31m- //- Post process pass[m
|
||||
[32m+[m[32m //- Finalization pass[m
|
||||
[m
|
||||
{[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite);[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));[m
|
||||
[31m- G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));[m
|
||||
[32m+[m[32m G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
//- Debug shapes pass[m
|
||||
[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);[m
|
||||
[31m-[m
|
||||
{[m
|
||||
[32m+[m[32m G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);[m
|
||||
[32m+[m
|
||||
G_Rasterize([m
|
||||
frame->cl,[m
|
||||
V_DVertVS, V_DVertPS,[m
|
||||
[36m@@ -5198,12 +5181,13 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
screen_viewport, screen_scissor,[m
|
||||
G_RasterMode_TriangleList[m
|
||||
);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
//- Finalize screen target[m
|
||||
[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);[m
|
||||
{[m
|
||||
Rng2 uv = Zi;[m
|
||||
uv.p0 = Vec2FromVec(screen_viewport.p0);[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g[m
|
||||
[1mindex f8a254de..c0a9e47d 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_gpu.g[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_gpu.g[m
|
||||
[36m@@ -53,13 +53,6 @@[m [mVec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)[m
|
||||
return result;[m
|
||||
}[m
|
||||
[m
|
||||
[31m-// ACES approximation by Krzysztof Narkowicz[m
|
||||
[31m-// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/[m
|
||||
[31m-Vec3 V_ToneMap(Vec3 v)[m
|
||||
[31m-{[m
|
||||
[31m- return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));[m
|
||||
[31m-}[m
|
||||
[31m-[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
//~ Prepare frame[m
|
||||
[m
|
||||
[36m@@ -142,11 +135,11 @@[m [mComputeShader2D(V_PrepareCellsCS, 8, 8)[m
|
||||
}[m
|
||||
else if (over_stain.a > 0)[m
|
||||
{[m
|
||||
[31m- Vec4 stain = dry_stains[cell_pos];[m
|
||||
Vec4 dry_stain = max(dry_stains[cell_pos], 0);[m
|
||||
[32m+[m[32m Vec4 stain = dry_stain;[m
|
||||
[m
|
||||
[31m- stain = BlendPremul(over_stain, stain);[m
|
||||
dry_stain = BlendPremul(over_dry_stain, dry_stain);[m
|
||||
[32m+[m[32m stain = BlendPremul(over_stain, stain);[m
|
||||
[m
|
||||
stains[cell_pos] = stain;[m
|
||||
dry_stains[cell_pos] = dry_stain;[m
|
||||
[36m@@ -483,7 +476,7 @@[m [mComputeShader(V_SimParticlesCS, 64)[m
|
||||
particle.prev_occluder = occluder;[m
|
||||
}[m
|
||||
[m
|
||||
[31m- if (!AnyBit(desc.flags, V_ParticleFlag_NoPruneWhenStill) && dot(particle.velocity, particle.velocity) < 0.0001)[m
|
||||
[32m+[m[32m if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))[m
|
||||
{[m
|
||||
prune = 1;[m
|
||||
}[m
|
||||
[36m@@ -723,7 +716,6 @@[m [mComputeShader2D(V_CompositeCS, 8, 8)[m
|
||||
Vec4 ground_particle_color = 0;[m
|
||||
Vec4 air_particle_color = 0;[m
|
||||
[m
|
||||
[31m-[m
|
||||
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)[m
|
||||
{[m
|
||||
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);[m
|
||||
[36m@@ -752,9 +744,9 @@[m [mComputeShader2D(V_CompositeCS, 8, 8)[m
|
||||
// Darken wall particles / stains[m
|
||||
if (tile == P_TileKind_Wall)[m
|
||||
{[m
|
||||
[31m- ground_particle_color *= 0.25;[m
|
||||
[31m- air_particle_color *= 0.25;[m
|
||||
[31m- stain_color *= 0.25;[m
|
||||
[32m+[m[32m ground_particle_color *= 0.5;[m
|
||||
[32m+[m[32m air_particle_color *= 0.5;[m
|
||||
[32m+[m[32m stain_color *= 0.5;[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
[36m@@ -972,57 +964,73 @@[m [mComputeShader2D(V_CompositeCS, 8, 8)[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
//~ Bloom[m
|
||||
[m
|
||||
[32m+[m[32m//////////////////////////////[m
|
||||
[32m+[m[32m//- Downsample[m
|
||||
[32m+[m
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8)[m
|
||||
{[m
|
||||
[32m+[m[32m i32 mips_count = V_GpuConst_MipsCount;[m
|
||||
[32m+[m[32m i32 mip_idx = V_GpuConst_MipIdx;[m
|
||||
[32m+[m
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];[m
|
||||
[31m- Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead);[m
|
||||
[31m- RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);[m
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);[m
|
||||
[32m+[m[32m RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m Texture2D<Vec4> bloom_up;[m
|
||||
[32m+[m[32m b32 is_first_pass = mip_idx == 1;[m
|
||||
[32m+[m[32m if (is_first_pass)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.screen_ro);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx - 2]);[m
|
||||
[32m+[m[32m }[m
|
||||
[m
|
||||
[31m- Vec2 up_dims = countof(bloom_up);[m
|
||||
Vec2 down_dims = countof(bloom_down);[m
|
||||
[m
|
||||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;[m
|
||||
Vec2 bloom_uv = bloom_pos / down_dims;[m
|
||||
Vec2 off_uv = 0.5 / down_dims;[m
|
||||
[31m- b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);[m
|
||||
[m
|
||||
[31m- Struct(SampleDesc) { Vec2 uv; f32 weight; };[m
|
||||
[31m- SampleDesc samples[] = {[m
|
||||
[31m- { bloom_uv + Vec2(0, 0), 0.5 },[m
|
||||
[31m- { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },[m
|
||||
[31m- { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },[m
|
||||
[31m- { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },[m
|
||||
[31m- { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },[m
|
||||
[31m- };[m
|
||||
[32m+[m[32m f32 threshold = 0.25;[m
|
||||
[32m+[m[32m f32 knee = 0.75;[m
|
||||
[m
|
||||
Vec4 result = 0;[m
|
||||
[31m- for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)[m
|
||||
{[m
|
||||
[31m- SampleDesc desc = samples[sample_idx];[m
|
||||
[31m- Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);[m
|
||||
[31m-[m
|
||||
[31m- f32 knee_weight = 1;[m
|
||||
[31m- if (is_first_pass)[m
|
||||
[32m+[m[32m Struct(SampleDesc) { Vec2 uv; f32 weight; };[m
|
||||
[32m+[m[32m SampleDesc samples[] = {[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(0, 0), 0.5 },[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },[m
|
||||
[32m+[m[32m };[m
|
||||
[32m+[m[32m for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)[m
|
||||
{[m
|
||||
[31m- f32 luminance = LuminanceFromColor(src);[m
|
||||
[31m- f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance[m
|
||||
[31m- f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);[m
|
||||
[31m- if (bright > 0)[m
|
||||
[31m- {[m
|
||||
[31m- f32 threshold = 1.0;[m
|
||||
[31m- f32 knee = 0.5;[m
|
||||
[31m- f32 over_threshold = max(bright - threshold, 0.0);[m
|
||||
[31m- f32 ramp = saturate(over_threshold / knee);[m
|
||||
[31m- knee_weight = (over_threshold * ramp * ramp) / bright;[m
|
||||
[31m- }[m
|
||||
[31m- else[m
|
||||
[32m+[m[32m SampleDesc desc = samples[sample_idx];[m
|
||||
[32m+[m[32m Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m f32 knee_weight = 1;[m
|
||||
[32m+[m[32m if (is_first_pass)[m
|
||||
{[m
|
||||
[31m- knee_weight = 0;[m
|
||||
[32m+[m[32m f32 luminance = LuminanceFromColor(src);[m
|
||||
[32m+[m[32m f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance[m
|
||||
[32m+[m[32m f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);[m
|
||||
[32m+[m[32m if (bright > 0)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m f32 over_threshold = max(bright - threshold, 0.0);[m
|
||||
[32m+[m[32m f32 ramp = saturate(over_threshold / knee);[m
|
||||
[32m+[m[32m knee_weight = (over_threshold * ramp * ramp) / bright;[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m knee_weight = 0;[m
|
||||
[32m+[m[32m }[m
|
||||
}[m
|
||||
[31m- }[m
|
||||
[m
|
||||
[31m- result += src * desc.weight * knee_weight;[m
|
||||
[32m+[m[32m result += src * desc.weight * knee_weight;[m
|
||||
[32m+[m[32m }[m
|
||||
}[m
|
||||
[m
|
||||
if (IsInside(bloom_pos, down_dims))[m
|
||||
[36m@@ -1031,52 +1039,77 @@[m [mComputeShader2D(V_BloomDownCS, 8, 8)[m
|
||||
}[m
|
||||
}[m
|
||||
[m
|
||||
[32m+[m[32m//////////////////////////////[m
|
||||
[32m+[m[32m//- Upsample[m
|
||||
[32m+[m
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8)[m
|
||||
{[m
|
||||
[32m+[m[32m i32 mips_count = V_GpuConst_MipsCount;[m
|
||||
[32m+[m[32m i32 mip_idx = V_GpuConst_MipIdx;[m
|
||||
[32m+[m
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];[m
|
||||
[31m- Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead);[m
|
||||
[31m- RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);[m
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);[m
|
||||
[32m+[m[32m Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx]);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m b32 is_last_pass = mip_idx == 0;[m
|
||||
[32m+[m[32m RWTexture2D<Vec4> bloom_up;[m
|
||||
[32m+[m[32m if (is_last_pass)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.screen_rw);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);[m
|
||||
[32m+[m[32m }[m
|
||||
[m
|
||||
[31m- Vec2 up_dims = countof(bloom_up);[m
|
||||
Vec2 down_dims = countof(bloom_down);[m
|
||||
[32m+[m[32m Vec2 up_dims = countof(bloom_up);[m
|
||||
[m
|
||||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;[m
|
||||
Vec2 bloom_uv = bloom_pos / up_dims;[m
|
||||
[31m- Vec2 off_uv = 1 / up_dims;[m
|
||||
[32m+[m[32m Vec2 off_uv0 = 1 / down_dims;[m
|
||||
[32m+[m[32m Vec2 off_uv1 = off_uv0 * 2;[m
|
||||
[m
|
||||
Vec4 result = 0;[m
|
||||
{[m
|
||||
// Center[m
|
||||
[31m- result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4;[m
|
||||
[31m- // Edges[m
|
||||
[32m+[m[32m result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f;[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // Outer Edges[m
|
||||
result += ([m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0)[m
|
||||
[31m- ) * 2;[m
|
||||
[31m- // Corners[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, 0), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, 0), 0)[m
|
||||
[32m+[m[32m ) * 3.0f / 41.0f;[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // Inner corners[m
|
||||
[32m+[m[32m result += ([m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, -off_uv0.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, -off_uv0.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, off_uv0.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, off_uv0.y), 0)[m
|
||||
[32m+[m[32m ) * 4.0f / 41.0f;[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // Outer corners[m
|
||||
result += ([m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0)[m
|
||||
[31m- );[m
|
||||
[31m- // Normalize[m
|
||||
[31m- result /= 16;[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, -off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, -off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, off_uv1.y), 0)[m
|
||||
[32m+[m[32m ) * 1.0f / 41.0f;[m
|
||||
}[m
|
||||
[m
|
||||
if (IsInside(bloom_pos, up_dims))[m
|
||||
{[m
|
||||
[31m- bloom_up[bloom_pos] += result;[m
|
||||
[32m+[m[32m bloom_up[bloom_pos] += result * 0.75;[m
|
||||
}[m
|
||||
}[m
|
||||
[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
[31m-//~ Post process[m
|
||||
[32m+[m[32m//~ Finalize[m
|
||||
[m
|
||||
[31m-ComputeShader2D(V_PostProcessCS, 8, 8)[m
|
||||
[32m+[m[32mComputeShader2D(V_FinalizeCS, 8, 8)[m
|
||||
{[m
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];[m
|
||||
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);[m
|
||||
[36m@@ -1084,42 +1117,21 @@[m [mComputeShader2D(V_PostProcessCS, 8, 8)[m
|
||||
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);[m
|
||||
[m
|
||||
Vec2 screen_pos = SV_DispatchThreadID + 0.5;[m
|
||||
[31m- Vec2 screen_uv = screen_pos / frame.screen_dims;[m
|
||||
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);[m
|
||||
[31m-[m
|
||||
[31m- //////////////////////////////[m
|
||||
[31m- //- Original[m
|
||||
[31m-[m
|
||||
[31m- Vec4 original = 0;[m
|
||||
if (is_in_screen)[m
|
||||
{[m
|
||||
[31m- original = screen_tex[screen_pos];[m
|
||||
[31m- original.rgb *= original.a;[m
|
||||
[31m- }[m
|
||||
[32m+[m[32m Vec4 result = screen_tex[screen_pos];[m
|
||||
[m
|
||||
[32m+[m[32m //- Tone map[m
|
||||
[32m+[m[32m if (frame.should_tone_map)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m // ACES approximation by Krzysztof Narkowicz[m
|
||||
[32m+[m[32m // https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/[m
|
||||
[32m+[m[32m result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f));[m
|
||||
[32m+[m[32m }[m
|
||||
[m
|
||||
[31m- //////////////////////////////[m
|
||||
[31m- //- Bloom[m
|
||||
[31m-[m
|
||||
[31m- Vec4 bloom = 0;[m
|
||||
[31m- if (is_in_screen)[m
|
||||
[31m- {[m
|
||||
[31m- bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);[m
|
||||
[31m- // bloom.rgb *= bloom.a;[m
|
||||
[31m- }[m
|
||||
[31m-[m
|
||||
[31m- //////////////////////////////[m
|
||||
[31m- //- Compose[m
|
||||
[31m-[m
|
||||
[31m- Vec4 result = Vec4(0, 0, 0, 1);[m
|
||||
[31m- result = BlendPremul(original, result);[m
|
||||
[31m- result += bloom;[m
|
||||
[31m- // result.rgb = V_ToneMap(result);[m
|
||||
[32m+[m[32m result = Unpremul(result);[m
|
||||
[m
|
||||
[31m- result = Unpremul(result);[m
|
||||
[31m-[m
|
||||
[31m- if (is_in_screen)[m
|
||||
[31m- {[m
|
||||
screen_tex[screen_pos] = result;[m
|
||||
}[m
|
||||
}[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh[m
|
||||
[1mindex a47a2335..f176f2f8 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_gpu.gh[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_gpu.gh[m
|
||||
[36m@@ -46,7 +46,6 @@[m [mStruct(V_DVertPSOutput)[m
|
||||
[m
|
||||
f32 V_RandFromPos(Vec3 pos);[m
|
||||
Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);[m
|
||||
[31m-Vec3 V_ToneMap(Vec3 v);[m
|
||||
[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
//~ Shaders[m
|
||||
[36m@@ -73,8 +72,8 @@[m [mComputeShader2D(V_CompositeCS, 8, 8);[m
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8);[m
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8);[m
|
||||
[m
|
||||
[31m-//- Post process[m
|
||||
[31m-ComputeShader2D(V_PostProcessCS, 8, 8);[m
|
||||
[32m+[m[32m//- Finalize[m
|
||||
[32m+[m[32mComputeShader2D(V_FinalizeCS, 8, 8);[m
|
||||
[m
|
||||
//- Debug shapes[m
|
||||
VertexShader(V_DVertVS, V_DVertPSInput);[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_shared.cg b/src/pp/pp_vis/pp_vis_shared.cg[m
|
||||
[1mindex 2419a6f2..72f6ae8d 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_shared.cg[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_shared.cg[m
|
||||
[36m@@ -11,37 +11,42 @@[m [mV_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)[m
|
||||
V_ParticleDesc result;[m
|
||||
{[m
|
||||
PERSIST Readonly V_ParticleFlag flags[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) flags,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) flags,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly V_ParticleLayer layers[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) layer,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) layer,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly f32 stain_rates[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) stain_rate,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) stain_rate,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly f32 pen_rates[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) pen_rate,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) pen_rate,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly f32 lifetimes[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) lifetime,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) lifetime,[m
|
||||
[32m+[m[32m V_ParticlesXList(X)[m
|
||||
[32m+[m[32m #undef X[m
|
||||
[32m+[m[32m };[m
|
||||
[32m+[m[32m PERSIST Readonly f32 prune_speed_thresholds[V_ParticleKind_COUNT] = {[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) prune_speed_threshold,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly Vec4 base_colors[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) base_color,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) base_color,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly Vec4 dry_factor[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) dry_factor,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) dry_factor,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
[36m@@ -51,6 +56,7 @@[m [mV_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)[m
|
||||
result.stain_rate = stain_rates[kind];[m
|
||||
result.pen_rate = pen_rates[kind];[m
|
||||
result.lifetime = lifetimes[kind];[m
|
||||
[32m+[m[32m result.prune_speed_threshold = prune_speed_thresholds[kind];[m
|
||||
result.base_color = LinearFromSrgb(base_colors[kind]);[m
|
||||
result.dry_factor = LinearFromSrgb(dry_factor[kind]);[m
|
||||
}[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh[m
|
||||
[1mindex 16ca6419..71d88ea5 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_shared.cgh[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_shared.cgh[m
|
||||
[36m@@ -9,14 +9,13 @@[m
|
||||
Enum(V_GpuFlag)[m
|
||||
{[m
|
||||
V_GpuFlag_None = 0,[m
|
||||
[31m- V_GpuFlag_InitBloom = (1 << 0),[m
|
||||
};[m
|
||||
[m
|
||||
G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);[m
|
||||
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);[m
|
||||
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);[m
|
||||
[31m-G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3);[m
|
||||
[31m-G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);[m
|
||||
[32m+[m[32mG_DeclConstant(i32, V_GpuConst_MipsCount, 3);[m
|
||||
[32m+[m[32mG_DeclConstant(i32, V_GpuConst_MipIdx, 4);[m
|
||||
[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
//~ Particle types[m
|
||||
[36m@@ -29,7 +28,6 @@[m [mG_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);[m
|
||||
Enum(V_ParticleFlag)[m
|
||||
{[m
|
||||
V_ParticleFlag_None = 0,[m
|
||||
[31m- V_ParticleFlag_NoPruneWhenStill = (1 << 0),[m
|
||||
V_ParticleFlag_StainWhenPruned = (1 << 1),[m
|
||||
V_ParticleFlag_NoReflect = (1 << 2),[m
|
||||
V_ParticleFlag_OnlyCollideWithWalls = (1 << 3),[m
|
||||
[36m@@ -53,6 +51,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Ground, \[m
|
||||
/* Stain rate, pen chance */ 30, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0, 0, 0, 0), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -64,8 +63,9 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Ground, \[m
|
||||
/* Stain rate, pen chance */ 100, 0.25, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[31m- /* Base color */ CompVec4(0.5, 0.1, 0.1, 0.05), \[m
|
||||
[31m- /* Dry color factor */ CompVec4(0.5, 0.5, 0.5, 1) \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.5, \[m
|
||||
[32m+[m[32m /* Base color */ CompVec4(0.6, 0.1, 0.1, 0.05), \[m
|
||||
[32m+[m[32m /* Dry color factor */ CompVec4(0.4, 0.4, 0.4, 1) \[m
|
||||
) \[m
|
||||
X( \[m
|
||||
/* Name */ BloodDebris, \[m
|
||||
[36m@@ -73,6 +73,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 30, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0.5, 0.1, 0.1, 0.8), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -82,6 +83,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0.4, 0.3, 0.2, 1), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -91,6 +93,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.1, \[m
|
||||
/* Base color */ CompVec4(2, 0.5, 0, 1), \[m
|
||||
/* Dry color factor */ CompVec4(0.2, 0.1, 0.0, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -102,6 +105,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ 0.075, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0.8, 0.6, 0.2, 0.25), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -111,6 +115,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Air, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0.25, 0.25, 0.25, 0.75), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -122,6 +127,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(1, 1, 0, 1), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -168,6 +174,7 @@[m [mStruct(V_ParticleDesc)[m
|
||||
f32 stain_rate;[m
|
||||
f32 pen_rate;[m
|
||||
f32 lifetime;[m
|
||||
[32m+[m[32m f32 prune_speed_threshold;[m
|
||||
Vec4 base_color;[m
|
||||
Vec4 dry_factor;[m
|
||||
};[m
|
||||
[36m@@ -264,6 +271,7 @@[m [mStruct(V_SharedFrame)[m
|
||||
[m
|
||||
b32 tiles_dirty;[m
|
||||
b32 should_clear_particles;[m
|
||||
[32m+[m[32m b32 should_tone_map;[m
|
||||
[m
|
||||
b32 is_looking;[m
|
||||
b32 is_moving;[m
|
||||
Loading…
Reference in New Issue
Block a user