diff --git a/src/ase/ase.c b/src/ase/ase.c index 58b1407f..2f4f33c5 100644 --- a/src/ase/ase.c +++ b/src/ase/ase.c @@ -414,12 +414,6 @@ void ASE_PushError(Arena *arena, ASE_ErrorList *list, String msg_src) //////////////////////////////////////////////////////////// //~ Decode helpers -u32 ASE_BlendMulU8(u32 a, u32 b) -{ - u32 t = (a * b) + 0x80; - return ((t >> 8) + t) >> 8; -} - u32 ASE_Blend(u32 src, u32 dst, u8 opacity) { u32 dst_r = (dst & 0xff); @@ -432,21 +426,20 @@ u32 ASE_Blend(u32 src, u32 dst, u8 opacity) u32 src_b = (src >> 16) & 0xff; u32 src_a = (src >> 24) & 0xff; - src_a = (u8)ASE_BlendMulU8(src_a, opacity); - u32 a = src_a + dst_a - ASE_BlendMulU8(src_a, dst_a); - u32 r, g, b; - if (a == 0) - { - r = g = b = 0; - } - else + src_a = (u8)MulNormalizedU8(src_a, opacity); + u32 a = src_a + dst_a - MulNormalizedU8(src_a, dst_a); + + u32 r = 0; + u32 g = 0; + u32 b = 0; + if (a != 0) { r = dst_r + (src_r - dst_r) * src_a / a; g = dst_g + (src_g - dst_g) * src_a / a; b = dst_b + (src_b - dst_b) * src_a / a; } - return r | (g << 8) | (b << 16) | (a << 24); + return (r << 0) | (g << 8) | (b << 16) | (a << 24); } void ASE_MakeDimensionsSquareish(ASE_Header *header, u32 *frames_x, u32 *frames_y, u64 *image_width, u64 *image_height) @@ -520,8 +513,8 @@ ASE_DecodedImage ASE_DecodeImage(Arena *arena, String encoded) u32 num_layers = 0; ASE_Layer *layer_head = 0; - Ace_Cel *cel_head = 0; - Ace_Cel *cel_tail = 0; + ASE_Cel *cel_head = 0; + ASE_Cel *cel_tail = 0; ////////////////////////////// //- Iterate frames @@ -612,7 +605,7 @@ ASE_DecodedImage ASE_DecodeImage(Arena *arena, String encoded) case ASE_ChunkKind_Cel: { - Ace_Cel *cel = PushStruct(scratch.arena, Ace_Cel); + ASE_Cel *cel = PushStruct(scratch.arena, ASE_Cel); if (cel_tail) { cel_tail->next = cel; @@ -685,13 +678,13 @@ ASE_DecodedImage ASE_DecodeImage(Arena *arena, String encoded) ////////////////////////////// //- Link cels - Ace_Cel **cels_ordered = PushStructsNoZero(scratch.arena, Ace_Cel *, num_frames * num_layers); - for (Ace_Cel *cel = cel_head; cel; cel = cel->next) + ASE_Cel **cels_ordered = PushStructsNoZero(scratch.arena, ASE_Cel *, num_frames * num_layers); + for (ASE_Cel *cel = cel_head; cel; cel = cel->next) { cels_ordered[(cel->frame_index * num_layers) + cel->layer_index] = cel; if (cel->type == ASE_CelKind_Linked) { - Ace_Cel *ref_cel = cels_ordered[(cel->frame_pos * num_layers) + cel->layer_index]; + ASE_Cel *ref_cel = cels_ordered[(cel->frame_pos * num_layers) + cel->layer_index]; cel->width = ref_cel->width; cel->height = ref_cel->height; cel->pixels = ref_cel->pixels; @@ -702,7 +695,7 @@ ASE_DecodedImage ASE_DecodeImage(Arena *arena, String encoded) //- Assemble image from cels { - for (Ace_Cel *cel = cel_head; cel; cel = cel->next) + for (ASE_Cel *cel = cel_head; cel; cel = cel->next) { ASE_Layer *layer = layers_ordered[cel->layer_index]; // Only draw visible layers @@ -725,8 +718,8 @@ ASE_DecodedImage ASE_DecodeImage(Arena *arena, String encoded) i32 image_top = frame_top + ((cel->frame_index / frames_x) * frame_height); // Adjust bounds to ensure pixels outside of frame boundaries - // aren't (aseprite keeps chunks outside of frame around in - // project file). + // aren't processed (aseprite keeps chunks outside of frame + // around in project file). { i32 frame_right = cel_width + frame_left; i32 frame_bottom = frame_top + cel_height; @@ -769,7 +762,7 @@ ASE_DecodedImage ASE_DecodeImage(Arena *arena, String encoded) } } - // Assert all data was read + // Sanity check to ensure all data was read Assert(BB_NumBytesRemaining(&bbr) == 0); abort: diff --git a/src/ase/ase.h b/src/ase/ase.h index ec16408f..d8b4a3bd 100644 --- a/src/ase/ase.h +++ b/src/ase/ase.h @@ -183,7 +183,7 @@ Struct(ASE_Layer) ASE_Layer *next; }; -Struct(Ace_Cel) +Struct(ASE_Cel) { u16 layer_index; i16 x_pos; @@ -201,7 +201,7 @@ Struct(Ace_Cel) u32 *pixels; u16 frame_index; - Ace_Cel *next; + ASE_Cel *next; }; //////////////////////////////////////////////////////////// @@ -227,7 +227,6 @@ void ASE_PushError(Arena *arena, ASE_ErrorList *list, String msg_src); //////////////////////////////////////////////////////////// //~ Decode helpers -u32 ASE_BlendMulU8(u32 a, u32 b); u32 ASE_Blend(u32 src, u32 dst, u8 opacity); void ASE_MakeDimensionsSquareish(ASE_Header *header, u32 *frames_x, u32 *frames_y, u64 *image_width, u64 *image_height); diff --git a/src/base/base_math.c b/src/base/base_math.c index 6c42bca2..15e75628 100644 --- a/src/base/base_math.c +++ b/src/base/base_math.c @@ -267,6 +267,12 @@ f64 SmoothstepF64(f64 a, f64 b, f64 t) //////////////////////////////////////////////////////////// //~ Color +u8 MulNormalizedU8(u8 a, u8 b) +{ + u32 t = ((u32)a * (u32)b) + 0x80; + return ((t >> 8) + t) >> 8; +} + f32 SrgbFromLinearF32(f32 lin) { f32 result = 0; diff --git a/src/base/base_math.h b/src/base/base_math.h index d57ea56b..1e2641d6 100644 --- a/src/base/base_math.h +++ b/src/base/base_math.h @@ -316,6 +316,8 @@ f64 SmoothstepF64(f64 a, f64 b, f64 t); //////////////////////////////////////////////////////////// //~ Color +u8 MulNormalizedU8(u8 a, u8 b); + f32 SrgbFromLinearF32(f32 lin); f32 LinearFromSrgbF32(f32 srgb); diff --git a/src/base/base_shader.gh b/src/base/base_shader.gh index f9b73f18..302a6c45 100644 --- a/src/base/base_shader.gh +++ b/src/base/base_shader.gh @@ -171,6 +171,25 @@ Inline Vec4 Premul(Vec4 v) return result; } +Inline Vec4 Unpremul(Vec4 v) +{ + Vec4 result = 0; + if (v.a > 0.0) + { + result.rgb = v.rgb / v.a; + } + result.a = v.a; + return result; +} + +Inline Vec4 BlendPremul(Vec4 src, Vec4 dst) +{ + Vec4 result; + result.rgb = src.rgb + (dst.rgb * (1.0 - src.a)); + result.a = src.a + (dst.a * (1.0 - src.a)); + return result; +} + //////////////////////////////////////////////////////////// //~ Vertex ID helpers diff --git a/src/gpu/gpu_shader_core.cgh b/src/gpu/gpu_shader_core.cgh index d1d744d0..ec6d33dd 100644 --- a/src/gpu/gpu_shader_core.cgh +++ b/src/gpu/gpu_shader_core.cgh @@ -77,7 +77,7 @@ G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10 #if IsLanguageG // TODO: Non-uniform resource access currently is assumed as the default // behavior. We may want to add explicit "uniform" variants for - // optimization on AMD in the future. + // optimization on AMD hardware in the future. template StructuredBuffer G_Dereference(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } template RWStructuredBuffer G_Dereference(G_RWStructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } @@ -96,7 +96,7 @@ G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10 #endif //////////////////////////////////////////////////////////// -//~ Size helpers +//~ Resource countof #if IsLanguageG template u32 countof(StructuredBuffer buff) { u32 result; buff.GetDimensions(result); return result; } diff --git a/src/pp/pp_res/sprite/bla3.ase b/src/pp/pp_res/sprite/bla3.ase index cc05f9c5..2d093f14 100644 --- a/src/pp/pp_res/sprite/bla3.ase +++ b/src/pp/pp_res/sprite/bla3.ase @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:266e24b09714c54ca27cd2873b47ea41bdb12fb3646ac9f48f4338749be7f21a -size 2331 +oid sha256:1b981f00ffbe8976def03aacd7a79f99265a98899dfb2bbc20cedac0d7698d29 +size 2837 diff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay index 9dccc202..ef8c932b 100644 --- a/src/pp/pp_vis/pp_vis.lay +++ b/src/pp/pp_vis/pp_vis.lay @@ -23,10 +23,10 @@ @ComputeShader V_EmitParticlesCS @ComputeShader V_SimParticlesCS @ComputeShader V_ShadeCS +@VertexShader V_CompositeVS +@PixelShader V_CompositePS @VertexShader V_DVertVS @PixelShader V_DVertPS -@VertexShader V_OverlayVS -@PixelShader V_OverlayPS ////////////////////////////// //- Api diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 4bb6aa75..096916cf 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -677,7 +677,14 @@ void V_TickForever(WaveLaneCtx *lane) frame->screen_dims = RoundVec2(DimsFromRng2(vis_box_reps.draw.screen_rect)); frame->screen_dims.x = MaxF32(frame->screen_dims.x, 64); frame->screen_dims.y = MaxF32(frame->screen_dims.y, 64); - frame->shade_dims = frame->screen_dims; + + // frame->shade_dims = frame->screen_dims; + + f32 max_shade_aspect_ratio = 16.0 / 10.0; + frame->shade_dims.x = V_CellsPerMeter * meters_per_screen_width; + frame->shade_dims.y = frame->shade_dims.x / (frame->screen_dims.x / frame->screen_dims.y); + frame->shade_dims.y = MinF32(frame->shade_dims.y, frame->shade_dims.x / max_shade_aspect_ratio); + ////////////////////////////// //- Pop sim -> vis data @@ -798,11 +805,8 @@ void V_TickForever(WaveLaneCtx *lane) } ////////////////////////////// - //- Compute frame xforms + //- Compute camera position & zoom - // World <-> screen - frame->xf.world_to_screen = XformIdentity; - frame->xf.screen_to_world = XformIdentity; { // Determine target camera pos Vec2 target_camera_pos = Zi; @@ -885,22 +889,22 @@ void V_TickForever(WaveLaneCtx *lane) frame->camera_lerp_rate = ClampF32(frame->camera_lerp_rate, 0, 1); frame->camera_pos = LerpVec2(prev_frame->camera_pos, target_camera_pos, frame->camera_lerp_rate); frame->camera_zoom = LerpF32(prev_frame->camera_zoom, target_camera_zoom, frame->camera_lerp_rate); - { - f32 camera_scale = (f32)frame->screen_dims.x / (meters_per_screen_width * frame->camera_zoom); - frame->xf.world_to_screen = XformFromScale(VEC2(camera_scale, camera_scale)); - frame->xf.world_to_screen = TranslateXform(frame->xf.world_to_screen, NegVec2(frame->camera_pos)); - frame->xf.world_to_screen = WorldTranslateXform(frame->xf.world_to_screen, MulVec2(Vec2FromVec(frame->screen_dims), 0.5)); - frame->xf.world_to_screen.og = RoundVec2(frame->xf.world_to_screen.og); - frame->xf.screen_to_world = InvertXform(frame->xf.world_to_screen); - } } } - // Shade <-> screen - frame->xf.shade_to_screen = XformIdentity; - frame->xf.screen_to_shade = XformIdentity; + ////////////////////////////// + //- Compute frame xforms + + // World <-> screen + frame->xf.world_to_screen = XformIdentity; + frame->xf.screen_to_world = XformIdentity; { - frame->xf.screen_to_shade = InvertXform(frame->xf.shade_to_screen); + f32 camera_scale = (f32)frame->screen_dims.x / (meters_per_screen_width * frame->camera_zoom); + frame->xf.world_to_screen = XformFromScale(VEC2(camera_scale, camera_scale)); + frame->xf.world_to_screen = TranslateXform(frame->xf.world_to_screen, NegVec2(frame->camera_pos)); + frame->xf.world_to_screen = WorldTranslateXform(frame->xf.world_to_screen, MulVec2(Vec2FromVec(frame->screen_dims), 0.5)); + frame->xf.world_to_screen.og = RoundVec2(frame->xf.world_to_screen.og); + frame->xf.screen_to_world = InvertXform(frame->xf.world_to_screen); } // World <-> shade @@ -911,6 +915,13 @@ void V_TickForever(WaveLaneCtx *lane) frame->xf.shade_to_world = InvertXform(frame->xf.world_to_shade); } + // Shade <-> screen + frame->xf.shade_to_screen = XformIdentity; + frame->xf.screen_to_shade = XformIdentity; + { + frame->xf.screen_to_shade = InvertXform(frame->xf.shade_to_screen); + } + // World <-> cell // TODO: This never changes, should be #defined (so shaders don't need to read it every frame) frame->xf.world_to_cell = XformIdentity; @@ -4076,18 +4087,29 @@ void V_TickForever(WaveLaneCtx *lane) //- Begin gpu frame // Screen texture - G_ResourceHandle screen = G_PushTexture2D( + G_ResourceHandle screen_target = G_PushTexture2D( frame->gpu_arena, frame->cl, G_Format_R16G16B16A16_Float, frame->screen_dims, G_Layout_DirectQueue_RenderTargetWrite, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget ); - G_Texture2DRef screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen); - G_RWTexture2DRef screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen); + G_Texture2DRef screen_target_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target); + G_RWTexture2DRef screen_target_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target); Rng3 viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1)); Rng2 scissor = RNG2(VEC2(viewport.p0.x, viewport.p0.y), VEC2(viewport.p1.x, viewport.p1.y)); + // Shade texture + G_ResourceHandle shade_target = G_PushTexture2D( + frame->gpu_arena, frame->cl, + G_Format_R16G16B16A16_Float, + frame->shade_dims, + G_Layout_DirectQueue_ShaderReadWrite, + .flags = G_ResourceFlag_AllowShaderReadWrite + ); + G_Texture2DRef shade_target_ro = G_PushTexture2DRef(frame->gpu_arena, shade_target); + G_RWTexture2DRef shade_target_rw = G_PushRWTexture2DRef(frame->gpu_arena, shade_target); + // Debug shape buffers G_ResourceHandle dverts_buff = G_PushBufferFromCpuCopy(frame->gpu_arena, frame->cl, StringFromArena(frame->dverts_arena)); G_ResourceHandle dvert_idxs_buff = G_PushBufferFromCpuCopy(frame->gpu_arena, frame->cl, StringFromArena(frame->dvert_idxs_arena)); @@ -4115,14 +4137,20 @@ void V_TickForever(WaveLaneCtx *lane) V_GpuParams params = Zi; { params.dt = frame->dt; - params.screen_dims = frame->screen_dims; - params.screen_ro = screen_ro; - params.screen_rw = screen_rw; params.xf = frame->xf; + params.screen_dims = frame->screen_dims; + params.screen_ro = screen_target_ro; + params.screen_rw = screen_target_rw; + + params.shade_dims = frame->shade_dims; + params.shade_ro = shade_target_ro; + params.shade_rw = shade_target_rw; + params.tick = frame->tick; params.seed = RandU64FromState(&frame->rand); + params.pt_clamp_sampler = G_BasicPointClampSampler(); params.pt_wrap_sampler = G_BasicPointWrapSampler(); params.selection_mode = frame->selection_mode; @@ -4212,7 +4240,7 @@ void V_TickForever(WaveLaneCtx *lane) } // Discard screen RT - G_DiscardRenderTarget(frame->cl, screen); + G_DiscardRenderTarget(frame->cl, screen_target); // Sync G_DumbGlobalMemorySync(frame->cl); @@ -4237,7 +4265,7 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Backdrop pass - G_DumbMemoryLayoutSync(frame->cl, screen, G_Layout_DirectQueue_ShaderReadWrite); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite); { G_Compute(frame->cl, V_BackdropCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); @@ -4253,30 +4281,31 @@ void V_TickForever(WaveLaneCtx *lane) } ////////////////////////////// - //- Debug shapes pass + //- Composite pass - G_DumbMemoryLayoutSync(frame->cl, screen, G_Layout_DirectQueue_RenderTargetWrite); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite); + G_DumbMemoryLayoutSync(frame->cl, shade_target, G_Layout_DirectQueue_ShaderRead); { G_Rasterize( frame->cl, - V_DVertVS, V_DVertPS, - 1, dvert_idxs_ib, - 1, &G_Rt(screen, G_BlendMode_CompositeStraightAlpha), + V_CompositeVS, V_CompositePS, + 1, G_QuadIndices(), + 1, &G_Rt(screen_target, G_BlendMode_CompositeStraightAlpha), viewport, scissor, G_RasterMode_TriangleList ); } ////////////////////////////// - //- Overlay pass + //- Debug shapes pass { G_Rasterize( frame->cl, - V_OverlayVS, V_OverlayPS, - 1, G_QuadIndices(), - 1, &G_Rt(screen, G_BlendMode_CompositeStraightAlpha), + V_DVertVS, V_DVertPS, + 1, dvert_idxs_ib, + 1, &G_Rt(screen_target, G_BlendMode_CompositeStraightAlpha), viewport, scissor, G_RasterMode_TriangleList ); @@ -4285,13 +4314,13 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Finalize screen target - G_DumbMemoryLayoutSync(frame->cl, screen, G_Layout_DirectQueue_ShaderRead); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead); { Rng2 uv = Zi; uv.p0 = Vec2FromVec(viewport.p0); uv.p1 = Vec2FromVec(viewport.p1); uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims)); - UI_SetRawTexture(vis_box, screen_ro, uv); + UI_SetRawTexture(vis_box, screen_target_ro, uv); } } diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index 0393991b..f905fdcb 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -4,8 +4,8 @@ f32 V_RandFromPos(Vec3 pos) { Texture3D noise3d = G_Dereference(V_ShaderConst_NoiseTex); - Vec3I32 dims = countof(noise3d); - u32 noise = noise3d[floor(pos) % dims]; + // TODO: Compile-time noise dims + u32 noise = noise3d[(Vec3U32)pos % countof(noise3d)]; f32 rand = (f32)noise / 0xFFFF; return rand; } @@ -70,6 +70,7 @@ ComputeShader2D(V_BackdropCS, 8, 8) V_GpuParams params = G_Dereference(V_ShaderConst_Params)[0]; RWTexture2D screen = G_Dereference(params.screen_rw); Texture2D tiles = G_Dereference(params.tiles); + SamplerState wrap_sampler = G_Dereference(params.pt_wrap_sampler); const Vec4 background_color_a = LinearFromSrgb(Vec4(0.30, 0.30, 0.30, 1)); const Vec4 background_color_b = LinearFromSrgb(Vec4(0.15, 0.15, 0.15, 1)); @@ -85,17 +86,18 @@ ComputeShader2D(V_BackdropCS, 8, 8) P_TileKind tile = tiles.Load(Vec3(tile_pos, 0)); f32 half_thickness = 1; - f32 half_bounds_size = P_WorldPitch * 0.5; - Vec2 bounds_screen_p0 = mul(params.xf.world_to_screen, Vec3(-half_bounds_size, -half_bounds_size, 1)); - Vec2 bounds_screen_p1 = mul(params.xf.world_to_screen, Vec3(half_bounds_size, half_bounds_size, 1)); - bool is_in_bounds = - screen_pos.x > (bounds_screen_p0.x - half_thickness) && - screen_pos.y > (bounds_screen_p0.y - half_thickness) && - screen_pos.x < (bounds_screen_p1.x + half_thickness) && - screen_pos.y < (bounds_screen_p1.y + half_thickness); - if (is_in_bounds) + f32 half_world_bounds_size = P_WorldPitch * 0.5; + Vec2 world_bounds_screen_p0 = mul(params.xf.world_to_screen, Vec3(-half_world_bounds_size, -half_world_bounds_size, 1)); + Vec2 world_bounds_screen_p1 = mul(params.xf.world_to_screen, Vec3(half_world_bounds_size, half_world_bounds_size, 1)); + b32 is_in_world_bounds = + screen_pos.x > (world_bounds_screen_p0.x - half_thickness) && + screen_pos.y > (world_bounds_screen_p0.y - half_thickness) && + screen_pos.x < (world_bounds_screen_p1.x + half_thickness) && + screen_pos.y < (world_bounds_screen_p1.y + half_thickness); + + if (is_in_world_bounds) { - // Grid checker + // Checkered grid { i32 color_idx = 0; Vec4 colors[2] = { @@ -155,7 +157,6 @@ ComputeShader2D(V_BackdropCS, 8, 8) } else if (tile != P_TileKind_Empty) { - SamplerState wrap_sampler = G_Dereference(params.pt_wrap_sampler); SPR_Slice slice = params.tile_slices[tile]; Texture2D tile_tex = G_Dereference(slice.tex); Vec4 tile_col = tile_tex.Sample(wrap_sampler, world_pos); @@ -399,7 +400,7 @@ ComputeShader(V_SimParticlesCS, 64) } Vec2 cell_pos = floor(mul(params.xf.world_to_cell, Vec3(particle.pos, 1))); - b32 is_in_bounds = cell_pos.x >= 0 && cell_pos.y >= 0 && cell_pos.x < countof(stains).x && cell_pos.y < countof(stains).y; + b32 is_in_world_bounds = cell_pos.x >= 0 && cell_pos.y >= 0 && cell_pos.x < countof(stains).x && cell_pos.y < countof(stains).y; // Simulate f32 old_exists = particle.exists; @@ -415,7 +416,7 @@ ComputeShader(V_SimParticlesCS, 64) { particle.exists = 0; } - if (!is_in_bounds) + if (!is_in_world_bounds) { particle.exists = 0; } @@ -424,7 +425,7 @@ ComputeShader(V_SimParticlesCS, 64) // Commit { // FIXME: Atomic write - if (is_in_bounds) + if (is_in_world_bounds) { b32 should_stain = 0; if ((particle.flags & V_ParticleFlag_StainTrail) || ((particle.flags & V_ParticleFlag_StainOnPrune) && particle.exists == 0)) @@ -466,6 +467,185 @@ ComputeShader(V_SimParticlesCS, 64) ComputeShader2D(V_ShadeCS, 8, 8) { + V_GpuParams params = G_Dereference(V_ShaderConst_Params)[0]; + RWTexture2D shade_tex = G_Dereference(params.shade_rw); + + Vec2 shade_pos = SV_DispatchThreadID + Vec2(0.5, 0.5); + if (all(shade_pos < countof(shade_tex))) + { + Vec4 result = 0; + result.r = shade_pos.x / countof(shade_tex).x; + + result.a = 1; + shade_tex[shade_pos] = result; + } +} + +//////////////////////////////////////////////////////////// +//~ Composite + +////////////////////////////// +//- Vertex shader + +VertexShader(V_CompositeVS, V_CompositePSInput) +{ + Vec2 uv = RectUvFromVertexId(SV_VertexID); + V_CompositePSInput result; + result.sv_position = Vec4(NdcFromUv(uv).xy, 0, 1); + return result; +} + +////////////////////////////// +//- Pixel shader + +PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input) +{ + V_GpuParams params = G_Dereference(V_ShaderConst_Params)[0]; + Texture2D shade_tex = G_Dereference(params.shade_ro); + SamplerState clamp_sampler = G_Dereference(params.pt_clamp_sampler); + + Vec2 screen_pos = input.sv_position.xy; + + Vec2 world_pos = mul(params.xf.screen_to_world, Vec3(screen_pos, 1)); + Vec2 tile_pos = mul(params.xf.world_to_tile, Vec3(world_pos, 1)); + P_TileKind equipped_tile = params.equipped_tile; + + f32 half_thickness = 1; + + Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5; + Vec2 world_bounds_screen_p0 = mul(params.xf.world_to_screen, Vec3(-half_world_dims.xy, 1)); + Vec2 world_bounds_screen_p1 = mul(params.xf.world_to_screen, Vec3(half_world_dims.xy, 1)); + b32 is_in_world_bounds = ( + screen_pos.x > (world_bounds_screen_p0.x - half_thickness) && + screen_pos.y > (world_bounds_screen_p0.y - half_thickness) && + screen_pos.x < (world_bounds_screen_p1.x + half_thickness) && + screen_pos.y < (world_bounds_screen_p1.y + half_thickness) + ); + + Vec2 shade_pos = mul(params.xf.screen_to_shade, Vec3(screen_pos.xy, 1)); + + //- Shaded color + Vec4 shade_color = 0; + if (all(shade_pos >= Vec2(0, 0)) && all(shade_pos < countof(shade_tex))) + { + Vec2 shade_uv = shade_pos / countof(shade_tex); + shade_color = shade_tex.Sample(clamp_sampler, shade_uv); + } + + //- Tile selection overlay + Vec4 selection_color = 0; + if (params.has_mouse_focus && params.selection_mode == V_SelectionMode_Tile) + { + + Vec4 border_color = LinearFromSrgb(Vec4(1, 1, 1, 1)); + // Vec4 inner_color = LinearFromSrgb(Vec4(0.4, 0.4, 0.4, 0.25)); + Vec4 inner_color = LinearFromSrgb(Vec4(0.4, 0.8, 0.4, 0.6)); + + Rng2 screen_selection = params.screen_selection; + Rng2 world_selection = params.world_selection; + + Rng2 tile_selection; + tile_selection.p0 = floor(mul(params.xf.world_to_tile, Vec3(world_selection.p0, 1))); + tile_selection.p1 = ceil(mul(params.xf.world_to_tile, Vec3(world_selection.p1, 1))); + + f32 dist = 100000000; + dist = min(dist, screen_pos.x - screen_selection.p0.x); + dist = min(dist, screen_pos.y - screen_selection.p0.y); + dist = min(dist, screen_selection.p1.x - screen_pos.x); + dist = min(dist, screen_selection.p1.y - screen_pos.y); + dist = -dist; + + // if (dist >= -half_thickness && dist <= half_thickness) + // { + // selection_color = border_color; + // } + // else + { + if ( + world_pos.x > -(P_WorldPitch / 2) && + world_pos.y > -(P_WorldPitch / 2) && + world_pos.x < (P_WorldPitch / 2) && + world_pos.y < (P_WorldPitch / 2) && + tile_pos.x >= tile_selection.p0.x && + tile_pos.x <= tile_selection.p1.x && + tile_pos.y >= tile_selection.p0.y && + tile_pos.y <= tile_selection.p1.y + ) + { + selection_color = inner_color; + } + } + + // Premultiply + selection_color.rgb *= selection_color.a; + } + + //- Grid overlay + Vec4 overlay_color = 0; + if (is_in_world_bounds) + { + // Grid outline + if (V_ShaderConst_GpuFlags & V_GpuFlag_DebugDraw) + { + const Vec4 grid_color = LinearFromSrgb(Vec4(1, 1, 1, 0.1)); + Vec2 grid_screen_p0 = mul(params.xf.world_to_screen, Vec3(floor(world_pos), 1)); + Vec2 grid_screen_p1 = mul(params.xf.world_to_screen, Vec3(ceil(world_pos), 1)); + f32 grid_dist = 100000; + grid_dist = min(grid_dist, abs(screen_pos.x - grid_screen_p0.x)); + grid_dist = min(grid_dist, abs(screen_pos.x - grid_screen_p1.x)); + grid_dist = min(grid_dist, abs(screen_pos.y - grid_screen_p0.y)); + grid_dist = min(grid_dist, abs(screen_pos.y - grid_screen_p1.y)); + if (grid_dist <= half_thickness * 0.5) + { + overlay_color = grid_color; + } + } + // Axis + if (V_ShaderConst_GpuFlags & V_GpuFlag_DebugDraw) + { + const Vec4 x_axis_color = LinearFromSrgb(Vec4(0.75, 0, 0, 1)); + const Vec4 y_axis_color = LinearFromSrgb(Vec4(0, 0.75, 0, 1)); + + Vec2 zero_screen = mul(params.xf.world_to_screen, Vec3(0, 0, 1)); + f32 x_dist = abs(screen_pos.x - zero_screen.x); + f32 y_dist = abs(screen_pos.y - zero_screen.y); + if (y_dist <= half_thickness) + { + overlay_color = x_axis_color; + } + else if (x_dist <= half_thickness) + { + overlay_color = y_axis_color; + } + } + // World bounds + { + const Vec4 bounds_color = LinearFromSrgb(Vec4(0.75, 0.75, 0, 1)); + f32 bounds_dist = 100000; + bounds_dist = min(bounds_dist, abs(screen_pos.x - world_bounds_screen_p0.x)); + bounds_dist = min(bounds_dist, abs(screen_pos.x - world_bounds_screen_p1.x)); + bounds_dist = min(bounds_dist, abs(screen_pos.y - world_bounds_screen_p0.y)); + bounds_dist = min(bounds_dist, abs(screen_pos.y - world_bounds_screen_p1.y)); + if (bounds_dist <= half_thickness) + { + overlay_color = bounds_color; + } + } + // Premultiply + overlay_color.rgb *= overlay_color.a; + } + + //- Composite + Vec4 result = 0; + result = BlendPremul(selection_color, result); + result = BlendPremul(overlay_color, result); + result = BlendPremul(shade_color, result); + + result = Unpremul(result); + + V_CompositePSOutput output; + output.sv_target0 = result; + return output; } //////////////////////////////////////////////////////////// @@ -499,141 +679,3 @@ PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input) output.sv_target0 = input.color_lin; return output; } - -//////////////////////////////////////////////////////////// -//~ Overlay - -////////////////////////////// -//- Vertex shader - -VertexShader(V_OverlayVS, V_OverlayPSInput) -{ - Vec2 uv = RectUvFromVertexId(SV_VertexID); - V_OverlayPSInput result; - result.sv_position = Vec4(NdcFromUv(uv).xy, 0, 1); - return result; -} - -////////////////////////////// -//- Pixel shader - -PixelShader(V_OverlayPS, V_OverlayPSOutput, V_OverlayPSInput input) -{ - V_GpuParams params = G_Dereference(V_ShaderConst_Params)[0]; - Vec2 screen_pos = input.sv_position.xy; - Vec4 result = 0; - - Vec2 world_pos = mul(params.xf.screen_to_world, Vec3(screen_pos, 1)); - Vec2 tile_pos = mul(params.xf.world_to_tile, Vec3(world_pos, 1)); - P_TileKind equipped_tile = params.equipped_tile; - - f32 half_thickness = 1; - f32 half_bounds_size = P_WorldPitch * 0.5; - Vec2 bounds_screen_p0 = mul(params.xf.world_to_screen, Vec3(-half_bounds_size, -half_bounds_size, 1)); - Vec2 bounds_screen_p1 = mul(params.xf.world_to_screen, Vec3(half_bounds_size, half_bounds_size, 1)); - bool is_in_bounds = screen_pos.x > (bounds_screen_p0.x - half_thickness) && - screen_pos.y > (bounds_screen_p0.y - half_thickness) && - screen_pos.x < (bounds_screen_p1.x + half_thickness) && - screen_pos.y < (bounds_screen_p1.y + half_thickness); - - Vec4 border_color = LinearFromSrgb(Vec4(1, 1, 1, 1)); - // Vec4 inner_color = LinearFromSrgb(Vec4(0.4, 0.4, 0.4, 0.25)); - - Vec4 inner_color = LinearFromSrgb(Vec4(0.4, 0.8, 0.4, 0.6)); - - Rng2 screen_selection = params.screen_selection; - Rng2 world_selection = params.world_selection; - - Rng2 tile_selection; - tile_selection.p0 = floor(mul(params.xf.world_to_tile, Vec3(world_selection.p0, 1))); - tile_selection.p1 = ceil(mul(params.xf.world_to_tile, Vec3(world_selection.p1, 1))); - - if (params.has_mouse_focus) - { - if (params.selection_mode == V_SelectionMode_Tile) - { - f32 dist = 100000000; - dist = min(dist, screen_pos.x - screen_selection.p0.x); - dist = min(dist, screen_pos.y - screen_selection.p0.y); - dist = min(dist, screen_selection.p1.x - screen_pos.x); - dist = min(dist, screen_selection.p1.y - screen_pos.y); - dist = -dist; - - // if (dist >= -half_thickness && dist <= half_thickness) - // { - // result = border_color; - // } - // else - { - if ( - world_pos.x > -(P_WorldPitch / 2) && - world_pos.y > -(P_WorldPitch / 2) && - world_pos.x < (P_WorldPitch / 2) && - world_pos.y < (P_WorldPitch / 2) && - tile_pos.x >= tile_selection.p0.x && - tile_pos.x <= tile_selection.p1.x && - tile_pos.y >= tile_selection.p0.y && - tile_pos.y <= tile_selection.p1.y - ) - { - result = inner_color; - } - } - } - } - - if (is_in_bounds) - { - // Grid outline - if (V_ShaderConst_GpuFlags & V_GpuFlag_DebugDraw) - { - const Vec4 grid_color = LinearFromSrgb(Vec4(1, 1, 1, 0.1)); - Vec2 grid_screen_p0 = mul(params.xf.world_to_screen, Vec3(floor(world_pos), 1)); - Vec2 grid_screen_p1 = mul(params.xf.world_to_screen, Vec3(ceil(world_pos), 1)); - f32 grid_dist = 100000; - grid_dist = min(grid_dist, abs(screen_pos.x - grid_screen_p0.x)); - grid_dist = min(grid_dist, abs(screen_pos.x - grid_screen_p1.x)); - grid_dist = min(grid_dist, abs(screen_pos.y - grid_screen_p0.y)); - grid_dist = min(grid_dist, abs(screen_pos.y - grid_screen_p1.y)); - if (grid_dist <= half_thickness * 0.5) - { - result = grid_color; - } - } - // Axis - if (V_ShaderConst_GpuFlags & V_GpuFlag_DebugDraw) - { - const Vec4 x_axis_color = LinearFromSrgb(Vec4(0.75, 0, 0, 1)); - const Vec4 y_axis_color = LinearFromSrgb(Vec4(0, 0.75, 0, 1)); - - Vec2 zero_screen = mul(params.xf.world_to_screen, Vec3(0, 0, 1)); - f32 x_dist = abs(screen_pos.x - zero_screen.x); - f32 y_dist = abs(screen_pos.y - zero_screen.y); - if (y_dist <= half_thickness) - { - result = x_axis_color; - } - else if (x_dist <= half_thickness) - { - result = y_axis_color; - } - } - // World bounds - { - const Vec4 bounds_color = LinearFromSrgb(Vec4(0.75, 0.75, 0, 1)); - f32 bounds_dist = 100000; - bounds_dist = min(bounds_dist, abs(screen_pos.x - bounds_screen_p0.x)); - bounds_dist = min(bounds_dist, abs(screen_pos.x - bounds_screen_p1.x)); - bounds_dist = min(bounds_dist, abs(screen_pos.y - bounds_screen_p0.y)); - bounds_dist = min(bounds_dist, abs(screen_pos.y - bounds_screen_p1.y)); - if (bounds_dist <= half_thickness) - { - result = bounds_color; - } - } - } - - V_OverlayPSOutput output; - output.sv_target0 = result; - return output; -} diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh index 2b0d03f4..9efe8552 100644 --- a/src/pp/pp_vis/pp_vis_gpu.gh +++ b/src/pp/pp_vis/pp_vis_gpu.gh @@ -13,7 +13,7 @@ Struct(V_QuadPSOutput) }; //////////////////////////////////////////////////////////// -//~ Shape shader types +//~ Debug shape shader types Struct(V_DVertPSInput) { @@ -27,15 +27,15 @@ Struct(V_DVertPSOutput) }; //////////////////////////////////////////////////////////// -//~ Overlay shader types +//~ Composite shader types -Struct(V_OverlayPSInput) +Struct(V_CompositePSInput) { Semantic(Vec4, sv_position); }; -Struct(V_OverlayPSOutput) +Struct(V_CompositePSOutput) { Semantic(Vec4, sv_target0); }; @@ -67,10 +67,10 @@ ComputeShader(V_SimParticlesCS, 64); //- Shade ComputeShader2D(V_ShadeCS, 8, 8); +//- Composite +VertexShader(V_CompositeVS, V_CompositePSInput); +PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input); + //- Debug shapes VertexShader(V_DVertVS, V_DVertPSInput); PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input); - -//- Overlay -VertexShader(V_OverlayVS, V_OverlayPSInput); -PixelShader(V_OverlayPS, V_OverlayPSOutput, V_OverlayPSInput input); diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index 5474a976..d540756d 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -32,14 +32,14 @@ Struct(V_Xforms) Xform world_to_screen; Xform screen_to_world; - // Shade <-> screen - Xform shade_to_screen; - Xform screen_to_shade; - // World <-> shade Xform world_to_shade; Xform shade_to_world; + // Shade <-> screen + Xform shade_to_screen; + Xform screen_to_shade; + // World <-> cell Xform world_to_cell; Xform cell_to_world; @@ -59,15 +59,20 @@ Struct(V_GpuParams) { // TODO: Use simulation dt f32 dt; + V_Xforms xf; Vec2 screen_dims; G_Texture2DRef screen_ro; G_RWTexture2DRef screen_rw; - V_Xforms xf; + + Vec2 shade_dims; + G_Texture2DRef shade_ro; + G_RWTexture2DRef shade_rw; u64 tick; u64 seed; + G_SamplerStateRef pt_clamp_sampler; G_SamplerStateRef pt_wrap_sampler; V_SelectionMode selection_mode; diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index 8768a501..e0008a09 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -23,6 +23,7 @@ SPR_SheetKey SPR_SheetKeyFromResource(ResourceKey resource) SPR_Slice SPR_SliceFromSheet(SPR_SheetKey sheet, String slice_name) { + // TODO: Ability to specify desired alpha modes (Straight, Premultiplied, Opaque) SPR_Slice result = Zi; u64 hash = sheet.r.v; diff --git a/src/ttf/ttf_dwrite/ttf_dwrite.c b/src/ttf/ttf_dwrite/ttf_dwrite.c index ad422882..b8cf9cd8 100644 --- a/src/ttf/ttf_dwrite/ttf_dwrite.c +++ b/src/ttf/ttf_dwrite/ttf_dwrite.c @@ -299,9 +299,8 @@ TTF_GlyphResult TTF_RasterizeGlyphFromCodepoint(Arena *arena, u32 codepoint, Res } ////////////////////////////// - //- Copy result + //- Write result - // Copy from target to result Vec2I32 dst_dims = Zi; u32 *dst_pixels = 0; if (SUCCEEDED(hr)) diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index f0a6c4b6..269c446c 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -1539,7 +1539,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) // Box rect { - UI_DRect *rect = PushStruct(frame->rects_arena, UI_DRect); + UI_GpuRect *rect = PushStruct(frame->rects_arena, UI_GpuRect); rect->bounds = box->screen_rect; rect->background_lin = LinearFromSrgb(box->desc.background_color); rect->border_lin = LinearFromSrgb(box->desc.border_color); @@ -1665,7 +1665,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) Vec2 glyph_dims = DimsFromRng2(rr.bounds); if (glyph_dims.x != 0 || glyph_dims.y != 0) { - UI_DRect *rect = PushStruct(frame->rects_arena, UI_DRect); + UI_GpuRect *rect = PushStruct(frame->rects_arena, UI_GpuRect); rect->debug_lin = debug_lin; rect->tint_lin = text_color_lin; rect->tex = rr.tex; @@ -1693,12 +1693,12 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) G_Texture2DRef draw_target_ro = G_PushTexture2DRef(frame->gpu_arena, draw_target); // Rects - u64 rects_count = ArenaCount(frame->rects_arena, UI_DRect); + u64 rects_count = ArenaCount(frame->rects_arena, UI_GpuRect); G_ResourceHandle rects_buff = G_PushBufferFromCpuCopy(frame->gpu_arena, frame->cl, StringFromArena(frame->rects_arena)); - G_StructuredBufferRef rects_ro = G_PushStructuredBufferRef(frame->gpu_arena, rects_buff, UI_DRect); + G_StructuredBufferRef rects_ro = G_PushStructuredBufferRef(frame->gpu_arena, rects_buff, UI_GpuRect); // Params - UI_DParams params = Zi; + UI_GpuParams params = Zi; { params.target_size = draw_size; params.target_ro = draw_target_ro; @@ -1708,7 +1708,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) params.aa = TweakFloat("UI anti-aliasing", 1, 0, 1); } G_ResourceHandle params_buff = G_PushBufferFromCpuCopy(frame->gpu_arena, frame->cl, StringFromStruct(¶ms)); - G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_DParams); + G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_GpuParams); // Constants G_SetConstant(frame->cl, UI_ShaderConst_Params, params_ro); diff --git a/src/ui/ui_gpu.g b/src/ui/ui_gpu.g index c70df785..136993da 100644 --- a/src/ui/ui_gpu.g +++ b/src/ui/ui_gpu.g @@ -6,9 +6,9 @@ VertexShader(UI_DRectVS, UI_DRectPSInput) { - UI_DParams params = G_Dereference(UI_ShaderConst_Params)[0]; - StructuredBuffer rects = G_Dereference(params.rects); - UI_DRect rect = rects[SV_InstanceID]; + UI_GpuParams params = G_Dereference(UI_ShaderConst_Params)[0]; + StructuredBuffer rects = G_Dereference(params.rects); + UI_GpuRect rect = rects[SV_InstanceID]; Vec2 rect_uv = RectUvFromVertexId(SV_VertexID); Vec2 tex_uv = lerp(rect.tex_slice_uv.p0, rect.tex_slice_uv.p1, rect_uv); @@ -35,11 +35,11 @@ VertexShader(UI_DRectVS, UI_DRectPSInput) PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) { - UI_DParams params = G_Dereference(UI_ShaderConst_Params)[0]; - StructuredBuffer rects = G_Dereference(params.rects); + UI_GpuParams params = G_Dereference(UI_ShaderConst_Params)[0]; + StructuredBuffer rects = G_Dereference(params.rects); SamplerState sampler = G_Dereference(params.sampler); - UI_DRect rect = rects[input.rect_idx]; + UI_GpuRect rect = rects[input.rect_idx]; Vec2 p = input.sv_position.xy; Vec2 rect_uv = input.rect_uv; @@ -135,7 +135,7 @@ VertexShader(UI_BlitVS, UI_BlitPSInput) PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input) { - UI_DParams params = G_Dereference(UI_ShaderConst_Params)[0]; + UI_GpuParams params = G_Dereference(UI_ShaderConst_Params)[0]; Texture2D tex = G_Dereference(params.target_ro); SamplerState sampler = G_Dereference(params.sampler); diff --git a/src/ui/ui_shared.cgh b/src/ui/ui_shared.cgh index 8c995a8c..0c9855e6 100644 --- a/src/ui/ui_shared.cgh +++ b/src/ui/ui_shared.cgh @@ -4,7 +4,7 @@ G_DeclConstant(G_StructuredBufferRef, UI_ShaderConst_Params, 0); G_DeclConstant(b32, UI_ShaderConst_DebugDraw, 1); -Struct(UI_DParams) +Struct(UI_GpuParams) { f32 aa; @@ -20,7 +20,7 @@ Struct(UI_DParams) //////////////////////////////////////////////////////////// //~ Rect types -Struct(UI_DRect) +Struct(UI_GpuRect) { Rng2 bounds;