function [phi, psi, h] = hodge_poisson_faces(mu_r, mu_g, dx, dy)
%HODGE_POISSON_FACES Discrete Hodge/Helmholtz decomposition on a uniform MAC grid.
% mu_r : ny-by-(nx+1) x-component on vertical faces (x-faces)
% mu_g : (ny+1)-by-nx y-component on horizontal faces (y-faces)
% phi : ny-by-nx scalar potential at cell centers (gauge fixed)
% psi : ny-by-nx stream function averaged to cell centers (for visualization)
% h : struct with face residual fields:
% Decomposition on faces:
% mu = grad(phi) + perpgrad(psi_corner) + h
% Boundary conventions used here:
% phi satisfies the natural Neumann condition implied by matching grad(phi) to mu on boundary faces.
% psi_corner satisfies Dirichlet psi=0 on boundary corners, which makes the curl-projection unique.
assert(nxp1 == nx+1, 'mu_r must be ny-by-(nx+1).');
assert(nyp1 == ny+1, 'mu_g must be (ny+1)-by-nx.');
idx = @(i,j) (j-1)*ny + i; % cell-center indexing (column-major)
% ---------------------------------------------------------------------
% Step A: solve for phi on cell centers from Δphi = div(mu) with nonhomog Neumann
% ---------------------------------------------------------------------
div_mu = (mu_r(:,2:nx+1) - mu_r(:,1:nx))/dx + (mu_g(2:ny+1,:) - mu_g(1:ny,:))/dy; % ny-by-nx
% Add non-homogeneous Neumann flux contributions induced by boundary face values.
% These terms are essential when div_mu is (nearly) zero but boundary flux is not, e.g. mu=(x,-y).
b(idx(i,1)) = b(idx(i,1)) + mu_r(i,1)/dx; % left boundary
b(idx(i,nx)) = b(idx(i,nx)) - mu_r(i,nx+1)/dx; % right boundary
b(idx(1,j)) = b(idx(1,j)) + mu_g(1,j)/dy; % bottom boundary
b(idx(ny,j)) = b(idx(ny,j)) - mu_g(ny+1,j)/dy; % top boundary
% Assemble discrete Laplacian Δ on cell centers with Neumann handled via ghost elimination.
% Interior stencil: +ax neighbors, +ay neighbors, diagonal -(2ax+2ay).
% Boundary cells: missing-neighbor effect adds +ax or +ay to the diagonal (reducing magnitude).
I = zeros(5*N,1); J = zeros(5*N,1); V = zeros(5*N,1);
if j==1, diagc = diagc + ax; end
if j==nx, diagc = diagc + ax; end
if i==1, diagc = diagc + ay; end
if i==ny, diagc = diagc + ay; end
t=t+1; I(t)=p; J(t)=idx(i,j-1); V(t)=ax;
t=t+1; I(t)=p; J(t)=idx(i,j+1); V(t)=ax;
t=t+1; I(t)=p; J(t)=idx(i-1,j); V(t)=ay;
t=t+1; I(t)=p; J(t)=idx(i+1,j); V(t)=ay;
t=t+1; I(t)=p; J(t)=p; V(t)=diagc;
L = sparse(I(1:t), J(1:t), V(1:t), N, N);
% Gauge fixing to remove constant nullspace
phi = reshape(phi_vec, ny, nx);
% ---------------------------------------------------------------------
% Step B: compute grad(phi) on faces with boundary faces matched to mu (Neumann convention)
% ---------------------------------------------------------------------
grad_r = zeros(ny, nx+1);
grad_g = zeros(ny+1, nx);
grad_r(:,2:nx) = (phi(:,2:nx) - phi(:,1:nx-1))/dx;
grad_r(:,nx+1) = mu_r(:,nx+1);
grad_g(2:ny,:) = (phi(2:ny,:) - phi(1:ny-1,:))/dy;
grad_g(ny+1,:) = mu_g(ny+1,:);
% ---------------------------------------------------------------------
% Step C: solve for psi on corners via Δpsi = -curl(r), with psi=0 on boundary corners
% ---------------------------------------------------------------------
% psi_corner lives on (ny+1)-by-(nx+1) corner grid.
% Define discrete curl of a face field at interior corners (i=2..ny, j=2..nx):
% curl(i,j) = ( r_g(i,j) - r_g(i,j-1) )/dx - ( r_r(i,j) - r_r(i-1,j) )/dy
curl_corner = zeros(ny+1, nx+1);
dv_dx = (r_g(i,j) - r_g(i,j-1))/dx;
du_dy = (r_r(i,j) - r_r(i-1,j))/dy;
curl_corner(i,j) = dv_dx - du_dy;
% Solve Poisson on interior corners only, Dirichlet boundary psi=0.
nyc = ny-1; nxc = nx-1; % number of interior corners in y/x
idxc = @(ii,jj) (jj-1)*nyc + ii; % interior-corner indexing, ii=1..ny-1, jj=1..nx-1
I3 = zeros(5*Nc,1); J3 = zeros(5*Nc,1); V3 = zeros(5*Nc,1);
i = ii+1; j = jj+1; % map to corner indices in full grid
% Equation: Δpsi = -curl(r)
b3(p) = -curl_corner(i,j);
t3=t3+1; I3(t3)=p; J3(t3)=idxc(ii,jj-1); V3(t3)=ax;
t3=t3+1; I3(t3)=p; J3(t3)=idxc(ii,jj+1); V3(t3)=ax;
t3=t3+1; I3(t3)=p; J3(t3)=idxc(ii-1,jj); V3(t3)=ay;
t3=t3+1; I3(t3)=p; J3(t3)=idxc(ii+1,jj); V3(t3)=ay;
t3=t3+1; I3(t3)=p; J3(t3)=p; V3(t3)=diagc;
A3 = sparse(I3(1:t3), J3(1:t3), V3(1:t3), Nc, Nc);
psi_corner = zeros(ny+1, nx+1);
psi_corner(ii+1, jj+1) = psi_int(idxc(ii,jj));
% ---------------------------------------------------------------------
% Step D: compute perpgrad(psi) exactly on faces from corner psi
% ---------------------------------------------------------------------
perp_r = zeros(ny, nx+1);
perp_g = zeros(ny+1, nx);
% x-component on vertical faces: dpsi/dy
perp_r(i,j) = (psi_corner(i+1,j) - psi_corner(i,j))/dy;
% y-component on horizontal faces: -dpsi/dx
perp_g(i,j) = -(psi_corner(i,j+1) - psi_corner(i,j))/dx;
% ---------------------------------------------------------------------
% Step E: residual/harmonic term on faces and cell-centered psi for plotting
% ---------------------------------------------------------------------
h.r = mu_r - grad_r - perp_r;
h.g = mu_g - grad_g - perp_g;
% cell-centered psi for visualization (average of surrounding corners)
psi = 0.25*(psi_corner(1:ny,1:nx) + psi_corner(2:ny+1,1:nx) + ...
psi_corner(1:ny,2:nx+1) + psi_corner(2:ny+1,2:nx+1));
function [mu, Sigma, info] = alg1_mu_sigma_2d(P, dt, h, x0, Lstack)
%ALG1_MU_SIGMA_2D 2D extension of Algorithm-1 (simple method)
% P : nTraj x 2 x nFrames array. P(j,:,k) = [r,g] at frame k of traj j.
% dt : scalar. fixed time step between frames (here 0.1548).
% W : scalar. Gaussian kernel bandwidth in (r,g)-space.
% x0 : nX0 x 2 array of query points where mu(x0), Sigma(x0) are evaluated.
% mu : nX0 x 2 array. mu(q,:) = [mu_r, mu_g] at x0(q,:).
% Sigma : 2 x 2 x nX0 array. Sigma(:,:,q) covariance-rate matrix at x0(q,:).
% info : struct with diagnostics (effective sample size, sum of weights).
% Build sample pool: current positions X and increments dX across all traj & frames
X = P(:,:,1:end-1); % nTraj x 2 x (nFrames-1)
dX = diff(P, 1, 3); % nTraj x 2 x (nFrames-1)
% Reshape into (nSamples x 2)
[nTraj, ~, nFrames] = size(P);
nSamples = nTraj * nSteps;
X = reshape(permute(X, [1,3,2]), nSamples, 2); % [traj,step,dim] -> rows
dX = reshape(permute(dX, [1,3,2]), nSamples, 2);
% Constant factors: normalization of Gaussian kernel cancels in ratios,
% but we keep the unnormalized exponential for numerical stability.
inv2h2 = 1.0 / (2.0 * h * h);
% Main loop over query points
% 3.1) Gaussian kernel weights w_i = exp(-||z||^2/(2h^2)), z = L^{-1} (X - x0)
w = exp(-dist2 * inv2h2);
info.neff(q) = (sw*sw) / max(sum(w.^2), realmin);
% If virtually no effective samples, leave NaN
if sw <= 0 || info.neff(q) < 5
% 3.2) mu(x0) = sum w * dX / (dt * sum w)
mu_q = (w.' * dX) / (dt * sw); % 1x2
% Residual increments: dX - mu*dt
% Sigma(x0) = (1/dt) * [ sum w * (R R^T) / sum w ]
% Implement as weighted second moment: (R' * diag(w) * R)
% without forming diag(w)
R = dX - (dt * mu_q); % nSamples x 2
RW = R .* sqrt(w); % nSamples x 2
S = (RW.' * RW) / sw; % 2x2 (this equals sum w * R R^T / sum w)
Sigma(:,:,q) = S / dt; % covariance-rate matrix
% Enforce exact symmetry to remove roundoff asymmetry
Sigma(:,:,q) = 0.5 * (Sigma(:,:,q) + Sigma(:,:,q).');
% Some additional diagnostics