import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(42)

years = np.arange(2015, 2025)
growth = np.array([1.8, 1.9, 2.0, 2.2, 2.1, 1.7, 1.8, 1.9, 2.0, 2.1])
df = pd.DataFrame({"year": years, "growth_rate": growth})
df

fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(df["year"], df["growth_rate"], marker="o")
ax.set_title("Growth rate over time — full y-axis")
ax.set_ylabel("Percent")
plt.show()

fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(df["year"], df["growth_rate"], marker="o")
ax.set_ylim(1.7, 2.2)
ax.set_title("Growth rate over time — truncated y-axis")
ax.set_ylabel("Percent")
plt.show()

wages = np.concatenate([np.random.normal(30, 5, 400), np.array([120, 150, 180])])
df_w = pd.DataFrame({"wage": wages})

fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(df_w["wage"], bins=30)
ax.set_title("Wage distribution with outliers")
plt.show()

fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(df_w.loc[df_w["wage"] < 80, "wage"], bins=30)
ax.set_title("Wage distribution excluding extreme outliers")
plt.show()

fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(df_w["wage"], bins=10)
ax.set_title("Histogram with 10 bins")
plt.show()

fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(df_w["wage"], bins=50)
ax.set_title("Histogram with 50 bins")
plt.show()

fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(df["year"], df["growth_rate"], marker="o")
ax.set_title("Transparent version with annotation")
ax.set_ylabel("Percent")
ax.set_xlabel("Year")
for _, row in df.iterrows():
    ax.annotate(f"{row['growth_rate']:.1f}", (row["year"], row["growth_rate"]), fontsize=8)
plt.show()

# Parameters
beta   = 0.95
n_grid = 200
W_max  = 10.0
W_grid = np.linspace(1e-6, W_max, n_grid)  # avoid log(0)

# Analytical solution
def V_analytical(W, beta):
    return np.log(W) / (1 - beta) + (
        np.log(1 - beta) + beta * np.log(beta)
    ) / (1 - beta) ** 2

def c_analytical(W, beta):
    return (1 - beta) * W

V_true = V_analytical(W_grid, beta)
c_true = c_analytical(W_grid, beta)

def vfi(W_grid, beta, tol=1e-7, max_iter=2000):
    n      = len(W_grid)
    V      = np.zeros(n)          # initial guess
    errors = []                   # sup-norm errors per iteration
    V_history = [V.copy()]        # store every iterate for diagnostics

    for it in range(max_iter):
        V_new = np.empty(n)
        for i, W in enumerate(W_grid):
            # Feasible consumption grid for this wealth level
            c_grid  = np.linspace(1e-8, W, 300)
            W_next  = W - c_grid
            # Interpolate V at W_next
            V_next  = np.interp(W_next, W_grid, V)
            obj     = np.log(c_grid) + beta * V_next
            V_new[i] = obj.max()

        err = np.max(np.abs(V_new - V))
        errors.append(err)
        V_history.append(V_new.copy())
        V = V_new

        if err < tol:
            print(f"Converged in {it + 1} iterations. Final sup-norm error: {err:.2e}")
            break
    else:
        print(f"Did not converge after {max_iter} iterations.")

    return V, errors, V_history

V_num, errors, V_history = vfi(W_grid, beta)

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Linear scale
axes[0].plot(errors)
axes[0].set_xlabel("Iteration")
axes[0].set_ylabel("Sup-norm error")
axes[0].set_title("Convergence — linear scale")

# Log scale — should be approximately linear
axes[1].semilogy(errors)
axes[1].set_xlabel("Iteration")
axes[1].set_ylabel("Sup-norm error (log scale)")
axes[1].set_title("Convergence — log scale (expected: linear decay)")

# Theoretical decay rate line for reference
n_it = len(errors)
theory = errors[0] * beta ** np.arange(n_it)
axes[1].semilogy(theory, linestyle="--", color="gray", label=f"Theoretical rate β={beta}")
axes[1].legend()

plt.tight_layout()
plt.show()

print(f"Observed decay ratio (median): {np.median(np.array(errors[1:]) / np.array(errors[:-1])):.4f}")
print(f"Theoretical decay rate β     : {beta:.4f}")

# Recover policy function numerically (argmax)
c_num = np.empty(len(W_grid))
for i, W in enumerate(W_grid):
    c_grid  = np.linspace(1e-8, W, 300)
    W_next  = W - c_grid
    V_next  = np.interp(W_next, W_grid, V_num)
    obj     = np.log(c_grid) + beta * V_next
    c_num[i] = c_grid[obj.argmax()]

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Value function
axes[0].plot(W_grid, V_true, label="Analytical", linewidth=2)
axes[0].plot(W_grid, V_num, linestyle="--", label="VFI numerical", linewidth=1.5)
axes[0].set_title("Value function")
axes[0].set_xlabel("Wealth W")
axes[0].legend()

# Policy function
axes[1].plot(W_grid, c_true, label="Analytical: c*(W) = (1-β)W", linewidth=2)
axes[1].plot(W_grid, c_num, linestyle="--", label="VFI numerical", linewidth=1.5)
axes[1].set_title("Policy function (consumption)")
axes[1].set_xlabel("Wealth W")
axes[1].legend()

plt.tight_layout()
plt.show()

# Numerical error summary
V_err = np.abs(V_num - V_true)
c_err = np.abs(c_num - c_true)
print(f"Value function — max absolute error : {V_err.max():.6f}")
print(f"Value function — mean absolute error: {V_err.mean():.6f}")
print(f"Policy function — max absolute error : {c_err.max():.6f}")
print(f"Policy function — mean absolute error: {c_err.mean():.6f}")

grid_sizes = [20, 50, 100, 200, 500]
results = []

for n in grid_sizes:
    W_g = np.linspace(1e-6, W_max, n)
    V_g, errs, _ = vfi(W_g, beta, tol=1e-7)
    V_true_g = V_analytical(W_g, beta)
    max_err = np.abs(V_g - V_true_g).max()
    n_iter  = len(errs)
    results.append({"grid_points": n, "iterations": n_iter, "max_error_vs_analytical": round(max_err, 6)})

pd.DataFrame(results)

# Re-run VFI on fine grid, saving selected iterates
W_g    = np.linspace(1e-6, W_max, 200)
V_iter = np.zeros(len(W_g))
V_true_g = V_analytical(W_g, beta)

snapshots = {}
tol = 1e-7
for it in range(2000):
    V_new = np.empty(len(W_g))
    for i, W in enumerate(W_g):
        c_g     = np.linspace(1e-8, W, 300)
        V_next  = np.interp(W - c_g, W_g, V_iter)
        obj     = np.log(c_g) + beta * V_next
        V_new[i] = obj.max()
    if it + 1 in [1, 5, 20, 50, len(errors)]:
        snapshots[f"iter {it+1}"] = V_new.copy()
    if np.max(np.abs(V_new - V_iter)) < tol:
        snapshots[f"converged ({it+1})"] = V_new.copy()
        break
    V_iter = V_new

fig, ax = plt.subplots(figsize=(9, 5))
ax.plot(W_g, V_true_g, color="black", linewidth=2.5, label="Analytical")
colors = plt.cm.Blues(np.linspace(0.3, 0.9, len(snapshots)))
for (label, V_snap), color in zip(snapshots.items(), colors):
    ax.plot(W_g, V_snap, color=color, linewidth=1, label=label)
ax.set_title("VFI iterates converging to analytical solution")
ax.set_xlabel("Wealth W")
ax.set_ylabel("V(W)")
ax.legend(fontsize=9)
plt.tight_layout()
plt.show()

def vfi_broken(W_grid, beta, tol=1e-7, max_iter=300):
    """Buggy VFI: uses -beta instead of beta (sign error)."""
    n  = len(W_grid)
    V  = np.zeros(n)
    errors = []
    for it in range(max_iter):
        V_new = np.empty(n)
        for i, W in enumerate(W_grid):
            c_g    = np.linspace(1e-8, W, 300)
            V_next = np.interp(W - c_g, W_grid, V)
            obj    = np.log(c_g) - beta * V_next   # BUG: minus sign
            V_new[i] = obj.max()
        err = np.max(np.abs(V_new - V))
        errors.append(err)
        V = V_new
        if err < tol:
            break
    return V, errors

V_broken, errors_broken = vfi_broken(W_grid, beta)

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Convergence
axes[0].semilogy(errors, label="Correct VFI")
axes[0].semilogy(errors_broken, linestyle="--", label="Broken VFI")
axes[0].set_title("Convergence diagnostic — correct vs broken")
axes[0].set_xlabel("Iteration")
axes[0].set_ylabel("Sup-norm error (log)")
axes[0].legend()

# Value function
axes[1].plot(W_grid, V_true, label="Analytical", linewidth=2)
axes[1].plot(W_grid, V_num, linestyle="--", label="Correct VFI")
axes[1].plot(W_grid, V_broken, linestyle=":", label="Broken VFI")
axes[1].set_title("Value functions — correct vs broken")
axes[1].set_xlabel("Wealth W")
axes[1].legend()

plt.tight_layout()
plt.show()

print("Broken VFI max error vs analytical:", round(np.abs(V_broken - V_true).max(), 4))
print("Correct VFI max error vs analytical:", round(np.abs(V_num - V_true).max(), 6))

Extra L4 — Visualization, EDA, and How Plots Can Mislead¶

Goal¶

Core lesson¶

1. A misleading y-axis¶

Discussion¶

2. Outliers and scale¶

3. Binning choices change the apparent story¶

4. A more defensible plotting workflow¶

Short exercise¶

Optional extension¶

Advanced section — Numerical convergence and visualization as validation¶

A1. The cake eating problem — recap¶

A2. Value function iteration with convergence tracking¶

A3. Convergence diagnostic — the sup-norm error plot¶

A4. Comparison against the analytical solution¶

A5. Sensitivity to grid density¶

Interpretation¶

A6. Visualizing iterates — does the algorithm approach the solution monotonically?¶

A7. Detecting a broken algorithm visually¶

Takeaway¶

Advanced write-up prompt¶

Advanced optional extension¶