# --- Core scientific stack ---------------------------------------------------
import pandas as pd                           # tabular data
import numpy as np                            # numerical arrays and maths
import matplotlib.pyplot as plt               # the imperative plotting API
import matplotlib.ticker as mticker           # fine-grained control over axis tick locators/formatters
import seaborn as sns                         # statistical graphics built on matplotlib
import warnings
warnings.filterwarnings("ignore")             # silence deprecation noise during the lecture

# --- Matplotlib defaults -----------------------------------------------------
# plt.rcParams is a mutable dict-like global that controls defaults for ALL
# subsequent plots. Setting it once here means we don't repeat the same
# styling arguments (figsize, spines, grid...) on every call to plt.subplots.
#
# Key choices below:
#   figure.dpi        → render resolution in notebooks (120 ≈ crisp on retina screens)
#   figure.figsize    → default (width, height) in inches
#   axes.spines.*     → hide the top and right frame lines (cleaner, journal style)
#   axes.grid         → light gridlines aid reading
#   font.size         → 11 pt is readable both on screen and when exported to PDF
plt.rcParams.update({
    "figure.dpi":        120,
    "figure.figsize":    (9, 4.5),
    "axes.spines.top":   False,
    "axes.spines.right": False,
    "axes.grid":         True,
    "grid.alpha":        0.3,
    "font.size":         11,
})
print("Libraries loaded.")

# --- Load the eurozone panel -------------------------------------------------
# First preference: read the CSV written by L3 (§6.2). This keeps L4 self-contained
# for students who have already run L3 and avoids re-hitting the World Bank API.
#
# If the CSV isn't there (first-time run of L4 without L3 having been executed),
# we fall back to fetching the two indicators directly.
try:
    df = pd.read_csv("eurozone_panel.csv")
    df["year"] = df["year"].astype(int)        # coerce to int for sorting and merges
    print("Loaded:", df.shape)
except FileNotFoundError:
    import wbdata
    countries = ["IT","DE","FR","ES","NL","PT","GR","BE","AT","FI"]
    # A dict mapping World Bank indicator codes to the column names we want
    indicators = {
        "NY.GDP.MKTP.KD.ZG": "gdp_growth",
        "SL.UEM.TOTL.ZS":    "unemployment",
    }
    df = wbdata.get_dataframe(
        indicators,
        country=countries,
        date=("2000", "2023"),
    ).reset_index()
    name_to_code = {
        "Italy":"IT","Germany":"DE","France":"FR","Spain":"ES","Netherlands":"NL",
        "Portugal":"PT","Greece":"GR","Belgium":"BE","Austria":"AT","Finland":"FI",
    }
    df["country"] = df["country"].map(name_to_code)
    df = df.rename(columns={"date": "year"})
    df["year"] = df["year"].astype(int)
    df = df.dropna().reset_index(drop=True)
    df.to_csv("eurozone_panel.csv", index=False)
    print("Downloaded:", df.shape)

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Fake but realistic: GDP index for 4 countries, 2000-2024.
# We use SYNTHETIC data here (not the real eurozone panel) so that every line
# has the same starting value — this makes the "chart makeover" lesson about
# design choices, not about data patterns.
np.random.seed(42)                             # fix the RNG for reproducibility
years = np.arange(2000, 2025)                  # 25 annual observations

# For each country we generate an index that starts at 100 and grows by a
# country-specific random-walk: normal(mean, std), different per country.
# np.cumsum turns an i.i.d. sequence into a random walk.
# Adding 100 shifts the starting level to a familiar "2000=100" index value.
data = pd.DataFrame({
    'Italy':   np.cumsum(np.random.normal(0.4, 2.0, len(years))) + 100,
    'Germany': np.cumsum(np.random.normal(1.2, 1.8, len(years))) + 100,
    'France':  np.cumsum(np.random.normal(1.0, 1.5, len(years))) + 100,
    'Spain':   np.cumsum(np.random.normal(1.3, 2.5, len(years))) + 100,
}, index=years)
data.index.name = 'year'                       # naming the index clarifies plot labels later

# Step 0: the UGLY default.
# Called with no arguments, plt.subplots() gives the smallest acceptable figure,
# a generic title, no axis labels, a box-shaped frame, and default colours.
# Good enough for exploratory code; unacceptable for any research output.
fig, ax = plt.subplots()
data.plot(ax=ax)                               # DataFrame.plot(ax=...) delegates to matplotlib on given axes
#ax.set_title('plot')                           # deliberately generic — this is what "works but is bad" looks like
plt.show()

# Step 1: fix the basics.
#   figsize=(9, 5) → decent on-screen size; 16:9 reads well in slides and papers.
#   An informative title tells the reader what the figure is about.
#   Axis labels are mandatory for any research output.
fig, ax = plt.subplots(figsize=(9, 5))
data.plot(ax=ax)
ax.set_title('Real GDP index, 2000=100', fontsize=13)
ax.set_xlabel('Year')
ax.set_ylabel('Index')
plt.show()

# Step 2: remove chart junk.
# The top and right spines add ink without carrying information (Tufte's data-ink ratio).
# We also drop the x-axis label: the integer years on the ticks already convey "year",
# so "Year" underneath is redundant. We specialise the y-axis label to "(2000=100)"
# to save the reader a glance at the title.
fig, ax = plt.subplots(figsize=(9, 5))
data.plot(ax=ax)
ax.set_title('Real GDP index, 2000=100', fontsize=13)
ax.set_xlabel('')
ax.set_ylabel('Index (2000=100)')
# .spines is a dict keyed by side. Hide the top/right edges of the axes frame.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()

# Step 3: legend.
# - linewidth=2 thickens lines for readability in slides/print.
# - ax.legend(...) overrides the default legend:
#     loc='upper left'   → legend in the top-left corner
#     frameon=False      → no border box (reduces visual noise)
#     ncol=4             → lay legend entries out horizontally across 4 columns
#                          (with 4 countries, this puts them all on one row)
fig, ax = plt.subplots(figsize=(9, 5))
data.plot(ax=ax, linewidth=2)
ax.set_title('Real GDP index, 2000=100', fontsize=13)
ax.set_xlabel('')
ax.set_ylabel('Index (2000=100)')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.legend(loc='upper left', frameon=False, ncol=4)
plt.show()

# Step 4: context annotations — shade GFC and COVID periods.
# ax.axvspan(x0, x1, ...) draws a vertical band between x0 and x1.
#   alpha=0.12 → 12% opacity, low enough to not obscure the data lines underneath.
#   color='grey' → neutral; we reserve colour for the data itself.
# ax.text(x, y, s) places a label string at data coordinates (x, y).
#   ax.get_ylim() returns (ymin, ymax); multiplying by 0.98 puts the label just below the top.
#   ha='center' horizontal alignment puts the label string centred on the x position.
fig, ax = plt.subplots(figsize=(9, 5))
data.plot(ax=ax, linewidth=2)
ax.axvspan(2008, 2009,   color='grey', alpha=0.12)
ax.axvspan(2020, 2020.5, color='grey', alpha=0.12)
ax.text(2008.5, ax.get_ylim()[1] * 0.97, 'GFC',   ha='center', fontsize=9, color='grey')
ax.text(2020.2, ax.get_ylim()[1] * 0.97, 'COVID', ha='center', fontsize=9, color='grey')
ax.set_title('Real GDP index, 2000=100', fontsize=13)
ax.set_xlabel('')
ax.set_ylabel('Index (2000=100)')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.legend(loc='upper left', frameon=False, ncol=4)
plt.show()

# Step 5: final polish.
# - Explicit colour palette per country (dict keyed by series name) → reproducible across figures.
# - Manual loop with ax.plot(...) instead of DataFrame.plot: gives per-line control.
# - Inline end-of-line labels with ax.text(...) replace the legend entirely — the reader's
#   eye can match line to country without jumping back and forth (Tufte's "direct labelling").
# - ax.set_xlim: extend the x-axis slightly past the last year so the inline labels fit.
# - title loc='left' aligns the title to the left margin — a common journal-style choice.
fig, ax = plt.subplots(figsize=(9, 5))
colors = {'Italy': '#d62728', 'Germany': '#1f77b4', 'France': '#2ca02c', 'Spain': '#ff7f0e'}

for country in data.columns:
    # One call per country so each line carries its own colour + label.
    ax.plot(data.index, data[country], linewidth=2, color=colors[country], label=country)
    # ax.text(x, y, s, ...): place the country name just past the last data point.
    #   data.index[-1] + 0.3  → shift 0.3 years to the right of the last x-value
    #   data[country].iloc[-1] → y value at the last observation
    #   va='center'            → vertically centre the text on the y position
    ax.text(data.index[-1] + 0.3, data[country].iloc[-1], country,
            color=colors[country], fontsize=10, va='center')

# Event shading + labels, same as Step 4.
ax.axvspan(2008, 2009,   color='grey', alpha=0.12)
ax.axvspan(2020, 2020.5, color='grey', alpha=0.12)
ax.text(2008.5, ax.get_ylim()[1] * 0.97, 'GFC',   ha='center', fontsize=9, color='grey')
ax.text(2020.2, ax.get_ylim()[1] * 0.97, 'COVID', ha='center', fontsize=9, color='grey')

ax.set_title('Real GDP, cumulative growth since 2000', fontsize=13, loc='left')
ax.set_xlabel('')
ax.set_ylabel('Index, 2000=100')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Extend the x-axis slightly so the inline labels don't get clipped.
ax.set_xlim(data.index.min(), data.index.max() + 2)

plt.tight_layout()                             # auto-adjust margins
plt.show()

# --- Multi-country line plot with event shading -----------------------------
# Restrict the panel to 5 "focus" countries (readability: 5 lines is about the
# ceiling for a legible comparison; more than that and you need small multiples).
focus  = ["IT","DE","FR","ES","GR"]
colors = ["#2166ac","#d6604d","#4dac26","#f4a582","#762a83"]   # fixed palette: same country → same colour everywhere

fig, ax = plt.subplots(figsize=(11, 5))

# zip(focus, colors) pairs each country code with its assigned colour.
# We filter df per country inside the loop — slightly wasteful for a few countries,
# but clear and readable; for large panels, use groupby.
for code, color in zip(focus, colors):
    sub = df[df["country"]==code].sort_values("year")           # sort to avoid line-crossing artefacts
    ax.plot(sub["year"], sub["gdp_growth"],
            label=country_labels[code], color=color, linewidth=1.8)

# Shade the two major recession episodes.
ax.axvspan(2008, 2009,   alpha=0.08, color="red",  label="GFC")
ax.axvspan(2020, 2020.5, alpha=0.08, color="gray", label="COVID")

# Zero line: visually separates growth from contraction.
ax.axhline(0, color="black", linewidth=0.6)

ax.set_title("GDP growth in selected eurozone countries, 2000-2023", fontsize=12)
ax.set_xlabel("Year"); ax.set_ylabel("Annual % change")
# framealpha=0.6 makes the legend box semi-transparent so it doesn't fully hide the data behind it.
ax.legend(loc="lower right", fontsize=9, framealpha=0.6)
fig.tight_layout(); plt.show()

# --- Scatter: Okun's law (country averages) with OLS line -------------------
# Collapse to one observation per country by averaging over the 24-year window.
# Same named-agg pattern as in L3: new_col = (source_col, aggregation).
avg = df.groupby("country").agg(
    gdp_growth  =("gdp_growth",   "mean"),
    unemployment=("unemployment", "mean"),
).reset_index()

fig, ax = plt.subplots(figsize=(8, 5))

# scatter parameters:
#   s=80     → marker area in points²; 80 is large enough to see individual points clearly
#   alpha=0.8 → slight transparency in case points overlap
#   zorder=3  → draw ABOVE the OLS line (which we'll add below); higher zorder = on top
ax.scatter(avg["unemployment"], avg["gdp_growth"],
           s=80, color="steelblue", alpha=0.8, zorder=3)

# Annotate each point with its country name.
# iterrows() yields (index, Series) — a simple way to loop over DataFrame rows.
# For large data this would be slow; for 10 rows it's perfectly fine.
# xytext=(5, 3) with textcoords="offset points" places the label 5pt right, 3pt up
# from the data point, in SCREEN coordinates (independent of data scale).
for _, row in avg.iterrows():
    ax.annotate(country_labels.get(row["country"], row["country"]),
                xy=(row["unemployment"], row["gdp_growth"]),
                xytext=(5, 3), textcoords="offset points", fontsize=8.5)

# np.polyfit(x, y, deg) fits a polynomial of the given degree to (x, y).
# deg=1 returns [slope, intercept] — simple univariate OLS without intercept handling.
# For real research we'd use statsmodels to get SEs; here we just need the line to display.
m, b = np.polyfit(avg["unemployment"], avg["gdp_growth"], 1)

# np.linspace(a, b, n) returns n equally spaced points between a and b — for a smooth line.
x_l  = np.linspace(avg["unemployment"].min(), avg["unemployment"].max(), 100)
# m*x_l + b evaluates the fitted line at each x; the slope appears in the legend label.
ax.plot(x_l, m*x_l+b, color="tomato", linewidth=1.5, linestyle="--",
        label=f"OLS slope = {m:.2f}")

ax.set_title("Average GDP growth vs unemployment, Eurozone 2000-2023", fontsize=12)
ax.set_xlabel("Avg unemployment (%)"); ax.set_ylabel("Avg GDP growth (%)")
ax.legend(fontsize=9); fig.tight_layout(); plt.show()

# --- Boxplot + correlation heatmap in a single 1×2 figure -------------------
# plt.subplots(1, 2, ...) returns (fig, array_of_axes). We unpack into `axes`
# and index with axes[0], axes[1].
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# --- Left panel: boxplot of GDP growth by country ---------------------------
# Order countries by median growth (descending) so the boxplot tells a story
# at a glance: best-performing on the left, worst on the right.
#   .groupby("country")["gdp_growth"]  → grouped series
#   .median()                          → one value per country
#   .sort_values(ascending=False)      → descending
#   .index.tolist()                    → the ordered list of country codes
order = (df.groupby("country")["gdp_growth"].median()
           .sort_values(ascending=False).index.tolist())

# seaborn's boxplot:
#   order= lets us impose the custom ordering computed above
#   palette="Blues" uses a sequential palette (all blue tones) — fine since
#   countries here have no categorical "meaning" beyond being labels
#   width=0.5 shrinks the boxes so they don't crowd each other
sns.boxplot(data=df, x="country", y="gdp_growth", order=order,
            palette="Blues", width=0.5, ax=axes[0])

# Zero reference line so positive vs. negative growth reads instantly.
axes[0].axhline(0, color="black", linewidth=0.7, linestyle="--")

# Replace ISO2 codes on the x-axis with full country names for readability.
# get_xticklabels() returns Text objects; we extract their strings via .get_text().
# rotation=30, ha="right" keeps the text readable without overlap.
axes[0].set_xticklabels(
    [country_labels.get(c.get_text(), c.get_text()) for c in axes[0].get_xticklabels()],
    rotation=30, ha="right")
axes[0].set_title("Distribution of GDP growth by country"); axes[0].set_xlabel("")

# --- Right panel: correlation matrix as a heatmap ---------------------------
# Only two variables here, so the heatmap is small — the same syntax scales to
# dozens of variables.
corr = df[["gdp_growth","unemployment"]].corr()     # 2x2 Pearson correlation matrix

# sns.heatmap options worth remembering:
#   annot=True    → write the numeric value in each cell
#   fmt=".2f"     → two decimal places
#   cmap="RdBu_r" → diverging red-blue reversed (red = positive, blue = negative);
#                   appropriate because correlations are signed and centred on 0
#   center=0      → pin the colormap's midpoint at 0
#   vmin/vmax     → fix the scale to [-1, 1] so colours are comparable across plots
#   square=True   → cells are squares (aesthetic for correlation matrices)
sns.heatmap(corr, annot=True, fmt=".2f", cmap="RdBu_r",
            center=0, vmin=-1, vmax=1, square=True, ax=axes[1])
axes[1].set_title("Correlation matrix")

fig.tight_layout(); plt.show()

# --- Ten-minute challenge: one possible solution ----------------------------
# Story: over 25 years of cumulative growth, Italy falls persistently behind
# Germany; Spain and France lie between the two. We communicate this with:
#   - direct line labels (no legend)
#   - GFC and COVID shading (shared shocks, not explanatory of the gap)
#   - an explicit end-of-sample arrow quantifying the Italy-Germany gap
#   - muted colours for France/Spain, strong colours for the two protagonists

fig, ax = plt.subplots(figsize=(10, 5.5))

# Two-tier palette: strong colours for the contrast we want the reader to see,
# muted greys for the two "supporting" countries.
palette = {"Italy": "#c0392b", "Germany": "#2166ac",
           "France": "#999999", "Spain": "#bbbbbb"}

# Plot all four lines with inline end-of-series labels (no legend).
for country in data.columns:
    ax.plot(data.index, data[country],
            linewidth=2.2 if country in ("Italy", "Germany") else 1.4,
            color=palette[country])
    ax.text(data.index[-1] + 0.3, data[country].iloc[-1], country,
            color=palette[country], fontsize=10, va="center",
            fontweight="bold" if country in ("Italy", "Germany") else "normal")

# Event shading: light grey, does not compete with the story lines above.
ax.axvspan(2008, 2009,   color="grey", alpha=0.10)
ax.axvspan(2020, 2020.5, color="grey", alpha=0.10)
ax.text(2008.5, ax.get_ylim()[1] * 0.97, "GFC",   ha="center", fontsize=9, color="grey")
ax.text(2020.2, ax.get_ylim()[1] * 0.97, "COVID", ha="center", fontsize=9, color="grey")

# The headline annotation: vertical bracket between Italy and Germany at t=2024.
# This is what makes the story readable in five seconds.
x_last    = data.index[-1]
y_italy   = data["Italy"].iloc[-1]
y_germany = data["Germany"].iloc[-1]
gap       = y_germany - y_italy

# annotate with arrowprops=... draws an arrow; here we use a "<->" style for a bracket.
ax.annotate(
    "",
    xy=(x_last - 0.7, y_germany), xytext=(x_last - 0.7, y_italy),
    arrowprops=dict(arrowstyle="<->", color="black", lw=1.2),
)
# The gap label, positioned just to the left of the bracket.
ax.text(x_last - 1.0, (y_italy + y_germany) / 2,
        f"+{gap:.0f} pts",
        ha="right", va="center", fontsize=10, fontweight="bold")

# Strong two-line title: the first line is the factual subject, the second
# (smaller, grey) is the interpretive message. This "kicker + headline"
# pattern is standard in financial-press charts (FT, Economist).
ax.set_title("Real GDP index, 2000 = 100", fontsize=13, loc="left", pad=18)
ax.text(0.0, 1.02,
        "Germany gains ~25 index points on Italy over the cycle",
        transform=ax.transAxes, fontsize=10, color="#555", style="italic")

ax.set_xlabel("")
ax.set_ylabel("Index (2000 = 100)")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xlim(data.index.min(), data.index.max() + 2.5)

plt.tight_layout()
plt.show()

# --- Systematic EDA checklist ------------------------------------------------
# Before modelling, print a fixed sequence of summaries to catch surprises early.
# Keep this block uniform across projects so YOU don't forget anything — the
# "boring" rows (shape, range, describe) are the ones that catch silent bugs.

print(f"Shape: {df.shape}")                    # (rows, columns)
# .nunique() counts distinct non-null values; .min()/.max() give the time span.
print(f"Countries: {df['country'].nunique()}  Years: {df['year'].min()}-{df['year'].max()}")
print()
# .describe() returns count/mean/std/min/25%/50%/75%/max for every numeric column.
# .round(2) shortens the output so it fits on one screen.
print(df.describe().round(2))
print()

# --- Outlier flag ------------------------------------------------------------
# Classical "more than k standard deviations from the mean" rule, with k=2.5.
# NOTE: this is a DETECTION heuristic, not a correction. Outliers here are
# usually 2009 and 2020 observations — real events, not data errors. Flagging
# them helps decide whether to control for them in the regression (e.g. with
# year FE, as we did in L3), not whether to drop them.
df["outlier"] = (
    (df["gdp_growth"] - df["gdp_growth"].mean()).abs()
    > 2.5 * df["gdp_growth"].std()
)
print(f"Outliers (>2.5 SD): {df['outlier'].sum()}")
# .to_string(index=False) prints the DataFrame without the 0..N-1 index column.
print(df[df["outlier"]][["country","year","gdp_growth"]].to_string(index=False))

# --- Publication-ready figure -----------------------------------------------
# This cell demonstrates the defaults you'd want for ANY figure going into a paper:
#   - consistent colour per series
#   - event shading with in-figure annotations (no external captions needed)
#   - zero reference line
#   - constrained x-tick locator (every 4 years; default is often cluttered)
#   - source attribution in a footnote
#   - saved to PDF at 300 dpi with tight bounding box (vector output, no white margins)
fig, ax = plt.subplots(figsize=(9, 5))

for code, color in zip(["IT","DE","GR"], ["#2166ac","#d6604d","#762a83"]):
    sub = df[df["country"]==code].sort_values("year")
    ax.plot(sub["year"], sub["gdp_growth"],
            label=country_labels[code], color=color, linewidth=2)

ax.axhline(0, color="black", linewidth=0.7)
ax.axvspan(2008, 2009,   alpha=0.08, color="red")
ax.axvspan(2020, 2020.5, alpha=0.08, color="gray")

# ax.annotate places rich text at data coordinates. xy=(x, y) is where the
# annotation "points" — here we just use it as a label anchor with no arrow.
# Colours match the shading so the association is unambiguous.
ax.annotate("GFC",   xy=(2008.5, -6),  fontsize=8.5, color="#cc0000", ha="center")
ax.annotate("COVID", xy=(2020.25,-11), fontsize=8.5, color="#555555", ha="center")

ax.set_title("GDP growth: Italy, Germany, Greece, 2000-2023", fontsize=13, pad=10)
ax.set_xlabel("Year"); ax.set_ylabel("Annual % change")
ax.legend(fontsize=10, framealpha=0.6)

# Tick every 4 years instead of matplotlib's default choice. mticker.MultipleLocator(n)
# places a tick at every integer multiple of n — the cleanest way to control spacing.
ax.xaxis.set_major_locator(mticker.MultipleLocator(4))

# Figure-level text (not axes-level): we want a source note in the bottom-left
# MARGIN, outside the data area. fig.text uses FIGURE coordinates (0..1 in both
# dimensions), hence (0.01, -0.02) = a hair to the right of the left edge and
# just below the axes.
fig.text(0.01, -0.02,
         "Source: World Bank, World Development Indicators.",
         fontsize=8, color="#555")

fig.tight_layout()
# Save parameters worth remembering:
#   bbox_inches="tight"  → crop white margins to the actual content bbox
#   dpi=300               → print-quality for PNG; irrelevant for PDF (vector) but harmless
fig.savefig("gdp_growth_figure.pdf", bbox_inches="tight", dpi=300)
plt.show()
print("Saved: gdp_growth_figure.pdf")

alpha = (1 - beta) / (1 - beta**remaining)     # with remaining = T - t + 1

# --- Parameters for the finite-horizon cake-eating problem ------------------
beta  = 0.95     # discount factor (0 < beta < 1): higher → more patient agent
T     = 10       # finite horizon: the agent lives for periods 0, 1, ..., T (T+1 dates total)
N     = 20000      # number of grid points for the state variable W
W_max = 1.0      # maximum cake size (normalise so W is in [0, 1])

# Build the state grid. We start at 1e-6 (not 0) because:
#   a) log(0) = -inf → numerical disaster in u_log below
#   b) the agent's optimal policy never hits exactly 0 cake in finite time
# np.linspace generates N equally spaced points from 1e-6 to W_max.
W_grid = np.linspace(1e-6, W_max, N)

def u_log(c):
    """Log utility with numerical safeguard: log(max(c, tiny)) prevents -inf."""
    # np.maximum is element-wise: works on both scalars and arrays transparently.
    return np.log(np.maximum(c, 1e-10))

print(f"Grid: {N} points on [0, {W_max}]")
print(f"beta={beta}, T={T}")

# --- Backward induction ------------------------------------------------------
# We solve the dynamic program BACKWARDS from the terminal period T to t=0.
# At t=T the problem is trivial (eat everything); at earlier t, we use the
# Bellman equation V_t(W) = max_c [ u(c) + beta * V_{t+1}(W - c) ].
#
# Storage arrays: rows indexed by time (0..T), columns indexed by grid point.
V   = np.zeros((T + 1, N))          # value function V_t(W_i) for each (t, i)
pol = np.zeros((T + 1, N))          # policy function c_t*(W_i): optimal consumption

# --- Terminal condition (t = T): eat everything. No tomorrow, so c* = W ----
V[T, :]   = u_log(W_grid)            # value = utility of eating all remaining cake
pol[T, :] = W_grid                   # policy = eat all

# --- Backward pass: t = T-1, T-2, ..., 0 -----------------------------------
# range(start, stop, step): start=T-1, stop=-1 (exclusive), step=-1 → counts down to 0.
for t in range(T - 1, -1, -1):
    for i, W in enumerate(W_grid):
        # Build a fine grid of candidate consumption choices c in [1e-6, W].
        # We MUST bound c above by W (can't eat more cake than is on the plate).
        # N points matches the state grid but this is not required — any fine grid works.
        c_grid   = np.linspace(1e-6, W, N)

        # Implied next-period cake for each candidate c.
        W_next   = W - c_grid

        # Evaluate V_{t+1} at the off-grid W_next values by LINEAR INTERPOLATION.
        # np.interp(x, xp, fp) returns f(x) where f is defined by the (xp, fp)
        # piecewise-linear curve through the grid points. This is the workhorse
        # technique of low-dimensional dynamic programming.
        V_next   = np.interp(W_next, W_grid, V[t+1])

        # Bellman objective evaluated at every candidate c: current utility + discounted future value.
        objective = u_log(c_grid) + beta * V_next

        # Pick the maximiser over the candidate grid.
        # NOTE: this is a "brute-force" grid search. More efficient methods (golden-section,
        # scipy.optimize) exist, but grid search is transparent and fast enough for teaching.
        idx       = np.argmax(objective)
        V[t, i]   = objective[idx]
        pol[t, i] = c_grid[idx]

print("Backward induction complete.")
print(f"Value at W=1, t=0: {V[0, -1]:.4f}")   # V[0, -1] = V_0 at the largest W on the grid

def c_star_finite(W, t, T, beta):
    """
    Closed-form optimal consumption for the finite-horizon log-utility
    cake-eating problem.

    With T-t+1 periods remaining and log utility, the optimal fraction of
    the remaining cake to eat today is:
        alpha = (1 - beta) / (1 - beta^(T - t + 1))       if beta != 1
        alpha = 1 / (T - t + 1)                            if beta == 1
    The optimal consumption is then c* = alpha * W.

    This closed form exists only because utility is log; it's our VALIDATION
    benchmark for the numerical solver.
    """
    remaining = T - t + 1
    # abs(...) > 1e-10 is the numerically-safe way to check "beta != 1 up to float noise".
    alpha = (1 - beta) / (1 - beta**remaining) if abs(beta - 1) > 1e-10 else 1/remaining
    return alpha * W

# --- Plot: value and policy functions side by side --------------------------
fig, axes = plt.subplots(1, 2, figsize=(13, 5))

# Left panel: value function at four different times, to show backward shift.
# zip pairs each t with a colour.
for t_show, color in zip([0,2,5,9],["#2166ac","#4dac26","#f4a582","#d6604d"]):
    axes[0].plot(W_grid, V[t_show], color=color, linewidth=1.8, label=f"t={t_show}")
axes[0].set_title("Value function $V_t(W)$ -- finite horizon", fontsize=11)
axes[0].set_xlabel("Cake size W"); axes[0].set_ylabel("V_t(W)")
axes[0].legend(fontsize=9)

# Right panel: policy function at t=0, numerical vs. analytical.
# The whole point: if the two lines coincide, our solver is correct.
c_anal = [c_star_finite(W, 0, T, beta) for W in W_grid]
axes[1].plot(W_grid, pol[0], color="steelblue", linewidth=2, label="Numerical")
axes[1].plot(W_grid, c_anal, color="tomato", linewidth=1.5, linestyle="--", label="Analytical")
axes[1].set_title("Policy function $c_0^*(W)$ -- finite horizon", fontsize=11)
axes[1].set_xlabel("Cake size W"); axes[1].set_ylabel("c*(W)")
axes[1].legend(fontsize=9)

fig.tight_layout(); plt.show()

# Quantitative validation: max absolute error over the grid.
# A tight max error (here ~1e-3 or better) confirms the grid is fine enough.
max_err = max(abs(pol[0,i] - c_star_finite(W_grid[i], 0, T, beta)) for i in range(N))
print(f"Max numerical error vs analytical: {max_err:.6f}")

# --- Simulate the optimal trajectory starting from W_0 = 1 ------------------
# Given the computed policy pol[t, :], we can trace out what the agent actually
# eats period by period: start with the full cake, consume c_t*(W_t), update
# the remaining cake, repeat.
W0 = 1.0
records = []        # will become a DataFrame for easy plotting
W = W0

for t in range(T + 1):
    # The current W may not be exactly on the grid, so interpolate the policy:
    # np.interp(W, W_grid, pol[t]) returns the linearly-interpolated c_t*(W).
    c_opt  = float(np.interp(W, W_grid, pol[t]))

    # The closed-form reference for the same (W, t) — used to verify.
    c_anal = c_star_finite(W, t, T, beta)

    records.append({"t":t, "W":W, "c_num":c_opt, "c_anal":c_anal})

    # Update the state. max(..., 0) guards against tiny negative values from
    # numerical round-off — the cake can't be negative.
    W = max(W - c_opt, 0)

path = pd.DataFrame(records)

# --- Plot the path: W_t on the left, c_t on the right -----------------------
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 4))

# marker="o" places a circle at each data point (discrete-time feel).
ax1.plot(path["t"], path["W"], marker="o", color="steelblue", linewidth=2)
ax1.set_title("Remaining cake $W_t$"); ax1.set_xlabel("Period t"); ax1.set_ylabel("W_t")

# Compare the simulated numerical c_t to the closed-form c_t on the same axes.
ax2.plot(path["t"], path["c_num"],  marker="o", color="steelblue", linewidth=2, label="Numerical")
ax2.plot(path["t"], path["c_anal"], color="tomato", linewidth=1.5, linestyle="--", label="Analytical")
ax2.set_title("Optimal consumption $c_t^*$")
ax2.set_xlabel("Period t"); ax2.set_ylabel("c_t*"); ax2.legend(fontsize=9)

fig.tight_layout(); plt.show()

# --- Value Function Iteration (VFI) -- infinite horizon --------------------
# With T = ∞, the problem is STATIONARY: the optimal policy depends on W only,
# not on t. We find V* as the fixed point of the Bellman operator T:
#     T V  :  (T V)(W) = max_c { u(c) + beta * V(W - c) }
# Starting from V_0 = 0, iterate V_{n+1} = T V_n until convergence in sup-norm.
# By the Contraction Mapping Theorem, ||V_{n+1} - V_n||_inf decays at rate beta.
N_inf    = 300                      # finer grid here — VFI benefits from more points
W_inf    = np.linspace(1e-6, 1.0, N_inf)
tol      = 1e-6                     # convergence tolerance in sup-norm
max_iter = 2000                     # safety cap — VFI should converge well before this

V_new   = np.zeros(N_inf)           # initial guess: V = 0 everywhere
history = []                        # record the sup-norm distance at each iteration (for convergence plot)

print("Running VFI...")
for it in range(max_iter):
    # Copy-before-update is critical for a SYNCHRONOUS update: V_{n+1} must depend
    # only on V_n, not on partially-updated values of V_{n+1} itself.
    V_old   = V_new.copy()
    pol_inf = np.zeros(N_inf)

    # Loop over every state W_i and solve the inner max.
    for i, W in enumerate(W_inf):
        c_g  = np.linspace(1e-6, W, N_inf)          # candidate consumption grid
        V_nx = np.interp(W - c_g, W_inf, V_old)     # V_n evaluated at next-period cake
        obj  = u_log(c_g) + beta * V_nx             # Bellman objective
        idx  = np.argmax(obj)
        V_new[i]   = obj[idx]
        pol_inf[i] = c_g[idx]

    # Sup-norm distance between successive iterates.
    diff = np.max(np.abs(V_new - V_old))
    history.append(diff)

    if diff < tol:
        print(f"  Converged at iteration {it+1}  (sup-norm = {diff:.2e})")
        break
    if it % 100 == 0:
        print(f"  iter {it:4d}  sup-norm = {diff:.6f}")

# Save the converged value and policy.
V_star   = V_new.copy()
pol_star = pol_inf.copy()

# --- Diagnostics: convergence rate + policy function vs. analytical ---------
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 4))

# LEFT: convergence plot on log scale (the decay should look like a straight line
# with slope ≈ log(beta) — that's the Contraction Mapping Theorem made visible).
# semilogy plots log10 of y vs linear x.
ax1.semilogy(history, color="steelblue", linewidth=1.5)
ax1.axhline(tol, color="tomato", linewidth=1, linestyle="--", label=f"tol={tol:.0e}")
ax1.set_title("VFI convergence: sup-norm distance", fontsize=11)
ax1.set_xlabel("Iteration"); ax1.set_ylabel("||V_n+1 - V_n|| (log)")
ax1.legend(fontsize=9)

# RIGHT: the numerical policy overlaid on the closed-form solution.
# Analytical result: c*(W) = (1 - beta) * W (log utility, stationary case).
c_anal_inf = (1 - beta) * W_inf
ax2.plot(W_inf, pol_star,   color="steelblue", linewidth=2, label="VFI")
ax2.plot(W_inf, c_anal_inf, color="tomato",    linewidth=1.5, linestyle="--",
         label=f"Analytical (1-{beta})*W")
ax2.set_title("Policy function -- infinite horizon", fontsize=11)
ax2.set_xlabel("Cake size W"); ax2.set_ylabel("c*(W)")
ax2.legend(fontsize=9)

fig.tight_layout(); plt.show()

# Two headline diagnostics:
#   1) policy error (should be tiny if the grid is fine enough)
#   2) ratio history[5]/history[4] estimates the empirical contraction factor;
#      CMT predicts it should be ≈ beta once we're past the transient early iterations.
print(f"Max policy error: {np.max(np.abs(pol_star - c_anal_inf)):.6f}")
print(f"Contraction rate (iter 5->6): {history[5]/history[4]:.4f}  (should be ~beta={beta})")

# --- Comparative statics: policy function for different beta ---------------
# Same VFI as above but wrapped in a loop over beta values to trace how the
# patience parameter shifts the policy. Higher beta → more patient → save more,
# consume less today for any given W.
fig, ax = plt.subplots(figsize=(9, 5))

for beta_i, color in zip([0.70, 0.85, 0.95, 0.99],
                          ["#d6604d","#f4a582","#4dac26","#2166ac"]):
    # Fresh initial guess for each beta.
    V_i = np.zeros(N_inf)

    # Fixed-iteration-count VFI with early-exit on convergence.
    # 500 iterations is plenty for all four beta values at this grid size.
    for _ in range(500):
        V_old_i = V_i.copy(); pol_i = np.zeros(N_inf)
        for i, W in enumerate(W_inf):
            c_g  = np.linspace(1e-6, W, N_inf)
            V_nx = np.interp(W - c_g, W_inf, V_old_i)
            obj  = u_log(c_g) + beta_i * V_nx       # beta_i changes across the outer loop
            idx  = np.argmax(obj)
            V_i[i] = obj[idx]; pol_i[i] = c_g[idx]

        # Converged? Break early to save time.
        if np.max(np.abs(V_i - V_old_i)) < 1e-5:
            break

    ax.plot(W_inf, pol_i, color=color, linewidth=2, label=f"beta={beta_i}")

ax.set_title("Policy function for different discount factors", fontsize=12)
ax.set_xlabel("Cake size W"); ax.set_ylabel("Optimal consumption c*(W)")
ax.legend(fontsize=10); fig.tight_layout(); plt.show()
print("Higher beta = more patient => lower c* today (save more for future).")

# Part A: Visualization

# Figure 1 — recession-shaded time series for 5 countries
# YOUR CODE HERE

# Figure 2 — scatter unemployment vs growth with OLS line and annotations
# YOUR CODE HERE

# Part B: Dynamic Programming

# Task 1: CRRA utility u(c) = c^(1-sigma)/(1-sigma)
# Run VFI for sigma in [0.5, 1.0, 2.0, 5.0] and plot policy functions
# What happens to optimal savings as sigma increases?
# YOUR CODE HERE

# Task 2 (optional): add gross return R on savings so W_next = R*(W-c)
# How does the policy function change for R=1.05?
# YOUR CODE HERE

# SOLUTION ---------------------------------------------------------------
# Self-contained re-run: re-import and reload data so the cell works in a fresh kernel.
import numpy as np, pandas as pd, matplotlib.pyplot as plt, warnings
warnings.filterwarnings("ignore")

df = pd.read_csv("eurozone_panel.csv")
df["year"] = df["year"].astype(int)
country_labels = {"IT":"Italy","DE":"Germany","FR":"France","ES":"Spain",
                  "NL":"Netherlands","PT":"Portugal","GR":"Greece",
                  "BE":"Belgium","AT":"Austria","FI":"Finland"}

# ============================================================================
# PART A — VISUALIZATION
# ============================================================================

# --- Figure 1: 5-country time series with RECESSION years shaded ------------
# A "recession year" here is operationalised as a year in which the PANEL-WIDE
# mean GDP growth was negative. This flags 2009 and 2020 in our sample.
five   = ["IT","DE","FR","ES","GR"]
cols   = ["#2166ac","#d6604d","#4dac26","#f4a582","#762a83"]

# groupby("year")["gdp_growth"].mean() = cross-country mean per year (a Series indexed by year).
# Then filter to negative values and take the index as a list of recession years.
rec_yrs = df.groupby("year")["gdp_growth"].mean()
rec_yrs = rec_yrs[rec_yrs < 0].index.tolist()

fig, ax = plt.subplots(figsize=(11,5))
for code, color in zip(five, cols):
    sub = df[df["country"]==code].sort_values("year")
    ax.plot(sub["year"], sub["gdp_growth"], label=country_labels[code],
            color=color, linewidth=2)

# Shade each flagged recession year with a ±0.5 year band for visual weight.
for yr in rec_yrs:
    ax.axvspan(yr-0.5, yr+0.5, alpha=0.07, color="red")

ax.axhline(0, color="black", linewidth=0.6)
ax.set_title("GDP growth in selected eurozone countries, 2000-2023", fontsize=12)
ax.set_xlabel("Year"); ax.set_ylabel("Annual % change")
ax.legend(fontsize=9, loc="lower right")
# Figure-level source note (figure coordinates, outside the axes).
fig.text(0.01,-0.03,"Source: World Bank.", fontsize=8, color="#555")
fig.tight_layout()
fig.savefig("sol_fig1_temp.png", dpi=5, bbox_inches="tight"); plt.show()

print(rec_yrs)

# --- Figure 2: unemployment vs growth scatter, 2020 highlighted -------------
# Split the sample: 2020 (COVID shock — structurally different) vs everything else.
# .dropna(subset=[...]) drops rows where EITHER of the listed columns is missing.
non_covid = df[df["year"]!=2020].dropna(subset=["unemployment","gdp_growth"])
covid     = df[df["year"]==2020].dropna(subset=["unemployment","gdp_growth"])

# Fit the OLS line on the non-COVID sample so the 2020 points don't distort it.
m, b = np.polyfit(non_covid["unemployment"], non_covid["gdp_growth"], 1)
x_l  = np.linspace(df["unemployment"].min(), df["unemployment"].max(), 100)

fig, ax = plt.subplots(figsize=(9,5))
# Base sample: small semi-transparent points.
ax.scatter(non_covid["unemployment"], non_covid["gdp_growth"],
           s=18, alpha=0.4, color="steelblue", label="2000-2023 (excl.2020)")
# COVID highlight: bigger, different shape (diamond), different colour, on top (zorder=5).
ax.scatter(covid["unemployment"], covid["gdp_growth"],
           s=60, color="tomato", marker="D", zorder=5, label="2020 (COVID)")
# OLS line (grey dashed).
ax.plot(x_l, m*x_l+b, color="gray", linewidth=1.5, linestyle="--",
        label=f"OLS slope={m:.2f}")

# Annotate the three most extreme growth observations.
# pd.concat([nlargest(2,col), nsmallest(1,col)]): the 2 largest and 1 smallest rows
# by gdp_growth stacked into a single DataFrame. (Since pandas 2.0, .append() and its
# private successor ._append() are both removed — concat is the only supported path.)
extremes = pd.concat([df.nlargest(2, "gdp_growth"), df.nsmallest(1, "gdp_growth")])
for _, row in extremes.iterrows():
    ax.annotate(f"{country_labels.get(row['country'],row['country'])} {int(row['year'])}",
                xy=(row["unemployment"], row["gdp_growth"]),
                xytext=(6,3), textcoords="offset points", fontsize=8)
ax.axhline(0, color="black", linewidth=0.5)
ax.set_title("GDP growth vs unemployment"); ax.set_xlabel("Unemployment (%)"); ax.set_ylabel("GDP growth (%)")
ax.legend(fontsize=9); fig.tight_layout()
fig.savefig("sol_fig2.png", dpi=300, bbox_inches="tight"); plt.show()

# SOLUTION ---------------------------------------------------------------
# Self-contained re-run: re-import and reload data so the cell works in a fresh kernel.
import numpy as np, pandas as pd, matplotlib.pyplot as plt, warnings
warnings.filterwarnings("ignore")

df = pd.read_csv("eurozone_panel.csv")
df["year"] = df["year"].astype(int)
country_labels = {"IT":"Italy","DE":"Germany","FR":"France","ES":"Spain",
                  "NL":"Netherlands","PT":"Portugal","GR":"Greece",
                  "BE":"Belgium","AT":"Austria","FI":"Finland"}

# ============================================================================
# PART A — VISUALIZATION
# ============================================================================

# --- Figure 1: 5-country time series with RECESSION years shaded ------------
# A "recession year" here is operationalised as a year in which the PANEL-WIDE
# mean GDP growth was negative. This flags 2009 and 2020 in our sample.
five   = ["IT","DE","FR","ES","GR"]
cols   = ["#2166ac","#d6604d","#4dac26","#f4a582","#762a83"]

# groupby("year")["gdp_growth"].mean() = cross-country mean per year (a Series indexed by year).
# Then filter to negative values and take the index as a list of recession years.
rec_yrs = df.groupby("year")["gdp_growth"].mean()
rec_yrs = rec_yrs[rec_yrs < 0].index.tolist()

fig, ax = plt.subplots(figsize=(11,5))
for code, color in zip(five, cols):
    sub = df[df["country"]==code].sort_values("year")
    ax.plot(sub["year"], sub["gdp_growth"], label=country_labels[code],
            color=color, linewidth=2)

# Shade each flagged recession year with a ±0.5 year band for visual weight.
for yr in rec_yrs:
    ax.axvspan(yr-0.5, yr+0.5, alpha=0.07, color="red")

ax.axhline(0, color="black", linewidth=0.6)
ax.set_title("GDP growth in selected eurozone countries, 2000-2023", fontsize=12)
ax.set_xlabel("Year"); ax.set_ylabel("Annual % change")
ax.legend(fontsize=9, loc="lower right")
# Figure-level source note (figure coordinates, outside the axes).
fig.text(0.01,-0.03,"Source: World Bank.", fontsize=8, color="#555")
fig.tight_layout()
fig.savefig("sol_fig1.png", dpi=300, bbox_inches="tight"); plt.show()

# --- Figure 2: unemployment vs growth scatter, 2020 highlighted -------------
# Split the sample: 2020 (COVID shock — structurally different) vs everything else.
# .dropna(subset=[...]) drops rows where EITHER of the listed columns is missing.
non_covid = df[df["year"]!=2020].dropna(subset=["unemployment","gdp_growth"])
covid     = df[df["year"]==2020].dropna(subset=["unemployment","gdp_growth"])

# Fit the OLS line on the non-COVID sample so the 2020 points don't distort it.
m, b = np.polyfit(non_covid["unemployment"], non_covid["gdp_growth"], 1)
x_l  = np.linspace(df["unemployment"].min(), df["unemployment"].max(), 100)

fig, ax = plt.subplots(figsize=(9,5))
# Base sample: small semi-transparent points.
ax.scatter(non_covid["unemployment"], non_covid["gdp_growth"],
           s=18, alpha=0.4, color="steelblue", label="2000-2023 (excl.2020)")
# COVID highlight: bigger, different shape (diamond), different colour, on top (zorder=5).
ax.scatter(covid["unemployment"], covid["gdp_growth"],
           s=60, color="tomato", marker="D", zorder=5, label="2020 (COVID)")
# OLS line (grey dashed).
ax.plot(x_l, m*x_l+b, color="gray", linewidth=1.5, linestyle="--",
        label=f"OLS slope={m:.2f}")

# Annotate the three most extreme growth observations.
# pd.concat([nlargest(2,col), nsmallest(1,col)]): the 2 largest and 1 smallest rows
# by gdp_growth stacked into a single DataFrame. (Since pandas 2.0, .append() and its
# private successor ._append() are both removed — concat is the only supported path.)
extremes = pd.concat([df.nlargest(2, "gdp_growth"), df.nsmallest(1, "gdp_growth")])
for _, row in extremes.iterrows():
    ax.annotate(f"{country_labels.get(row['country'],row['country'])} {int(row['year'])}",
                xy=(row["unemployment"], row["gdp_growth"]),
                xytext=(6,3), textcoords="offset points", fontsize=8)
ax.axhline(0, color="black", linewidth=0.5)
ax.set_title("GDP growth vs unemployment"); ax.set_xlabel("Unemployment (%)"); ax.set_ylabel("GDP growth (%)")
ax.legend(fontsize=9); fig.tight_layout()
fig.savefig("sol_fig2.png", dpi=300, bbox_inches="tight"); plt.show()

# ============================================================================
# PART B — DYNAMIC PROGRAMMING
# ============================================================================

# --- Task 1: VFI with CRRA utility, varying sigma ---------------------------
# CRRA utility: u(c) = c^(1-sigma) / (1-sigma) for sigma != 1, and u(c) = log(c) for sigma = 1.
# sigma is the coefficient of relative risk aversion (also the inverse of the elasticity of
# intertemporal substitution in this setting).
beta_dp = 0.95; N_dp = 200
W_dp = np.linspace(1e-6, 1.0, N_dp)

def u_crra(c, sigma):
    """CRRA utility with log as the sigma=1 limit."""
    c = np.maximum(c, 1e-10)                   # numerical safeguard for c > 0
    # abs(sigma - 1) < 1e-10: robust way to check sigma == 1 in floating point.
    return np.log(c) if abs(sigma-1)<1e-10 else c**(1-sigma)/(1-sigma)

fig, ax = plt.subplots(figsize=(9,5))
for sigma, color in zip([0.5,1.0,2.0,5.0],["#2166ac","#4dac26","#f4a582","#d6604d"]):
    V = np.zeros(N_dp)
    # Standard VFI loop, now with CRRA instead of log utility.
    for _ in range(600):
        V_old = V.copy(); pol_s = np.zeros(N_dp)
        for i, W in enumerate(W_dp):
            c_g  = np.linspace(1e-6, W, N_dp)
            V_nx = np.interp(W - c_g, W_dp, V_old)
            obj  = u_crra(c_g, sigma) + beta_dp * V_nx
            idx  = np.argmax(obj)
            V[i] = obj[idx]; pol_s[i] = c_g[idx]
        if np.max(np.abs(V-V_old)) < 1e-5:
            break
    ax.plot(W_dp, pol_s, color=color, linewidth=2, label=f"sigma={sigma}")
ax.set_title("Policy function for different sigma (CRRA)", fontsize=12)
ax.set_xlabel("Cake size W"); ax.set_ylabel("c*(W)")
ax.legend(fontsize=10); fig.tight_layout(); plt.show()
print("Higher sigma => more risk aversion / preference for smooth consumption => save more.")

# --- Task 2: VFI with gross return R on savings -----------------------------
# Law of motion changes: W_{t+1} = R * (W_t - c_t) instead of W_{t+1} = W_t - c_t.
# We clip the next-period cake at W_dp[-1] because the grid has a finite upper bound
# and np.interp extrapolates linearly outside the grid — clipping keeps it well-defined.
fig, ax = plt.subplots(figsize=(9,5))
for R, color, lbl in [(1.0,"steelblue","R=1.00"),(1.05,"tomato","R=1.05")]:
    V = np.zeros(N_dp)
    for _ in range(800):
        V_old = V.copy(); pol_r = np.zeros(N_dp)
        for i, W in enumerate(W_dp):
            c_g  = np.linspace(1e-6, W, N_dp)
            # Key line: next-period cake is R * saved_amount, clipped to the grid max.
            W_nx = np.minimum(R*(W-c_g), W_dp[-1])
            V_nx = np.interp(W_nx, W_dp, V_old)
            obj  = np.log(np.maximum(c_g,1e-10)) + beta_dp*V_nx
            idx  = np.argmax(obj); V[i]=obj[idx]; pol_r[i]=c_g[idx]
        if np.max(np.abs(V-V_old))<1e-5:
            break
    ax.plot(W_dp, pol_r, color=color, linewidth=2, label=lbl)
ax.set_title("Policy function with gross return R", fontsize=12)
ax.set_xlabel("W"); ax.set_ylabel("c*(W)"); ax.legend(fontsize=9)
fig.tight_layout(); plt.show()
print("With R>1 the agent saves more: returns make patience more valuable.")

Topic	Key tool	Key concept
Multi-panel figures	`plt.subplots(nrows, ncols)`	`fig, ax` object-oriented interface
Statistical plots	`sns.boxplot`, `sns.heatmap`	Grouping, confidence intervals
Save figure	`fig.savefig("f.pdf", dpi=300)`	`bbox_inches="tight"`
Finite-horizon DP	Backward induction, `np.interp`	Terminal condition + Bellman recursion
Infinite-horizon DP	Value function iteration	Contraction mapping, convergence rate ≈ β
Verify numerics	Analytical solution comparison	Always validate against known cases

Lecture 4 — Data Visualization, EDA & Dynamic Programming¶

Python for Economists · University of Bologna · 2025/2026¶

What we cover today¶

Warm-up — the chart makeover¶

Step 0 — the ugly default¶

Step 1 — size, labels, title¶

Step 2 — remove the box¶

Step 3 — legend outside, no frame¶

Step 4 — highlight a key event¶

Step 5 — final polish (line labels inline)¶

1. The anatomy of a matplotlib figure¶

⏱ Ten-minute challenge — draw a chart¶

One possible solution¶

2. Systematic EDA¶

3. Dynamic Programming: the Cake Eating Problem¶

Setup¶

Why this problem?¶

Where the Bellman equation comes from¶

Backward induction by hand, with log utility¶

3a. Finite Horizon: Backward Induction¶

3b. Infinite Horizon: Value Function Iteration¶

The Bellman operator as a fixed point¶

Value function iteration by hand — a 4×4 worked example¶

4. Exercise¶

Part A — Visualization (10 min)¶

Part B — Dynamic Programming (10 min)¶

Solution¶

Summary¶

Next lecture¶