# Creating lists
countries = ["Italy", "Germany", "France", "Spain", "Netherlands"]
gdp_growth = [0.7, -0.3, 0.9, 2.5, 1.1]     # 2023 estimates, %
years = list(range(2018, 2024))             # [2018, 2019, ..., 2023]

print(countries)
print(gdp_growth)
print(years)

# Indexing — Python uses zero-based indexing
print(countries[0])    # first element → "Italy"
print(countries[-1])   # last element  → "Netherlands"
print(countries[4])
print(countries[1:3])  # slice: index 1 up to (not including) 3 → ["Germany", "France"]

# Note on slice notation [start:stop]: Python slices are *half-open* intervals —
# the start index is included, the stop index is excluded.
# Reason: len(seq[i:j]) == j - i, which keeps arithmetic clean.
# This convention is consistent with range(start, stop): range(1, 3) → 1, 2 (not 3).
# You already saw this in L1 with range() — same logic applies everywhere in Python.

# Modifying lists
gdp_growth[0] = 0.73           # update a value
countries.append("Belgium")    # add to the end
gdp_growth.append(1.4)

print(countries)
print(gdp_growth)

# Useful list methods
rates = [1.2, 0.3, -0.5, 1.8, 0.7, -1.1, 2.3]

print(len(rates))          # number of elements
print(sum(rates))          # sum
print(min(rates))          # minimum
print(max(rates))          # maximum
print(sorted(rates))       # returns a *new* sorted list (does not modify the original)
print(rates)               # original unchanged

rates.sort()               # sorts *in place* — modifies the original, returns None
print(rates)

# Key distinction:
#   sorted(x)  → built-in function, non-destructive, works on any iterable
#   x.sort()   → list method, in-place, returns None
# Common mistake: rates = rates.sort() — this sets rates to None!

# zip() pairs two (or more) iterables element by element,
# producing tuples: (countries[0], gdp_growth[0]), (countries[1], gdp_growth[1]), ...
# If the lists have different lengths, zip() stops at the *shortest* one (silent truncation).
# You already saw tuple unpacking in L1 (e.g. avg, low, high = describe_series(...));
# here the same unpacking happens directly in the for-clause.

countries = ["Italy", "Germany", "France", "Spain"]
gdp_growth = [0.73, -0.30, 0.90, 2.50]

for country, rate in zip(countries, gdp_growth):
    sign = "+" if rate >= 0 else ""
    print(f"{country:<12} {sign}{rate:.2f}%")

# Check membership with `in`
print("Italy" in countries)      # True
print("Poland" in countries)     # False

# Find the index of an element
print(countries.index("France")) # 2

# Count occurrences
ratings = ["A", "B", "A", "C", "A", "B"]
print(ratings.count("A"))        # 3

# Creating tuples — parentheses (optional but conventional)
italy = ("Italy", "EUR", 1909.0, True)   # (name, currency, gdp, is_eurozone)

print(italy[0])   # indexing works the same as lists
print(italy[-1])

# Unpacking — assign each element to a variable
name, currency, gdp, eurozone = italy
print(f"{name}: GDP = {gdp:.0f} bn EUR, currency = {currency}")

# Tuples are immutable — this will raise a TypeError.
# We catch it with try/except to show the error message without stopping the notebook.
# (You already saw try/except in L1 in the input-validation function;
# we will cover exception handling more systematically in a later lecture.)
try:
    italy[2] = 2010.0
except TypeError as e:
    print(f"TypeError: {e}")

# A list of tuples — a natural way to store tabular data before pandas
country_data = [
    ("Italy",       "EUR", 1909.0,  0.73),
    ("Germany",     "EUR", 4072.0, -0.30),
    ("France",      "EUR", 2794.0,  0.90),
    ("Switzerland", "CHF",  769.0,  1.20),
]

print(f"{'Country':<14} {'Currency':<10} {'GDP (bn)':>10} {'Growth':>8}")
print("-" * 46)
for name, curr, gdp, growth in country_data:
    print(f"{name:<14} {curr:<10} {gdp:>10.1f} {growth:>+8.2f}%")

# Creating a dictionary
italy = {
    "name":       "Italy",
    "currency":   "EUR",
    "gdp":        1909.0,
    "gdp_growth": 0.73,
    "eurozone":   True,
}

# Access a value by key
print(italy["gdp"])
print(italy["name"])

# Note: since Python 3.7 dicts maintain *insertion order* —
# iterating over a dict yields keys in the order they were added.
# This was not guaranteed in earlier Python versions.

# Safer access with .get() — returns None (or a default) if key is missing
print(italy.get("inflation"))           # None — key does not exist
print(italy.get("inflation", "n/a"))    # "n/a" — explicit default

# Add or update entries
italy["inflation"] = 5.8
italy["gdp"] = 2010.0     # update existing key

print(italy)

# Iterating over a dictionary
for key, value in italy.items():
    print(f"  {key:<14}: {value}")

print()
print("Keys:  ", list(italy.keys()))
print("Values:", list(italy.values()))

# A dict of dicts — a dataset indexed by country code
dataset = {
    "ITA": {"name": "Italy",       "gdp": 2010.0, "growth":  0.73, "inflation": 5.8},
    "DEU": {"name": "Germany",     "gdp": 4082.0, "growth": -0.30, "inflation": 6.1},
    "FRA": {"name": "France",      "gdp": 2794.0, "growth":  0.90, "inflation": 5.7},
    "ESP": {"name": "Spain",       "gdp": 1419.0, "growth":  2.50, "inflation": 3.5},
    "NLD": {"name": "Netherlands", "gdp":  1118.0, "growth":  0.10, "inflation": 3.8},
}

# Access nested values
print(dataset["ITA"]["inflation"])

# Iterate and compute
print(f"\n{'Code':<6} {'Country':<14} {'GDP':>8} {'Growth':>8} {'Inflation':>10}")
print("-" * 52)
for code, d in dataset.items():
    print(f"{code:<6} {d['name']:<14} {d['gdp']:>8.1f} {d['growth']:>+8.2f}% {d['inflation']:>9.1f}%")

# A time series as a dict — year → value
italy_gdp = {
    2019: 1789.0,
    2020: 1654.0,   # COVID shock
    2021: 1779.0,
    2022: 1909.0,
    2023: 2010.0,
}

# Compute year-on-year growth rates from a dict
years = sorted(italy_gdp.keys())
for i in range(1, len(years)):
    y0, y1 = years[i-1], years[i]
    g = (italy_gdp[y1] - italy_gdp[y0]) / italy_gdp[y0] * 100
    print(f"{y0}→{y1}: {g:+.2f}%")

# Creating sets
eurozone = {"Germany", "France", "Italy", "Spain", "Netherlands",
            "Belgium", "Austria", "Portugal", "Finland", "Ireland"}

g7 = {"Germany", "France", "Italy", "United States",
      "United Kingdom", "Japan", "Canada"}

# Set operations
print("Eurozone ∩ G7 (both):",     eurozone & g7)       # intersection
print("Eurozone ∪ G7 (either):",   eurozone | g7)       # union
print("Eurozone only (not G7):",   eurozone - g7)       # difference
print("G7 only (not eurozone):",   g7 - eurozone)

# Deduplication — convert a list with duplicates to a set
raw_countries = ["Italy", "Germany", "Italy", "France", "Germany", "Italy"]
unique_countries = set(raw_countries)
print(f"Raw list: {len(raw_countries)} entries → unique: {len(unique_countries)}")
print(unique_countries)

# Membership test is O(1) for sets — much faster than lists for large data.
# O(1) ("constant time") means the lookup time does not grow with the size of the set:
# checking 'x in my_set' takes the same time whether the set has 10 or 10 million elements.
# This is possible because sets are implemented as hash tables.
# Lists, by contrast, are O(n): in the worst case Python scans every element one by one.
# For the sizes we encounter in this course the difference rarely matters,
# but it becomes important when working with large datasets.
print("Italy" in unique_countries)

[expression for item in iterable if condition]

# Without comprehension
growth_rates = [1.2, 0.3, -0.5, 1.8, 0.7, -1.1, 2.3]

positive = []
for r in growth_rates:
    if r > 0:
        positive.append(r)
print("Loop:", positive)

# With list comprehension — same result, one line
positive = [r for r in growth_rates if r > 0]
print("Comprehension:", positive)

# Transform and filter in one step
# Convert annual rates to growth factors (1 + r)
factors = [1 + r/100 for r in growth_rates]
print("Growth factors:", [round(f, 4) for f in factors])

# Classify each rate
labels = ["expansion" if r > 0 else "contraction" for r in growth_rates]
print("Labels:", labels)

# Dict comprehension — build a mapping from two lists
countries = ["Italy", "Germany", "France", "Spain"]
growth    = [0.73,    -0.30,      0.90,     2.50]

growth_map = {c: g for c, g in zip(countries, growth)}
print(growth_map)

# Filter: only countries with positive growth
positive_map = {c: g for c, g in growth_map.items() if g > 0}
print(positive_map)

# Applying a function in a comprehension
# Which countries in our dataset have above-average growth?
avg = sum(growth) / len(growth)
above_avg = [c for c, g in zip(countries, growth) if g > avg]
print(f"Average growth: {avg:.2f}%")
print(f"Above average: {above_avg}")

# A minimal class
class Country:
    """
    Represents a country with basic macroeconomic attributes.

    Parameters
    ----------
    name : str
    gdp_series : list of float
        Annual GDP values in chronological order.
    start_year : int
        Year of the first observation in gdp_series.
    """

    def __init__(self, name, gdp_series, start_year):
        self.name       = name
        self.gdp_series = gdp_series
        self.start_year = start_year

    def latest_gdp(self):
        """Return the most recent GDP value."""
        return self.gdp_series[-1]

    def average_gdp(self):
        """Return mean GDP over the available period."""
        return sum(self.gdp_series) / len(self.gdp_series)

    def growth_rates(self):
        """Return year-on-year growth rates as a list."""
        rates = []
        for i in range(1, len(self.gdp_series)):
            g = (self.gdp_series[i] - self.gdp_series[i-1]) / self.gdp_series[i-1] * 100
            rates.append(round(g, 2))
        return rates

    def average_growth(self):
        """Return average annual growth rate."""
        rates = self.growth_rates()
        return sum(rates) / len(rates)

    def __repr__(self):
        """String representation — shown when you print the object."""
        return (f"Country(name='{self.name}', "
                f"latest_gdp={self.latest_gdp():.1f}, "
                f"avg_growth={self.average_growth():.2f}%)")

# Instantiate objects
italy = Country(
    name="Italy",
    gdp_series=[1789.0, 1654.0, 1779.0, 1909.0, 2010.0],
    start_year=2019
)

germany = Country(
    name="Germany",
    gdp_series=[3449.0, 3367.0, 3601.0, 3870.0, 4082.0],
    start_year=2019
)

# Access attributes and call methods
print(italy)
print(germany)
print()
print(f"Italy growth rates: {italy.growth_rates()}")
print(f"Germany avg growth: {germany.average_growth():.2f}%")

# A list of Country objects — now we can apply the same operations to all
countries = [italy, germany]

france = Country(
    name="France",
    gdp_series=[2715.0, 2630.0, 2788.0, 2782.0, 2794.0],
    start_year=2019
)
countries.append(france)

print(f"{'Country':<12} {'Avg growth':>12} {'Latest GDP':>12}")
print("-" * 38)
for c in countries:
    print(f"{c.name:<12} {c.average_growth():>+12.2f}% {c.latest_gdp():>11.1f}")

# Sorting a list of objects by an attribute.

# sorted() accepts a `key` argument: a function that is applied to each element
# to produce the value to sort by.
# We use a *lambda* — a function that is created and used on the spot,
# without ever being assigned to a name.
#
# Syntax:  lambda argument : expression
#
# These two lines produce exactly the same result:
#
#   Using lambda (anonymous, used once and discarded):

countries_sorted = sorted(countries, key=lambda c: c.average_growth(), reverse=True)

#   Using def (named, could be reused elsewhere):

def sort_key(c):
    return c.average_growth()
countries_sorted_with_def = sorted(countries, key=sort_key, reverse=True)

# Use lambda for short, one-off functions; use def when the logic is
# more complex or you need to reuse it.

print("Ranked by average growth (descending):")
for i, c in enumerate(countries_sorted, start=1):           # start=1: index from 1 instead of 0
    print(f"  {i}. {c.name}: {c.average_growth():.2f}%")

print("Ranked by average growth (descending) using def:")
for i, c in enumerate(countries_sorted_with_def, start=1):   # start=1: index from 1 instead of 0
    print(f"  {i}. {c.name}: {c.average_growth():.2f}%")

raw_data = {
    "ITA": {"name": "Italy",       "gdp": [1789, 1654, 1779, 1909, 2010], "inflation_2023": 5.8},
    "DEU": {"name": "Germany",     "gdp": [3449, 3367, 3601, 3870, 4082], "inflation_2023": 6.1},
    "FRA": {"name": "France",      "gdp": [2715, 2630, 2788, 2782, 2794], "inflation_2023": 5.7},
    "ESP": {"name": "Spain",       "gdp": [1245, 1122, 1207, 1328, 1419], "inflation_2023": 3.5},
    "NLD": {"name": "Netherlands", "gdp": [ 894,  857,  924, 1010, 1118], "inflation_2023": 3.8},
}

raw_data = {
    "ITA": {"name": "Italy",       "gdp": [1789, 1654, 1779, 1909, 2010], "inflation_2023": 5.8},
    "DEU": {"name": "Germany",     "gdp": [3449, 3367, 3601, 3870, 4082], "inflation_2023": 6.1},
    "FRA": {"name": "France",      "gdp": [2715, 2630, 2788, 2782, 2794], "inflation_2023": 5.7},
    "ESP": {"name": "Spain",       "gdp": [1245, 1122, 1207, 1328, 1419], "inflation_2023": 3.5},
    "NLD": {"name": "Netherlands", "gdp": [ 894,  857,  924, 1010, 1118], "inflation_2023": 3.8},
}

# Task 1 — dict comprehension: avg_growth
# YOUR CODE HERE

# Task 2 — list comprehension: positive growth countries
# YOUR CODE HERE

# Task 3 — extend the Country class
class Country:
    # YOUR CODE HERE
    pass

# Task 4 — instantiate, sort, print
# YOUR CODE HERE

# Task 5 (optional) — set operations
g7_names = {"Germany", "France", "Italy", "United States", "United Kingdom", "Japan", "Canada"}
# YOUR CODE HERE

# ── SOLUTION ──────────────────────────────────────────────────────────────────

raw_data = {
    "ITA": {"name": "Italy",       "gdp": [1789, 1654, 1779, 1909, 2010], "inflation_2023": 5.8},
    "DEU": {"name": "Germany",     "gdp": [3449, 3367, 3601, 3870, 4082], "inflation_2023": 6.1},
    "FRA": {"name": "France",      "gdp": [2715, 2630, 2788, 2782, 2794], "inflation_2023": 5.7},
    "ESP": {"name": "Spain",       "gdp": [1245, 1122, 1207, 1328, 1419], "inflation_2023": 3.5},
    "NLD": {"name": "Netherlands", "gdp": [ 894,  857,  924, 1010, 1118], "inflation_2023": 3.8},
}

# ── Task 1: dict comprehension ─────────────────────────────────────────────
def _avg_growth(gdp_list):
    rates = [(gdp_list[i] - gdp_list[i-1]) / gdp_list[i-1] * 100
             for i in range(1, len(gdp_list))]
    return round(sum(rates) / len(rates), 2)

avg_growth = {code: _avg_growth(d["gdp"]) for code, d in raw_data.items()}
print("Task 1 — average growth:", avg_growth)


# ── Task 2: list comprehension ─────────────────────────────────────────────
positive_codes = [code for code, g in avg_growth.items() if g > 0]
print("Task 2 — positive growth:", positive_codes)


# ── Task 3: extended Country class ────────────────────────────────────────
class Country:
    def __init__(self, name, gdp_series, start_year, inflation):
        self.name       = name
        self.gdp_series = gdp_series
        self.start_year = start_year
        self.inflation  = inflation

    def latest_gdp(self):
        return self.gdp_series[-1]

    def growth_rates(self):
        return [round((self.gdp_series[i] - self.gdp_series[i-1]) /
                       self.gdp_series[i-1] * 100, 2)
                for i in range(1, len(self.gdp_series))]

    def average_growth(self):
        rates = self.growth_rates()
        return sum(rates) / len(rates)

    def real_growth(self):
        """Approximate real growth: latest nominal growth minus inflation.

        This uses the linear approximation g_real ≈ g_nom − π, which holds
        when both rates are small (a few percentage points).
        The exact Fisher identity is (1 + g_nom) = (1 + g_real)(1 + π),
        giving g_real = (g_nom − π) / (1 + π). At low inflation the
        denominator is close to 1 and the approximation is accurate;
        at high inflation it materially understates the real rate.
        """
        return self.growth_rates()[-1] - self.inflation

    def __repr__(self):
        return (f"Country('{self.name}', "
                f"latest_gdp={self.latest_gdp():.0f}, "
                f"avg_growth={self.average_growth():.2f}%, "
                f"inflation={self.inflation:.1f}%)")

# ── Task 4: instantiate, sort, print ──────────────────────────────────────
countries = [
    Country(
        name=d["name"],
        gdp_series=d["gdp"],
        start_year=2019,
        inflation=d["inflation_2023"]
    )
    for d in raw_data.values()
]

countries_sorted = sorted(countries, key=lambda c: c.real_growth(), reverse=True)

print()
print(f"{'Country':<14} {'Nom. growth':>12} {'Inflation':>10} {'Real growth':>12}")
print("-" * 52)
for c in countries_sorted:
    nom = c.growth_rates()[-1]
    print(f"{c.name:<14} {nom:>+12.2f}% {c.inflation:>9.1f}% {c.real_growth():>+11.2f}%")

# ── Task 5: set operations ─────────────────────────────────────────────────
g7_names = {"Germany", "France", "Italy", "United States",
            "United Kingdom", "Japan", "Canada"}
dataset_names = {d["name"] for d in raw_data.values()}
both = dataset_names & g7_names
print()
print(f"\nTask 5 — in both dataset and G7: {both}")

Concept	Meaning	Analogy
Class	Blueprint / template	The concept of "country"
Instance	A specific object created from a class	Italy, Germany
Attribute	Data stored in an object	`.name`, `.gdp`
Method	Function defined inside a class	`.growth_rate()`
`__init__`	Special method called when creating an instance	Constructor
`__repr__`	Special method controlling how the object is displayed	What `print(italy)` shows

If you need...	Use
An ordered, mutable sequence	`list`
An ordered, fixed sequence (return values, coordinates)	`tuple`
A key-value mapping (flexible, named fields)	`dict`
Fast membership testing, deduplication	`set`
Data + behaviour bundled together	`class`
Tabular data with rows and columns	`pandas.DataFrame` (next week)

Structure	Mutable	Ordered	Duplicates	Typical use
`list`	Yes	Yes	Yes	Sequences, time series
`tuple`	No	Yes	Yes	Fixed records, return values
`dict`	Yes	Yes*	Keys: no	Named fields, JSON-like data
`set`	Yes	No	No	Deduplication, membership
`class`	Yes	—	—	Data + behaviour together

Lecture 2 — Data Structures & Object-Oriented Programming¶

Python for Economists · University of Bologna · 2025/2026¶

What we cover today¶

1. Lists¶

1.1 Membership and search¶

2. Tuples¶

3. Dictionaries¶

4. Sets¶

5. List and dict comprehensions¶

6. Introduction to Object-Oriented Programming¶

Why does this matter for economists?¶

Core concepts¶

6.1 A note on `self`¶

7. Choosing the right data structure¶

8. Exercise¶

Tasks¶

Solution¶

Summary¶

Next lecture¶

Lecture 2 — Data Structures & Object-Oriented Programming¶

Python for Economists · University of Bologna · 2025/2026¶

What we cover today¶

1. Lists¶

1.1 Membership and search¶

2. Tuples¶

3. Dictionaries¶

4. Sets¶

5. List and dict comprehensions¶

6. Introduction to Object-Oriented Programming¶

Why does this matter for economists?¶

Core concepts¶

6.1 A note on self¶

7. Choosing the right data structure¶

8. Exercise¶

Tasks¶

Solution¶

Summary¶

Next lecture¶

6.1 A note on `self`¶