In [None]:
import plotly.express as px
import polars as pl
from enum import IntEnum

In [None]:
import plotly.offline
plotly.offline.init_notebook_mode()

In [None]:
# https://www.data.gouv.fr/fr/datasets/r/8ef4c2a3-91a0-4d98-ae3a-989bde87b62a
class Gravity(IntEnum):
    UNINJURED=1
    KILLED = 2
    HOSPITALIZED = 3
    LIGHTLY_INJURED = 4

In [None]:
usagers_df = pl.read_parquet(
    "data/usagers_2023.parquet"
)
usagers_df

In [None]:
caracs_df = pl.read_parquet("data/caracs_2023.parquet")
caracs_df

In [None]:
recensement_df = pl.read_csv("data/recensement_2022.csv", separator=";")
recensement_df

In [None]:
df = usagers_df.select(
    acc_id="Num_Acc",
    gravity="grav",
).filter(
    pl.col("gravity") == Gravity.KILLED.value
).join(
    caracs_df.select(
        acc_id="Num_Acc", 
        department="dep"
    ),
    on="acc_id"
).select(
    "department"
).group_by(
    "department"
).agg(
    accident_count=pl.len()
).join(
    recensement_df.select(
        department="DEP",
        population="PTOT"
    ),
    on="department"
).with_columns(
    accident_rate=pl.col("accident_count") / pl.col("population")
)

df

In [None]:
px.bar(
    df.sort("accident_count", descending=True),
    x="department",
    y="accident_count",
    title="Number of fatal accidents per department"
)

In [None]:
px.bar(
    df.sort("accident_rate", descending=True),
    x="department",
    y="accident_rate",
    title="Fatal accidents per per capita per department"
)