In [1]:
import plotly.express as px
import polars as pl
from enum import IntEnum
In [2]:
import plotly.offline
plotly.offline.init_notebook_mode()
In [3]:
# https://www.data.gouv.fr/fr/datasets/r/8ef4c2a3-91a0-4d98-ae3a-989bde87b62a
class Gravity(IntEnum):
    UNINJURED=1
    KILLED = 2
    HOSPITALIZED = 3
    LIGHTLY_INJURED = 4
In [4]:
usagers_df = pl.read_parquet(
    "data/usagers_2023.parquet"
)
usagers_df
Out[4]:
shape: (125_789, 16)
Num_Accid_usagerid_vehiculenum_vehplacecatugravsexean_naistrajetsecu1secu2secu3locpactpetatp
i64strstrstri64i64i64f64stri64i64i64i64i64stri64
202300000001"203 851 184""155 680 557""A01"1141.0"1978"520-1-1" -1"-1
202300000002"203 851 182""155 680 556""A01"1112.0"1997"910-1-1" -1"-1
202300000002"203 851 183""155 680 556""A01"10331.0"1997"90-1-12"3"1
202300000003"203 851 180""155 680 554""B01"1131.0"1987"02600"0"-1
202300000003"203 851 181""155 680 555""A01"1112.0"1984"01000"0"-1
…………………………………………
202300054821"203 720 720""155 583 346""A01"1141.0"1971"120-10"0"-1
202300054821"203 720 721""155 583 347""B01"1112.0"1968"110-10"0"-1
202300054822"203 720 717""155 583 344""A01"2212.0"2003"21-1-1-1" -1"-1
202300054822"203 720 718""155 583 344""A01"1112.0"2002"11-1-1-1" -1"-1
202300054822"203 720 719""155 583 345""B01"1132.0"1995"12-1-1-1" -1"-1
In [5]:
caracs_df = pl.read_parquet("data/caracs_2023.parquet")
caracs_df
Out[5]:
shape: (54_822, 15)
Num_Accjourmoisanhrmnlumdepcomaggintatmcoladrlatlong
i64i64strstrstri64strstri64i64i64i64strf64f64
2023000000017"05""2023""06:00"1"75""75101"2427"RUE DE RIVOLI"48.8663862.323471
2023000000027"05""2023""05:30"5"94""94080"2136"Avenue de Paris"48.8454782.428681
2023000000037"05""2023""20:50"1"94""94022"2321"Avenue du Général Leclerc"48.76242.40655
2023000000046"05""2023""23:57"5"94""94078"2135"Rue de Paris"48.7324842.446876
2023000000057"05""2023""00:50"5"94""94068"2233"56bis Avenue Raspail"48.785812.49217
………………………………………
20230005481826"10""2023""20:45"5"974""97416"2116"LA FONTAINE (RUE JEAN DE)"21.3382855.47771
20230005481926"10""2023""19:10"3"974""97416"1113"RN3 (ANCIENNE ROUTE)"21.2886555.50994
20230005482026"10""2023""09:40"1"974""97411"2117"BAMBOU (CHEMIN DE LA RUELLE)"20.9012955.40598
20230005482126"10""2023""17:20"1"973""97302"1613"ROUTE NATIONALE 1"4.8971352.32854
20230005482220"10""2023""16:30"1"69""69387"2163"Boulevard Yves Farge"45.733064.8254
In [6]:
recensement_df = pl.read_csv("data/recensement_2022.csv", separator=";")
recensement_df
Out[6]:
shape: (100, 9)
REGRégionDEPDépartementNBARRNBCANNBCOMPMUNPTOT
i64strstrstri64stri64i64i64
84"Auvergne-Rhône-Alpes""01""Ain"4"23"392671289686804
32"Hauts-de-France""02""Aisne"5"21"798525558536985
84"Auvergne-Rhône-Alpes""03""Allier"3"19"317334715343338
93"Provence-Alpes-Côte d'Azur""04""Alpes-de-Haute-Provence"4"15"198167179171621
93"Provence-Alpes-Côte d'Azur""05""Hautes-Alpes"2"15"162141677145993
………………………
11"Île-de-France""95""Val-d'Oise"3"21"18312708451280338
1"Guadeloupe""971""Guadeloupe"2"21"32383569388197
2"Martinique""972""Martinique"4"NA"34361019364991
3"Guyane""973""Guyane"3"NA"22288382290476
4"La Réunion""974""La Réunion"4"25"24881348891190
In [7]:
df = usagers_df.select(
    acc_id="Num_Acc",
    gravity="grav",
).filter(
    pl.col("gravity") == Gravity.KILLED.value
).join(
    caracs_df.select(
        acc_id="Num_Acc", 
        department="dep"
    ),
    on="acc_id"
).select(
    "department"
).group_by(
    "department"
).agg(
    accident_count=pl.len()
).join(
    recensement_df.select(
        department="DEP",
        population="PTOT"
    ),
    on="department"
).with_columns(
    accident_rate=pl.col("accident_count") / pl.col("population")
)

df
Out[7]:
shape: (100, 4)
departmentaccident_countpopulationaccident_rate
stru32i64f64
"34"7112354570.000057
"974"308911900.000034
"29"449523510.000046
"54"337440520.000044
"12"182897810.000062
…………
"17"476837100.000069
"22"516271820.000081
"57"3710686190.000035
"73"364574630.000079
"972"243649910.000066
In [8]:
px.bar(
    df.sort("accident_count", descending=True),
    x="department",
    y="accident_count",
    title="Number of fatal accidents per department"
)
In [9]:
px.bar(
    df.sort("accident_rate", descending=True),
    x="department",
    y="accident_rate",
    title="Fatal accidents per per capita per department"
)