In [1]:
import plotly.express as px
import polars as pl
from enum import IntEnum
In [2]:
import plotly.offline
plotly.offline.init_notebook_mode()
In [3]:
# https://www.data.gouv.fr/fr/datasets/r/8ef4c2a3-91a0-4d98-ae3a-989bde87b62a
class Gravity(IntEnum):
UNINJURED=1
KILLED = 2
HOSPITALIZED = 3
LIGHTLY_INJURED = 4
In [4]:
usagers_df = pl.read_parquet(
"data/usagers_2023.parquet"
)
usagers_df
Out[4]:
shape: (125_789, 16)
| Num_Acc | id_usager | id_vehicule | num_veh | place | catu | grav | sexe | an_nais | trajet | secu1 | secu2 | secu3 | locp | actp | etatp |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| i64 | str | str | str | i64 | i64 | i64 | f64 | str | i64 | i64 | i64 | i64 | i64 | str | i64 |
| 202300000001 | "203 851 184" | "155 680 557" | "A01" | 1 | 1 | 4 | 1.0 | "1978" | 5 | 2 | 0 | -1 | -1 | " -1" | -1 |
| 202300000002 | "203 851 182" | "155 680 556" | "A01" | 1 | 1 | 1 | 2.0 | "1997" | 9 | 1 | 0 | -1 | -1 | " -1" | -1 |
| 202300000002 | "203 851 183" | "155 680 556" | "A01" | 10 | 3 | 3 | 1.0 | "1997" | 9 | 0 | -1 | -1 | 2 | "3" | 1 |
| 202300000003 | "203 851 180" | "155 680 554" | "B01" | 1 | 1 | 3 | 1.0 | "1987" | 0 | 2 | 6 | 0 | 0 | "0" | -1 |
| 202300000003 | "203 851 181" | "155 680 555" | "A01" | 1 | 1 | 1 | 2.0 | "1984" | 0 | 1 | 0 | 0 | 0 | "0" | -1 |
| … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
| 202300054821 | "203 720 720" | "155 583 346" | "A01" | 1 | 1 | 4 | 1.0 | "1971" | 1 | 2 | 0 | -1 | 0 | "0" | -1 |
| 202300054821 | "203 720 721" | "155 583 347" | "B01" | 1 | 1 | 1 | 2.0 | "1968" | 1 | 1 | 0 | -1 | 0 | "0" | -1 |
| 202300054822 | "203 720 717" | "155 583 344" | "A01" | 2 | 2 | 1 | 2.0 | "2003" | 2 | 1 | -1 | -1 | -1 | " -1" | -1 |
| 202300054822 | "203 720 718" | "155 583 344" | "A01" | 1 | 1 | 1 | 2.0 | "2002" | 1 | 1 | -1 | -1 | -1 | " -1" | -1 |
| 202300054822 | "203 720 719" | "155 583 345" | "B01" | 1 | 1 | 3 | 2.0 | "1995" | 1 | 2 | -1 | -1 | -1 | " -1" | -1 |
In [5]:
caracs_df = pl.read_parquet("data/caracs_2023.parquet")
caracs_df
Out[5]:
shape: (54_822, 15)
| Num_Acc | jour | mois | an | hrmn | lum | dep | com | agg | int | atm | col | adr | lat | long |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| i64 | i64 | str | str | str | i64 | str | str | i64 | i64 | i64 | i64 | str | f64 | f64 |
| 202300000001 | 7 | "05" | "2023" | "06:00" | 1 | "75" | "75101" | 2 | 4 | 2 | 7 | "RUE DE RIVOLI" | 48.866386 | 2.323471 |
| 202300000002 | 7 | "05" | "2023" | "05:30" | 5 | "94" | "94080" | 2 | 1 | 3 | 6 | "Avenue de Paris" | 48.845478 | 2.428681 |
| 202300000003 | 7 | "05" | "2023" | "20:50" | 1 | "94" | "94022" | 2 | 3 | 2 | 1 | "Avenue du Général Leclerc" | 48.7624 | 2.40655 |
| 202300000004 | 6 | "05" | "2023" | "23:57" | 5 | "94" | "94078" | 2 | 1 | 3 | 5 | "Rue de Paris" | 48.732484 | 2.446876 |
| 202300000005 | 7 | "05" | "2023" | "00:50" | 5 | "94" | "94068" | 2 | 2 | 3 | 3 | "56bis Avenue Raspail" | 48.78581 | 2.49217 |
| … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
| 202300054818 | 26 | "10" | "2023" | "20:45" | 5 | "974" | "97416" | 2 | 1 | 1 | 6 | "LA FONTAINE (RUE JEAN DE)" | 21.33828 | 55.47771 |
| 202300054819 | 26 | "10" | "2023" | "19:10" | 3 | "974" | "97416" | 1 | 1 | 1 | 3 | "RN3 (ANCIENNE ROUTE)" | 21.28865 | 55.50994 |
| 202300054820 | 26 | "10" | "2023" | "09:40" | 1 | "974" | "97411" | 2 | 1 | 1 | 7 | "BAMBOU (CHEMIN DE LA RUELLE)" | 20.90129 | 55.40598 |
| 202300054821 | 26 | "10" | "2023" | "17:20" | 1 | "973" | "97302" | 1 | 6 | 1 | 3 | "ROUTE NATIONALE 1" | 4.89713 | 52.32854 |
| 202300054822 | 20 | "10" | "2023" | "16:30" | 1 | "69" | "69387" | 2 | 1 | 6 | 3 | "Boulevard Yves Farge" | 45.73306 | 4.8254 |
In [6]:
recensement_df = pl.read_csv("data/recensement_2022.csv", separator=";")
recensement_df
Out[6]:
shape: (100, 9)
| REG | Région | DEP | Département | NBARR | NBCAN | NBCOM | PMUN | PTOT |
|---|---|---|---|---|---|---|---|---|
| i64 | str | str | str | i64 | str | i64 | i64 | i64 |
| 84 | "Auvergne-Rhône-Alpes" | "01" | "Ain" | 4 | "23" | 392 | 671289 | 686804 |
| 32 | "Hauts-de-France" | "02" | "Aisne" | 5 | "21" | 798 | 525558 | 536985 |
| 84 | "Auvergne-Rhône-Alpes" | "03" | "Allier" | 3 | "19" | 317 | 334715 | 343338 |
| 93 | "Provence-Alpes-Côte d'Azur" | "04" | "Alpes-de-Haute-Provence" | 4 | "15" | 198 | 167179 | 171621 |
| 93 | "Provence-Alpes-Côte d'Azur" | "05" | "Hautes-Alpes" | 2 | "15" | 162 | 141677 | 145993 |
| … | … | … | … | … | … | … | … | … |
| 11 | "Île-de-France" | "95" | "Val-d'Oise" | 3 | "21" | 183 | 1270845 | 1280338 |
| 1 | "Guadeloupe" | "971" | "Guadeloupe" | 2 | "21" | 32 | 383569 | 388197 |
| 2 | "Martinique" | "972" | "Martinique" | 4 | "NA" | 34 | 361019 | 364991 |
| 3 | "Guyane" | "973" | "Guyane" | 3 | "NA" | 22 | 288382 | 290476 |
| 4 | "La Réunion" | "974" | "La Réunion" | 4 | "25" | 24 | 881348 | 891190 |
In [7]:
df = usagers_df.select(
acc_id="Num_Acc",
gravity="grav",
).filter(
pl.col("gravity") == Gravity.KILLED.value
).join(
caracs_df.select(
acc_id="Num_Acc",
department="dep"
),
on="acc_id"
).select(
"department"
).group_by(
"department"
).agg(
accident_count=pl.len()
).join(
recensement_df.select(
department="DEP",
population="PTOT"
),
on="department"
).with_columns(
accident_rate=pl.col("accident_count") / pl.col("population")
)
df
Out[7]:
shape: (100, 4)
| department | accident_count | population | accident_rate |
|---|---|---|---|
| str | u32 | i64 | f64 |
| "34" | 71 | 1235457 | 0.000057 |
| "974" | 30 | 891190 | 0.000034 |
| "29" | 44 | 952351 | 0.000046 |
| "54" | 33 | 744052 | 0.000044 |
| "12" | 18 | 289781 | 0.000062 |
| … | … | … | … |
| "17" | 47 | 683710 | 0.000069 |
| "22" | 51 | 627182 | 0.000081 |
| "57" | 37 | 1068619 | 0.000035 |
| "73" | 36 | 457463 | 0.000079 |
| "972" | 24 | 364991 | 0.000066 |
In [8]:
px.bar(
df.sort("accident_count", descending=True),
x="department",
y="accident_count",
title="Number of fatal accidents per department"
)
In [9]:
px.bar(
df.sort("accident_rate", descending=True),
x="department",
y="accident_rate",
title="Fatal accidents per per capita per department"
)