This repository has been archived on 2025-01-04. You can view files and clone it, but cannot push or open issues or pull requests.
2024-01-08 16:00:26 +01:00

287 lines
9.8 KiB
Python

import logging
from db_connector import RemoteDB
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import kaleido
import numpy as np
logging.getLogger("matplotlib").setLevel(logging.WARNING)
# Summary charts ======================================================================================================
def plt_acc_by_year(db):
acc_year_sql = """
SELECT COUNT(*), accidentyear AS year FROM accidents
GROUP BY year
ORDER BY year;
"""
result = db.execute_query(acc_year_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, y='year', x='count', orientation='h', title='Total Accidents per Year')
fig.write_image("fig/acc_by_year.png")
fig.write_html("html/acc_by_year.html")
def plt_acc_by_weekday(db):
acc_weekday_sql = f"""
SELECT COUNT(*) AS count, accidentweekday_en AS weekday
FROM accidents
GROUP BY weekday
ORDER BY COUNT(*);
"""
result = db.execute_query(acc_weekday_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, y='weekday', x='count', orientation='h', title='No. of Accidents per Weekday')
fig.write_image("fig/acc_by_weekday.png")
fig.write_html("html/acc_by_weekday.html")
# def plt_acc_by_day_year_old(db):
# acc_year_day_sql = """
# SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count
# FROM accidents
# GROUP BY weekday, year
# ORDER BY weekday, year, COUNT(*);
# """
#
# result = db.execute_query(acc_year_day_sql)
# resut_df = pd.DataFrame(result)
def plt_acc_by_day_year(db):
acc_year_day_sql = """
SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count
FROM accidents
GROUP BY weekday, year
ORDER BY weekday, year, COUNT(*);
"""
result = db.execute_query(acc_year_day_sql)
df = pd.DataFrame(result)
print(df.head())
fig = px.bar(
df,
x='weekday',
y='count',
title='Accidents by Weekday over the Years',
animation_frame='year',
labels={'weekday': 'Weekday', 'count': 'Number of Accidents'},
category_orders={'weekday': ['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']},
orientation='v'
)
fig.update_yaxes(range=[0, 1000])
# Customize the layout to include a slider
fig.update_layout(
updatemenus=[
{
'buttons': [
{
'args': [None, {'frame': {'duration': 1000, 'redraw': True}, 'fromcurrent': True}],
'label': 'Play',
'method': 'animate',
},
{
'args': [[None], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate',
'transition': {'duration': 0}}],
'label': 'Pause',
'method': 'animate',
},
],
'direction': 'left',
'pad': {'r': 10, 't': 87},
'showactive': False,
'type': 'buttons',
'x': 0.1,
'xanchor': 'right',
'y': 0,
'yanchor': 'top',
}
],
sliders=[{
'active': 0,
'yanchor': 'top',
'xanchor': 'left',
'currentvalue': {
'font': {'size': 20},
'prefix': 'Year:',
'visible': True,
'xanchor': 'right',
},
'transition': {'duration': 300, 'easing': 'cubic-in-out'},
'pad': {'b': 10, 't': 50},
'len': 0.9,
'x': 0.1,
'y': 0,
'steps': [{'label': str(year), 'method': 'animate',
'args': [[year], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate'}]} for year in
sorted(df['year'].unique())],
}],
)
fig.write_image("fig/plt_acc_by_day_year.png")
fig.write_html("html/plt_acc_by_day_year.html")
def plt_acc_by_daytime(db):
acc_weekday_sql = f"""
SELECT COUNT(*) AS count, accidenthour AS hour
FROM accidents
GROUP BY accidenthour
ORDER BY COUNT(*);
"""
result = db.execute_query(acc_weekday_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, y='hour', x='count', orientation='h', title='Accidents by hour')
fig.write_image("fig/acc_by_daytime.png")
fig.write_html("html/acc_by_daytime.html")
# Time Series charts ==================================================================================================
def acc_by_type(db):
acc_by_type_sql = """
SELECT accidentyear AS year, accidenttype_en as type, count(*) as count
FROM accidents
GROUP BY year, type;
"""
result = db.execute_query(acc_by_type_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, x='count', y='year', color='type', barmode='stack', orientation='h',title='Accidents by type')
fig.update_layout(xaxis_title="No. of Accidents", yaxis_title="Year", legend_title="Accident Type")
fig.write_image("fig/acc_by_type.png")
fig.write_html("html/acc_by_type.html")
#fig.show()
def severity_by_year(db):
severity_by_year_sql = """
SELECT accidentyear as year, accidentseveritycategory as code, severity, count(*) as count
FROM accident_copy
GROUP BY year, code, severity;
"""
result = db.execute_query(severity_by_year_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, x='year', y='count', color='severity', barmode='group', orientation='v', title="Severity over the years")
fig.update_layout(xaxis_title="Year", yaxis_title="No. of Accidents", legend_title="Accident Severity")
fig.write_image("fig/severity_by_year.png")
fig.write_html("html/severity_by_year.html")
#fig.show()
def ped_by_month(db):
ped_by_month_sql = """
SELECT accidentyear AS year, accidentmonth AS month, count(*) as count
FROM accidents
WHERE accidentinvolvingpedestrian IS TRUE
GROUP BY year, month
ORDER BY year, month;
"""
result = db.execute_query(ped_by_month_sql)
result_df = pd.DataFrame(result)
result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
fig = px.line(result_df, x='year-month', y='count', markers=True)
fig.update_layout(
xaxis_title='Year',
yaxis_title='No. of accidents',
title='Accidents involving Pedestrians')
fig.update_xaxes(tickmode='array', tickvals=result_df['year'], ticktext=result_df['year'])
fig.write_image("fig/ped_by_month.png")
fig.write_html("html/ped_by_month.html")
#fig.show()
#fig.write_html('ped_by_month.html')
def bike_by_month(db):
bike_by_month_sql = """
SELECT accidentyear AS year, accidentmonth AS month, count(*) as count
FROM accidents
WHERE accidentinvolvingbicycle IS TRUE
GROUP BY year, month
ORDER BY year, month;
"""
result = db.execute_query(bike_by_month_sql)
result_df = pd.DataFrame(result)
result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
fig = px.line(result_df, x='year-month', y='count', markers=True)
fig.update_layout(
xaxis_title='Year',
yaxis_title='No. of accidents',
title='Accidents involving Bicycles')
fig.update_xaxes(tickmode='array', tickvals=result_df['year'], ticktext=result_df['year'])
fig.write_image("fig/bike_by_month.png")
fig.write_html("html/bike_by_month.html")
#fig.show()
# TO TEDIOUS :/
# def acc_by_involved(db):
# acc_by_involved_sql = """
# SELECT accidentyear AS year, accidentmonth AS month, accidentinvolvingpedestrian AS ped,
# accidentinvolvingbicycle as bike,
# accidentinvolvingmotorcycle as moto,count(*) as count
# FROM accidents
# GROUP BY year, month, ped, bike, moto
# ORDER BY year, month;
# """
#
# result = db.execute_query(acc_by_involved_sql)
# result_df = pd.DataFrame(result)
# result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
#
# fig = px.line(result_df, x='year-month', y='count', color='')
def severity_by_month(db):
severity_by_year_sql = """
SELECT accidentyear as year, accidentmonth as month, severity, count(*) as count
FROM accident_copy
GROUP BY year, month, severity
ORDER BY year, month;
"""
result = db.execute_query(severity_by_year_sql)
result_df = pd.DataFrame(result)
result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
fig = px.line(result_df, x='year-month', y='count', color='severity', orientation='v', title='Accident severity')
#fig = px.bar(result_df, x='year', y='count', color='severity', barmode='group', orientation='v', title="Severity over the years")
fig.update_layout(xaxis_title="Time", yaxis_title="No. of Accidents", legend_title="Accident Severity")
fig.write_image("fig/severity_by_month.png")
fig.write_html("html/severity_by_month.html")
#fig.show()
# Utilities ===========================================================================================================
def save_as_barplot(df, xname, yname, orientation, file_name):
pass
def save_as_html():
pass
if __name__ == "__main__":
remote_db = RemoteDB()
try:
plt_acc_by_year(remote_db)
plt_acc_by_weekday(remote_db)
plt_acc_by_daytime(remote_db)
plt_acc_by_day_year(remote_db)
ped_by_month(remote_db)
acc_by_type(remote_db)
severity_by_year(remote_db)
severity_by_month(remote_db)
bike_by_month(remote_db)
except Exception as e:
print(f"Exception {e} in plots.py")
finally:
remote_db.close()