291 lines
9.8 KiB
Python
291 lines
9.8 KiB
Python
import logging
|
|
|
|
from db_connector import RemoteDB
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
import plotly.express as px
|
|
import kaleido
|
|
import numpy as np
|
|
|
|
logging.getLogger("matplotlib").setLevel(logging.WARNING)
|
|
|
|
|
|
|
|
|
|
# Summary charts ======================================================================================================
|
|
def plt_acc_by_year(db):
|
|
acc_year_sql = """
|
|
SELECT COUNT(*), accidentyear AS year FROM accidents
|
|
GROUP BY year
|
|
ORDER BY year;
|
|
"""
|
|
result = db.execute_query(acc_year_sql)
|
|
result_df = pd.DataFrame(result)
|
|
|
|
fig = px.bar(result_df, y='year', x='count', orientation='h', title='No. of Accidents per Year')
|
|
fig.write_image("fig/acc_by_year.png")
|
|
fig.write_html("html/acc_by_year.html")
|
|
|
|
|
|
def plt_acc_by_weekday(db):
|
|
acc_weekday_sql = f"""
|
|
SELECT COUNT(*) AS count, accidentweekday_en AS weekday
|
|
FROM accidents
|
|
GROUP BY weekday
|
|
ORDER BY COUNT(*);
|
|
"""
|
|
|
|
result = db.execute_query(acc_weekday_sql)
|
|
result_df = pd.DataFrame(result)
|
|
|
|
fig = px.bar(result_df, y='weekday', x='count', orientation='h', title='No. of Accidents per Weekday')
|
|
fig.write_image("fig/acc_by_weekday.png")
|
|
fig.write_html("html/acc_by_weekday.html")
|
|
|
|
|
|
# def plt_acc_by_day_year_old(db):
|
|
# acc_year_day_sql = """
|
|
# SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count
|
|
# FROM accidents
|
|
# GROUP BY weekday, year
|
|
# ORDER BY weekday, year, COUNT(*);
|
|
# """
|
|
#
|
|
# result = db.execute_query(acc_year_day_sql)
|
|
# resut_df = pd.DataFrame(result)
|
|
|
|
|
|
def plt_acc_by_day_year(db):
|
|
acc_year_day_sql = """
|
|
SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count
|
|
FROM accidents
|
|
GROUP BY weekday, year
|
|
ORDER BY weekday, year, COUNT(*);
|
|
"""
|
|
result = db.execute_query(acc_year_day_sql)
|
|
df = pd.DataFrame(result)
|
|
print(df.head())
|
|
fig = px.bar(
|
|
df,
|
|
x='weekday',
|
|
y='count',
|
|
title='Accidents by Weekday',
|
|
animation_frame='year',
|
|
labels={'weekday': 'Weekday', 'count': 'Number of Accidents'},
|
|
category_orders={'weekday': ['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']},
|
|
orientation='h'
|
|
)
|
|
fig.update_yaxes(range=[0, 1000])
|
|
# Customize the layout to include a slider
|
|
fig.update_layout(
|
|
updatemenus=[
|
|
{
|
|
'buttons': [
|
|
{
|
|
'args': [None, {'frame': {'duration': 1000, 'redraw': True}, 'fromcurrent': True}],
|
|
'label': 'Play',
|
|
'method': 'animate',
|
|
},
|
|
{
|
|
'args': [[None], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate',
|
|
'transition': {'duration': 0}}],
|
|
'label': 'Pause',
|
|
'method': 'animate',
|
|
},
|
|
],
|
|
'direction': 'left',
|
|
'pad': {'r': 10, 't': 87},
|
|
'showactive': False,
|
|
'type': 'buttons',
|
|
'x': 0.1,
|
|
'xanchor': 'right',
|
|
'y': 0,
|
|
'yanchor': 'top',
|
|
}
|
|
],
|
|
sliders=[{
|
|
'active': 0,
|
|
'yanchor': 'top',
|
|
'xanchor': 'left',
|
|
'currentvalue': {
|
|
'font': {'size': 20},
|
|
'prefix': 'Year:',
|
|
'visible': True,
|
|
'xanchor': 'right',
|
|
},
|
|
'transition': {'duration': 300, 'easing': 'cubic-in-out'},
|
|
'pad': {'b': 10, 't': 50},
|
|
'len': 0.9,
|
|
'x': 0.1,
|
|
'y': 0,
|
|
'steps': [{'label': str(year), 'method': 'animate',
|
|
'args': [[year], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate'}]} for year in
|
|
sorted(df['year'].unique())],
|
|
}],
|
|
)
|
|
fig.write_image("fig/plt_acc_by_day_year.png")
|
|
fig.write_html("html/plt_acc_by_day_year.html")
|
|
|
|
|
|
def plt_acc_by_daytime(db):
|
|
acc_weekday_sql = f"""
|
|
SELECT COUNT(*) AS count, accidenthour AS hour
|
|
FROM accidents
|
|
GROUP BY accidenthour
|
|
ORDER BY COUNT(*);
|
|
"""
|
|
|
|
result = db.execute_query(acc_weekday_sql)
|
|
result_df = pd.DataFrame(result)
|
|
|
|
fig = px.bar(result_df, y='hour', x='count', orientation='h', title='Accidents by day')
|
|
fig.write_image("fig/acc_by_daytime.png")
|
|
fig.write_html("html/acc_by_daytime.html")
|
|
|
|
# Time Series charts ==================================================================================================
|
|
def acc_by_type(db):
|
|
acc_by_type_sql = """
|
|
SELECT accidentyear AS year, accidenttype_en as type, count(*) as count
|
|
FROM accidents
|
|
GROUP BY year, type;
|
|
"""
|
|
|
|
result = db.execute_query(acc_by_type_sql)
|
|
result_df = pd.DataFrame(result)
|
|
|
|
fig = px.bar(result_df, x='count', y='year', color='type', barmode='stack', orientation='h',title='Accidents by type')
|
|
|
|
fig.update_layout(xaxis_title="No. of Accidents", yaxis_title="Year", legend_title="Accident Type")
|
|
fig.write_image("fig/acc_by_type.png")
|
|
fig.write_html("html/acc_by_type.html")
|
|
#fig.show()
|
|
|
|
|
|
def severity_by_year(db):
|
|
severity_by_year_sql = """
|
|
SELECT accidentyear as year, accidentseveritycategory as code, severity, count(*) as count
|
|
FROM accident_copy
|
|
GROUP BY year, code, severity;
|
|
"""
|
|
|
|
result = db.execute_query(severity_by_year_sql)
|
|
result_df = pd.DataFrame(result)
|
|
|
|
fig = px.bar(result_df, x='year', y='count', color='severity', barmode='group', orientation='v', title="Severity over the years")
|
|
fig.update_layout(xaxis_title="Year", yaxis_title="No. of Accidents", legend_title="Accident Severity")
|
|
fig.write_image("fig/severity_by_year.png")
|
|
fig.write_html("html/severity_by_year.html")
|
|
#fig.show()
|
|
|
|
|
|
def ped_by_month(db):
|
|
ped_by_month_sql = """
|
|
SELECT accidentyear AS year, accidentmonth AS month, count(*) as count
|
|
FROM accidents
|
|
WHERE accidentinvolvingpedestrian IS TRUE
|
|
GROUP BY year, month
|
|
ORDER BY year, month;
|
|
"""
|
|
|
|
result = db.execute_query(ped_by_month_sql)
|
|
result_df = pd.DataFrame(result)
|
|
result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
|
|
fig = px.line(result_df, x='year-month', y='count', markers=True)
|
|
fig.update_layout(
|
|
xaxis_title='Year',
|
|
yaxis_title='No. of accidents',
|
|
title='Accidents involving Pedestrians')
|
|
fig.update_xaxes(tickmode='array', tickvals=result_df['year'], ticktext=result_df['year'])
|
|
fig.write_image("fig/ped_by_month.png")
|
|
fig.write_html("html/ped_by_month.html")
|
|
|
|
#fig.show()
|
|
#fig.write_html('ped_by_month.html')
|
|
|
|
def bike_by_month(db):
|
|
bike_by_month_sql = """
|
|
SELECT accidentyear AS year, accidentmonth AS month, count(*) as count
|
|
FROM accidents
|
|
WHERE accidentinvolvingbicycle IS TRUE
|
|
GROUP BY year, month
|
|
ORDER BY year, month;
|
|
"""
|
|
|
|
result = db.execute_query(bike_by_month_sql)
|
|
result_df = pd.DataFrame(result)
|
|
result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
|
|
fig = px.line(result_df, x='year-month', y='count', markers=True)
|
|
fig.update_layout(
|
|
xaxis_title='Year',
|
|
yaxis_title='No. of accidents',
|
|
title='Accidents involving Bicycles')
|
|
fig.update_xaxes(tickmode='array', tickvals=result_df['year'], ticktext=result_df['year'])
|
|
fig.write_image("fig/bike_by_month.png")
|
|
fig.write_html("html/bike_by_month.html")
|
|
#fig.show()
|
|
|
|
# TO TEDIOUS :/
|
|
# def acc_by_involved(db):
|
|
# acc_by_involved_sql = """
|
|
# SELECT accidentyear AS year, accidentmonth AS month, accidentinvolvingpedestrian AS ped,
|
|
# accidentinvolvingbicycle as bike,
|
|
# accidentinvolvingmotorcycle as moto,count(*) as count
|
|
# FROM accidents
|
|
# GROUP BY year, month, ped, bike, moto
|
|
# ORDER BY year, month;
|
|
# """
|
|
#
|
|
# result = db.execute_query(acc_by_involved_sql)
|
|
# result_df = pd.DataFrame(result)
|
|
# result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
|
|
#
|
|
# fig = px.line(result_df, x='year-month', y='count', color='')
|
|
|
|
|
|
def severity_by_month(db):
|
|
severity_by_year_sql = """
|
|
SELECT accidentyear as year, accidentmonth as month, severity, count(*) as count
|
|
FROM accident_copy
|
|
GROUP BY year, month, severity
|
|
ORDER BY year, month;
|
|
"""
|
|
|
|
result = db.execute_query(severity_by_year_sql)
|
|
result_df = pd.DataFrame(result)
|
|
result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
|
|
fig = px.line(result_df, x='year-month', y='count', color='severity', orientation='v', title='Accident severity')
|
|
#fig = px.bar(result_df, x='year', y='count', color='severity', barmode='group', orientation='v', title="Severity over the years")
|
|
fig.update_layout(xaxis_title="Time", yaxis_title="No. of Accidents", legend_title="Accident Severity")
|
|
fig.write_image("fig/severity_by_month.png")
|
|
fig.write_html("html/severity_by_month.html")
|
|
#fig.show()
|
|
|
|
|
|
|
|
|
|
# Utilities ===========================================================================================================
|
|
def save_as_barplot(df, xname, yname, orientation, file_name):
|
|
pass
|
|
|
|
|
|
def save_as_html():
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
remote_db = RemoteDB()
|
|
try:
|
|
plt_acc_by_year(remote_db)
|
|
plt_acc_by_weekday(remote_db)
|
|
plt_acc_by_daytime(remote_db)
|
|
plt_acc_by_day_year(remote_db)
|
|
ped_by_month(remote_db)
|
|
acc_by_type(remote_db)
|
|
severity_by_year(remote_db)
|
|
severity_by_month(remote_db)
|
|
bike_by_month(remote_db)
|
|
except Exception as e:
|
|
print(f"Exception {e} in plots.py")
|
|
finally:
|
|
remote_db.close()
|