From f5c75f57627117b243a279feb874cb028165a833 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sat, 6 Jan 2024 16:33:06 +0100 Subject: [PATCH] Finalize making plots. --- analysis/database/console_dump.sql | 77 ++++++++++ analysis/database/fbcount_copy.sql | 19 +++ analysis/plots.py | 221 ++++++++++++++++++++++++++--- 3 files changed, 298 insertions(+), 19 deletions(-) create mode 100644 analysis/database/console_dump.sql create mode 100644 analysis/database/fbcount_copy.sql diff --git a/analysis/database/console_dump.sql b/analysis/database/console_dump.sql new file mode 100644 index 0000000..19c0ca5 --- /dev/null +++ b/analysis/database/console_dump.sql @@ -0,0 +1,77 @@ +DROP TABLE IF EXISTS fbcount_copy; + +CREATE TABLE fbcount_copy AS + SELECT * FROM footbikecount; + +ALTER TABLE fbcount_copy ADD fuss_total INTEGER; +UPDATE fbcount_copy SET fuss_total = fuss_in + fuss_out; + +ALTER TABLE fbcount_copy + DROP COLUMN IF EXISTS fuss_in, + DROP COLUMN IF EXISTS fuss_out, + ADD PRIMARY KEY (id); + +ALTER TABLE fbcount_copy ADD velo_total INTEGER; +UPDATE fbcount_copy SET velo_total = velo_in + velo_out; + +ALTER TABLE fbcount_copy +DROP COLUMN velo_in, +DROP COLUMN velo_out; + +select count(*), accidentyear from accidents +group by accidentyear +order by accidentyear; + +SELECT COUNT(*), accidentweekday_en +FROM accidents +GROUP BY accidentweekday_en +ORDER BY COUNT(*); + +SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count +FROM accidents +GROUP BY weekday, year +ORDER BY year, COUNT(*); + +select distinct msid from mivcount; + + SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count + FROM accidents + GROUP BY year, weekday + ORDER BY year, weekday; + +SELECT accidentyear AS year, accidentmonth AS month, count() as count +FROM accidents +GROUP BY year, month; + +SELECT accidentyear as year, accidentmonth as month, count(*) as count +from accidents +where accidentinvolvingpedestrian=True +group by month, year +order by year, month; + +drop table if exists accident_copy; + +create table accident_copy as + select * from accidents; +alter table accident_copy add severity varchar; +update accident_copy set severity = 'Accident with property damage' +where accidentseveritycategory='as4'; + +update accident_copy set severity = 'Accident with light injuries' +where accidentseveritycategory='as3'; + +update accident_copy set severity = 'Accident with severe injuries' +where accidentseveritycategory='as2'; + +update accident_copy set severity = 'Accidents with fatalities' +where accidentseveritycategory='as1'; + + + SELECT accidentyear AS year, accidentmonth AS month, accidentinvolvingpedestrian AS ped, + accidentinvolvingbicycle as bike, + accidentinvolvingmotorcycle as moto,count(*) as count + FROM accidents + GROUP BY year, month, ped, bike, moto + ORDER BY year, month; + + diff --git a/analysis/database/fbcount_copy.sql b/analysis/database/fbcount_copy.sql new file mode 100644 index 0000000..11065e7 --- /dev/null +++ b/analysis/database/fbcount_copy.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS fbcount_copy; + +CREATE TABLE fbcount_copy AS + SELECT * FROM footbikecount; + +ALTER TABLE fbcount_copy ADD fuss_total INTEGER; +UPDATE fbcount_copy SET fuss_total = fuss_in + fuss_out; + +ALTER TABLE fbcount_copy + DROP COLUMN IF EXISTS fuss_in, + DROP COLUMN IF EXISTS fuss_out, + ADD PRIMARY KEY (id); + +ALTER TABLE fbcount_copy ADD velo_total INTEGER; +UPDATE fbcount_copy SET velo_total = velo_in + velo_out; + +ALTER TABLE fbcount_copy +DROP COLUMN IF EXISTS velo_in, +DROP COLUMN IF EXISTS velo_out; \ No newline at end of file diff --git a/analysis/plots.py b/analysis/plots.py index a612973..bc15213 100644 --- a/analysis/plots.py +++ b/analysis/plots.py @@ -10,44 +10,120 @@ import numpy as np logging.getLogger("matplotlib").setLevel(logging.WARNING) + + # Summary charts ====================================================================================================== def plt_acc_by_year(db): acc_year_sql = """ - SELECT COUNT(*), accidentyear FROM accidents - GROUP BY accidentyear - ORDER BY accidentyear; + SELECT COUNT(*), accidentyear AS year FROM accidents + GROUP BY year + ORDER BY year; """ result = db.execute_query(acc_year_sql) result_df = pd.DataFrame(result) fig = px.bar(result_df, y='year', x='count', orientation='h', title='No. of Accidents per Year') fig.write_image("fig/acc_by_year.png") - fig.write_html("html/acc_by_year.png") + fig.write_html("html/acc_by_year.html") def plt_acc_by_weekday(db): acc_weekday_sql = f""" SELECT COUNT(*) AS count, accidentweekday_en AS weekday FROM accidents - GROUP BY accidentweekday_en + GROUP BY weekday ORDER BY COUNT(*); """ result = db.execute_query(acc_weekday_sql) result_df = pd.DataFrame(result) - fig = px.bar(result_df, y='weekday', x='Count', orientation='h', title='No. of Accidents per Weekday') + fig = px.bar(result_df, y='weekday', x='count', orientation='h', title='No. of Accidents per Weekday') fig.write_image("fig/acc_by_weekday.png") fig.write_html("html/acc_by_weekday.html") +# def plt_acc_by_day_year_old(db): +# acc_year_day_sql = """ +# SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count +# FROM accidents +# GROUP BY weekday, year +# ORDER BY weekday, year, COUNT(*); +# """ +# +# result = db.execute_query(acc_year_day_sql) +# resut_df = pd.DataFrame(result) + + def plt_acc_by_day_year(db): acc_year_day_sql = """ SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count FROM accidents GROUP BY weekday, year - ORDER BY year, COUNT(*); + ORDER BY weekday, year, COUNT(*); """ + result = db.execute_query(acc_year_day_sql) + df = pd.DataFrame(result) + print(df.head()) + fig = px.bar( + df, + x='weekday', + y='count', + title='Accidents by Weekday', + animation_frame='year', + labels={'weekday': 'Weekday', 'count': 'Number of Accidents'}, + category_orders={'weekday': ['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']}, + ) + fig.update_yaxes(range=[0, 1000]) + # Customize the layout to include a slider + fig.update_layout( + updatemenus=[ + { + 'buttons': [ + { + 'args': [None, {'frame': {'duration': 1000, 'redraw': True}, 'fromcurrent': True}], + 'label': 'Play', + 'method': 'animate', + }, + { + 'args': [[None], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate', + 'transition': {'duration': 0}}], + 'label': 'Pause', + 'method': 'animate', + }, + ], + 'direction': 'left', + 'pad': {'r': 10, 't': 87}, + 'showactive': False, + 'type': 'buttons', + 'x': 0.1, + 'xanchor': 'right', + 'y': 0, + 'yanchor': 'top', + } + ], + sliders=[{ + 'active': 0, + 'yanchor': 'top', + 'xanchor': 'left', + 'currentvalue': { + 'font': {'size': 20}, + 'prefix': 'Year:', + 'visible': True, + 'xanchor': 'right', + }, + 'transition': {'duration': 300, 'easing': 'cubic-in-out'}, + 'pad': {'b': 10, 't': 50}, + 'len': 0.9, + 'x': 0.1, + 'y': 0, + 'steps': [{'label': str(year), 'method': 'animate', + 'args': [[year], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate'}]} for year in + sorted(df['year'].unique())], + }], + ) + fig.write_image("fig/plt_acc_by_day_year.png") + fig.write_html("html/plt_acc_by_day_year.html") def plt_acc_by_daytime(db): @@ -66,21 +142,122 @@ def plt_acc_by_daytime(db): fig.write_html("html/acc_by_day.html") # Time Series charts ================================================================================================== -def acc_by_type(): - pass +def acc_by_type(db): + acc_by_type_sql = """ + SELECT accidentyear AS year, accidenttype_en as type, count(*) as count + FROM accidents + GROUP BY year, type; + """ -def severity_by_year(): - pass + result = db.execute_query(acc_by_type_sql) + result_df = pd.DataFrame(result) -def ped_by_month(): - pass + fig = px.bar(result_df, x='count', y='year', color='type', barmode='stack', orientation='h',title='Accidents by type') -def bike_by_month(): - pass + fig.update_layout(xaxis_title="No. of Accidents", yaxis_title="Year", legend_title="Accident Type") + fig.write_image("fig/acc_by_type.png") + fig.write_html("html/acc_by_type.html") + #fig.show() -def severity_by_month(): - pass +def severity_by_year(db): + severity_by_year_sql = """ + SELECT accidentyear as year, accidentseveritycategory as code, severity, count(*) as count + FROM accident_copy + GROUP BY year, code, severity; + """ + + result = db.execute_query(severity_by_year_sql) + result_df = pd.DataFrame(result) + + fig = px.bar(result_df, x='year', y='count', color='severity', barmode='group', orientation='v', title="Severity over the years") + fig.update_layout(xaxis_title="Year", yaxis_title="No. of Accidents", legend_title="Accident Severity") + fig.write_image("fig/severity_by_year.png") + fig.write_html("html/severity_by_year.html") + #fig.show() + + +def ped_by_month(db): + ped_by_month_sql = """ + SELECT accidentyear AS year, accidentmonth AS month, count(*) as count + FROM accidents + WHERE accidentinvolvingpedestrian IS TRUE + GROUP BY year, month + ORDER BY year, month; + """ + + result = db.execute_query(ped_by_month_sql) + result_df = pd.DataFrame(result) + result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str)) + fig = px.line(result_df, x='year-month', y='count', markers=True) + fig.update_layout( + xaxis_title='Year', + yaxis_title='No. of accidents', + title='Accidents involving Pedestrians') + fig.update_xaxes(tickmode='array', tickvals=result_df['year'], ticktext=result_df['year']) + fig.write_image("fig/ped_by_month.png") + fig.write_html("html/ped_by_month.html") + + #fig.show() + #fig.write_html('ped_by_month.html') + +def bike_by_month(db): + bike_by_month_sql = """ + SELECT accidentyear AS year, accidentmonth AS month, count(*) as count + FROM accidents + WHERE accidentinvolvingbicycle IS TRUE + GROUP BY year, month + ORDER BY year, month; + """ + + result = db.execute_query(bike_by_month_sql) + result_df = pd.DataFrame(result) + result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str)) + fig = px.line(result_df, x='year-month', y='count', markers=True) + fig.update_layout( + xaxis_title='Year', + yaxis_title='No. of accidents', + title='Accidents involving Bicycles') + fig.update_xaxes(tickmode='array', tickvals=result_df['year'], ticktext=result_df['year']) + fig.write_image("fig/bike_by_month.png") + fig.write_html("html/bike_by_month.html") + #fig.show() + +# TO TEDIOUS :/ +# def acc_by_involved(db): +# acc_by_involved_sql = """ +# SELECT accidentyear AS year, accidentmonth AS month, accidentinvolvingpedestrian AS ped, +# accidentinvolvingbicycle as bike, +# accidentinvolvingmotorcycle as moto,count(*) as count +# FROM accidents +# GROUP BY year, month, ped, bike, moto +# ORDER BY year, month; +# """ +# +# result = db.execute_query(acc_by_involved_sql) +# result_df = pd.DataFrame(result) +# result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str)) +# +# fig = px.line(result_df, x='year-month', y='count', color='') + + +def severity_by_month(db): + severity_by_year_sql = """ + SELECT accidentyear as year, accidentmonth as month, severity, count(*) as count + FROM accident_copy + GROUP BY year, month, severity + ORDER BY year, month; + """ + + result = db.execute_query(severity_by_year_sql) + result_df = pd.DataFrame(result) + result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str)) + fig = px.line(result_df, x='year-month', y='count', color='severity', orientation='v', title='Accident severity') + #fig = px.bar(result_df, x='year', y='count', color='severity', barmode='group', orientation='v', title="Severity over the years") + fig.update_layout(xaxis_title="Time", yaxis_title="No. of Accidents", legend_title="Accident Severity") + fig.write_image("fig/severity_by_month.png") + fig.write_html("html/severity_by_month.html") + #fig.show() @@ -97,9 +274,15 @@ def save_as_html(): if __name__ == "__main__": remote_db = RemoteDB() try: - # plt_acc_by_year(remote_db) - # plt_acc_by_weekday(remote_db) + plt_acc_by_year(remote_db) + plt_acc_by_weekday(remote_db) plt_acc_by_daytime(remote_db) + plt_acc_by_day_year(remote_db) + ped_by_month(remote_db) + acc_by_type(remote_db) + severity_by_year(remote_db) + severity_by_month(remote_db) + bike_by_month(remote_db) except Exception as e: print(f"Exception {e} in plots.py") finally: