From 5b41db5e7e71ce3751a1677d619565cb856b25a1 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Wed, 3 Jan 2024 13:49:01 +0100 Subject: [PATCH] Add logger statements and first plays with accident data in notebook. --- src/datasets/integrated/data_viz.ipynb | 266 +++++++++++++++++++++++++ src/integrate.py | 6 +- 2 files changed, 269 insertions(+), 3 deletions(-) create mode 100644 src/datasets/integrated/data_viz.ipynb diff --git a/src/datasets/integrated/data_viz.ipynb b/src/datasets/integrated/data_viz.ipynb new file mode 100644 index 0000000..dfb38c5 --- /dev/null +++ b/src/datasets/integrated/data_viz.ipynb @@ -0,0 +1,266 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2024-01-03T12:40:30.748332084Z", + "start_time": "2024-01-03T12:40:30.269262153Z" + } + }, + "outputs": [], + "source": [ + "# Handle imports\n", + "\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import os \n", + "import csv\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "fb_df = pd.read_csv('FootBikeCount.csv', low_memory=False)\n", + "miv_df = pd.read_csv('MivCount.csv', low_memory=False)\n", + "acc_gdf = gpd.read_file('Accidents.geojson')\n", + "sig_speeds_gdf = gpd.read_file('signaled_speeds.geojson.geojson')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T12:40:51.041189781Z", + "start_time": "2024-01-03T12:40:30.749782023Z" + } + }, + "id": "ef8bfea977c881d3" + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "map_gdf = gpd.read_file('map.geojson')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T12:40:51.050765535Z", + "start_time": "2024-01-03T12:40:51.048395044Z" + } + }, + "id": "eeed8aa197302f36" + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "data": { + "text/plain": " AccidentUID AccidentYear AccidentMonth \\\n0 A2D2677533867004E0430A865E337004 2011 1 \n1 9FD6441F802C20A6E0430A865E3320A6 2011 1 \n2 9FDA0DC4856A6094E0430A865E336094 2011 1 \n3 A3B66E42396E6000E0430A865E336000 2011 1 \n4 9FDA0DBE8CCE9096E0430A865E339096 2011 1 \n\n AccidentWeekDay_en AccidentHour NKoord EKoord \\\n0 Saturday 0 1245194 2684605 \n1 Saturday 1 1246980 2682382 \n2 Saturday 2 1247749 2682791 \n3 Saturday 2 1247102 2681199 \n4 Saturday 3 1250690 2682479 \n\n AccidentType_en AccidentType \\\n0 Accident with skidding or self-accident at0 \n1 Accident with skidding or self-accident at0 \n2 Accident with skidding or self-accident at0 \n3 Accident when crossing the lane(s) at5 \n4 Accident with skidding or self-accident at0 \n\n AccidentSeverityCategory AccidentInvolvingPedestrian \\\n0 as4 True \n1 as3 True \n2 as4 True \n3 as3 True \n4 as4 True \n\n AccidentInvolvingBicycle AccidentInvolvingMotorcycle RoadType RoadType_en \\\n0 True True rt433 Minor road \n1 True True rt433 Minor road \n2 True True rt439 Other \n3 True True rt433 Minor road \n4 True True rt433 Minor road \n\n geometry \n0 POINT Z (8.55841 47.35217 0.00000) \n1 POINT Z (8.52932 47.36851 0.00000) \n2 POINT Z (8.53488 47.37538 0.00000) \n3 POINT Z (8.51368 47.36976 0.00000) \n4 POINT Z (8.53129 47.40187 0.00000) ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentYearAccidentMonthAccidentWeekDay_enAccidentHourNKoordEKoordAccidentType_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_engeometry
0A2D2677533867004E0430A865E33700420111Saturday012451942684605Accident with skidding or self-accidentat0as4TrueTrueTruert433Minor roadPOINT Z (8.55841 47.35217 0.00000)
19FD6441F802C20A6E0430A865E3320A620111Saturday112469802682382Accident with skidding or self-accidentat0as3TrueTrueTruert433Minor roadPOINT Z (8.52932 47.36851 0.00000)
29FDA0DC4856A6094E0430A865E33609420111Saturday212477492682791Accident with skidding or self-accidentat0as4TrueTrueTruert439OtherPOINT Z (8.53488 47.37538 0.00000)
3A3B66E42396E6000E0430A865E33600020111Saturday212471022681199Accident when crossing the lane(s)at5as3TrueTrueTruert433Minor roadPOINT Z (8.51368 47.36976 0.00000)
49FDA0DBE8CCE9096E0430A865E33909620111Saturday312506902682479Accident with skidding or self-accidentat0as4TrueTrueTruert433Minor roadPOINT Z (8.53129 47.40187 0.00000)
\n
" + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "acc_gdf.head()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T12:40:51.086294164Z", + "start_time": "2024-01-03T12:40:51.049756885Z" + } + }, + "id": "eb4bf665a06e9923" + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "import folium" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T12:40:51.222351595Z", + "start_time": "2024-01-03T12:40:51.077888125Z" + } + }, + "id": "602d84c746e89cce" + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas\n", + "import requests\n", + "state_geo = requests.get(\n", + " \"https://www.ogd.stadt-zuerich.ch/wfs/geoportal/Statistische_Quartiere?service=WFS&version=1.1.0&request=GetFeature&outputFormat=GeoJSON&typename=adm_statistische_quartiere_map\"\n", + ").json()\n", + "state_data = pandas.read_csv(\n", + " \"https://raw.githubusercontent.com/python-visualization/folium-example-data/main/us_unemployment_oct_2012.csv\"\n", + ")\n", + "\n", + "m = folium.Map(location=[47.38, 8.52], zoom_start=13)\n", + "\n", + "\n", + "folium.GeoJson(state_geo).add_to(m)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T12:40:52.577237632Z", + "start_time": "2024-01-03T12:40:51.224665299Z" + } + }, + "id": "df0b59f35b494a62" + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "data": { + "text/plain": "", + "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import folium\n", + "from folium.plugins import HeatMap\n", + "from pyproj import Transformer\n", + "\n", + "acc_gdf['latitude'] = acc_gdf.geometry.y\n", + "acc_gdf['longitude'] = acc_gdf.geometry.x\n", + "zurich_coordinates = [47.368650, \t8.539183]\n", + "\n", + "fixed_map_zurich_original_coords = folium.Map(\n", + " location=zurich_coordinates, \n", + " zoom_start=13, \n", + " zoom_control=False, \n", + " dragging=False, \n", + " scrollWheelZoom=False, \n", + " doubleClickZoom=False\n", + ")\n", + "\n", + "\n", + "HeatMap(data=acc_gdf[['latitude', 'longitude']], radius=10).add_to(fixed_map_zurich_original_coords)\n", + "fixed_map_zurich_original_coords" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T12:40:59.611801469Z", + "start_time": "2024-01-03T12:40:59.323708433Z" + } + }, + "id": "cded2fff6806c2dc" + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [], + "source": [ + "# Create a base map\n", + "gradient = {\n", + " 0.1: 'blue',\n", + " 0.3: 'cyan',\n", + " 0.5: 'lime',\n", + " 0.7: 'yellow',\n", + " 0.9: 'red'\n", + "}\n", + "interactive_map = folium.Map(\n", + " location=zurich_coordinates, \n", + " zoom_start=13, \n", + " zoom_control=True, \n", + " dragging=False, \n", + " scrollWheelZoom=True, \n", + " doubleClickZoom=False\n", + ")\n", + "\n", + "\n", + "example_years = [2011, 2012, 2013, 2014, 2014, 2016, 2017, 2018, 2019, 2020, 2021, 2022]\n", + "\n", + "for year in example_years:\n", + " \n", + " year_data = acc_gdf[acc_gdf['AccidentYear'] == year]\n", + " \n", + " \n", + " heatmap_layer = HeatMap(\n", + " data=year_data[['latitude', 'longitude']],\n", + " radius=8, \n", + " gradient=gradient,\n", + " min_opacity=0.5,\n", + " max_opacity=0.8,\n", + " blur=10,\n", + " show=False,\n", + " name=f'Accidents in {year}'\n", + " )\n", + " \n", + " \n", + " heatmap_layer.add_to(interactive_map)\n", + "\n", + "\n", + "folium.LayerControl(collapsed=False).add_to(interactive_map)\n", + "folium.TileLayer(wfs)\n", + "interactive_map.save(\"accident_map_interactive.html\")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T12:43:24.525437654Z", + "start_time": "2024-01-03T12:43:24.337895855Z" + } + }, + "id": "738ca6a4a67ca1bd" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/integrate.py b/src/integrate.py index 330f682..55a167d 100755 --- a/src/integrate.py +++ b/src/integrate.py @@ -125,7 +125,7 @@ def process_foot_bike_data(files_present=True): def process_miv_data(files_present=True): miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=files_present) - + logger.debug("Unified MIV dataframe created.") miv_df_unified[['Datum', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True) miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True) @@ -180,7 +180,7 @@ def process_all_data_sources(fb_present=True, miv_present=True, accident_present """ # ensure_dirs_exist(data_dir, integrated_dir) logger.info("Started processing all data sources.") - fb_to_integrated(fb_present) + #fb_to_integrated(fb_present) miv_to_integrated_csv(miv_present) @@ -239,7 +239,7 @@ def load_tempo_geojson_from_api_to_local(): if __name__ == '__main__': # ensure_dirs_exist(data_dir, integrated_dir, logs_dir) - process_all_data_sources(True, False, False) + process_all_data_sources(True, True, False) # miv_to_integrated_csv() # acc_to_cleaned_geojson() load_tempo_geojson_from_api_to_local()