From 5b41db5e7e71ce3751a1677d619565cb856b25a1 Mon Sep 17 00:00:00 2001
From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com>
Date: Wed, 3 Jan 2024 13:49:01 +0100
Subject: [PATCH] Add logger statements and first plays with accident data in
notebook.
---
src/datasets/integrated/data_viz.ipynb | 266 +++++++++++++++++++++++++
src/integrate.py | 6 +-
2 files changed, 269 insertions(+), 3 deletions(-)
create mode 100644 src/datasets/integrated/data_viz.ipynb
diff --git a/src/datasets/integrated/data_viz.ipynb b/src/datasets/integrated/data_viz.ipynb
new file mode 100644
index 0000000..dfb38c5
--- /dev/null
+++ b/src/datasets/integrated/data_viz.ipynb
@@ -0,0 +1,266 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "initial_id",
+ "metadata": {
+ "collapsed": true,
+ "ExecuteTime": {
+ "end_time": "2024-01-03T12:40:30.748332084Z",
+ "start_time": "2024-01-03T12:40:30.269262153Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Handle imports\n",
+ "\n",
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "import numpy as np\n",
+ "import os \n",
+ "import csv\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "outputs": [],
+ "source": [
+ "fb_df = pd.read_csv('FootBikeCount.csv', low_memory=False)\n",
+ "miv_df = pd.read_csv('MivCount.csv', low_memory=False)\n",
+ "acc_gdf = gpd.read_file('Accidents.geojson')\n",
+ "sig_speeds_gdf = gpd.read_file('signaled_speeds.geojson.geojson')"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-01-03T12:40:51.041189781Z",
+ "start_time": "2024-01-03T12:40:30.749782023Z"
+ }
+ },
+ "id": "ef8bfea977c881d3"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "outputs": [],
+ "source": [
+ "map_gdf = gpd.read_file('map.geojson')"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-01-03T12:40:51.050765535Z",
+ "start_time": "2024-01-03T12:40:51.048395044Z"
+ }
+ },
+ "id": "eeed8aa197302f36"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " AccidentUID AccidentYear AccidentMonth \\\n0 A2D2677533867004E0430A865E337004 2011 1 \n1 9FD6441F802C20A6E0430A865E3320A6 2011 1 \n2 9FDA0DC4856A6094E0430A865E336094 2011 1 \n3 A3B66E42396E6000E0430A865E336000 2011 1 \n4 9FDA0DBE8CCE9096E0430A865E339096 2011 1 \n\n AccidentWeekDay_en AccidentHour NKoord EKoord \\\n0 Saturday 0 1245194 2684605 \n1 Saturday 1 1246980 2682382 \n2 Saturday 2 1247749 2682791 \n3 Saturday 2 1247102 2681199 \n4 Saturday 3 1250690 2682479 \n\n AccidentType_en AccidentType \\\n0 Accident with skidding or self-accident at0 \n1 Accident with skidding or self-accident at0 \n2 Accident with skidding or self-accident at0 \n3 Accident when crossing the lane(s) at5 \n4 Accident with skidding or self-accident at0 \n\n AccidentSeverityCategory AccidentInvolvingPedestrian \\\n0 as4 True \n1 as3 True \n2 as4 True \n3 as3 True \n4 as4 True \n\n AccidentInvolvingBicycle AccidentInvolvingMotorcycle RoadType RoadType_en \\\n0 True True rt433 Minor road \n1 True True rt433 Minor road \n2 True True rt439 Other \n3 True True rt433 Minor road \n4 True True rt433 Minor road \n\n geometry \n0 POINT Z (8.55841 47.35217 0.00000) \n1 POINT Z (8.52932 47.36851 0.00000) \n2 POINT Z (8.53488 47.37538 0.00000) \n3 POINT Z (8.51368 47.36976 0.00000) \n4 POINT Z (8.53129 47.40187 0.00000) ",
+ "text/html": "
\n\n
\n \n \n | \n AccidentUID | \n AccidentYear | \n AccidentMonth | \n AccidentWeekDay_en | \n AccidentHour | \n NKoord | \n EKoord | \n AccidentType_en | \n AccidentType | \n AccidentSeverityCategory | \n AccidentInvolvingPedestrian | \n AccidentInvolvingBicycle | \n AccidentInvolvingMotorcycle | \n RoadType | \n RoadType_en | \n geometry | \n
\n \n \n \n | 0 | \n A2D2677533867004E0430A865E337004 | \n 2011 | \n 1 | \n Saturday | \n 0 | \n 1245194 | \n 2684605 | \n Accident with skidding or self-accident | \n at0 | \n as4 | \n True | \n True | \n True | \n rt433 | \n Minor road | \n POINT Z (8.55841 47.35217 0.00000) | \n
\n \n | 1 | \n 9FD6441F802C20A6E0430A865E3320A6 | \n 2011 | \n 1 | \n Saturday | \n 1 | \n 1246980 | \n 2682382 | \n Accident with skidding or self-accident | \n at0 | \n as3 | \n True | \n True | \n True | \n rt433 | \n Minor road | \n POINT Z (8.52932 47.36851 0.00000) | \n
\n \n | 2 | \n 9FDA0DC4856A6094E0430A865E336094 | \n 2011 | \n 1 | \n Saturday | \n 2 | \n 1247749 | \n 2682791 | \n Accident with skidding or self-accident | \n at0 | \n as4 | \n True | \n True | \n True | \n rt439 | \n Other | \n POINT Z (8.53488 47.37538 0.00000) | \n
\n \n | 3 | \n A3B66E42396E6000E0430A865E336000 | \n 2011 | \n 1 | \n Saturday | \n 2 | \n 1247102 | \n 2681199 | \n Accident when crossing the lane(s) | \n at5 | \n as3 | \n True | \n True | \n True | \n rt433 | \n Minor road | \n POINT Z (8.51368 47.36976 0.00000) | \n
\n \n | 4 | \n 9FDA0DBE8CCE9096E0430A865E339096 | \n 2011 | \n 1 | \n Saturday | \n 3 | \n 1250690 | \n 2682479 | \n Accident with skidding or self-accident | \n at0 | \n as4 | \n True | \n True | \n True | \n rt433 | \n Minor road | \n POINT Z (8.53129 47.40187 0.00000) | \n
\n \n
\n
"
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "acc_gdf.head()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-01-03T12:40:51.086294164Z",
+ "start_time": "2024-01-03T12:40:51.049756885Z"
+ }
+ },
+ "id": "eb4bf665a06e9923"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "outputs": [],
+ "source": [
+ "import folium"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-01-03T12:40:51.222351595Z",
+ "start_time": "2024-01-03T12:40:51.077888125Z"
+ }
+ },
+ "id": "602d84c746e89cce"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": ""
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas\n",
+ "import requests\n",
+ "state_geo = requests.get(\n",
+ " \"https://www.ogd.stadt-zuerich.ch/wfs/geoportal/Statistische_Quartiere?service=WFS&version=1.1.0&request=GetFeature&outputFormat=GeoJSON&typename=adm_statistische_quartiere_map\"\n",
+ ").json()\n",
+ "state_data = pandas.read_csv(\n",
+ " \"https://raw.githubusercontent.com/python-visualization/folium-example-data/main/us_unemployment_oct_2012.csv\"\n",
+ ")\n",
+ "\n",
+ "m = folium.Map(location=[47.38, 8.52], zoom_start=13)\n",
+ "\n",
+ "\n",
+ "folium.GeoJson(state_geo).add_to(m)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-01-03T12:40:52.577237632Z",
+ "start_time": "2024-01-03T12:40:51.224665299Z"
+ }
+ },
+ "id": "df0b59f35b494a62"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "",
+ "text/html": "Make this Notebook Trusted to load map: File -> Trust Notebook
"
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import folium\n",
+ "from folium.plugins import HeatMap\n",
+ "from pyproj import Transformer\n",
+ "\n",
+ "acc_gdf['latitude'] = acc_gdf.geometry.y\n",
+ "acc_gdf['longitude'] = acc_gdf.geometry.x\n",
+ "zurich_coordinates = [47.368650, \t8.539183]\n",
+ "\n",
+ "fixed_map_zurich_original_coords = folium.Map(\n",
+ " location=zurich_coordinates, \n",
+ " zoom_start=13, \n",
+ " zoom_control=False, \n",
+ " dragging=False, \n",
+ " scrollWheelZoom=False, \n",
+ " doubleClickZoom=False\n",
+ ")\n",
+ "\n",
+ "\n",
+ "HeatMap(data=acc_gdf[['latitude', 'longitude']], radius=10).add_to(fixed_map_zurich_original_coords)\n",
+ "fixed_map_zurich_original_coords"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-01-03T12:40:59.611801469Z",
+ "start_time": "2024-01-03T12:40:59.323708433Z"
+ }
+ },
+ "id": "cded2fff6806c2dc"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "outputs": [],
+ "source": [
+ "# Create a base map\n",
+ "gradient = {\n",
+ " 0.1: 'blue',\n",
+ " 0.3: 'cyan',\n",
+ " 0.5: 'lime',\n",
+ " 0.7: 'yellow',\n",
+ " 0.9: 'red'\n",
+ "}\n",
+ "interactive_map = folium.Map(\n",
+ " location=zurich_coordinates, \n",
+ " zoom_start=13, \n",
+ " zoom_control=True, \n",
+ " dragging=False, \n",
+ " scrollWheelZoom=True, \n",
+ " doubleClickZoom=False\n",
+ ")\n",
+ "\n",
+ "\n",
+ "example_years = [2011, 2012, 2013, 2014, 2014, 2016, 2017, 2018, 2019, 2020, 2021, 2022]\n",
+ "\n",
+ "for year in example_years:\n",
+ " \n",
+ " year_data = acc_gdf[acc_gdf['AccidentYear'] == year]\n",
+ " \n",
+ " \n",
+ " heatmap_layer = HeatMap(\n",
+ " data=year_data[['latitude', 'longitude']],\n",
+ " radius=8, \n",
+ " gradient=gradient,\n",
+ " min_opacity=0.5,\n",
+ " max_opacity=0.8,\n",
+ " blur=10,\n",
+ " show=False,\n",
+ " name=f'Accidents in {year}'\n",
+ " )\n",
+ " \n",
+ " \n",
+ " heatmap_layer.add_to(interactive_map)\n",
+ "\n",
+ "\n",
+ "folium.LayerControl(collapsed=False).add_to(interactive_map)\n",
+ "folium.TileLayer(wfs)\n",
+ "interactive_map.save(\"accident_map_interactive.html\")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-01-03T12:43:24.525437654Z",
+ "start_time": "2024-01-03T12:43:24.337895855Z"
+ }
+ },
+ "id": "738ca6a4a67ca1bd"
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/src/integrate.py b/src/integrate.py
index 330f682..55a167d 100755
--- a/src/integrate.py
+++ b/src/integrate.py
@@ -125,7 +125,7 @@ def process_foot_bike_data(files_present=True):
def process_miv_data(files_present=True):
miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=files_present)
-
+ logger.debug("Unified MIV dataframe created.")
miv_df_unified[['Datum', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True)
miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True)
@@ -180,7 +180,7 @@ def process_all_data_sources(fb_present=True, miv_present=True, accident_present
"""
# ensure_dirs_exist(data_dir, integrated_dir)
logger.info("Started processing all data sources.")
- fb_to_integrated(fb_present)
+ #fb_to_integrated(fb_present)
miv_to_integrated_csv(miv_present)
@@ -239,7 +239,7 @@ def load_tempo_geojson_from_api_to_local():
if __name__ == '__main__':
# ensure_dirs_exist(data_dir, integrated_dir, logs_dir)
- process_all_data_sources(True, False, False)
+ process_all_data_sources(True, True, False)
# miv_to_integrated_csv()
# acc_to_cleaned_geojson()
load_tempo_geojson_from_api_to_local()