Merge branch 'sebl' into 'main'

Add code to create Artifacts for Data Analysis and Presentation.

See merge request dbis/lecture-groups/database-systems/2023hs/group-1!5
This commit is contained in:
Sebastian Lenzlinger 2024-01-06 15:57:11 +00:00
commit aeeb2e368d
37 changed files with 4497 additions and 113 deletions

3
.gitignore vendored Normal file → Executable file
View File

@ -15,7 +15,8 @@ db23-project-venv/
*.tax2010 *.tax2010
*.vcf *.vcf
*.xml *.xml
#
analysis/config.py
### Database ### ### Database ###
*.accdb *.accdb
*.db *.db

8
.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

0
DataExploration.ipynb Normal file → Executable file
View File

0
README.md Normal file → Executable file
View File

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,11 @@
# config.py, adjust as needed
# TODO RENAME THIS FILE TO "config.py"
SSH_HOST = 'slenzlinger.dev'
SSH_USERNAME = 'sebl' #TODO: Enter own username
SSH_PASSWORD = 'your_ssh_password' # TODO: to not push to git
DB_NAME = 'your_database_name' # TODO
DB_USER = 'your_database_username' # TODO
DB_PASSWORD = 'your_database_password' # TODO
DB_HOST = 'your_database_host' # TODO
DB_PORT = 5433
SSH_PORT = 22

View File

@ -0,0 +1,16 @@
drop table if exists accident_copy;
create table accident_copy as
select * from accidents;
alter table accident_copy add severity varchar;
update accident_copy set severity = 'Accident with property damage'
where accidentseveritycategory='as4';
update accident_copy set severity = 'Accident with light injuries'
where accidentseveritycategory='as3';
update accident_copy set severity = 'Accident with severe injuries'
where accidentseveritycategory='as2';
update accident_copy set severity = 'Accidents with fatalities'
where accidentseveritycategory='as1';

View File

@ -0,0 +1,77 @@
DROP TABLE IF EXISTS fbcount_copy;
CREATE TABLE fbcount_copy AS
SELECT * FROM footbikecount;
ALTER TABLE fbcount_copy ADD fuss_total INTEGER;
UPDATE fbcount_copy SET fuss_total = fuss_in + fuss_out;
ALTER TABLE fbcount_copy
DROP COLUMN IF EXISTS fuss_in,
DROP COLUMN IF EXISTS fuss_out,
ADD PRIMARY KEY (id);
ALTER TABLE fbcount_copy ADD velo_total INTEGER;
UPDATE fbcount_copy SET velo_total = velo_in + velo_out;
ALTER TABLE fbcount_copy
DROP COLUMN velo_in,
DROP COLUMN velo_out;
select count(*), accidentyear from accidents
group by accidentyear
order by accidentyear;
SELECT COUNT(*), accidentweekday_en
FROM accidents
GROUP BY accidentweekday_en
ORDER BY COUNT(*);
SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count
FROM accidents
GROUP BY weekday, year
ORDER BY year, COUNT(*);
select distinct msid from mivcount;
SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count
FROM accidents
GROUP BY year, weekday
ORDER BY year, weekday;
SELECT accidentyear AS year, accidentmonth AS month, count() as count
FROM accidents
GROUP BY year, month;
SELECT accidentyear as year, accidentmonth as month, count(*) as count
from accidents
where accidentinvolvingpedestrian=True
group by month, year
order by year, month;
drop table if exists accident_copy;
create table accident_copy as
select * from accidents;
alter table accident_copy add severity varchar;
update accident_copy set severity = 'Accident with property damage'
where accidentseveritycategory='as4';
update accident_copy set severity = 'Accident with light injuries'
where accidentseveritycategory='as3';
update accident_copy set severity = 'Accident with severe injuries'
where accidentseveritycategory='as2';
update accident_copy set severity = 'Accidents with fatalities'
where accidentseveritycategory='as1';
SELECT accidentyear AS year, accidentmonth AS month, accidentinvolvingpedestrian AS ped,
accidentinvolvingbicycle as bike,
accidentinvolvingmotorcycle as moto,count(*) as count
FROM accidents
GROUP BY year, month, ped, bike, moto
ORDER BY year, month;

View File

@ -0,0 +1,19 @@
DROP TABLE IF EXISTS fbcount_copy;
CREATE TABLE fbcount_copy AS
SELECT * FROM footbikecount;
ALTER TABLE fbcount_copy ADD fuss_total INTEGER;
UPDATE fbcount_copy SET fuss_total = fuss_in + fuss_out;
ALTER TABLE fbcount_copy
DROP COLUMN IF EXISTS fuss_in,
DROP COLUMN IF EXISTS fuss_out,
ADD PRIMARY KEY (id);
ALTER TABLE fbcount_copy ADD velo_total INTEGER;
UPDATE fbcount_copy SET velo_total = velo_in + velo_out;
ALTER TABLE fbcount_copy
DROP COLUMN IF EXISTS velo_in,
DROP COLUMN IF EXISTS velo_out;

View File

View File

@ -0,0 +1,30 @@
select count(*), accidentyear from accidents
group by accidentyear
order by accidentyear;
SELECT COUNT(*), accidentweekday_en
FROM accidents
GROUP BY accidentweekday_en
ORDER BY COUNT(*);
SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count
FROM accidents
GROUP BY weekday, year
ORDER BY year, COUNT(*);
drop table if exists accident_copy;
create table accident_copy as
select * from accidents;
alter table accident_copy add severity varchar;
update accident_copy set severity = 'Accident with property damage'
where accidentseveritycategory='as4';
update accident_copy set severity = 'Accident with light injuries'
where accidentseveritycategory='as3';
update accident_copy set severity = 'Accident with severe injuries'
where accidentseveritycategory='as2';
update accident_copy set severity = 'Accidents with fatalities'
where accidentseveritycategory='as1';

81
analysis/db_connector.py Normal file
View File

@ -0,0 +1,81 @@
import logging
import paramiko.util
import sqlalchemy
from sshtunnel import SSHTunnelForwarder
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from config import SSH_HOST, SSH_USERNAME, SSH_PASSWORD, DB_NAME, DB_USER, DB_PASSWORD, DB_HOST, DB_PORT, SSH_PORT
logging.getLogger("paramiko").setLevel(logging.WARNING)
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('db_connector.py')
stream_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
class RemoteDB:
def __init__(self):
self.ssh_host = SSH_HOST
self.ssh_username = SSH_USERNAME
self.ssh_password = SSH_PASSWORD
self.db_name = DB_NAME
self.db_user = DB_USER
self.db_password = DB_PASSWORD
self.db_host = DB_HOST
self.db_port = DB_PORT
self.ssh_port = SSH_PORT
self.tunnel = None
self.engine = None
self.Session = None
self._connect()
def _connect(self):
try:
self.tunnel = SSHTunnelForwarder(
(self.ssh_host, self.ssh_port),
ssh_username=self.ssh_username,
ssh_password=self.ssh_password,
remote_bind_address=(self.db_host, self.db_port)
)
self.tunnel.start()
local_port = self.tunnel.local_bind_port
db_url = f"postgresql://{self.db_user}:{self.db_password}@localhost:{local_port}/{self.db_name}"
self.engine = create_engine(db_url)
self.Session = sessionmaker(bind=self.engine)
except Exception as e:
logger.exception(f"Connection failed: {e}")
def execute_query(self, query: str):
session = self.Session()
try:
result = session.execute(sqlalchemy.text(query))
session.commit()
return result.fetchall()
except Exception as e:
session.rollback()
raise
finally:
session.close()
def execute_command(self, cmd: str):
session = self.Session()
try:
result = session.execute(sqlalchemy.text(cmd))
session.commit()
logger.debug(f"Command {cmd} committed.")
except Exception as e:
session.rollback()
raise
finally:
session.close()
def close(self):
if self.engine:
self.engine.dispose()
if self.tunnel:
self.tunnel.stop()

133
analysis/db_utils.py Normal file
View File

@ -0,0 +1,133 @@
from db_connector import RemoteDB
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("db_utils.py")
stream_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
# Generic DB Methods ==================================================================================================
def drop_view(view_name):
drop_view_sql = f"""
DROP VIEW IF EXISTS {view_name};
"""
remote_db = RemoteDB()
try:
result = remote_db.execute_command(drop_view_sql)
logger.info(f"{view_name} dropped.")
except Exception as e:
logger.exception(f"Exception while dropping {view_name}. Msg: {e} ")
finally:
remote_db.close()
logger.debug(f"RemoteDB object closed.")
def get_view(view_name, rows="*"):
get_view_sql = f"""
SELECT {rows}
FROM {view_name};
"""
remote_db = RemoteDB()
try:
result = remote_db.execute_query(get_view_sql)
logger.info(f"Succesfully retrieved {view_name}")
return result
except Exception as e:
logger.exception(f"Failed getting view for {view_name} with exception {e}.")
finally:
remote_db.close()
def query_table(table_name):
pass
# Specialized DB methods ==============================================================================================
def create_heat_view():
create_heat_view_sql = """
CREATE VIEW heat AS
SELECT
ST_Y(geometry) AS latitude,
ST_X(geometry) AS longitude,
AccidentYear AS year
FROM
accidents
WHERE
ST_Y(geometry) IS NOT NULL AND
ST_X(geometry) IS NOT NULL AND
AccidentYear IS NOT NULL;
"""
remote_db = RemoteDB()
remote_db.execute_command(create_heat_view_sql)
remote_db.close()
logger.info("Heat View Created")
def create_bike_heat_view():
create_heat_view_sql = """
CREATE VIEW bikeheat AS
SELECT
ST_Y(geometry) AS latitude,
ST_X(geometry) AS longitude,
AccidentYear AS year
FROM
accidents
WHERE
ST_Y(geometry) IS NOT NULL AND
ST_X(geometry) IS NOT NULL AND
AccidentYear IS NOT NULL AND
accidentinvolvingbicycle IS TRUE;
"""
remote_db = RemoteDB()
remote_db.execute_command(create_heat_view_sql)
remote_db.close()
logger.info("BIKE Heat View Created")
def create_pedestrian_heat_view():
create_heat_view_sql = """
CREATE VIEW pedestrianheat AS
SELECT
ST_Y(geometry) AS latitude,
ST_X(geometry) AS longitude,
AccidentYear AS year
FROM
accidents
WHERE
ST_Y(geometry) IS NOT NULL AND
ST_X(geometry) IS NOT NULL AND
AccidentYear IS NOT NULL AND
accidentinvolvingpedestrian IS TRUE;
"""
remote_db = RemoteDB()
remote_db.execute_command(create_heat_view_sql)
remote_db.close()
logger.info("PEDESTRIAN Heat View Created")
def create_motorcycle_heat_view():
create_heat_view_sql = """
CREATE VIEW motoheat AS
SELECT
ST_Y(geometry) AS latitude,
ST_X(geometry) AS longitude,
AccidentYear AS year
FROM
accidents
WHERE
ST_Y(geometry) IS NOT NULL AND
ST_X(geometry) IS NOT NULL AND
AccidentYear IS NOT NULL AND
accidentinvolvingpedestrian IS TRUE;
"""
remote_db = RemoteDB()
remote_db.execute_command(create_heat_view_sql)
remote_db.close()
logger.info("MOTO Heat View Created")

303
analysis/map.py Normal file
View File

@ -0,0 +1,303 @@
import pandas as pd
import geopandas as gpd
import colorsys
import folium
from folium import plugins
import logging
from folium.plugins import HeatMap
from matplotlib import pyplot as plt
from db_connector import RemoteDB
import shapely
from shapely import wkb
import json
## MUST IMPORT otherwise contains the functions used in db interaction
from db_utils import *
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('map.py')
stream_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
accidents_filepath = "../src/datasets/integrated/Accidents.geojson"
signaled_speeds_filepath = "../src/datasets/integrated/signaled_speeds.geojson.geojson"
# Map centered around zurich
zurich_coordinates = [47.368650, 8.539183]
fixed_map_zurich_original_coords = folium.Map(
location=zurich_coordinates,
zoom_start=13,
zoom_control=False,
dragging=False,
scrollWheelZoom=False,
doubleClickZoom=False
)
gradient = {
0.1: 'blue',
0.3: 'cyan',
0.5: 'lime',
0.7: 'yellow',
0.9: 'red'
}
speedLimits = ["T0","T20","T30","T50","T60","T80","T100"]
color_dict = {
"T0": "red",
"T20": "orange",
"T30": "green",
"T50": "yellow",
"T60": "purple",
"T80": "pink",
"T100": "gray"
}
# Create Maps =========================================================================================================
def create_heat_map_with_time(folium_map):
# Process heat map data
heat_view_data = get_view("heat")
heat_df = gpd.GeoDataFrame(heat_view_data, columns=['latitude', 'longitude', 'year'])
assert not heat_df.empty, f" Heat Dataframe is empty: {heat_df.head(5)}"
add_heat_map_time(heat_df, folium_map)
logger.info(f"Heat map time added to time map.")
#interactive_map.save("test.html")
add_signaled_speeds(folium_map)
# Add bikes
add_bike_heat_map_time(folium_map)
#Pedestrian Part
add_pedestrian_heat_map_time(folium_map)
folium.LayerControl(collapsed=True).add_to(folium_map)
def create_heat_map_toggle(folium_map):
heat_view_data = get_view("heat")
heat_gdf = gpd.GeoDataFrame(heat_view_data, columns=['latitude', 'longitude', 'year'])
assert not heat_gdf.empty, f" Heat Dataframe is empty: {heat_gdf.head(5)}"
add_heat_year_toggle(heat_gdf, folium_map)
add_bike_heat_toggle(folium_map)
add_ped_heat_toggle(folium_map)
# Add signald speeds data
add_signaled_speeds(folium_map)
folium.LayerControl(collapsed=True).add_to(folium_map)
# Layer Adding Methods ================================================================================================
def add_bike_heat_map_time(folium_map):
# Process heat map data
bike_heat_view_data = get_view('bikeheat', 'latitude, longitude, year')
bike_heat_df = gpd.GeoDataFrame(bike_heat_view_data, columns=['latitude', 'longitude', 'year'])
assert not bike_heat_df.empty, f" Heat Dataframe is empty: {bike_heat_df.head(5)}"
heat_data = [[[row['latitude'], row['longitude'], 0.1] for index, row in bike_heat_df[bike_heat_df['year'] == i].iterrows()] for
i in range(2011, 2023)]
logger.debug(f"First element of heat data: {heat_data[0]}")
index = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
AccidentType = "Bicycles: "
index = [str(element) for element in index]
index = [AccidentType + element for element in index]
# plot heat map
gradient = generate_hue_gradient(0.6, 5)
hm = plugins.HeatMapWithTime(heat_data,
auto_play=False,
max_opacity=1,
gradient=gradient,
min_opacity=0.5,
radius=9,
use_local_extrema=False,
blur=1,
index=index,
name="Accident Heatmap Bikes")
hm.add_to(folium_map)
def add_pedestrian_heat_map_time(folium_map):
# Process heat map data
pedestrian_heat_view_data = get_view("pedestrianheat")
heat_df = gpd.GeoDataFrame(pedestrian_heat_view_data, columns=['latitude', 'longitude', 'year'])
assert not heat_df.empty, f" Heat Dataframe is empty: {heat_df.head(5)}"
heat_data = [[[row['latitude'], row['longitude'], 0.5] for index, row in heat_df[heat_df['year'] == i].iterrows()] for
i in range(2011, 2023)]
logger.debug(f"First element of PED heat data: {heat_data[0]}")
index = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
AccidentType = "Pedestrians: "
index = [str(element) for element in index]
index = [AccidentType + element for element in index]
#gradient =
# plot heat map
gradient = generate_hue_gradient(0.2, 5)
hm = plugins.HeatMapWithTime(heat_data,
auto_play=False,
max_opacity=1,
gradient=gradient,
min_opacity=0.5,
radius=9,
use_local_extrema=False,
blur=1,
index=index,
name="Accident Heatmap Pedestrian")
hm.add_to(folium_map)
def add_heat_map_time(heat_df, folium_map):
heat_data = [[[row['latitude'], row['longitude'], 0.5] for index, row in heat_df[heat_df['year'] == i].iterrows()] for
i in range(2011, 2023)]
index = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
# create heat map
logger.debug(f"First element of heat data: {heat_data[0]}")
hm = plugins.HeatMapWithTime(heat_data,
auto_play=False,
max_opacity=0.8,
gradient=gradient,
min_opacity=0.3,
radius=9,
use_local_extrema=False,
blur=1,
index=index,
name="Accident Heatmap ALL")
hm.add_to(folium_map)
def add_signaled_speeds(folium_map):
# Add signald speeds data
rows = """
temporegime_technical as tempo,
wkb_geometry
"""
sig_speeds_data = get_view("signaled_speeds", rows)
sig_speed_df = pd.DataFrame(sig_speeds_data, columns=['tempo', 'wkb_geometry'])
sig_speed_df['geometry'] = sig_speed_df['wkb_geometry'].apply(lambda x: wkb.loads(x, hex=True))
logger.debug(f"{sig_speed_df.head()}")
sig_speed_gdf = gpd.GeoDataFrame(sig_speed_df, geometry="geometry")
for speedlimit in speedLimits:
signal_speed = sig_speed_gdf[sig_speed_gdf["tempo"].str.contains(speedlimit, case=False)]
geometries = json.loads(json.dumps(shapely.geometry.mapping(signal_speed['geometry'].unary_union)))
folium.GeoJson(
data=geometries,
name=f'Signaled Speed {speedlimit}',
color=color_dict[speedlimit],
show=False,
line_cap="butt",
).add_to(folium_map)
def add_heat_year_toggle(heat_gdf, folium_map, name="All"):
index = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
# plot heat map
for year in index:
year_data = heat_gdf[heat_gdf['year'] == year]
heatmap_layer = HeatMap(
data=year_data[['latitude', 'longitude']],
radius=8,
gradient=gradient,
min_opacity=0.5,
max_opacity=0.8,
blur=10,
show=False,
name=f'{name} in {year}'
)
heatmap_layer.add_to(folium_map)
def add_bike_heat_toggle(folium_map):
bike_heat_view_data = get_view('bikeheat', 'latitude, longitude, year')
heat_gdf = gpd.GeoDataFrame(bike_heat_view_data, columns=['latitude', 'longitude', 'year'])
add_heat_year_toggle(heat_gdf, folium_map, name="motorcycles")
def add_ped_heat_toggle(folium_map):
pedestrian_heat_view_data = get_view("pedestrianheat")
heat_gdf = gpd.GeoDataFrame(pedestrian_heat_view_data, columns=['latitude', 'longitude', 'year'])
add_heat_year_toggle(heat_gdf, folium_map, name="pedestrians")
# Utilities ===========================================================================================================
def save_map_as_html(folium_map, name):
folium_map.save(f"{name}.html")
logger.info(f"Succesfully saved map {name}.")
def setup_views():
drop_view("heat")
create_heat_view()
drop_view("bikeheat")
create_bike_heat_view()
drop_view("pedestrianheat")
create_pedestrian_heat_view()
def generate_hue_gradient(hue, num_colors):
if num_colors < 2:
num_colors = 2
gradient = {}
for i in range(num_colors):
lightness = 0.1 + 0.8 * (i / (num_colors - 1))
saturation = 0.1 + 0.8 * (i / (num_colors - 1))
rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
gradient[i / (num_colors - 1)] = '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
return gradient
def generate_contrasting_gradient(num_colors):
cmap = plt.get_cmap('viridis') # viridis is a map with contrasting colors
gradient = {}
for i in range(num_colors):
rgba = cmap(i / (num_colors - 1))
gradient[i / (num_colors - 1)] = '#{:02x}{:02x}{:02x}'.format(int(rgba[0]*255), int(rgba[1]*255), int(rgba[2]*255))
return gradient
if __name__ == "__main__":
time_map = folium.Map(
location=zurich_coordinates,
zoom_start=13,
zoom_control=True,
dragging=True,
scrollWheelZoom=True,
doubleClickZoom=False,
tiles="cartodb positron"
)
toggle_map = folium.Map(
location=zurich_coordinates,
zoom_start=13,
zoom_control=True,
dragging=True,
scrollWheelZoom=True,
doubleClickZoom=False,
tiles="cartodb positron"
)
#setup_views()
create_heat_map_with_time(time_map)
create_heat_map_toggle(toggle_map)
## Save Maps ============================================================================================
save_map_as_html(toggle_map, "heat_map_toggle")
save_map_as_html(time_map, "heat_map_time")

413
analysis/map_michel.py Normal file
View File

@ -0,0 +1,413 @@
import pandas as pd
import geopandas as gpd
import os
import folium
from folium import plugins
import logging
from db_connector import RemoteDB
import shapely
from shapely import wkb
import json
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('map.py')
stream_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
accidents_filepath = "../src/datasets/integrated/Accidents.geojson"
signaled_speeds_filepath = "../src/datasets/integrated/signaled_speeds.geojson.geojson"
# Map centered around zurich
zurich_coordinates = [47.368650, 8.539183]
fixed_map_zurich_original_coords = folium.Map(
location=zurich_coordinates,
zoom_start=13,
zoom_control=False,
dragging=False,
scrollWheelZoom=False,
doubleClickZoom=False
)
gradient = {
0.1: 'blue',
0.3: 'cyan',
0.5: 'lime',
0.7: 'yellow',
0.9: 'red'
}
interactive_map = folium.Map(
location=zurich_coordinates,
zoom_start=13,
zoom_control=True,
dragging=True,
scrollWheelZoom=True,
doubleClickZoom=False,
tiles="cartodb positron"
)
speedLimits = ["T0","T20","T30","T50","T60","T80","T100"]
color_dict = {
"T0": "red",
"T20": "orange",
"T30": "green",
"T50": "yellow",
"T60": "purple",
"T80": "pink",
"T100": "gray"
}
def drop_heat_view():
drop_heat_view_sql = """
DROP VIEW IF EXISTS heat;
"""
remote_db = RemoteDB()
try:
result = remote_db.execute_query(drop_heat_view_sql)
logger.info("Heat View dropped.")
except Exception as e:
logger.exception(f"Exception while dropping heat view. Msg: {e} ")
finally:
remote_db.close()
logger.debug(f"RemoteDB object closed.")
def create_heat_view():
create_heat_view_sql = """
CREATE VIEW heat AS
SELECT
ST_Y(geometry) AS latitude,
ST_X(geometry) AS longitude,
AccidentYear AS year
FROM
accidents
WHERE
ST_Y(geometry) IS NOT NULL AND
ST_X(geometry) IS NOT NULL AND
AccidentYear IS NOT NULL;
"""
remote_db = RemoteDB()
remote_db.execute_command(create_heat_view_sql)
remote_db.close()
logger.info("Heat View Created")\
def get_heat_view():
get_heat_view_sql = """
SELECT latitude, longitude, year
FROM heat;
"""
remote_db = RemoteDB()
# Get heat map data from database
try:
result = remote_db.execute_query(get_heat_view_sql)
logger.info(f"Succesfully retrieved result")
return result
except Exception as e:
logger.exception(f"Failed getting result with exception {e}")
finally:
remote_db.close()
def create_heat_map_with_time():
# Process heat map data
heat_view_data = get_heat_view()
heat_df = gpd.GeoDataFrame(heat_view_data, columns=['latitude', 'longitude', 'year'])
assert not heat_df.empty, f" Heat Dataframe is empty: {heat_df.head(5)}"
heat_data = [[[row['latitude'], row['longitude']] for index, row in heat_df[heat_df['year'] == i].iterrows()] for
i in range(2011, 2023)]
index = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
AccidentType = "All: "
index = [str(element) for element in index]
index = [AccidentType + element for element in index]
# plot heat map
hm = plugins.HeatMapWithTime(heat_data,
auto_play=False,
max_opacity=0.8,
index=index,
name="Accident Heatmap")
hm.add_to(interactive_map)
interactive_map.save("test.html")
# Add signald speeds data
sig_speeds_data = get_signaled_speed_sql()
sig_speed_df = pd.DataFrame(sig_speeds_data, columns=['tempo','wkb_geometry'])
sig_speed_df['geometry'] = sig_speed_df['wkb_geometry'].apply(lambda x: wkb.loads(x, hex=True))
logger.debug(f"{sig_speed_df.head()}")
sig_speed_gdf = gpd.GeoDataFrame(sig_speed_df, geometry="geometry")
for speedlimit in speedLimits:
signal_speed = sig_speed_gdf[sig_speed_gdf["tempo"].str.contains(speedlimit, case=False)]
geometries = json.loads(json.dumps(shapely.geometry.mapping(signal_speed['geometry'].unary_union)))
folium.GeoJson(
data=geometries,
name=f'Signaled Speed {speedlimit}',
color=color_dict[speedlimit],
show=False,
line_cap="butt",
).add_to(interactive_map)
folium.LayerControl(collapsed=True).add_to(interactive_map)
interactive_map.save("heat_map_time.html")
def create_heat_map_toggle():
# Process heat map data
heat_view_data = get_heat_view()
heat_df = gpd.GeoDataFrame(heat_view_data, columns=['latitude', 'longitude', 'year'])
assert not heat_df.empty, f" Heat Dataframe is empty: {heat_df.head(5)}"
heat_data = [[[row['latitude'], row['longitude']] for index, row in heat_df[heat_df['year'] == i].iterrows()] for
i in range(2011, 2023)]
index = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
# plot heat map
hm = plugins.HeatMapWithTime(heat_data,
auto_play=False,
max_opacity=0.8,
index=index,
blur=0.8,
name="Accident Heatmap")
hm.add_to(interactive_map)
interactive_map.save("test.html")
# Add signald speeds data
sig_speeds_data = get_signaled_speed_sql()
sig_speed_df = pd.DataFrame(sig_speeds_data, columns=['tempo','wkb_geometry'])
sig_speed_df['geometry'] = sig_speed_df['wkb_geometry'].apply(lambda x: wkb.loads(x, hex=True))
logger.debug(f"{sig_speed_df.head()}")
sig_speed_gdf = gpd.GeoDataFrame(sig_speed_df, geometry="geometry")
for speedlimit in speedLimits:
signal_speed = sig_speed_gdf[sig_speed_gdf["tempo"].str.contains(speedlimit, case=False)]
geometries = json.loads(json.dumps(shapely.geometry.mapping(signal_speed['geometry'].unary_union)))
folium.GeoJson(
data=geometries,
name=f'Signaled Speed {speedlimit}',
color=color_dict[speedlimit],
show=False,
line_cap="butt",
).add_to(interactive_map)
folium.LayerControl(collapsed=True).add_to(interactive_map)
def get_signaled_speed_sql():
sigspeed_sql = """
SELECT
temporegime_technical as tempo,
wkb_geometry
FROM signaled_speeds;
"""
remote_db = RemoteDB()
try:
result = remote_db.execute_query(sigspeed_sql)
logger.info(f"Succesfully retrieved result")
return result
except Exception as e:
logger.exception(f"Failed getting result with exception {e}")
finally:
remote_db.close()
def save_map_as_html(map, name):
map.save(f"{name}.html")
logger.info(f"Succesfully saved map {name}.")
#Bike-accident-Filter====================================================
def drop_bike_heat_view():
drop_heat_view_sql = """
DROP VIEW IF EXISTS bikeheat;
"""
remote_db = RemoteDB()
try:
result = remote_db.execute_query(drop_heat_view_sql)
logger.info("Heat View dropped.")
except Exception as e:
logger.exception(f"Exception while dropping heat view. Msg: {e} ")
finally:
remote_db.close()
logger.debug(f"RemoteDB object closed.")
def create_bike_heat_view():
create_heat_view_sql = """
CREATE VIEW bikeheat AS
SELECT
ST_Y(geometry) AS latitude,
ST_X(geometry) AS longitude,
AccidentYear AS year
FROM
accidents
WHERE
ST_Y(geometry) IS NOT NULL AND
ST_X(geometry) IS NOT NULL AND
AccidentYear IS NOT NULL AND
accidentinvolvingbicycle IS TRUE;
"""
remote_db = RemoteDB()
remote_db.execute_command(create_heat_view_sql)
remote_db.close()
logger.info("Heat View Created")\
def get_bike_heat_view():
get_heat_view_sql = """
SELECT latitude, longitude, year
FROM bikeheat;
"""
remote_db = RemoteDB()
# Get heat map data from database
try:
result = remote_db.execute_query(get_heat_view_sql)
logger.info(f"Succesfully retrieved result")
return result
except Exception as e:
logger.exception(f"Failed getting result with exception {e}")
finally:
remote_db.close()
def create_bike_heat_map_toggle():
# Process heat map data
heat_view_data = get_bike_heat_view()
heat_df = gpd.GeoDataFrame(heat_view_data, columns=['latitude', 'longitude', 'year'])
assert not heat_df.empty, f" Heat Dataframe is empty: {heat_df.head(5)}"
heat_data = [[[row['latitude'], row['longitude']] for index, row in heat_df[heat_df['year'] == i].iterrows()] for
i in range(2011, 2023)]
index = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
AccidentType = "Bicycles: "
index = [str(element) for element in index]
index = [AccidentType + element for element in index]
# plot heat map
hm = plugins.HeatMapWithTime(heat_data,
auto_play=False,
max_opacity=0.8,
index=index,
blur=0.8,
name="Accident Heatmap including bikes")
hm.add_to(interactive_map)
interactive_map.save("test2.html")
#Pedestrian Part======================================================================
def drop_pedestrian_heat_view():
drop_heat_view_sql = """
DROP VIEW IF EXISTS pedestrianheat;
"""
remote_db = RemoteDB()
try:
result = remote_db.execute_query(drop_heat_view_sql)
logger.info("Heat View dropped.")
except Exception as e:
logger.exception(f"Exception while dropping heat view. Msg: {e} ")
finally:
remote_db.close()
logger.debug(f"RemoteDB object closed.")
def create_pedestrian_heat_view():
create_heat_view_sql = """
CREATE VIEW pedestrianheat AS
SELECT
ST_Y(geometry) AS latitude,
ST_X(geometry) AS longitude,
AccidentYear AS year
FROM
accidents
WHERE
ST_Y(geometry) IS NOT NULL AND
ST_X(geometry) IS NOT NULL AND
AccidentYear IS NOT NULL AND
accidentinvolvingpedestrian IS TRUE;
"""
remote_db = RemoteDB()
remote_db.execute_command(create_heat_view_sql)
remote_db.close()
logger.info("Heat View Created")\
def get_pedestrian_heat_view():
get_heat_view_sql = """
SELECT latitude, longitude, year
FROM pedestrianheat;
"""
remote_db = RemoteDB()
# Get heat map data from database
try:
result = remote_db.execute_query(get_heat_view_sql)
logger.info(f"Succesfully retrieved result")
return result
except Exception as e:
logger.exception(f"Failed getting result with exception {e}")
finally:
remote_db.close()
def create_pedestrian_heat_map_toggle():
# Process heat map data
heat_view_data = get_pedestrian_heat_view()
heat_df = gpd.GeoDataFrame(heat_view_data, columns=['latitude', 'longitude', 'year'])
assert not heat_df.empty, f" Heat Dataframe is empty: {heat_df.head(5)}"
heat_data = [[[row['latitude'], row['longitude']] for index, row in heat_df[heat_df['year'] == i].iterrows()] for
i in range(2011, 2023)]
index = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
AccidentType = "Pedestrians: "
index = [str(element) for element in index]
index = [AccidentType + element for element in index]
# plot heat map
hm = plugins.HeatMapWithTime(heat_data,
auto_play=False,
max_opacity=0.8,
index=index,
blur=0.8,
name="Accident Heatmap including pedestrians")
hm.add_to(interactive_map)
interactive_map.save("test3.html")
if __name__ == "__main__":
#Pedestrian_Part
drop_pedestrian_heat_view()
create_pedestrian_heat_view()
create_pedestrian_heat_map_toggle()
#Bike_Part
drop_bike_heat_view()
create_bike_heat_view()
create_bike_heat_map_toggle()
#Working_Part
drop_heat_view()
create_heat_view()
create_heat_map_with_time()
save_map_as_html(interactive_map, "heat_map_with_time")

289
analysis/plots.py Normal file
View File

@ -0,0 +1,289 @@
import logging
from db_connector import RemoteDB
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import kaleido
import numpy as np
logging.getLogger("matplotlib").setLevel(logging.WARNING)
# Summary charts ======================================================================================================
def plt_acc_by_year(db):
acc_year_sql = """
SELECT COUNT(*), accidentyear AS year FROM accidents
GROUP BY year
ORDER BY year;
"""
result = db.execute_query(acc_year_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, y='year', x='count', orientation='h', title='No. of Accidents per Year')
fig.write_image("fig/acc_by_year.png")
fig.write_html("html/acc_by_year.html")
def plt_acc_by_weekday(db):
acc_weekday_sql = f"""
SELECT COUNT(*) AS count, accidentweekday_en AS weekday
FROM accidents
GROUP BY weekday
ORDER BY COUNT(*);
"""
result = db.execute_query(acc_weekday_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, y='weekday', x='count', orientation='h', title='No. of Accidents per Weekday')
fig.write_image("fig/acc_by_weekday.png")
fig.write_html("html/acc_by_weekday.html")
# def plt_acc_by_day_year_old(db):
# acc_year_day_sql = """
# SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count
# FROM accidents
# GROUP BY weekday, year
# ORDER BY weekday, year, COUNT(*);
# """
#
# result = db.execute_query(acc_year_day_sql)
# resut_df = pd.DataFrame(result)
def plt_acc_by_day_year(db):
acc_year_day_sql = """
SELECT accidentyear AS year, accidentweekday_en AS weekday, COUNT(*) AS count
FROM accidents
GROUP BY weekday, year
ORDER BY weekday, year, COUNT(*);
"""
result = db.execute_query(acc_year_day_sql)
df = pd.DataFrame(result)
print(df.head())
fig = px.bar(
df,
x='weekday',
y='count',
title='Accidents by Weekday',
animation_frame='year',
labels={'weekday': 'Weekday', 'count': 'Number of Accidents'},
category_orders={'weekday': ['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']},
)
fig.update_yaxes(range=[0, 1000])
# Customize the layout to include a slider
fig.update_layout(
updatemenus=[
{
'buttons': [
{
'args': [None, {'frame': {'duration': 1000, 'redraw': True}, 'fromcurrent': True}],
'label': 'Play',
'method': 'animate',
},
{
'args': [[None], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate',
'transition': {'duration': 0}}],
'label': 'Pause',
'method': 'animate',
},
],
'direction': 'left',
'pad': {'r': 10, 't': 87},
'showactive': False,
'type': 'buttons',
'x': 0.1,
'xanchor': 'right',
'y': 0,
'yanchor': 'top',
}
],
sliders=[{
'active': 0,
'yanchor': 'top',
'xanchor': 'left',
'currentvalue': {
'font': {'size': 20},
'prefix': 'Year:',
'visible': True,
'xanchor': 'right',
},
'transition': {'duration': 300, 'easing': 'cubic-in-out'},
'pad': {'b': 10, 't': 50},
'len': 0.9,
'x': 0.1,
'y': 0,
'steps': [{'label': str(year), 'method': 'animate',
'args': [[year], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate'}]} for year in
sorted(df['year'].unique())],
}],
)
fig.write_image("fig/plt_acc_by_day_year.png")
fig.write_html("html/plt_acc_by_day_year.html")
def plt_acc_by_daytime(db):
acc_weekday_sql = f"""
SELECT COUNT(*) AS count, accidenthour AS hour
FROM accidents
GROUP BY accidenthour
ORDER BY COUNT(*);
"""
result = db.execute_query(acc_weekday_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, y='hour', x='count', orientation='h')
fig.write_image("fig/acc_by_day.png")
fig.write_html("html/acc_by_day.html")
# Time Series charts ==================================================================================================
def acc_by_type(db):
acc_by_type_sql = """
SELECT accidentyear AS year, accidenttype_en as type, count(*) as count
FROM accidents
GROUP BY year, type;
"""
result = db.execute_query(acc_by_type_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, x='count', y='year', color='type', barmode='stack', orientation='h',title='Accidents by type')
fig.update_layout(xaxis_title="No. of Accidents", yaxis_title="Year", legend_title="Accident Type")
fig.write_image("fig/acc_by_type.png")
fig.write_html("html/acc_by_type.html")
#fig.show()
def severity_by_year(db):
severity_by_year_sql = """
SELECT accidentyear as year, accidentseveritycategory as code, severity, count(*) as count
FROM accident_copy
GROUP BY year, code, severity;
"""
result = db.execute_query(severity_by_year_sql)
result_df = pd.DataFrame(result)
fig = px.bar(result_df, x='year', y='count', color='severity', barmode='group', orientation='v', title="Severity over the years")
fig.update_layout(xaxis_title="Year", yaxis_title="No. of Accidents", legend_title="Accident Severity")
fig.write_image("fig/severity_by_year.png")
fig.write_html("html/severity_by_year.html")
#fig.show()
def ped_by_month(db):
ped_by_month_sql = """
SELECT accidentyear AS year, accidentmonth AS month, count(*) as count
FROM accidents
WHERE accidentinvolvingpedestrian IS TRUE
GROUP BY year, month
ORDER BY year, month;
"""
result = db.execute_query(ped_by_month_sql)
result_df = pd.DataFrame(result)
result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
fig = px.line(result_df, x='year-month', y='count', markers=True)
fig.update_layout(
xaxis_title='Year',
yaxis_title='No. of accidents',
title='Accidents involving Pedestrians')
fig.update_xaxes(tickmode='array', tickvals=result_df['year'], ticktext=result_df['year'])
fig.write_image("fig/ped_by_month.png")
fig.write_html("html/ped_by_month.html")
#fig.show()
#fig.write_html('ped_by_month.html')
def bike_by_month(db):
bike_by_month_sql = """
SELECT accidentyear AS year, accidentmonth AS month, count(*) as count
FROM accidents
WHERE accidentinvolvingbicycle IS TRUE
GROUP BY year, month
ORDER BY year, month;
"""
result = db.execute_query(bike_by_month_sql)
result_df = pd.DataFrame(result)
result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
fig = px.line(result_df, x='year-month', y='count', markers=True)
fig.update_layout(
xaxis_title='Year',
yaxis_title='No. of accidents',
title='Accidents involving Bicycles')
fig.update_xaxes(tickmode='array', tickvals=result_df['year'], ticktext=result_df['year'])
fig.write_image("fig/bike_by_month.png")
fig.write_html("html/bike_by_month.html")
#fig.show()
# TO TEDIOUS :/
# def acc_by_involved(db):
# acc_by_involved_sql = """
# SELECT accidentyear AS year, accidentmonth AS month, accidentinvolvingpedestrian AS ped,
# accidentinvolvingbicycle as bike,
# accidentinvolvingmotorcycle as moto,count(*) as count
# FROM accidents
# GROUP BY year, month, ped, bike, moto
# ORDER BY year, month;
# """
#
# result = db.execute_query(acc_by_involved_sql)
# result_df = pd.DataFrame(result)
# result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
#
# fig = px.line(result_df, x='year-month', y='count', color='')
def severity_by_month(db):
severity_by_year_sql = """
SELECT accidentyear as year, accidentmonth as month, severity, count(*) as count
FROM accident_copy
GROUP BY year, month, severity
ORDER BY year, month;
"""
result = db.execute_query(severity_by_year_sql)
result_df = pd.DataFrame(result)
result_df['year-month'] = pd.to_datetime(result_df['year'].astype(str) + "-" + result_df['month'].astype(str))
fig = px.line(result_df, x='year-month', y='count', color='severity', orientation='v', title='Accident severity')
#fig = px.bar(result_df, x='year', y='count', color='severity', barmode='group', orientation='v', title="Severity over the years")
fig.update_layout(xaxis_title="Time", yaxis_title="No. of Accidents", legend_title="Accident Severity")
fig.write_image("fig/severity_by_month.png")
fig.write_html("html/severity_by_month.html")
#fig.show()
# Utilities ===========================================================================================================
def save_as_barplot(df, xname, yname, orientation, file_name):
pass
def save_as_html():
pass
if __name__ == "__main__":
remote_db = RemoteDB()
try:
plt_acc_by_year(remote_db)
plt_acc_by_weekday(remote_db)
plt_acc_by_daytime(remote_db)
plt_acc_by_day_year(remote_db)
ped_by_month(remote_db)
acc_by_type(remote_db)
severity_by_year(remote_db)
severity_by_month(remote_db)
bike_by_month(remote_db)
except Exception as e:
print(f"Exception {e} in plots.py")
finally:
remote_db.close()

661
analysis/plotting.ipynb Normal file

File diff suppressed because one or more lines are too long

454
analysis/tester.ipynb Normal file

File diff suppressed because one or more lines are too long

0
docs/accident_loc_urls.txt Normal file → Executable file
View File

0
docs/all_csv_urls.txt Normal file → Executable file
View File

0
docs/diary.md Normal file → Executable file
View File

0
docs/foot_bike_zaehlung_urls.txt Normal file → Executable file
View File

0
docs/urls.txt Normal file → Executable file
View File

0
docs/verkehrszaehlung_moto_urls.txt Normal file → Executable file
View File

13
docs/wiki.md Normal file → Executable file
View File

@ -22,4 +22,15 @@ deactivate
See ``requirements.txt`` See ``requirements.txt``
# Setting up postgres # Setting up postgres
# Setting up pgadmin as container serverd by nginx # Setting up pgadmin as container serverd by nginx
# Load csv into db HOT FIX
Go to directory containing the csvs.
```bash
cd group-1/src/datasets/integrated/ && psql -h localhost -d proj_db -U sebl -p 5433
```
Then manually copy
```postgresql
\copy FootBikeCount FROM 'FootBikeCount.csv' WITH CSV HEADER
\copy mivcount FROM 'MivCount.csv' WITH CSV HEADER
```

27
requirements.txt Normal file → Executable file
View File

@ -1,3 +1,4 @@
ansi2html==1.9.1
anyio==4.0.0 anyio==4.0.0
appnope==0.1.3 appnope==0.1.3
argon2-cffi==23.1.0 argon2-cffi==23.1.0
@ -10,6 +11,8 @@ Babel==2.13.1
beautifulsoup4==4.12.2 beautifulsoup4==4.12.2
black==23.11.0 black==23.11.0
bleach==6.1.0 bleach==6.1.0
blinker==1.7.0
branca==0.7.0
certifi==2023.7.22 certifi==2023.7.22
cffi==1.16.0 cffi==1.16.0
charset-normalizer==3.3.2 charset-normalizer==3.3.2
@ -17,20 +20,32 @@ click==8.1.7
click-plugins==1.1.1 click-plugins==1.1.1
cligj==0.7.2 cligj==0.7.2
comm==0.2.0 comm==0.2.0
contourpy==1.2.0
cycler==0.12.1
dash==2.14.2
dash-core-components==2.0.0
dash-html-components==2.0.0
dash-table==5.0.0
debugpy==1.8.0 debugpy==1.8.0
decorator==5.1.1 decorator==5.1.1
defusedxml==0.7.1 defusedxml==0.7.1
executing==2.0.1 executing==2.0.1
fastjsonschema==2.19.0 fastjsonschema==2.19.0
fiona==1.9.5 fiona==1.9.5
Flask==3.0.0
folium==0.15.1
fonttools==4.47.0
fqdn==1.5.1 fqdn==1.5.1
GeoAlchemy2==0.14.2 GeoAlchemy2==0.14.2
geopandas==0.14.1 geopandas==0.14.1
greenlet==3.0.3
idna==3.4 idna==3.4
importlib-metadata==7.0.1
ipykernel==6.26.0 ipykernel==6.26.0
ipython==8.17.2 ipython==8.17.2
ipywidgets==8.1.1 ipywidgets==8.1.1
isoduration==20.11.0 isoduration==20.11.0
itsdangerous==2.1.2
jedi==0.19.1 jedi==0.19.1
Jinja2==3.1.2 Jinja2==3.1.2
json5==0.9.14 json5==0.9.14
@ -49,7 +64,9 @@ jupyterlab==4.0.8
jupyterlab-pygments==0.2.2 jupyterlab-pygments==0.2.2
jupyterlab-widgets==3.0.9 jupyterlab-widgets==3.0.9
jupyterlab_server==2.25.1 jupyterlab_server==2.25.1
kiwisolver==1.4.5
MarkupSafe==2.1.3 MarkupSafe==2.1.3
matplotlib==3.8.2
matplotlib-inline==0.1.6 matplotlib-inline==0.1.6
mistune==3.0.2 mistune==3.0.2
mypy-extensions==1.0.0 mypy-extensions==1.0.0
@ -67,7 +84,9 @@ pandocfilters==1.5.0
parso==0.8.3 parso==0.8.3
pathspec==0.11.2 pathspec==0.11.2
pexpect==4.8.0 pexpect==4.8.0
pillow==10.2.0
platformdirs==4.0.0 platformdirs==4.0.0
plotly==5.18.0
prometheus-client==0.18.0 prometheus-client==0.18.0
prompt-toolkit==3.0.41 prompt-toolkit==3.0.41
psutil==5.9.6 psutil==5.9.6
@ -76,6 +95,7 @@ ptyprocess==0.7.0
pure-eval==0.2.2 pure-eval==0.2.2
pycparser==2.21 pycparser==2.21
Pygments==2.16.1 Pygments==2.16.1
pyparsing==3.1.1
pyproj==3.6.1 pyproj==3.6.1
python-dateutil==2.8.2 python-dateutil==2.8.2
python-json-logger==2.0.7 python-json-logger==2.0.7
@ -86,6 +106,7 @@ qtconsole==5.5.1
QtPy==2.4.1 QtPy==2.4.1
referencing==0.31.0 referencing==0.31.0
requests==2.31.0 requests==2.31.0
retrying==1.3.4
rfc3339-validator==0.1.4 rfc3339-validator==0.1.4
rfc3986-validator==0.1.1 rfc3986-validator==0.1.1
rpds-py==0.13.0 rpds-py==0.13.0
@ -96,6 +117,7 @@ sniffio==1.3.0
soupsieve==2.5 soupsieve==2.5
SQLAlchemy==2.0.23 SQLAlchemy==2.0.23
stack-data==0.6.3 stack-data==0.6.3
tenacity==8.2.3
terminado==0.18.0 terminado==0.18.0
tinycss2==1.2.1 tinycss2==1.2.1
tornado==6.3.3 tornado==6.3.3
@ -109,4 +131,9 @@ wcwidth==0.2.10
webcolors==1.13 webcolors==1.13
webencodings==0.5.1 webencodings==0.5.1
websocket-client==1.6.4 websocket-client==1.6.4
Werkzeug==3.0.1
widgetsnbextension==4.0.9 widgetsnbextension==4.0.9
xyzservices==2023.10.1
zipp==3.17.0
sshtunnel~=0.4.0

20
src/data_utils.py Normal file → Executable file
View File

@ -130,6 +130,26 @@ def load_file_from_api(api_link, target_name, integrated_dir):
def save_dataframe_to_csv(df, integrated_dir, filename): def save_dataframe_to_csv(df, integrated_dir, filename):
pass pass
# Needed Since we converted strings all to True
def convert_to_boolean(value):
true_values = ['true', '1', 'yes']
false_values = ['false', '0', 'no']
if isinstance(value, str):
value = value.lower()
if value in true_values:
return True
elif value in false_values:
return False
else:
raise ValueError(f"Invalid boolean string: {value}")
if isinstance(value, (int, float)):
return bool(value)
raise ValueError(f"Invalid boolean value type: {type(value)}")
if __name__ == "__main__": if __name__ == "__main__":
csv_urls_file = '../docs/all_csv_urls.txt' csv_urls_file = '../docs/all_csv_urls.txt'

File diff suppressed because one or more lines are too long

0
src/ensure_dirs_exist.py Normal file → Executable file
View File

14
src/fill_db.py Normal file → Executable file
View File

@ -22,10 +22,10 @@ Make sure db_info contain the correct credentials
""" """
db_info = { db_info = {
'host': 'localhost', 'host': 'localhost',
'database': 'test-db23', 'database': 'proj_db',
'port': '5432', 'port': '5433',
'user': 'seb', 'user': 'sebl',
'password': '', 'password': 'hatred-pollute-digraph-disciple',
} }
setup_tables_script = 'setup_tables.sql' setup_tables_script = 'setup_tables.sql'
load_csvs_into_db_script = 'load_csvs_into_db.sql' load_csvs_into_db_script = 'load_csvs_into_db.sql'
@ -98,11 +98,11 @@ if __name__ == '__main__':
db_info['port'], db_info['port'],
accident_table_name) accident_table_name)
logger.info('Finished loading accident geojson into db using bash script.') logger.info('Finished loading accident geojson into db using bash script.')
geojson_loader(signaled_speeds_file, run_geojson_loader_script(accident_loader_script,
signaled_speeds_file,
db_info['database'], db_info['database'],
db_info['user'], db_info['user'],
db_info['password'], db_info['password'],
db_info['host'], db_info['host'],
db_info['port'], db_info['port'],
signaled_speeds_table_name, signaled_speeds_table_name)
modus='overwrite')

25
src/integrate.py Normal file → Executable file
View File

@ -113,7 +113,7 @@ def process_foot_bike_data(files_present=True):
}).reset_index() }).reset_index()
dt_obj = pd.to_datetime(fb_df_grouped['DATE']) dt_obj = pd.to_datetime(fb_df_grouped['DATE'])
days = dt_obj.dt.weekday days = dt_obj.dt.weekday
fb_df_grouped['Weekday_en'] = days.map(lambda x: weekday_names[x]) fb_df_grouped.loc[:,'Weekday_en'] = days.map(lambda x: weekday_names[x])
cleaned_fb_df = fb_df_grouped cleaned_fb_df = fb_df_grouped
cleaned_fb_df['ID'] = cleaned_fb_df.index + 1 cleaned_fb_df['ID'] = cleaned_fb_df.index + 1
cleaned_fb_df = cleaned_fb_df[['ID', 'NORD', 'OST', 'DATE', 'HRS', 'VELO_IN', 'VELO_OUT', 'FUSS_IN', cleaned_fb_df = cleaned_fb_df[['ID', 'NORD', 'OST', 'DATE', 'HRS', 'VELO_IN', 'VELO_OUT', 'FUSS_IN',
@ -125,20 +125,20 @@ def process_foot_bike_data(files_present=True):
def process_miv_data(files_present=True): def process_miv_data(files_present=True):
miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=files_present) miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=files_present)
logger.debug("Unified MIV dataframe created.")
miv_df_unified[['Datum', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True) miv_df_unified[['Datum', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True)
miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True) miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True)
miv_cols_to_keep = ['MSID','ZSID','Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus', miv_cols_to_keep = ['MSID','ZSID','Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus',
'Datum', 'Hrs',] 'Datum', 'Hrs',]
miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep] miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep].copy()
dt_obj = pd.to_datetime(miv_df_cols_dropped['Datum']) dt_obj = pd.to_datetime(miv_df_cols_dropped['Datum'])
days = dt_obj.dt.weekday days = dt_obj.dt.weekday
miv_df_cols_dropped.loc[:, 'Weekday_en'] = days.map(lambda x: weekday_names[x]) miv_df_cols_dropped.loc[:, 'Weekday_en'] = days.map(lambda x: weekday_names[x])
miv_df_cols_dropped.loc[:, 'AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int) miv_df_cols_dropped.loc[:, 'AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int)
miv_df_cols_dropped[:, 'ZSID'] = miv_df_cols_dropped['ZSID'].fillna('Missing').astype(str) miv_df_cols_dropped.loc[:, 'ZSID'] = miv_df_cols_dropped['ZSID'].fillna('Missing').astype(str)
miv_df_cols_dropped['ID'] = (miv_df_cols_dropped.index + 1).copy() miv_df_cols_dropped['ID'] = (miv_df_cols_dropped.index + 1).copy()
cleaned_miv_df = miv_df_cols_dropped[['ID', 'MSID', 'ZSID', 'Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge', cleaned_miv_df = miv_df_cols_dropped[['ID', 'MSID', 'ZSID', 'Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge',
@ -153,12 +153,18 @@ def process_accident_data(file_present: bool = True):
if not file_present: if not file_present:
du.process_urls(data_dir, accident_file_url) du.process_urls(data_dir, accident_file_url)
acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string) acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string)
logger.debug(acc_df_unified[['AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle',
'AccidentInvolvingMotorcycle']].head())
acc_cols_to_keep = ['AccidentUID', 'AccidentYear', 'AccidentMonth', 'AccidentWeekDay_en','AccidentHour', acc_cols_to_keep = ['AccidentUID', 'AccidentYear', 'AccidentMonth', 'AccidentWeekDay_en','AccidentHour',
'AccidentLocation_CHLV95_N', 'AccidentLocation_CHLV95_E', 'AccidentType_en', 'AccidentType', 'AccidentLocation_CHLV95_N', 'AccidentLocation_CHLV95_E', 'AccidentType_en', 'AccidentType',
'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle',
'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en',
'geometry'] 'geometry']
cleaned_acc_df = acc_df_unified[acc_cols_to_keep] # Need to already convert boolean strings "by hand", otherwise all will become 'True'
for col in ['AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle',
'AccidentInvolvingMotorcycle']:
acc_df_unified[col] = acc_df_unified[col].apply(du.convert_to_boolean)
cleaned_acc_df = acc_df_unified[acc_cols_to_keep].copy()
cleaned_acc_df.rename(columns={ cleaned_acc_df.rename(columns={
'AccidentLocation_CHLV95_E': 'EKoord', 'AccidentLocation_CHLV95_E': 'EKoord',
'AccidentLocation_CHLV95_N': 'NKoord', 'AccidentLocation_CHLV95_N': 'NKoord',
@ -180,7 +186,7 @@ def process_all_data_sources(fb_present=True, miv_present=True, accident_present
""" """
# ensure_dirs_exist(data_dir, integrated_dir) # ensure_dirs_exist(data_dir, integrated_dir)
logger.info("Started processing all data sources.") logger.info("Started processing all data sources.")
fb_to_integrated(fb_present) #fb_to_integrated(fb_present)
miv_to_integrated_csv(miv_present) miv_to_integrated_csv(miv_present)
@ -239,7 +245,8 @@ def load_tempo_geojson_from_api_to_local():
if __name__ == '__main__': if __name__ == '__main__':
# ensure_dirs_exist(data_dir, integrated_dir, logs_dir) # ensure_dirs_exist(data_dir, integrated_dir, logs_dir)
# process_all_data_sources(True, True, True) #process_accident_data()
#process_all_data_sources(True, True, False)
# miv_to_integrated_csv() # miv_to_integrated_csv()
# acc_to_cleaned_geojson() acc_to_cleaned_geojson()
load_tempo_geojson_from_api_to_local() #load_tempo_geojson_from_api_to_local()

0
src/load_accidents_into_db.sh Normal file → Executable file
View File

4
src/load_csvs_into_db.sql Normal file → Executable file
View File

@ -1,7 +1,7 @@
COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv' COPY FootBikeCount FROM '/home/sebl/group-1/src/datasets/integrated/FootBikeCount.csv'
DELIMITER ',' DELIMITER ','
CSV HEADER; CSV HEADER;
COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv' COPY MivCount FROM '/home/sebl/group-1/src/datasets/integrated/MivCount.csv'
DELIMITER ',' DELIMITER ','
CSV HEADER; CSV HEADER;

View File

@ -0,0 +1,7 @@
COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv'
DELIMITER ','
CSV HEADER;
COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv'
DELIMITER ','
CSV HEADER;

0
src/queries.sql Normal file → Executable file
View File

18
src/setup_tables.sql Normal file → Executable file
View File

@ -6,6 +6,8 @@ DROP TABLE IF EXISTS Accidents;
DROP TABLE IF EXISTS MivCount; DROP TABLE IF EXISTS MivCount;
drop table if exists signaled_speeds;
CREATE TABLE FootBikeCount ( CREATE TABLE FootBikeCount (
ID INTEGER , ID INTEGER ,
@ -69,4 +71,20 @@ CREATE TABLE Accidents (
PRIMARY KEY (AccidentUID) , PRIMARY KEY (AccidentUID) ,
CHECK ( AccidentHour BETWEEN 0 AND 23) , CHECK ( AccidentHour BETWEEN 0 AND 23) ,
CHECK (AccidentWeekDay_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')) CHECK (AccidentWeekDay_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'))
);
-- Ad hoc via generate sql functionality in pycharm
create table signaled_speeds
(
id serial
primary key,
ausnahmen_fahrverbot varchar,
fahrverbot_ssv varchar,
lokalisationsname varchar,
objectid double precision,
publiziert_vsi_datum timestamp with time zone,
rechtskraeftig_datum timestamp with time zone,
temporegime varchar,
temporegime_technical varchar,
umgesetzt_datum timestamp with time zone,
wkb_geometry geometry(Point, 4326) --changed from MultiLineString
); );

View File

@ -14,8 +14,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-12-03T15:47:10.110909Z", "end_time": "2024-01-03T14:15:33.076096175Z",
"start_time": "2023-12-03T15:47:09.656556Z" "start_time": "2024-01-03T14:15:32.881183740Z"
} }
}, },
"id": "be55b25929d95559" "id": "be55b25929d95559"
@ -23,40 +23,146 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/seb/Projects/repos/group-1/src/integrate.py:132: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n",
"/Users/seb/Projects/repos/group-1/src/integrate.py:133: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" miv_df_cols_dropped['AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int)\n"
]
}
],
"source": [ "source": [
"\n", "\n",
"miv_df = intr.process_miv_data()\n", "#miv_df = intr.process_miv_data()\n",
"#fb_data = intr.process_foot_bike_data()" "#fb_data = intr.process_foot_bike_data()"
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-12-03T15:49:07.561603Z", "end_time": "2024-01-03T14:15:33.085151332Z",
"start_time": "2023-12-03T15:47:14.759104Z" "start_time": "2024-01-03T14:15:33.076608687Z"
} }
}, },
"id": "dd3831953afdeb72" "id": "dd3831953afdeb72"
}, },
{
"cell_type": "code",
"execution_count": 3,
"outputs": [],
"source": [
"#uplicate_rows = miv_df[miv_df.duplicated()]\n",
"#print(duplicate_rows.shape[0])"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-03T14:15:33.109662807Z",
"start_time": "2024-01-03T14:15:33.078813925Z"
}
},
"id": "14471cd78389ce4d"
},
{
"cell_type": "code",
"execution_count": 17,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"u_string RoadTrafficAccidentLocations.json\n",
"Filepath: datasets/RoadTrafficAccidentLocations.json\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-01-03 15:17:14,309 - integrate.py - DEBUG - AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n",
"0 false false \n",
"1 false true \n",
"2 false false \n",
"3 false false \n",
"4 false false \n",
"\n",
" AccidentInvolvingMotorcycle \n",
"0 false \n",
"1 false \n",
"2 false \n",
"3 false \n",
"4 false \n"
]
},
{
"data": {
"text/plain": "Empty DataFrame\nColumns: [AccidentUID, AccidentYear, AccidentMonth, AccidentWeekDay_en, AccidentHour, NKoord, EKoord, AccidentType_en, AccidentType, AccidentSeverityCategory, AccidentInvolvingPedestrian, AccidentInvolvingBicycle, AccidentInvolvingMotorcycle, RoadType, RoadType_en, geometry]\nIndex: []",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>AccidentUID</th>\n <th>AccidentYear</th>\n <th>AccidentMonth</th>\n <th>AccidentWeekDay_en</th>\n <th>AccidentHour</th>\n <th>NKoord</th>\n <th>EKoord</th>\n <th>AccidentType_en</th>\n <th>AccidentType</th>\n <th>AccidentSeverityCategory</th>\n <th>AccidentInvolvingPedestrian</th>\n <th>AccidentInvolvingBicycle</th>\n <th>AccidentInvolvingMotorcycle</th>\n <th>RoadType</th>\n <th>RoadType_en</th>\n <th>geometry</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>"
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"acc_df = intr.process_accident_data(True)\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-03T14:17:14.568151856Z",
"start_time": "2024-01-03T14:17:00.375936463Z"
}
},
"id": "f86bc612060b17a4"
},
{
"cell_type": "code",
"execution_count": 26,
"outputs": [
{
"data": {
"text/plain": " AccidentUID AccidentYear AccidentMonth \\\n0 A2D2677533867004E0430A865E337004 2011 1 \n1 9FD6441F802C20A6E0430A865E3320A6 2011 1 \n2 9FDA0DC4856A6094E0430A865E336094 2011 1 \n3 A3B66E42396E6000E0430A865E336000 2011 1 \n4 9FDA0DBE8CCE9096E0430A865E339096 2011 1 \n\n AccidentWeekDay_en AccidentHour NKoord EKoord \\\n0 Saturday 0 1245194 2684605 \n1 Saturday 1 1246980 2682382 \n2 Saturday 2 1247749 2682791 \n3 Saturday 2 1247102 2681199 \n4 Saturday 3 1250690 2682479 \n\n AccidentType_en AccidentType \\\n0 Accident with skidding or self-accident at0 \n1 Accident with skidding or self-accident at0 \n2 Accident with skidding or self-accident at0 \n3 Accident when crossing the lane(s) at5 \n4 Accident with skidding or self-accident at0 \n\n AccidentSeverityCategory AccidentInvolvingPedestrian \\\n0 as4 True \n1 as3 True \n2 as4 True \n3 as3 True \n4 as4 True \n\n AccidentInvolvingBicycle AccidentInvolvingMotorcycle RoadType RoadType_en \\\n0 True True rt433 Minor road \n1 True True rt433 Minor road \n2 True True rt439 Other \n3 True True rt433 Minor road \n4 True True rt433 Minor road \n\n geometry \n0 POINT Z (8.55841 47.352168 0) \n1 POINT Z (8.52932 47.368512 0) \n2 POINT Z (8.534877 47.375376 0) \n3 POINT Z (8.513682 47.369756 0) \n4 POINT Z (8.531288 47.401865 0) ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>AccidentUID</th>\n <th>AccidentYear</th>\n <th>AccidentMonth</th>\n <th>AccidentWeekDay_en</th>\n <th>AccidentHour</th>\n <th>NKoord</th>\n <th>EKoord</th>\n <th>AccidentType_en</th>\n <th>AccidentType</th>\n <th>AccidentSeverityCategory</th>\n <th>AccidentInvolvingPedestrian</th>\n <th>AccidentInvolvingBicycle</th>\n <th>AccidentInvolvingMotorcycle</th>\n <th>RoadType</th>\n <th>RoadType_en</th>\n <th>geometry</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>A2D2677533867004E0430A865E337004</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>0</td>\n <td>1245194</td>\n <td>2684605</td>\n <td>Accident with skidding or self-accident</td>\n <td>at0</td>\n <td>as4</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>POINT Z (8.55841 47.352168 0)</td>\n </tr>\n <tr>\n <th>1</th>\n <td>9FD6441F802C20A6E0430A865E3320A6</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>1</td>\n <td>1246980</td>\n <td>2682382</td>\n <td>Accident with skidding or self-accident</td>\n <td>at0</td>\n <td>as3</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>POINT Z (8.52932 47.368512 0)</td>\n </tr>\n <tr>\n <th>2</th>\n <td>9FDA0DC4856A6094E0430A865E336094</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>2</td>\n <td>1247749</td>\n <td>2682791</td>\n <td>Accident with skidding or self-accident</td>\n <td>at0</td>\n <td>as4</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt439</td>\n <td>Other</td>\n <td>POINT Z (8.534877 47.375376 0)</td>\n </tr>\n <tr>\n <th>3</th>\n <td>A3B66E42396E6000E0430A865E336000</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>2</td>\n <td>1247102</td>\n <td>2681199</td>\n <td>Accident when crossing the lane(s)</td>\n <td>at5</td>\n <td>as3</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>POINT Z (8.513682 47.369756 0)</td>\n </tr>\n <tr>\n <th>4</th>\n <td>9FDA0DBE8CCE9096E0430A865E339096</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>3</td>\n <td>1250690</td>\n <td>2682479</td>\n <td>Accident with skidding or self-accident</td>\n <td>at0</td>\n <td>as4</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>POINT Z (8.531288 47.401865 0)</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#acc_df[acc_df['AccidentInvolvingBicycle']==True]\n",
"acc_df.head()"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-03T14:19:26.508020674Z",
"start_time": "2024-01-03T14:19:26.500683554Z"
}
},
"id": "25378c6cd445a163"
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": "array(['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',\n 'Friday'], dtype=object)"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"acc_df.head()\n",
"acc_df['AccidentWeekDay_en'].unique()\n",
"#acc_df.dtypes\n",
"\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-03T14:15:48.660494125Z",
"start_time": "2024-01-03T14:15:48.656222488Z"
}
},
"id": "6affbeea6c7cf3ef"
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 6,
@ -65,112 +171,139 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"121\n" "Accident Columns:\n",
"AccidentUID object\n",
"AccidentYear int64\n",
"AccidentMonth int64\n",
"AccidentWeekDay_en object\n",
"AccidentHour int64\n",
"NKoord int64\n",
"EKoord int64\n",
"AccidentType_en object\n",
"AccidentType object\n",
"AccidentSeverityCategory object\n",
"AccidentInvolvingPedestrian bool\n",
"AccidentInvolvingBicycle bool\n",
"AccidentInvolvingMotorcycle bool\n",
"RoadType object\n",
"RoadType_en object\n",
"geometry object\n",
"dtype: object\n"
] ]
} }
], ],
"source": [ "source": [
"duplicate_rows = miv_df[miv_df.duplicated()]\n", "print(\"Accident Columns:\")\n",
"print(duplicate_rows.shape[0])" "print(acc_df.dtypes)\n"
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-12-03T15:51:21.158909Z", "end_time": "2024-01-03T14:15:48.708958773Z",
"start_time": "2023-12-03T15:51:15.711222Z" "start_time": "2024-01-03T14:15:48.661313317Z"
} }
}, },
"id": "14471cd78389ce4d"
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"acc_df = intr.process_accident_data(True)"
],
"metadata": {
"collapsed": false
},
"id": "f86bc612060b17a4"
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"acc_df.head()\n",
"acc_df['AccidentWeekDay_en'].unique()\n",
"#acc_df.dtypes\n",
"\n"
],
"metadata": {
"collapsed": false
},
"id": "6affbeea6c7cf3ef"
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"print(\"Accident Columns:\")\n",
"print(acc_df.dtypes)\n",
"print()\n",
"print(\"MIV Columns:\")\n",
"print(miv_df.dtypes)\n",
"print()\n",
"print(\"FB Cols:\")\n",
"print(fb_data.dtypes)"
],
"metadata": {
"collapsed": false
},
"id": "242041cd369d8454" "id": "242041cd369d8454"
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 7,
"outputs": [], "outputs": [
{
"data": {
"text/plain": "0 1\n1 2\n2 3\n3 4\n4 5\n ... \n55821 55822\n55822 55823\n55823 55824\n55824 55825\n55825 55826\nName: ID, Length: 55826, dtype: int64"
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"acc_df['ID'] = acc_df.index +1\n", "acc_df['ID'] = acc_df.index +1\n",
"acc_df[('ID')]" "acc_df[('ID')]"
], ],
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-03T14:15:48.720379971Z",
"start_time": "2024-01-03T14:15:48.692026239Z"
}
}, },
"id": "1841925ee109a417" "id": "1841925ee109a417"
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 16,
"outputs": [], "outputs": [
"source": [ {
"print(\"MIV unqiue:\", miv_df['EKoord'])\n", "data": {
"print(\"Acc unique:\", acc_df['RoadType'].unique)\n", "text/plain": "Empty DataFrame\nColumns: [AccidentUID, AccidentYear, AccidentMonth, AccidentWeekDay_en, AccidentHour, NKoord, EKoord, AccidentType_en, AccidentType, AccidentSeverityCategory, AccidentInvolvingPedestrian, AccidentInvolvingBicycle, AccidentInvolvingMotorcycle, RoadType, RoadType_en, geometry, ID]\nIndex: []",
"print(\"FB unique: \", fb_data['DATE'])\n" "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>AccidentUID</th>\n <th>AccidentYear</th>\n <th>AccidentMonth</th>\n <th>AccidentWeekDay_en</th>\n <th>AccidentHour</th>\n <th>NKoord</th>\n <th>EKoord</th>\n <th>AccidentType_en</th>\n <th>AccidentType</th>\n <th>AccidentSeverityCategory</th>\n <th>AccidentInvolvingPedestrian</th>\n <th>AccidentInvolvingBicycle</th>\n <th>AccidentInvolvingMotorcycle</th>\n <th>RoadType</th>\n <th>RoadType_en</th>\n <th>geometry</th>\n <th>ID</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>"
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
], ],
"source": [],
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-03T14:16:39.887367022Z",
"start_time": "2024-01-03T14:16:39.881262890Z"
}
}, },
"id": "f6d752ea17eda341" "id": "f6d752ea17eda341"
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 9,
"outputs": [], "outputs": [
{
"data": {
"text/plain": " AccidentUID AccidentYear AccidentMonth \\\n0 A2D2677533867004E0430A865E337004 2011 1 \n1 9FD6441F802C20A6E0430A865E3320A6 2011 1 \n2 9FDA0DC4856A6094E0430A865E336094 2011 1 \n3 A3B66E42396E6000E0430A865E336000 2011 1 \n4 9FDA0DBE8CCE9096E0430A865E339096 2011 1 \n\n AccidentWeekDay_en AccidentHour NKoord EKoord \\\n0 Saturday 0 1245194 2684605 \n1 Saturday 1 1246980 2682382 \n2 Saturday 2 1247749 2682791 \n3 Saturday 2 1247102 2681199 \n4 Saturday 3 1250690 2682479 \n\n AccidentType_en AccidentType \\\n0 Accident with skidding or self-accident at0 \n1 Accident with skidding or self-accident at0 \n2 Accident with skidding or self-accident at0 \n3 Accident when crossing the lane(s) at5 \n4 Accident with skidding or self-accident at0 \n\n AccidentSeverityCategory AccidentInvolvingPedestrian \\\n0 as4 True \n1 as3 True \n2 as4 True \n3 as3 True \n4 as4 True \n\n AccidentInvolvingBicycle AccidentInvolvingMotorcycle RoadType RoadType_en \\\n0 True True rt433 Minor road \n1 True True rt433 Minor road \n2 True True rt439 Other \n3 True True rt433 Minor road \n4 True True rt433 Minor road \n\n geometry ID \n0 POINT Z (8.55841 47.352168 0) 1 \n1 POINT Z (8.52932 47.368512 0) 2 \n2 POINT Z (8.534877 47.375376 0) 3 \n3 POINT Z (8.513682 47.369756 0) 4 \n4 POINT Z (8.531288 47.401865 0) 5 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>AccidentUID</th>\n <th>AccidentYear</th>\n <th>AccidentMonth</th>\n <th>AccidentWeekDay_en</th>\n <th>AccidentHour</th>\n <th>NKoord</th>\n <th>EKoord</th>\n <th>AccidentType_en</th>\n <th>AccidentType</th>\n <th>AccidentSeverityCategory</th>\n <th>AccidentInvolvingPedestrian</th>\n <th>AccidentInvolvingBicycle</th>\n <th>AccidentInvolvingMotorcycle</th>\n <th>RoadType</th>\n <th>RoadType_en</th>\n <th>geometry</th>\n <th>ID</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>A2D2677533867004E0430A865E337004</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>0</td>\n <td>1245194</td>\n <td>2684605</td>\n <td>Accident with skidding or self-accident</td>\n <td>at0</td>\n <td>as4</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>POINT Z (8.55841 47.352168 0)</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>9FD6441F802C20A6E0430A865E3320A6</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>1</td>\n <td>1246980</td>\n <td>2682382</td>\n <td>Accident with skidding or self-accident</td>\n <td>at0</td>\n <td>as3</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>POINT Z (8.52932 47.368512 0)</td>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>9FDA0DC4856A6094E0430A865E336094</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>2</td>\n <td>1247749</td>\n <td>2682791</td>\n <td>Accident with skidding or self-accident</td>\n <td>at0</td>\n <td>as4</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt439</td>\n <td>Other</td>\n <td>POINT Z (8.534877 47.375376 0)</td>\n <td>3</td>\n </tr>\n <tr>\n <th>3</th>\n <td>A3B66E42396E6000E0430A865E336000</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>2</td>\n <td>1247102</td>\n <td>2681199</td>\n <td>Accident when crossing the lane(s)</td>\n <td>at5</td>\n <td>as3</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>POINT Z (8.513682 47.369756 0)</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>9FDA0DBE8CCE9096E0430A865E339096</td>\n <td>2011</td>\n <td>1</td>\n <td>Saturday</td>\n <td>3</td>\n <td>1250690</td>\n <td>2682479</td>\n <td>Accident with skidding or self-accident</td>\n <td>at0</td>\n <td>as4</td>\n <td>True</td>\n <td>True</td>\n <td>True</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>POINT Z (8.531288 47.401865 0)</td>\n <td>5</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"acc_df.head()" "acc_df.head()"
], ],
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-03T14:15:48.790965333Z",
"start_time": "2024-01-03T14:15:48.759351058Z"
}
}, },
"id": "a159cafa9c227b88" "id": "a159cafa9c227b88"
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 10,
"outputs": [], "outputs": [
{
"ename": "AttributeError",
"evalue": "'str' object has no attribute 'wkt'",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mAttributeError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[0;32mIn[10], line 15\u001B[0m\n\u001B[1;32m 9\u001B[0m \u001B[38;5;66;03m#miv_df.to_sql('table_name', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 10\u001B[0m \u001B[38;5;66;03m#fb_data.to_sql('footbike', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 12\u001B[0m geometry_column \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m\n\u001B[0;32m---> 15\u001B[0m acc_df[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[43macc_df\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mgeometry\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mapply\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43;01mlambda\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mgeom\u001B[49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[43mWKTElement\u001B[49m\u001B[43m(\u001B[49m\u001B[43mgeom\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mwkt\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43msrid\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m4326\u001B[39;49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 17\u001B[0m acc_df\u001B[38;5;241m.\u001B[39mto_sql(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124maccidents\u001B[39m\u001B[38;5;124m'\u001B[39m, engine, if_exists\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mreplace\u001B[39m\u001B[38;5;124m'\u001B[39m, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m, dtype\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m: Geometry(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPOINT\u001B[39m\u001B[38;5;124m'\u001B[39m, srid\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m4326\u001B[39m)})\n",
"File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/series.py:4760\u001B[0m, in \u001B[0;36mSeries.apply\u001B[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001B[0m\n\u001B[1;32m 4625\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mapply\u001B[39m(\n\u001B[1;32m 4626\u001B[0m \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m 4627\u001B[0m func: AggFuncType,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 4632\u001B[0m \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs,\n\u001B[1;32m 4633\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m DataFrame \u001B[38;5;241m|\u001B[39m Series:\n\u001B[1;32m 4634\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 4635\u001B[0m \u001B[38;5;124;03m Invoke function on values of Series.\u001B[39;00m\n\u001B[1;32m 4636\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 4751\u001B[0m \u001B[38;5;124;03m dtype: float64\u001B[39;00m\n\u001B[1;32m 4752\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[1;32m 4753\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mSeriesApply\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 4754\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4755\u001B[0m \u001B[43m \u001B[49m\u001B[43mfunc\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4756\u001B[0m \u001B[43m \u001B[49m\u001B[43mconvert_dtype\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mconvert_dtype\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4757\u001B[0m \u001B[43m \u001B[49m\u001B[43mby_row\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mby_row\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4758\u001B[0m \u001B[43m \u001B[49m\u001B[43margs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4759\u001B[0m \u001B[43m \u001B[49m\u001B[43mkwargs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m-> 4760\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mapply\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n",
"File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/apply.py:1207\u001B[0m, in \u001B[0;36mSeriesApply.apply\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 1204\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mapply_compat()\n\u001B[1;32m 1206\u001B[0m \u001B[38;5;66;03m# self.func is Callable\u001B[39;00m\n\u001B[0;32m-> 1207\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mapply_standard\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n",
"File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/apply.py:1287\u001B[0m, in \u001B[0;36mSeriesApply.apply_standard\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 1281\u001B[0m \u001B[38;5;66;03m# row-wise access\u001B[39;00m\n\u001B[1;32m 1282\u001B[0m \u001B[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001B[39;00m\n\u001B[1;32m 1283\u001B[0m \u001B[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001B[39;00m\n\u001B[1;32m 1284\u001B[0m \u001B[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001B[39;00m\n\u001B[1;32m 1285\u001B[0m \u001B[38;5;66;03m# Categorical (GH51645).\u001B[39;00m\n\u001B[1;32m 1286\u001B[0m action \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mignore\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(obj\u001B[38;5;241m.\u001B[39mdtype, CategoricalDtype) \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m-> 1287\u001B[0m mapped \u001B[38;5;241m=\u001B[39m \u001B[43mobj\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_map_values\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1288\u001B[0m \u001B[43m \u001B[49m\u001B[43mmapper\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcurried\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mna_action\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43maction\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconvert\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconvert_dtype\u001B[49m\n\u001B[1;32m 1289\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1291\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(mapped) \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(mapped[\u001B[38;5;241m0\u001B[39m], ABCSeries):\n\u001B[1;32m 1292\u001B[0m \u001B[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001B[39;00m\n\u001B[1;32m 1293\u001B[0m \u001B[38;5;66;03m# See also GH#25959 regarding EA support\u001B[39;00m\n\u001B[1;32m 1294\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m obj\u001B[38;5;241m.\u001B[39m_constructor_expanddim(\u001B[38;5;28mlist\u001B[39m(mapped), index\u001B[38;5;241m=\u001B[39mobj\u001B[38;5;241m.\u001B[39mindex)\n",
"File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/base.py:921\u001B[0m, in \u001B[0;36mIndexOpsMixin._map_values\u001B[0;34m(self, mapper, na_action, convert)\u001B[0m\n\u001B[1;32m 918\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(arr, ExtensionArray):\n\u001B[1;32m 919\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m arr\u001B[38;5;241m.\u001B[39mmap(mapper, na_action\u001B[38;5;241m=\u001B[39mna_action)\n\u001B[0;32m--> 921\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43malgorithms\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmap_array\u001B[49m\u001B[43m(\u001B[49m\u001B[43marr\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmapper\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mna_action\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mna_action\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconvert\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mconvert\u001B[49m\u001B[43m)\u001B[49m\n",
"File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/algorithms.py:1814\u001B[0m, in \u001B[0;36mmap_array\u001B[0;34m(arr, mapper, na_action, convert)\u001B[0m\n\u001B[1;32m 1812\u001B[0m values \u001B[38;5;241m=\u001B[39m arr\u001B[38;5;241m.\u001B[39mastype(\u001B[38;5;28mobject\u001B[39m, copy\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n\u001B[1;32m 1813\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m na_action \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m-> 1814\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mlib\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmap_infer\u001B[49m\u001B[43m(\u001B[49m\u001B[43mvalues\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmapper\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconvert\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mconvert\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1815\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 1816\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m lib\u001B[38;5;241m.\u001B[39mmap_infer_mask(\n\u001B[1;32m 1817\u001B[0m values, mapper, mask\u001B[38;5;241m=\u001B[39misna(values)\u001B[38;5;241m.\u001B[39mview(np\u001B[38;5;241m.\u001B[39muint8), convert\u001B[38;5;241m=\u001B[39mconvert\n\u001B[1;32m 1818\u001B[0m )\n",
"File \u001B[0;32mlib.pyx:2920\u001B[0m, in \u001B[0;36mpandas._libs.lib.map_infer\u001B[0;34m()\u001B[0m\n",
"Cell \u001B[0;32mIn[10], line 15\u001B[0m, in \u001B[0;36m<lambda>\u001B[0;34m(geom)\u001B[0m\n\u001B[1;32m 9\u001B[0m \u001B[38;5;66;03m#miv_df.to_sql('table_name', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 10\u001B[0m \u001B[38;5;66;03m#fb_data.to_sql('footbike', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 12\u001B[0m geometry_column \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m\n\u001B[0;32m---> 15\u001B[0m acc_df[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;241m=\u001B[39m acc_df[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m]\u001B[38;5;241m.\u001B[39mapply(\u001B[38;5;28;01mlambda\u001B[39;00m geom: WKTElement(\u001B[43mgeom\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mwkt\u001B[49m, srid\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m4326\u001B[39m))\n\u001B[1;32m 17\u001B[0m acc_df\u001B[38;5;241m.\u001B[39mto_sql(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124maccidents\u001B[39m\u001B[38;5;124m'\u001B[39m, engine, if_exists\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mreplace\u001B[39m\u001B[38;5;124m'\u001B[39m, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m, dtype\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m: Geometry(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPOINT\u001B[39m\u001B[38;5;124m'\u001B[39m, srid\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m4326\u001B[39m)})\n",
"\u001B[0;31mAttributeError\u001B[0m: 'str' object has no attribute 'wkt'"
]
}
],
"source": [ "source": [
"from sqlalchemy import create_engine\n", "from sqlalchemy import create_engine\n",
"from geoalchemy2 import Geometry, WKTElement\n", "from geoalchemy2 import Geometry, WKTElement\n",
@ -192,7 +325,11 @@
"\n" "\n"
], ],
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-03T14:15:49.286594299Z",
"start_time": "2024-01-03T14:15:48.770718191Z"
}
}, },
"id": "fa76af8343443d7a" "id": "fa76af8343443d7a"
}, },
@ -204,7 +341,10 @@
"engine.dispose()" "engine.dispose()"
], ],
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"ExecuteTime": {
"start_time": "2024-01-03T14:15:49.287300519Z"
}
}, },
"id": "bc0a23a5126e76c2" "id": "bc0a23a5126e76c2"
} }