Add Text File containing relevant datasource urls.
First diary entries written. Wiki entries on how to setup a python virtual env for the project
This commit is contained in:
parent
8cf5940a4d
commit
77bf140efc
1
docs/accident_loc_urls.txt
Normal file
1
docs/accident_loc_urls.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_strassenverkehrsunfallorte/download/RoadTrafficAccidentLocations.json
|
||||||
22
docs/all_csv_urls.txt
Normal file
22
docs/all_csv_urls.txt
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2012.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2013.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2014.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2015.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2016.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2017.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2018.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2019.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2020.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2021.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2022.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2012_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2013_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2014_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2015_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2016_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2017_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2018_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2019_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2020_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2021_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2022_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
@ -1,3 +1,7 @@
|
|||||||
|
# TODOs
|
||||||
|
* Write a script that makes tables and inserts the data.
|
||||||
|
* Find out if data cleaning can be done in python with pandas or if it all must be SQL scipts.
|
||||||
|
|
||||||
# Project Diary
|
# Project Diary
|
||||||
|
|
||||||
| Version<br/> 0.00 | Author: <br />michel.romancuk@stud.unibas.ch<br />sebastian.lenzlinger@unibas.ch<br /> | HS 2023<br />Databases<br /> |
|
| Version<br/> 0.00 | Author: <br />michel.romancuk@stud.unibas.ch<br />sebastian.lenzlinger@unibas.ch<br /> | HS 2023<br />Databases<br /> |
|
||||||
|
|||||||
11
docs/foot_bike_zaehlung_urls.txt
Normal file
11
docs/foot_bike_zaehlung_urls.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2012_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2013_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2014_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2015_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2016_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2017_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2018_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2019_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2020_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2021_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2022_verkehrszaehlungen_werte_fussgaenger_velo.csv
|
||||||
11
docs/verkehrszaehlung_moto_urls.txt
Normal file
11
docs/verkehrszaehlung_moto_urls.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2012.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2013.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2014.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2015.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2016.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2017.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2018.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2019.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2020.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2021.csv
|
||||||
|
https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2022.csv
|
||||||
118
src/data_utils.py
Normal file
118
src/data_utils.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
# data_utils.py
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
import geopandas as gpd
|
||||||
|
from concurrent.futures import ThreadPoolExecutor as tpe
|
||||||
|
|
||||||
|
|
||||||
|
def download_csv(url, local_filename):
|
||||||
|
with requests.get(url, stream=True) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
with open(local_filename, 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
|
||||||
|
def process_urls(data_dir, urls_file):
|
||||||
|
# Ensure the data directory exists
|
||||||
|
if not os.path.exists(data_dir):
|
||||||
|
os.makedirs(data_dir)
|
||||||
|
|
||||||
|
# Read URLs from the file
|
||||||
|
with open(urls_file, 'r') as file:
|
||||||
|
urls = file.readlines()
|
||||||
|
|
||||||
|
# Process each URL
|
||||||
|
for url in urls:
|
||||||
|
url = url.strip()
|
||||||
|
filename = os.path.basename(urlparse(url).path)
|
||||||
|
local_filename = os.path.join(data_dir, filename)
|
||||||
|
|
||||||
|
# Check if the file already exists
|
||||||
|
if not os.path.isfile(local_filename):
|
||||||
|
print(f"Downloading {url}...")
|
||||||
|
download_csv(url, local_filename)
|
||||||
|
print(f"Saved to {local_filename}")
|
||||||
|
else:
|
||||||
|
print(f"File {filename} already exists in {data_dir}, skipping download.")
|
||||||
|
|
||||||
|
|
||||||
|
def load_dataframe_from_csv(filepath):
|
||||||
|
try:
|
||||||
|
df = pd.read_csv(filepath, low_memory=False)
|
||||||
|
return df
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading {filepath}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load_dataframes_from_csv_files(data_dir, u_string):
|
||||||
|
dataframes = []
|
||||||
|
|
||||||
|
with tpe(max_workers=5) as executor:
|
||||||
|
for filename in os.listdir(data_dir):
|
||||||
|
if (u_string in filename) and filename.endswith('.csv'):
|
||||||
|
filepath = os.path.join(data_dir, filename)
|
||||||
|
future = executor.submit(load_dataframe_from_csv, filepath)
|
||||||
|
dataframes.append(future)
|
||||||
|
|
||||||
|
dataframes = [future.result() for future in dataframes if future.result() is not None]
|
||||||
|
|
||||||
|
return dataframes
|
||||||
|
|
||||||
|
# for filename in os.listdir(data_dir):
|
||||||
|
# if (u_string in filename) and filename.endswith('.csv'):
|
||||||
|
# filepath = os.path.join(data_dir, filename)
|
||||||
|
# df = pd.read_csv(filepath, low_memory=False)
|
||||||
|
# dataframes.append(df)
|
||||||
|
# return dataframes
|
||||||
|
|
||||||
|
|
||||||
|
def load_dataframes_from_geojson_files(data_dir, u_string):
|
||||||
|
print('u_string', u_string)
|
||||||
|
gdf = gpd.GeoDataFrame()
|
||||||
|
for filename in os.listdir(data_dir):
|
||||||
|
print("Filename:", filename)
|
||||||
|
if (u_string in filename) and filename.endswith('.json'):
|
||||||
|
filepath = os.path.join(data_dir, filename)
|
||||||
|
print("Filepath:", filepath)
|
||||||
|
gdf = gpd.read_file(filepath) # Read GeoJSON directly as GeoDataFrame
|
||||||
|
|
||||||
|
return gdf
|
||||||
|
|
||||||
|
|
||||||
|
def combine_dataframes(dataframes):
|
||||||
|
if dataframes:
|
||||||
|
combined_dataframe = pd.concat(dataframes, ignore_index=True)
|
||||||
|
return combined_dataframe
|
||||||
|
else:
|
||||||
|
print("No dataframes to combine")
|
||||||
|
return pd.DataFrame() # Return an empty DataFrame
|
||||||
|
|
||||||
|
|
||||||
|
def create_unified_df(urls_file, u_string, data_dir, files_present=False):
|
||||||
|
df_list = []
|
||||||
|
df_unified = None
|
||||||
|
if not files_present:
|
||||||
|
process_urls(data_dir, urls_file)
|
||||||
|
|
||||||
|
df_list = load_dataframes_from_csv_files(data_dir, u_string)
|
||||||
|
df_unified = combine_dataframes(df_list)
|
||||||
|
|
||||||
|
return df_unified
|
||||||
|
|
||||||
|
|
||||||
|
def save_dataframe_to_csv(df, integrated_dir, filename):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test the functions here if necessary
|
||||||
|
csv_urls_file = '../docs/all_csv_urls.txt'
|
||||||
|
datasets_dir = 'datasets/'
|
||||||
|
output_file = 'column_names.txt'
|
||||||
|
process_urls(datasets_dir, csv_urls_file)
|
||||||
|
# extract_column_names(datasets_dir, output_file)
|
||||||
77
src/integrate.py
Normal file
77
src/integrate.py
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
import data_utils as du
|
||||||
|
from datetime import datetime as dt
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt'
|
||||||
|
miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt'
|
||||||
|
accident_file_url = '../docs/accident_loc_urls.txt'
|
||||||
|
|
||||||
|
# Using u_string to discriminate between files that belong to each other
|
||||||
|
motor_file_u_string = 'sid_dav_verkehrszaehlung_miv_OD2031'
|
||||||
|
foot_bike_file_u_string = 'velo.csv'
|
||||||
|
accident_file_u_string = 'RoadTrafficAccidentLocations.json'
|
||||||
|
|
||||||
|
data_dir = 'datasets/'
|
||||||
|
integrated_dir = 'datasets/integrated/'
|
||||||
|
|
||||||
|
weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
|
||||||
|
|
||||||
|
|
||||||
|
def process_foot_bike_data():
|
||||||
|
fb_df_unified = du.create_unified_df(foot_bike_urls_file, foot_bike_file_u_string, data_dir, files_present=True)
|
||||||
|
fb_df_unified[['DATE', "TIME"]] = fb_df_unified['DATUM'].str.split('T', expand=True)
|
||||||
|
fb_df_unified[['HRS', 'MINS']] = fb_df_unified['TIME'].str.split(':', expand=True)
|
||||||
|
## Evt brauchen wir doch FK_ZAEHLER
|
||||||
|
fb_cols_to_drop = ['DATUM']
|
||||||
|
fb_df_unified_correct_cols = fb_df_unified.drop(columns=fb_cols_to_drop, axis=1)
|
||||||
|
fb_df_unified_correct_cols.fillna(0, inplace=True)
|
||||||
|
fb_df_grouped = fb_df_unified_correct_cols.groupby(['OST', 'NORD', 'DATE', 'HRS']).agg({
|
||||||
|
'VELO_IN': 'sum',
|
||||||
|
'VELO_OUT': 'sum',
|
||||||
|
'FUSS_IN': 'sum',
|
||||||
|
'FUSS_OUT': 'sum'
|
||||||
|
}).reset_index()
|
||||||
|
dt_obj = pd.to_datetime(fb_df_grouped['DATE'])
|
||||||
|
days = dt_obj.dt.weekday
|
||||||
|
fb_df_grouped['Weekday_en'] = days.map(lambda x: weekday_names[x])
|
||||||
|
cleaned_fb_df = fb_df_grouped
|
||||||
|
return cleaned_fb_df
|
||||||
|
|
||||||
|
|
||||||
|
def process_miv_data():
|
||||||
|
miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir,files_present=True)
|
||||||
|
|
||||||
|
miv_df_unified[['Date', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True)
|
||||||
|
miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True)
|
||||||
|
|
||||||
|
miv_cols_to_keep = ['MSID','ZSID','Achse', 'EKoord', 'NKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus',
|
||||||
|
'Date', 'Hrs']
|
||||||
|
miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep]
|
||||||
|
|
||||||
|
dt_obj = pd.to_datetime(miv_df_cols_dropped['Date'])
|
||||||
|
days = dt_obj.dt.weekday
|
||||||
|
miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])
|
||||||
|
|
||||||
|
|
||||||
|
cleaned_miv_df = miv_df_cols_dropped
|
||||||
|
return cleaned_miv_df
|
||||||
|
|
||||||
|
|
||||||
|
def process_accident_data():
|
||||||
|
|
||||||
|
acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string)
|
||||||
|
acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType',
|
||||||
|
'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle',
|
||||||
|
'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E',
|
||||||
|
'AccidentLocation_CHLV95_N', 'geometry']
|
||||||
|
cleaned_acc_df = acc_df_unified[acc_cols_to_keep]
|
||||||
|
return cleaned_acc_df
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
fb_df = process_miv_data()
|
||||||
|
print(fb_df['MessungDatZeit'])
|
||||||
|
print(fb_df.dtypes)
|
||||||
|
print(fb_df.head(100))
|
||||||
3
src/preparations.py
Normal file
3
src/preparations.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
import data_utils
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user