This repository has been archived on 2025-01-04. You can view files and clone it, but cannot push or open issues or pull requests.
group-1-db23/src/integrate.py
2023-12-03 01:09:32 +01:00

78 lines
3.1 KiB
Python

import data_utils as du
from datetime import datetime as dt
import os
import requests
import pandas as pd
foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt'
miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt'
accident_file_url = '../docs/accident_loc_urls.txt'
# Using u_string to discriminate between files that belong to each other
motor_file_u_string = 'sid_dav_verkehrszaehlung_miv_OD2031'
foot_bike_file_u_string = 'velo.csv'
accident_file_u_string = 'RoadTrafficAccidentLocations.json'
data_dir = 'datasets/'
integrated_dir = 'datasets/integrated/'
weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
def process_foot_bike_data():
fb_df_unified = du.create_unified_df(foot_bike_urls_file, foot_bike_file_u_string, data_dir, files_present=True)
fb_df_unified[['DATE', "TIME"]] = fb_df_unified['DATUM'].str.split('T', expand=True)
fb_df_unified[['HRS', 'MINS']] = fb_df_unified['TIME'].str.split(':', expand=True)
## Evt brauchen wir doch FK_ZAEHLER
fb_cols_to_drop = ['DATUM']
fb_df_unified_correct_cols = fb_df_unified.drop(columns=fb_cols_to_drop, axis=1)
fb_df_unified_correct_cols.fillna(0, inplace=True)
fb_df_grouped = fb_df_unified_correct_cols.groupby(['OST', 'NORD', 'DATE', 'HRS']).agg({
'VELO_IN': 'sum',
'VELO_OUT': 'sum',
'FUSS_IN': 'sum',
'FUSS_OUT': 'sum'
}).reset_index()
dt_obj = pd.to_datetime(fb_df_grouped['DATE'])
days = dt_obj.dt.weekday
fb_df_grouped['Weekday_en'] = days.map(lambda x: weekday_names[x])
cleaned_fb_df = fb_df_grouped
return cleaned_fb_df
def process_miv_data():
miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir,files_present=True)
miv_df_unified[['Date', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True)
miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True)
miv_cols_to_keep = ['MSID','ZSID','Achse', 'EKoord', 'NKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus',
'Date', 'Hrs']
miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep]
dt_obj = pd.to_datetime(miv_df_cols_dropped['Date'])
days = dt_obj.dt.weekday
miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])
cleaned_miv_df = miv_df_cols_dropped
return cleaned_miv_df
def process_accident_data():
acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string)
acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType',
'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle',
'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E',
'AccidentLocation_CHLV95_N', 'geometry', 'AccidentMonth']
cleaned_acc_df = acc_df_unified[acc_cols_to_keep]
return cleaned_acc_df
if __name__ == '__main__':
fb_df = process_miv_data()
print(fb_df['MessungDatZeit'])
print(fb_df.dtypes)
print(fb_df.head(100))