Add logger.

This commit is contained in:
Sebastian Lenzlinger 2023-12-03 12:28:32 +01:00
parent e73962d8e1
commit e4d0484a23
3 changed files with 25 additions and 19 deletions

View File

@ -1,11 +1,13 @@
# data_utils.py
import os
import pandas as pd
import requests
from urllib.parse import urlparse
import geopandas as gpd
from concurrent.futures import ThreadPoolExecutor as tpe
import logging
logging.basicConfig(level=logging.DEBUG, filename='app.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('data_utils.py')
def download_csv(url, local_filename):
@ -75,11 +77,11 @@ def load_dataframes_from_geojson_files(data_dir, u_string):
print('u_string', u_string)
gdf = gpd.GeoDataFrame()
for filename in os.listdir(data_dir):
print("Filename:", filename)
#print("Filename:", filename)
if (u_string in filename) and filename.endswith('.json'):
filepath = os.path.join(data_dir, filename)
print("Filepath:", filepath)
gdf = gpd.read_file(filepath) # Read GeoJSON directly as GeoDataFrame
gdf = gpd.read_file(filepath)
return gdf
@ -90,7 +92,7 @@ def combine_dataframes(dataframes):
return combined_dataframe
else:
print("No dataframes to combine")
return pd.DataFrame() # Return an empty DataFrame
return pd.DataFrame()
def create_unified_df(urls_file, u_string, data_dir, files_present=False):
@ -110,7 +112,6 @@ def save_dataframe_to_csv(df, integrated_dir, filename):
if __name__ == "__main__":
# Test the functions here if necessary
csv_urls_file = '../docs/all_csv_urls.txt'
datasets_dir = 'datasets/'
output_file = 'column_names.txt'

View File

@ -4,6 +4,12 @@ import os
import requests
import pandas as pd
import logging
logging.basicConfig(level=logging.DEBUG, filename='app.log',
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('integrate.py')
foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt'
miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt'
accident_file_url = '../docs/accident_loc_urls.txt'
@ -41,7 +47,7 @@ def process_foot_bike_data():
def process_miv_data():
miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir,files_present=True)
miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=True)
miv_df_unified[['Date', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True)
miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True)
@ -65,13 +71,12 @@ def process_accident_data():
acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType',
'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle',
'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E',
'AccidentLocation_CHLV95_N', 'geometry', 'AccidentMonth']
'AccidentLocation_CHLV95_N', 'AccidentMonth', 'geometry']
cleaned_acc_df = acc_df_unified[acc_cols_to_keep]
return cleaned_acc_df
if __name__ == '__main__':
fb_df = process_miv_data()
print(fb_df['MessungDatZeit'])
print(fb_df.dtypes)
print(fb_df.head(100))
acc_df = process_accident_data()
print(acc_df.dtypes)
print(acc_df.head(100))

View File

@ -13,8 +13,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-02T23:43:55.980827Z",
"start_time": "2023-12-02T23:43:55.546732Z"
"end_time": "2023-12-03T10:58:50.698090Z",
"start_time": "2023-12-03T10:58:50.384352Z"
}
},
"id": "be55b25929d95559"
@ -44,8 +44,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-02T23:48:08.233784Z",
"start_time": "2023-12-02T23:43:55.980667Z"
"end_time": "2023-12-03T11:01:14.422749Z",
"start_time": "2023-12-03T10:58:52.300667Z"
}
},
"id": "dd3831953afdeb72"
@ -123,7 +123,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 3,
"outputs": [
{
"name": "stdout",
@ -163,8 +163,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-02T23:49:50.185415Z",
"start_time": "2023-12-02T23:49:34.846049Z"
"end_time": "2023-12-03T11:15:51.051154Z",
"start_time": "2023-12-03T11:15:36.154717Z"
}
},
"id": "f86bc612060b17a4"