diff --git a/src/data_utils.py b/src/data_utils.py index 584619c..9ce34c5 100644 --- a/src/data_utils.py +++ b/src/data_utils.py @@ -1,11 +1,13 @@ -# data_utils.py - import os import pandas as pd import requests from urllib.parse import urlparse import geopandas as gpd from concurrent.futures import ThreadPoolExecutor as tpe +import logging + +logging.basicConfig(level=logging.DEBUG, filename='app.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('data_utils.py') def download_csv(url, local_filename): @@ -75,11 +77,11 @@ def load_dataframes_from_geojson_files(data_dir, u_string): print('u_string', u_string) gdf = gpd.GeoDataFrame() for filename in os.listdir(data_dir): - print("Filename:", filename) + #print("Filename:", filename) if (u_string in filename) and filename.endswith('.json'): filepath = os.path.join(data_dir, filename) print("Filepath:", filepath) - gdf = gpd.read_file(filepath) # Read GeoJSON directly as GeoDataFrame + gdf = gpd.read_file(filepath) return gdf @@ -90,7 +92,7 @@ def combine_dataframes(dataframes): return combined_dataframe else: print("No dataframes to combine") - return pd.DataFrame() # Return an empty DataFrame + return pd.DataFrame() def create_unified_df(urls_file, u_string, data_dir, files_present=False): @@ -110,7 +112,6 @@ def save_dataframe_to_csv(df, integrated_dir, filename): if __name__ == "__main__": - # Test the functions here if necessary csv_urls_file = '../docs/all_csv_urls.txt' datasets_dir = 'datasets/' output_file = 'column_names.txt' diff --git a/src/integrate.py b/src/integrate.py index 607719d..6427e29 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -4,6 +4,12 @@ import os import requests import pandas as pd +import logging + +logging.basicConfig(level=logging.DEBUG, filename='app.log', + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('integrate.py') + foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt' miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt' accident_file_url = '../docs/accident_loc_urls.txt' @@ -41,7 +47,7 @@ def process_foot_bike_data(): def process_miv_data(): - miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir,files_present=True) + miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=True) miv_df_unified[['Date', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True) miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True) @@ -65,13 +71,12 @@ def process_accident_data(): acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E', - 'AccidentLocation_CHLV95_N', 'geometry', 'AccidentMonth'] + 'AccidentLocation_CHLV95_N', 'AccidentMonth', 'geometry'] cleaned_acc_df = acc_df_unified[acc_cols_to_keep] return cleaned_acc_df if __name__ == '__main__': - fb_df = process_miv_data() - print(fb_df['MessungDatZeit']) - print(fb_df.dtypes) - print(fb_df.head(100)) + acc_df = process_accident_data() + print(acc_df.dtypes) + print(acc_df.head(100)) diff --git a/src/testArea.ipynb b/src/testArea.ipynb index e158c10..c4739bb 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -13,8 +13,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T23:43:55.980827Z", - "start_time": "2023-12-02T23:43:55.546732Z" + "end_time": "2023-12-03T10:58:50.698090Z", + "start_time": "2023-12-03T10:58:50.384352Z" } }, "id": "be55b25929d95559" @@ -44,8 +44,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T23:48:08.233784Z", - "start_time": "2023-12-02T23:43:55.980667Z" + "end_time": "2023-12-03T11:01:14.422749Z", + "start_time": "2023-12-03T10:58:52.300667Z" } }, "id": "dd3831953afdeb72" @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "outputs": [ { "name": "stdout", @@ -163,8 +163,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T23:49:50.185415Z", - "start_time": "2023-12-02T23:49:34.846049Z" + "end_time": "2023-12-03T11:15:51.051154Z", + "start_time": "2023-12-03T11:15:36.154717Z" } }, "id": "f86bc612060b17a4"