Add logger.

This commit is contained in:
Sebastian Lenzlinger 2023-12-03 12:28:32 +01:00
parent e73962d8e1
commit e4d0484a23
3 changed files with 25 additions and 19 deletions

View File

@ -1,11 +1,13 @@
# data_utils.py
import os import os
import pandas as pd import pandas as pd
import requests import requests
from urllib.parse import urlparse from urllib.parse import urlparse
import geopandas as gpd import geopandas as gpd
from concurrent.futures import ThreadPoolExecutor as tpe from concurrent.futures import ThreadPoolExecutor as tpe
import logging
logging.basicConfig(level=logging.DEBUG, filename='app.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('data_utils.py')
def download_csv(url, local_filename): def download_csv(url, local_filename):
@ -75,11 +77,11 @@ def load_dataframes_from_geojson_files(data_dir, u_string):
print('u_string', u_string) print('u_string', u_string)
gdf = gpd.GeoDataFrame() gdf = gpd.GeoDataFrame()
for filename in os.listdir(data_dir): for filename in os.listdir(data_dir):
print("Filename:", filename) #print("Filename:", filename)
if (u_string in filename) and filename.endswith('.json'): if (u_string in filename) and filename.endswith('.json'):
filepath = os.path.join(data_dir, filename) filepath = os.path.join(data_dir, filename)
print("Filepath:", filepath) print("Filepath:", filepath)
gdf = gpd.read_file(filepath) # Read GeoJSON directly as GeoDataFrame gdf = gpd.read_file(filepath)
return gdf return gdf
@ -90,7 +92,7 @@ def combine_dataframes(dataframes):
return combined_dataframe return combined_dataframe
else: else:
print("No dataframes to combine") print("No dataframes to combine")
return pd.DataFrame() # Return an empty DataFrame return pd.DataFrame()
def create_unified_df(urls_file, u_string, data_dir, files_present=False): def create_unified_df(urls_file, u_string, data_dir, files_present=False):
@ -110,7 +112,6 @@ def save_dataframe_to_csv(df, integrated_dir, filename):
if __name__ == "__main__": if __name__ == "__main__":
# Test the functions here if necessary
csv_urls_file = '../docs/all_csv_urls.txt' csv_urls_file = '../docs/all_csv_urls.txt'
datasets_dir = 'datasets/' datasets_dir = 'datasets/'
output_file = 'column_names.txt' output_file = 'column_names.txt'

View File

@ -4,6 +4,12 @@ import os
import requests import requests
import pandas as pd import pandas as pd
import logging
logging.basicConfig(level=logging.DEBUG, filename='app.log',
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('integrate.py')
foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt' foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt'
miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt' miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt'
accident_file_url = '../docs/accident_loc_urls.txt' accident_file_url = '../docs/accident_loc_urls.txt'
@ -65,13 +71,12 @@ def process_accident_data():
acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType', acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType',
'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle',
'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E', 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E',
'AccidentLocation_CHLV95_N', 'geometry', 'AccidentMonth'] 'AccidentLocation_CHLV95_N', 'AccidentMonth', 'geometry']
cleaned_acc_df = acc_df_unified[acc_cols_to_keep] cleaned_acc_df = acc_df_unified[acc_cols_to_keep]
return cleaned_acc_df return cleaned_acc_df
if __name__ == '__main__': if __name__ == '__main__':
fb_df = process_miv_data() acc_df = process_accident_data()
print(fb_df['MessungDatZeit']) print(acc_df.dtypes)
print(fb_df.dtypes) print(acc_df.head(100))
print(fb_df.head(100))

View File

@ -13,8 +13,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-12-02T23:43:55.980827Z", "end_time": "2023-12-03T10:58:50.698090Z",
"start_time": "2023-12-02T23:43:55.546732Z" "start_time": "2023-12-03T10:58:50.384352Z"
} }
}, },
"id": "be55b25929d95559" "id": "be55b25929d95559"
@ -44,8 +44,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-12-02T23:48:08.233784Z", "end_time": "2023-12-03T11:01:14.422749Z",
"start_time": "2023-12-02T23:43:55.980667Z" "start_time": "2023-12-03T10:58:52.300667Z"
} }
}, },
"id": "dd3831953afdeb72" "id": "dd3831953afdeb72"
@ -123,7 +123,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 3,
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
@ -163,8 +163,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-12-02T23:49:50.185415Z", "end_time": "2023-12-03T11:15:51.051154Z",
"start_time": "2023-12-02T23:49:34.846049Z" "start_time": "2023-12-03T11:15:36.154717Z"
} }
}, },
"id": "f86bc612060b17a4" "id": "f86bc612060b17a4"