Add logger.

2023-12-03 12:28:32 +01:00 · 2023-12-03 12:28:32 +01:00 · e4d0484a23
commit e4d0484a23
parent e73962d8e1
3 changed files with 25 additions and 19 deletions
--- a/src/data_utils.py
+++ b/src/data_utils.py
@ -1,11 +1,13 @@
 # data_utils.py
 import os
 import pandas as pd
 import requests
 from urllib.parse import urlparse
 import geopandas as gpd
 from concurrent.futures import ThreadPoolExecutor as tpe
 import logging
 logging.basicConfig(level=logging.DEBUG, filename='app.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger('data_utils.py')
 def download_csv(url, local_filename):
@ -75,11 +77,11 @@ def load_dataframes_from_geojson_files(data_dir, u_string):
    print('u_string', u_string)
    gdf = gpd.GeoDataFrame()
    for filename in os.listdir(data_dir):
-        print("Filename:", filename)
+        #print("Filename:", filename)
        if (u_string in filename) and filename.endswith('.json'):
            filepath = os.path.join(data_dir, filename)
            print("Filepath:", filepath)
-            gdf = gpd.read_file(filepath)  # Read GeoJSON directly as GeoDataFrame
+            gdf = gpd.read_file(filepath)
    return gdf
@ -90,7 +92,7 @@ def combine_dataframes(dataframes):
        return combined_dataframe
    else:
        print("No dataframes to combine")
-        return pd.DataFrame()  # Return an empty DataFrame
+        return pd.DataFrame()
 def create_unified_df(urls_file, u_string, data_dir, files_present=False):
@ -110,7 +112,6 @@ def save_dataframe_to_csv(df, integrated_dir, filename):
 if __name__ == "__main__":
    # Test the functions here if necessary
    csv_urls_file = '../docs/all_csv_urls.txt'
    datasets_dir = 'datasets/'
    output_file = 'column_names.txt'
--- a/src/integrate.py
+++ b/src/integrate.py
@ -4,6 +4,12 @@ import os
 import requests
 import pandas as pd
 import logging
 logging.basicConfig(level=logging.DEBUG, filename='app.log',
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger('integrate.py')
 foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt'
 miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt'
 accident_file_url = '../docs/accident_loc_urls.txt'
@ -65,13 +71,12 @@ def process_accident_data():
    acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType',
                        'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle',
                        'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E',
-                        'AccidentLocation_CHLV95_N', 'geometry', 'AccidentMonth']
+                        'AccidentLocation_CHLV95_N', 'AccidentMonth', 'geometry']
    cleaned_acc_df = acc_df_unified[acc_cols_to_keep]
    return cleaned_acc_df
 if __name__ == '__main__':
-    fb_df = process_miv_data()
+    acc_df = process_accident_data()
-    print(fb_df['MessungDatZeit'])
+    print(acc_df.dtypes)
-    print(fb_df.dtypes)
+    print(acc_df.head(100))
    print(fb_df.head(100))
--- a/src/testArea.ipynb
+++ b/src/testArea.ipynb
@ -13,8 +13,8 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-12-02T23:43:55.980827Z",
+     "end_time": "2023-12-03T10:58:50.698090Z",
-     "start_time": "2023-12-02T23:43:55.546732Z"
+     "start_time": "2023-12-03T10:58:50.384352Z"
    }
   },
   "id": "be55b25929d95559"
@ -44,8 +44,8 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-12-02T23:48:08.233784Z",
+     "end_time": "2023-12-03T11:01:14.422749Z",
-     "start_time": "2023-12-02T23:43:55.980667Z"
+     "start_time": "2023-12-03T10:58:52.300667Z"
    }
   },
   "id": "dd3831953afdeb72"
@ -123,7 +123,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
   "outputs": [
    {
     "name": "stdout",
@ -163,8 +163,8 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-12-02T23:49:50.185415Z",
+     "end_time": "2023-12-03T11:15:51.051154Z",
-     "start_time": "2023-12-02T23:49:34.846049Z"
+     "start_time": "2023-12-03T11:15:36.154717Z"
    }
   },
   "id": "f86bc612060b17a4"