diff --git a/src/data_utils.py b/src/data_utils.py index 1a4edba..7f0607d 100755 --- a/src/data_utils.py +++ b/src/data_utils.py @@ -7,7 +7,7 @@ import geopandas as gpd from concurrent.futures import ThreadPoolExecutor as tpe import logging -logging.basicConfig(level=logging.INFO, filename='logs/data_utils.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.DEBUG, filename='logs/data_utils.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('data_utils.py') stream_handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') @@ -130,6 +130,26 @@ def load_file_from_api(api_link, target_name, integrated_dir): def save_dataframe_to_csv(df, integrated_dir, filename): pass +# Needed Since we converted strings all to True +def convert_to_boolean(value): + true_values = ['true', '1', 'yes'] + false_values = ['false', '0', 'no'] + + if isinstance(value, str): + value = value.lower() + if value in true_values: + return True + elif value in false_values: + return False + else: + raise ValueError(f"Invalid boolean string: {value}") + + if isinstance(value, (int, float)): + return bool(value) + + raise ValueError(f"Invalid boolean value type: {type(value)}") + + if __name__ == "__main__": csv_urls_file = '../docs/all_csv_urls.txt' diff --git a/src/datasets/integrated/data_viz.ipynb b/src/datasets/integrated/data_viz.ipynb index dfb38c5..634cbef 100644 --- a/src/datasets/integrated/data_viz.ipynb +++ b/src/datasets/integrated/data_viz.ipynb @@ -139,14 +139,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 45, "outputs": [ { "data": { - "text/plain": "", - "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + "text/plain": "" }, - "execution_count": 8, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -159,7 +158,7 @@ "\n", "acc_gdf['latitude'] = acc_gdf.geometry.y\n", "acc_gdf['longitude'] = acc_gdf.geometry.x\n", - "zurich_coordinates = [47.368650, \t8.539183]\n", + "zurich_coordinates = [47.385, \t8.539183]\n", "\n", "fixed_map_zurich_original_coords = folium.Map(\n", " location=zurich_coordinates, \n", @@ -170,23 +169,32 @@ " doubleClickZoom=False\n", ")\n", "\n", - "\n", "HeatMap(data=acc_gdf[['latitude', 'longitude']], radius=10).add_to(fixed_map_zurich_original_coords)\n", - "fixed_map_zurich_original_coords" + "#fixed_map_zurich_original_coords" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-01-03T12:40:59.611801469Z", - "start_time": "2024-01-03T12:40:59.323708433Z" + "end_time": "2024-01-03T13:41:38.594639794Z", + "start_time": "2024-01-03T13:41:38.499415915Z" } }, "id": "cded2fff6806c2dc" }, { "cell_type": "code", - "execution_count": 12, - "outputs": [], + "execution_count": 48, + "outputs": [ + { + "data": { + "text/plain": "", + "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Create a base map\n", "gradient = {\n", @@ -199,14 +207,14 @@ "interactive_map = folium.Map(\n", " location=zurich_coordinates, \n", " zoom_start=13, \n", - " zoom_control=True, \n", + " zoom_control=False, \n", " dragging=False, \n", - " scrollWheelZoom=True, \n", + " scrollWheelZoom=False, \n", " doubleClickZoom=False\n", ")\n", - "\n", - "\n", - "example_years = [2011, 2012, 2013, 2014, 2014, 2016, 2017, 2018, 2019, 2020, 2021, 2022]\n", + "# Add the WMTS layer to the map\n", + "#folium.GeoJson(state_geo, name=\"Statistische Quartiere\").add_to(interactive_map)\n", + "example_years = [2012, 2013, 2014, 2014, 2016, 2017, 2018, 2019, 2020, 2021, 2022]\n", "\n", "for year in example_years:\n", " \n", @@ -215,10 +223,10 @@ " \n", " heatmap_layer = HeatMap(\n", " data=year_data[['latitude', 'longitude']],\n", - " radius=8, \n", + " radius=10, \n", " gradient=gradient,\n", - " min_opacity=0.5,\n", - " max_opacity=0.8,\n", + " min_opacity=0.33,\n", + " max_opacity=1,\n", " blur=10,\n", " show=False,\n", " name=f'Accidents in {year}'\n", @@ -229,14 +237,15 @@ "\n", "\n", "folium.LayerControl(collapsed=False).add_to(interactive_map)\n", - "folium.TileLayer(wfs)\n", - "interactive_map.save(\"accident_map_interactive.html\")" + "\n", + "#interactive_map.save(\"accident_map_interactive.html\")\n", + "interactive_map" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-01-03T12:43:24.525437654Z", - "start_time": "2024-01-03T12:43:24.337895855Z" + "end_time": "2024-01-03T13:43:02.028257049Z", + "start_time": "2024-01-03T13:43:01.822642063Z" } }, "id": "738ca6a4a67ca1bd" diff --git a/src/integrate.py b/src/integrate.py index 55a167d..dab70aa 100755 --- a/src/integrate.py +++ b/src/integrate.py @@ -153,12 +153,18 @@ def process_accident_data(file_present: bool = True): if not file_present: du.process_urls(data_dir, accident_file_url) acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string) + logger.debug(acc_df_unified[['AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', + 'AccidentInvolvingMotorcycle']].head()) acc_cols_to_keep = ['AccidentUID', 'AccidentYear', 'AccidentMonth', 'AccidentWeekDay_en','AccidentHour', 'AccidentLocation_CHLV95_N', 'AccidentLocation_CHLV95_E', 'AccidentType_en', 'AccidentType', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'geometry'] - cleaned_acc_df = acc_df_unified[acc_cols_to_keep] + # Need to already convert boolean strings "by hand", otherwise all will become 'True' + for col in ['AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', + 'AccidentInvolvingMotorcycle']: + acc_df_unified[col] = acc_df_unified[col].apply(du.convert_to_boolean) + cleaned_acc_df = acc_df_unified[acc_cols_to_keep].copy() cleaned_acc_df.rename(columns={ 'AccidentLocation_CHLV95_E': 'EKoord', 'AccidentLocation_CHLV95_N': 'NKoord', @@ -239,7 +245,8 @@ def load_tempo_geojson_from_api_to_local(): if __name__ == '__main__': # ensure_dirs_exist(data_dir, integrated_dir, logs_dir) - process_all_data_sources(True, True, False) + #process_accident_data() + #process_all_data_sources(True, True, False) # miv_to_integrated_csv() - # acc_to_cleaned_geojson() - load_tempo_geojson_from_api_to_local() + acc_to_cleaned_geojson() + #load_tempo_geojson_from_api_to_local() diff --git a/src/testArea.ipynb b/src/testArea.ipynb old mode 100755 new mode 100644 index c0bcbf8..9d0fc5e --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -14,8 +14,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-03T15:47:10.110909Z", - "start_time": "2023-12-03T15:47:09.656556Z" + "end_time": "2024-01-03T14:15:33.076096175Z", + "start_time": "2024-01-03T14:15:32.881183740Z" } }, "id": "be55b25929d95559" @@ -23,40 +23,146 @@ { "cell_type": "code", "execution_count": 2, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/seb/Projects/repos/group-1/src/integrate.py:132: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n", - "/Users/seb/Projects/repos/group-1/src/integrate.py:133: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " miv_df_cols_dropped['AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int)\n" - ] - } - ], + "outputs": [], "source": [ "\n", - "miv_df = intr.process_miv_data()\n", + "#miv_df = intr.process_miv_data()\n", "#fb_data = intr.process_foot_bike_data()" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-03T15:49:07.561603Z", - "start_time": "2023-12-03T15:47:14.759104Z" + "end_time": "2024-01-03T14:15:33.085151332Z", + "start_time": "2024-01-03T14:15:33.076608687Z" } }, "id": "dd3831953afdeb72" }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "#uplicate_rows = miv_df[miv_df.duplicated()]\n", + "#print(duplicate_rows.shape[0])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T14:15:33.109662807Z", + "start_time": "2024-01-03T14:15:33.078813925Z" + } + }, + "id": "14471cd78389ce4d" + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "u_string RoadTrafficAccidentLocations.json\n", + "Filepath: datasets/RoadTrafficAccidentLocations.json\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-01-03 15:17:14,309 - integrate.py - DEBUG - AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n", + "0 false false \n", + "1 false true \n", + "2 false false \n", + "3 false false \n", + "4 false false \n", + "\n", + " AccidentInvolvingMotorcycle \n", + "0 false \n", + "1 false \n", + "2 false \n", + "3 false \n", + "4 false \n" + ] + }, + { + "data": { + "text/plain": "Empty DataFrame\nColumns: [AccidentUID, AccidentYear, AccidentMonth, AccidentWeekDay_en, AccidentHour, NKoord, EKoord, AccidentType_en, AccidentType, AccidentSeverityCategory, AccidentInvolvingPedestrian, AccidentInvolvingBicycle, AccidentInvolvingMotorcycle, RoadType, RoadType_en, geometry]\nIndex: []", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentYearAccidentMonthAccidentWeekDay_enAccidentHourNKoordEKoordAccidentType_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_engeometry
\n
" + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "acc_df = intr.process_accident_data(True)\n" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T14:17:14.568151856Z", + "start_time": "2024-01-03T14:17:00.375936463Z" + } + }, + "id": "f86bc612060b17a4" + }, + { + "cell_type": "code", + "execution_count": 26, + "outputs": [ + { + "data": { + "text/plain": " AccidentUID AccidentYear AccidentMonth \\\n0 A2D2677533867004E0430A865E337004 2011 1 \n1 9FD6441F802C20A6E0430A865E3320A6 2011 1 \n2 9FDA0DC4856A6094E0430A865E336094 2011 1 \n3 A3B66E42396E6000E0430A865E336000 2011 1 \n4 9FDA0DBE8CCE9096E0430A865E339096 2011 1 \n\n AccidentWeekDay_en AccidentHour NKoord EKoord \\\n0 Saturday 0 1245194 2684605 \n1 Saturday 1 1246980 2682382 \n2 Saturday 2 1247749 2682791 \n3 Saturday 2 1247102 2681199 \n4 Saturday 3 1250690 2682479 \n\n AccidentType_en AccidentType \\\n0 Accident with skidding or self-accident at0 \n1 Accident with skidding or self-accident at0 \n2 Accident with skidding or self-accident at0 \n3 Accident when crossing the lane(s) at5 \n4 Accident with skidding or self-accident at0 \n\n AccidentSeverityCategory AccidentInvolvingPedestrian \\\n0 as4 True \n1 as3 True \n2 as4 True \n3 as3 True \n4 as4 True \n\n AccidentInvolvingBicycle AccidentInvolvingMotorcycle RoadType RoadType_en \\\n0 True True rt433 Minor road \n1 True True rt433 Minor road \n2 True True rt439 Other \n3 True True rt433 Minor road \n4 True True rt433 Minor road \n\n geometry \n0 POINT Z (8.55841 47.352168 0) \n1 POINT Z (8.52932 47.368512 0) \n2 POINT Z (8.534877 47.375376 0) \n3 POINT Z (8.513682 47.369756 0) \n4 POINT Z (8.531288 47.401865 0) ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentYearAccidentMonthAccidentWeekDay_enAccidentHourNKoordEKoordAccidentType_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_engeometry
0A2D2677533867004E0430A865E33700420111Saturday012451942684605Accident with skidding or self-accidentat0as4TrueTrueTruert433Minor roadPOINT Z (8.55841 47.352168 0)
19FD6441F802C20A6E0430A865E3320A620111Saturday112469802682382Accident with skidding or self-accidentat0as3TrueTrueTruert433Minor roadPOINT Z (8.52932 47.368512 0)
29FDA0DC4856A6094E0430A865E33609420111Saturday212477492682791Accident with skidding or self-accidentat0as4TrueTrueTruert439OtherPOINT Z (8.534877 47.375376 0)
3A3B66E42396E6000E0430A865E33600020111Saturday212471022681199Accident when crossing the lane(s)at5as3TrueTrueTruert433Minor roadPOINT Z (8.513682 47.369756 0)
49FDA0DBE8CCE9096E0430A865E33909620111Saturday312506902682479Accident with skidding or self-accidentat0as4TrueTrueTruert433Minor roadPOINT Z (8.531288 47.401865 0)
\n
" + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#acc_df[acc_df['AccidentInvolvingBicycle']==True]\n", + "acc_df.head()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T14:19:26.508020674Z", + "start_time": "2024-01-03T14:19:26.500683554Z" + } + }, + "id": "25378c6cd445a163" + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "data": { + "text/plain": "array(['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',\n 'Friday'], dtype=object)" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "acc_df.head()\n", + "acc_df['AccidentWeekDay_en'].unique()\n", + "#acc_df.dtypes\n", + "\n" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T14:15:48.660494125Z", + "start_time": "2024-01-03T14:15:48.656222488Z" + } + }, + "id": "6affbeea6c7cf3ef" + }, { "cell_type": "code", "execution_count": 6, @@ -65,112 +171,139 @@ "name": "stdout", "output_type": "stream", "text": [ - "121\n" + "Accident Columns:\n", + "AccidentUID object\n", + "AccidentYear int64\n", + "AccidentMonth int64\n", + "AccidentWeekDay_en object\n", + "AccidentHour int64\n", + "NKoord int64\n", + "EKoord int64\n", + "AccidentType_en object\n", + "AccidentType object\n", + "AccidentSeverityCategory object\n", + "AccidentInvolvingPedestrian bool\n", + "AccidentInvolvingBicycle bool\n", + "AccidentInvolvingMotorcycle bool\n", + "RoadType object\n", + "RoadType_en object\n", + "geometry object\n", + "dtype: object\n" ] } ], "source": [ - "duplicate_rows = miv_df[miv_df.duplicated()]\n", - "print(duplicate_rows.shape[0])" + "print(\"Accident Columns:\")\n", + "print(acc_df.dtypes)\n" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-03T15:51:21.158909Z", - "start_time": "2023-12-03T15:51:15.711222Z" + "end_time": "2024-01-03T14:15:48.708958773Z", + "start_time": "2024-01-03T14:15:48.661313317Z" } }, - "id": "14471cd78389ce4d" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "acc_df = intr.process_accident_data(True)" - ], - "metadata": { - "collapsed": false - }, - "id": "f86bc612060b17a4" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "acc_df.head()\n", - "acc_df['AccidentWeekDay_en'].unique()\n", - "#acc_df.dtypes\n", - "\n" - ], - "metadata": { - "collapsed": false - }, - "id": "6affbeea6c7cf3ef" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "print(\"Accident Columns:\")\n", - "print(acc_df.dtypes)\n", - "print()\n", - "print(\"MIV Columns:\")\n", - "print(miv_df.dtypes)\n", - "print()\n", - "print(\"FB Cols:\")\n", - "print(fb_data.dtypes)" - ], - "metadata": { - "collapsed": false - }, "id": "242041cd369d8454" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 7, + "outputs": [ + { + "data": { + "text/plain": "0 1\n1 2\n2 3\n3 4\n4 5\n ... \n55821 55822\n55822 55823\n55823 55824\n55824 55825\n55825 55826\nName: ID, Length: 55826, dtype: int64" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "acc_df['ID'] = acc_df.index +1\n", "acc_df[('ID')]" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T14:15:48.720379971Z", + "start_time": "2024-01-03T14:15:48.692026239Z" + } }, "id": "1841925ee109a417" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "print(\"MIV unqiue:\", miv_df['EKoord'])\n", - "print(\"Acc unique:\", acc_df['RoadType'].unique)\n", - "print(\"FB unique: \", fb_data['DATE'])\n" + "execution_count": 16, + "outputs": [ + { + "data": { + "text/plain": "Empty DataFrame\nColumns: [AccidentUID, AccidentYear, AccidentMonth, AccidentWeekDay_en, AccidentHour, NKoord, EKoord, AccidentType_en, AccidentType, AccidentSeverityCategory, AccidentInvolvingPedestrian, AccidentInvolvingBicycle, AccidentInvolvingMotorcycle, RoadType, RoadType_en, geometry, ID]\nIndex: []", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentYearAccidentMonthAccidentWeekDay_enAccidentHourNKoordEKoordAccidentType_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_engeometryID
\n
" + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } ], + "source": [], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T14:16:39.887367022Z", + "start_time": "2024-01-03T14:16:39.881262890Z" + } }, "id": "f6d752ea17eda341" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 9, + "outputs": [ + { + "data": { + "text/plain": " AccidentUID AccidentYear AccidentMonth \\\n0 A2D2677533867004E0430A865E337004 2011 1 \n1 9FD6441F802C20A6E0430A865E3320A6 2011 1 \n2 9FDA0DC4856A6094E0430A865E336094 2011 1 \n3 A3B66E42396E6000E0430A865E336000 2011 1 \n4 9FDA0DBE8CCE9096E0430A865E339096 2011 1 \n\n AccidentWeekDay_en AccidentHour NKoord EKoord \\\n0 Saturday 0 1245194 2684605 \n1 Saturday 1 1246980 2682382 \n2 Saturday 2 1247749 2682791 \n3 Saturday 2 1247102 2681199 \n4 Saturday 3 1250690 2682479 \n\n AccidentType_en AccidentType \\\n0 Accident with skidding or self-accident at0 \n1 Accident with skidding or self-accident at0 \n2 Accident with skidding or self-accident at0 \n3 Accident when crossing the lane(s) at5 \n4 Accident with skidding or self-accident at0 \n\n AccidentSeverityCategory AccidentInvolvingPedestrian \\\n0 as4 True \n1 as3 True \n2 as4 True \n3 as3 True \n4 as4 True \n\n AccidentInvolvingBicycle AccidentInvolvingMotorcycle RoadType RoadType_en \\\n0 True True rt433 Minor road \n1 True True rt433 Minor road \n2 True True rt439 Other \n3 True True rt433 Minor road \n4 True True rt433 Minor road \n\n geometry ID \n0 POINT Z (8.55841 47.352168 0) 1 \n1 POINT Z (8.52932 47.368512 0) 2 \n2 POINT Z (8.534877 47.375376 0) 3 \n3 POINT Z (8.513682 47.369756 0) 4 \n4 POINT Z (8.531288 47.401865 0) 5 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentYearAccidentMonthAccidentWeekDay_enAccidentHourNKoordEKoordAccidentType_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_engeometryID
0A2D2677533867004E0430A865E33700420111Saturday012451942684605Accident with skidding or self-accidentat0as4TrueTrueTruert433Minor roadPOINT Z (8.55841 47.352168 0)1
19FD6441F802C20A6E0430A865E3320A620111Saturday112469802682382Accident with skidding or self-accidentat0as3TrueTrueTruert433Minor roadPOINT Z (8.52932 47.368512 0)2
29FDA0DC4856A6094E0430A865E33609420111Saturday212477492682791Accident with skidding or self-accidentat0as4TrueTrueTruert439OtherPOINT Z (8.534877 47.375376 0)3
3A3B66E42396E6000E0430A865E33600020111Saturday212471022681199Accident when crossing the lane(s)at5as3TrueTrueTruert433Minor roadPOINT Z (8.513682 47.369756 0)4
49FDA0DBE8CCE9096E0430A865E33909620111Saturday312506902682479Accident with skidding or self-accidentat0as4TrueTrueTruert433Minor roadPOINT Z (8.531288 47.401865 0)5
\n
" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "acc_df.head()" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T14:15:48.790965333Z", + "start_time": "2024-01-03T14:15:48.759351058Z" + } }, "id": "a159cafa9c227b88" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 10, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'str' object has no attribute 'wkt'", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mAttributeError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[10], line 15\u001B[0m\n\u001B[1;32m 9\u001B[0m \u001B[38;5;66;03m#miv_df.to_sql('table_name', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 10\u001B[0m \u001B[38;5;66;03m#fb_data.to_sql('footbike', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 12\u001B[0m geometry_column \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m\n\u001B[0;32m---> 15\u001B[0m acc_df[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[43macc_df\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mgeometry\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mapply\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43;01mlambda\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mgeom\u001B[49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[43mWKTElement\u001B[49m\u001B[43m(\u001B[49m\u001B[43mgeom\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mwkt\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43msrid\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m4326\u001B[39;49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 17\u001B[0m acc_df\u001B[38;5;241m.\u001B[39mto_sql(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124maccidents\u001B[39m\u001B[38;5;124m'\u001B[39m, engine, if_exists\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mreplace\u001B[39m\u001B[38;5;124m'\u001B[39m, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m, dtype\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m: Geometry(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPOINT\u001B[39m\u001B[38;5;124m'\u001B[39m, srid\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m4326\u001B[39m)})\n", + "File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/series.py:4760\u001B[0m, in \u001B[0;36mSeries.apply\u001B[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001B[0m\n\u001B[1;32m 4625\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mapply\u001B[39m(\n\u001B[1;32m 4626\u001B[0m \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m 4627\u001B[0m func: AggFuncType,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 4632\u001B[0m \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs,\n\u001B[1;32m 4633\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m DataFrame \u001B[38;5;241m|\u001B[39m Series:\n\u001B[1;32m 4634\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 4635\u001B[0m \u001B[38;5;124;03m Invoke function on values of Series.\u001B[39;00m\n\u001B[1;32m 4636\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 4751\u001B[0m \u001B[38;5;124;03m dtype: float64\u001B[39;00m\n\u001B[1;32m 4752\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[1;32m 4753\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mSeriesApply\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 4754\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4755\u001B[0m \u001B[43m \u001B[49m\u001B[43mfunc\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4756\u001B[0m \u001B[43m \u001B[49m\u001B[43mconvert_dtype\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mconvert_dtype\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4757\u001B[0m \u001B[43m \u001B[49m\u001B[43mby_row\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mby_row\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4758\u001B[0m \u001B[43m \u001B[49m\u001B[43margs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4759\u001B[0m \u001B[43m \u001B[49m\u001B[43mkwargs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m-> 4760\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mapply\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/apply.py:1207\u001B[0m, in \u001B[0;36mSeriesApply.apply\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 1204\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mapply_compat()\n\u001B[1;32m 1206\u001B[0m \u001B[38;5;66;03m# self.func is Callable\u001B[39;00m\n\u001B[0;32m-> 1207\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mapply_standard\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/apply.py:1287\u001B[0m, in \u001B[0;36mSeriesApply.apply_standard\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 1281\u001B[0m \u001B[38;5;66;03m# row-wise access\u001B[39;00m\n\u001B[1;32m 1282\u001B[0m \u001B[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001B[39;00m\n\u001B[1;32m 1283\u001B[0m \u001B[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001B[39;00m\n\u001B[1;32m 1284\u001B[0m \u001B[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001B[39;00m\n\u001B[1;32m 1285\u001B[0m \u001B[38;5;66;03m# Categorical (GH51645).\u001B[39;00m\n\u001B[1;32m 1286\u001B[0m action \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mignore\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(obj\u001B[38;5;241m.\u001B[39mdtype, CategoricalDtype) \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m-> 1287\u001B[0m mapped \u001B[38;5;241m=\u001B[39m \u001B[43mobj\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_map_values\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1288\u001B[0m \u001B[43m \u001B[49m\u001B[43mmapper\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcurried\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mna_action\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43maction\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconvert\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconvert_dtype\u001B[49m\n\u001B[1;32m 1289\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1291\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(mapped) \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(mapped[\u001B[38;5;241m0\u001B[39m], ABCSeries):\n\u001B[1;32m 1292\u001B[0m \u001B[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001B[39;00m\n\u001B[1;32m 1293\u001B[0m \u001B[38;5;66;03m# See also GH#25959 regarding EA support\u001B[39;00m\n\u001B[1;32m 1294\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m obj\u001B[38;5;241m.\u001B[39m_constructor_expanddim(\u001B[38;5;28mlist\u001B[39m(mapped), index\u001B[38;5;241m=\u001B[39mobj\u001B[38;5;241m.\u001B[39mindex)\n", + "File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/base.py:921\u001B[0m, in \u001B[0;36mIndexOpsMixin._map_values\u001B[0;34m(self, mapper, na_action, convert)\u001B[0m\n\u001B[1;32m 918\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(arr, ExtensionArray):\n\u001B[1;32m 919\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m arr\u001B[38;5;241m.\u001B[39mmap(mapper, na_action\u001B[38;5;241m=\u001B[39mna_action)\n\u001B[0;32m--> 921\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43malgorithms\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmap_array\u001B[49m\u001B[43m(\u001B[49m\u001B[43marr\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmapper\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mna_action\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mna_action\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconvert\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mconvert\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Documents/Education/UniBas/HS23/databases/project/repo/group-1/venv/lib64/python3.11/site-packages/pandas/core/algorithms.py:1814\u001B[0m, in \u001B[0;36mmap_array\u001B[0;34m(arr, mapper, na_action, convert)\u001B[0m\n\u001B[1;32m 1812\u001B[0m values \u001B[38;5;241m=\u001B[39m arr\u001B[38;5;241m.\u001B[39mastype(\u001B[38;5;28mobject\u001B[39m, copy\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n\u001B[1;32m 1813\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m na_action \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m-> 1814\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mlib\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmap_infer\u001B[49m\u001B[43m(\u001B[49m\u001B[43mvalues\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmapper\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconvert\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mconvert\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1815\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 1816\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m lib\u001B[38;5;241m.\u001B[39mmap_infer_mask(\n\u001B[1;32m 1817\u001B[0m values, mapper, mask\u001B[38;5;241m=\u001B[39misna(values)\u001B[38;5;241m.\u001B[39mview(np\u001B[38;5;241m.\u001B[39muint8), convert\u001B[38;5;241m=\u001B[39mconvert\n\u001B[1;32m 1818\u001B[0m )\n", + "File \u001B[0;32mlib.pyx:2920\u001B[0m, in \u001B[0;36mpandas._libs.lib.map_infer\u001B[0;34m()\u001B[0m\n", + "Cell \u001B[0;32mIn[10], line 15\u001B[0m, in \u001B[0;36m\u001B[0;34m(geom)\u001B[0m\n\u001B[1;32m 9\u001B[0m \u001B[38;5;66;03m#miv_df.to_sql('table_name', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 10\u001B[0m \u001B[38;5;66;03m#fb_data.to_sql('footbike', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 12\u001B[0m geometry_column \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m\n\u001B[0;32m---> 15\u001B[0m acc_df[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;241m=\u001B[39m acc_df[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m]\u001B[38;5;241m.\u001B[39mapply(\u001B[38;5;28;01mlambda\u001B[39;00m geom: WKTElement(\u001B[43mgeom\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mwkt\u001B[49m, srid\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m4326\u001B[39m))\n\u001B[1;32m 17\u001B[0m acc_df\u001B[38;5;241m.\u001B[39mto_sql(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124maccidents\u001B[39m\u001B[38;5;124m'\u001B[39m, engine, if_exists\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mreplace\u001B[39m\u001B[38;5;124m'\u001B[39m, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m, dtype\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgeometry\u001B[39m\u001B[38;5;124m'\u001B[39m: Geometry(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPOINT\u001B[39m\u001B[38;5;124m'\u001B[39m, srid\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m4326\u001B[39m)})\n", + "\u001B[0;31mAttributeError\u001B[0m: 'str' object has no attribute 'wkt'" + ] + } + ], "source": [ "from sqlalchemy import create_engine\n", "from geoalchemy2 import Geometry, WKTElement\n", @@ -192,7 +325,11 @@ "\n" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-03T14:15:49.286594299Z", + "start_time": "2024-01-03T14:15:48.770718191Z" + } }, "id": "fa76af8343443d7a" }, @@ -204,7 +341,10 @@ "engine.dispose()" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "start_time": "2024-01-03T14:15:49.287300519Z" + } }, "id": "bc0a23a5126e76c2" }