diff --git a/src/integrate.py b/src/integrate.py index 41c5b64..2df95c1 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -61,6 +61,9 @@ def process_miv_data(): days = dt_obj.dt.weekday miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x]) + # Convert row type to int so they match other + miv_df_cols_dropped['EKoord'] = miv_df_cols_dropped['EKoord'].astype(int) + miv_df_cols_dropped['NKoord'] = miv_df_cols_dropped['NKoord'].astype(int) cleaned_miv_df = miv_df_cols_dropped return cleaned_miv_df diff --git a/src/testArea.ipynb b/src/testArea.ipynb index 3104921..270b2ac 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -21,8 +21,21 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 2, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/seb/Projects/repos/group-1/src/integrate.py:62: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n" + ] + } + ], "source": [ "\n", "miv_df = intr.process_miv_data()\n", @@ -30,8 +43,8 @@ ], "metadata": { "collapsed": false, - "is_executing": true, "ExecuteTime": { + "end_time": "2023-12-03T12:20:31.968179Z", "start_time": "2023-12-03T12:17:04.199209Z" } }, @@ -76,14 +89,37 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 3, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "u_string RoadTrafficAccidentLocations.json\n", + "Filepath: datasets/RoadTrafficAccidentLocations.json\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/seb/Projects/repos/group-1/src/integrate.py:78: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " cleaned_acc_df.rename(columns={\n" + ] + } + ], "source": [ "acc_df = intr.process_accident_data(True)" ], "metadata": { "collapsed": false, - "is_executing": true + "ExecuteTime": { + "end_time": "2023-12-03T12:20:47.066579Z", + "start_time": "2023-12-03T12:20:31.964275Z" + } }, "id": "f86bc612060b17a4" }, @@ -104,8 +140,59 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 4, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accident Columns:\n", + "AccidentUID object\n", + "AccidentHour object\n", + "AccidentYear object\n", + "AccidentWeekDay_en object\n", + "AccidentType object\n", + "AccidentSeverityCategory object\n", + "AccidentInvolvingPedestrian object\n", + "AccidentInvolvingBicycle object\n", + "AccidentInvolvingMotorcycle object\n", + "RoadType object\n", + "RoadType_en object\n", + "EKoord object\n", + "NKoord object\n", + "AccidentMonth object\n", + "geometry geometry\n", + "dtype: object\n", + "\n", + "MIV Columns:\n", + "MSID object\n", + "ZSID object\n", + "Achse object\n", + "EKoord float64\n", + "NKoord float64\n", + "Richtung object\n", + "AnzFahrzeuge float64\n", + "AnzFahrzeugeStatus object\n", + "Date object\n", + "Hrs object\n", + "Weekday_en object\n", + "dtype: object\n", + "\n", + "FB Cols:\n", + "OST int64\n", + "NORD int64\n", + "DATE object\n", + "HRS object\n", + "VELO_IN float64\n", + "VELO_OUT float64\n", + "FUSS_IN float64\n", + "FUSS_OUT float64\n", + "Weekday_en object\n", + "ID int64\n", + "dtype: object\n" + ] + } + ], "source": [ "print(\"Accident Columns:\")\n", "print(acc_df.dtypes)\n", @@ -118,7 +205,10 @@ ], "metadata": { "collapsed": false, - "is_executing": true + "ExecuteTime": { + "end_time": "2023-12-03T12:20:47.067419Z", + "start_time": "2023-12-03T12:20:47.063397Z" + } }, "id": "242041cd369d8454" }, @@ -137,27 +227,88 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 8, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MIV unqiue: 0 2683009.89\n", + "1 2683009.89\n", + "2 2683009.89\n", + "3 2683009.89\n", + "4 2683009.89\n", + " ... \n", + "16699185 2682704.50\n", + "16699186 2682704.50\n", + "16699187 2682704.50\n", + "16699188 2682704.50\n", + "16699189 2682704.50\n", + "Name: EKoord, Length: 16699190, dtype: float64\n", + "Acc unique: 0 2684605\n", + "1 2682382\n", + "2 2682791\n", + "3 2681199\n", + "4 2682479\n", + " ... \n", + "55821 2682244\n", + "55822 2680029\n", + "55823 2684990\n", + "55824 2678025\n", + "55825 2684500\n", + "Name: EKoord, Length: 55826, dtype: object\n", + "FB unique: 0 2678956\n", + "1 2678956\n", + "2 2678956\n", + "3 2678956\n", + "4 2678956\n", + " ... \n", + "3011488 2684578\n", + "3011489 2684578\n", + "3011490 2684578\n", + "3011491 2684578\n", + "3011492 2684578\n", + "Name: OST, Length: 3011493, dtype: int64\n" + ] + } + ], "source": [ - "print(\"MIV unqiue:\", miv_df['EKoord'].unique().shape)\n", - "print(\"Acc unique:\", acc_df['AccidentLocation_CHLV95_E'].unique().shape)\n", - "print(\"FB unique: \", fb_data['OST'].unique())\n" + "print(\"MIV unqiue:\", miv_df['EKoord'])\n", + "print(\"Acc unique:\", acc_df['EKoord'])\n", + "print(\"FB unique: \", fb_data['OST'])\n" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-03T12:33:32.280058Z", + "start_time": "2023-12-03T12:33:32.275419Z" + } }, "id": "f6d752ea17eda341" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 5, + "outputs": [ + { + "data": { + "text/plain": " AccidentUID AccidentHour AccidentYear \\\n0 A2D2677533867004E0430A865E337004 00 2011 \n1 9FD6441F802C20A6E0430A865E3320A6 01 2011 \n2 9FDA0DC4856A6094E0430A865E336094 02 2011 \n3 A3B66E42396E6000E0430A865E336000 02 2011 \n4 9FDA0DBE8CCE9096E0430A865E339096 03 2011 \n\n AccidentWeekDay_en AccidentType AccidentSeverityCategory \\\n0 Saturday at0 as4 \n1 Saturday at0 as3 \n2 Saturday at0 as4 \n3 Saturday at5 as3 \n4 Saturday at0 as4 \n\n AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n0 false false \n1 false true \n2 false false \n3 false false \n4 false false \n\n AccidentInvolvingMotorcycle RoadType RoadType_en EKoord NKoord \\\n0 false rt433 Minor road 2684605 1245194 \n1 false rt433 Minor road 2682382 1246980 \n2 false rt439 Other 2682791 1247749 \n3 false rt433 Minor road 2681199 1247102 \n4 false rt433 Minor road 2682479 1250690 \n\n AccidentMonth geometry \n0 1 POINT Z (8.55841 47.35217 0.00000) \n1 1 POINT Z (8.52932 47.36851 0.00000) \n2 1 POINT Z (8.53488 47.37538 0.00000) \n3 1 POINT Z (8.51368 47.36976 0.00000) \n4 1 POINT Z (8.53129 47.40186 0.00000) ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentHourAccidentYearAccidentWeekDay_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_enEKoordNKoordAccidentMonthgeometry
0A2D2677533867004E0430A865E337004002011Saturdayat0as4falsefalsefalsert433Minor road268460512451941POINT Z (8.55841 47.35217 0.00000)
19FD6441F802C20A6E0430A865E3320A6012011Saturdayat0as3falsetruefalsert433Minor road268238212469801POINT Z (8.52932 47.36851 0.00000)
29FDA0DC4856A6094E0430A865E336094022011Saturdayat0as4falsefalsefalsert439Other268279112477491POINT Z (8.53488 47.37538 0.00000)
3A3B66E42396E6000E0430A865E336000022011Saturdayat5as3falsefalsefalsert433Minor road268119912471021POINT Z (8.51368 47.36976 0.00000)
49FDA0DBE8CCE9096E0430A865E339096032011Saturdayat0as4falsefalsefalsert433Minor road268247912506901POINT Z (8.53129 47.40186 0.00000)
\n
" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "acc_df.head()" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-03T12:32:55.249260Z", + "start_time": "2023-12-03T12:32:55.235008Z" + } }, "id": "a159cafa9c227b88" },