Convert MIV coordinate to Int

This commit is contained in:
Sebastian Lenzlinger 2023-12-03 13:37:11 +01:00
parent 94ee3cc3b0
commit 1ef7bbe39b
2 changed files with 172 additions and 18 deletions

View File

@ -61,6 +61,9 @@ def process_miv_data():
days = dt_obj.dt.weekday days = dt_obj.dt.weekday
miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x]) miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])
# Convert row type to int so they match other
miv_df_cols_dropped['EKoord'] = miv_df_cols_dropped['EKoord'].astype(int)
miv_df_cols_dropped['NKoord'] = miv_df_cols_dropped['NKoord'].astype(int)
cleaned_miv_df = miv_df_cols_dropped cleaned_miv_df = miv_df_cols_dropped
return cleaned_miv_df return cleaned_miv_df

View File

@ -21,8 +21,21 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 2,
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/seb/Projects/repos/group-1/src/integrate.py:62: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n"
]
}
],
"source": [ "source": [
"\n", "\n",
"miv_df = intr.process_miv_data()\n", "miv_df = intr.process_miv_data()\n",
@ -30,8 +43,8 @@
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"is_executing": true,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-12-03T12:20:31.968179Z",
"start_time": "2023-12-03T12:17:04.199209Z" "start_time": "2023-12-03T12:17:04.199209Z"
} }
}, },
@ -76,14 +89,37 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 3,
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"u_string RoadTrafficAccidentLocations.json\n",
"Filepath: datasets/RoadTrafficAccidentLocations.json\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/seb/Projects/repos/group-1/src/integrate.py:78: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" cleaned_acc_df.rename(columns={\n"
]
}
],
"source": [ "source": [
"acc_df = intr.process_accident_data(True)" "acc_df = intr.process_accident_data(True)"
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"is_executing": true "ExecuteTime": {
"end_time": "2023-12-03T12:20:47.066579Z",
"start_time": "2023-12-03T12:20:31.964275Z"
}
}, },
"id": "f86bc612060b17a4" "id": "f86bc612060b17a4"
}, },
@ -104,8 +140,59 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 4,
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accident Columns:\n",
"AccidentUID object\n",
"AccidentHour object\n",
"AccidentYear object\n",
"AccidentWeekDay_en object\n",
"AccidentType object\n",
"AccidentSeverityCategory object\n",
"AccidentInvolvingPedestrian object\n",
"AccidentInvolvingBicycle object\n",
"AccidentInvolvingMotorcycle object\n",
"RoadType object\n",
"RoadType_en object\n",
"EKoord object\n",
"NKoord object\n",
"AccidentMonth object\n",
"geometry geometry\n",
"dtype: object\n",
"\n",
"MIV Columns:\n",
"MSID object\n",
"ZSID object\n",
"Achse object\n",
"EKoord float64\n",
"NKoord float64\n",
"Richtung object\n",
"AnzFahrzeuge float64\n",
"AnzFahrzeugeStatus object\n",
"Date object\n",
"Hrs object\n",
"Weekday_en object\n",
"dtype: object\n",
"\n",
"FB Cols:\n",
"OST int64\n",
"NORD int64\n",
"DATE object\n",
"HRS object\n",
"VELO_IN float64\n",
"VELO_OUT float64\n",
"FUSS_IN float64\n",
"FUSS_OUT float64\n",
"Weekday_en object\n",
"ID int64\n",
"dtype: object\n"
]
}
],
"source": [ "source": [
"print(\"Accident Columns:\")\n", "print(\"Accident Columns:\")\n",
"print(acc_df.dtypes)\n", "print(acc_df.dtypes)\n",
@ -118,7 +205,10 @@
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"is_executing": true "ExecuteTime": {
"end_time": "2023-12-03T12:20:47.067419Z",
"start_time": "2023-12-03T12:20:47.063397Z"
}
}, },
"id": "242041cd369d8454" "id": "242041cd369d8454"
}, },
@ -137,27 +227,88 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 8,
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MIV unqiue: 0 2683009.89\n",
"1 2683009.89\n",
"2 2683009.89\n",
"3 2683009.89\n",
"4 2683009.89\n",
" ... \n",
"16699185 2682704.50\n",
"16699186 2682704.50\n",
"16699187 2682704.50\n",
"16699188 2682704.50\n",
"16699189 2682704.50\n",
"Name: EKoord, Length: 16699190, dtype: float64\n",
"Acc unique: 0 2684605\n",
"1 2682382\n",
"2 2682791\n",
"3 2681199\n",
"4 2682479\n",
" ... \n",
"55821 2682244\n",
"55822 2680029\n",
"55823 2684990\n",
"55824 2678025\n",
"55825 2684500\n",
"Name: EKoord, Length: 55826, dtype: object\n",
"FB unique: 0 2678956\n",
"1 2678956\n",
"2 2678956\n",
"3 2678956\n",
"4 2678956\n",
" ... \n",
"3011488 2684578\n",
"3011489 2684578\n",
"3011490 2684578\n",
"3011491 2684578\n",
"3011492 2684578\n",
"Name: OST, Length: 3011493, dtype: int64\n"
]
}
],
"source": [ "source": [
"print(\"MIV unqiue:\", miv_df['EKoord'].unique().shape)\n", "print(\"MIV unqiue:\", miv_df['EKoord'])\n",
"print(\"Acc unique:\", acc_df['AccidentLocation_CHLV95_E'].unique().shape)\n", "print(\"Acc unique:\", acc_df['EKoord'])\n",
"print(\"FB unique: \", fb_data['OST'].unique())\n" "print(\"FB unique: \", fb_data['OST'])\n"
], ],
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-03T12:33:32.280058Z",
"start_time": "2023-12-03T12:33:32.275419Z"
}
}, },
"id": "f6d752ea17eda341" "id": "f6d752ea17eda341"
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 5,
"outputs": [], "outputs": [
{
"data": {
"text/plain": " AccidentUID AccidentHour AccidentYear \\\n0 A2D2677533867004E0430A865E337004 00 2011 \n1 9FD6441F802C20A6E0430A865E3320A6 01 2011 \n2 9FDA0DC4856A6094E0430A865E336094 02 2011 \n3 A3B66E42396E6000E0430A865E336000 02 2011 \n4 9FDA0DBE8CCE9096E0430A865E339096 03 2011 \n\n AccidentWeekDay_en AccidentType AccidentSeverityCategory \\\n0 Saturday at0 as4 \n1 Saturday at0 as3 \n2 Saturday at0 as4 \n3 Saturday at5 as3 \n4 Saturday at0 as4 \n\n AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n0 false false \n1 false true \n2 false false \n3 false false \n4 false false \n\n AccidentInvolvingMotorcycle RoadType RoadType_en EKoord NKoord \\\n0 false rt433 Minor road 2684605 1245194 \n1 false rt433 Minor road 2682382 1246980 \n2 false rt439 Other 2682791 1247749 \n3 false rt433 Minor road 2681199 1247102 \n4 false rt433 Minor road 2682479 1250690 \n\n AccidentMonth geometry \n0 1 POINT Z (8.55841 47.35217 0.00000) \n1 1 POINT Z (8.52932 47.36851 0.00000) \n2 1 POINT Z (8.53488 47.37538 0.00000) \n3 1 POINT Z (8.51368 47.36976 0.00000) \n4 1 POINT Z (8.53129 47.40186 0.00000) ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>AccidentUID</th>\n <th>AccidentHour</th>\n <th>AccidentYear</th>\n <th>AccidentWeekDay_en</th>\n <th>AccidentType</th>\n <th>AccidentSeverityCategory</th>\n <th>AccidentInvolvingPedestrian</th>\n <th>AccidentInvolvingBicycle</th>\n <th>AccidentInvolvingMotorcycle</th>\n <th>RoadType</th>\n <th>RoadType_en</th>\n <th>EKoord</th>\n <th>NKoord</th>\n <th>AccidentMonth</th>\n <th>geometry</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>A2D2677533867004E0430A865E337004</td>\n <td>00</td>\n <td>2011</td>\n <td>Saturday</td>\n <td>at0</td>\n <td>as4</td>\n <td>false</td>\n <td>false</td>\n <td>false</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>2684605</td>\n <td>1245194</td>\n <td>1</td>\n <td>POINT Z (8.55841 47.35217 0.00000)</td>\n </tr>\n <tr>\n <th>1</th>\n <td>9FD6441F802C20A6E0430A865E3320A6</td>\n <td>01</td>\n <td>2011</td>\n <td>Saturday</td>\n <td>at0</td>\n <td>as3</td>\n <td>false</td>\n <td>true</td>\n <td>false</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>2682382</td>\n <td>1246980</td>\n <td>1</td>\n <td>POINT Z (8.52932 47.36851 0.00000)</td>\n </tr>\n <tr>\n <th>2</th>\n <td>9FDA0DC4856A6094E0430A865E336094</td>\n <td>02</td>\n <td>2011</td>\n <td>Saturday</td>\n <td>at0</td>\n <td>as4</td>\n <td>false</td>\n <td>false</td>\n <td>false</td>\n <td>rt439</td>\n <td>Other</td>\n <td>2682791</td>\n <td>1247749</td>\n <td>1</td>\n <td>POINT Z (8.53488 47.37538 0.00000)</td>\n </tr>\n <tr>\n <th>3</th>\n <td>A3B66E42396E6000E0430A865E336000</td>\n <td>02</td>\n <td>2011</td>\n <td>Saturday</td>\n <td>at5</td>\n <td>as3</td>\n <td>false</td>\n <td>false</td>\n <td>false</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>2681199</td>\n <td>1247102</td>\n <td>1</td>\n <td>POINT Z (8.51368 47.36976 0.00000)</td>\n </tr>\n <tr>\n <th>4</th>\n <td>9FDA0DBE8CCE9096E0430A865E339096</td>\n <td>03</td>\n <td>2011</td>\n <td>Saturday</td>\n <td>at0</td>\n <td>as4</td>\n <td>false</td>\n <td>false</td>\n <td>false</td>\n <td>rt433</td>\n <td>Minor road</td>\n <td>2682479</td>\n <td>1250690</td>\n <td>1</td>\n <td>POINT Z (8.53129 47.40186 0.00000)</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"acc_df.head()" "acc_df.head()"
], ],
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-03T12:32:55.249260Z",
"start_time": "2023-12-03T12:32:55.235008Z"
}
}, },
"id": "a159cafa9c227b88" "id": "a159cafa9c227b88"
}, },