From 552b49f118dd2b8e2942f09957cb7336a74b084a Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Thu, 16 Nov 2023 17:27:21 +0100 Subject: [PATCH 01/22] Add Text File containing relevant datasource urls. First diary entries written. Wiki entries on how to setup a python virtual env for the project --- .gitignore | 1 + DataExploration.ipynb | 80 +++++++++++++++++++-------------- docs/diary.md | 16 +++++++ docs/wiki.md | 25 +++++++++++ requirements.txt | 102 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 191 insertions(+), 33 deletions(-) create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 89af448..c96c287 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Created by https://www.toptal.com/developers/gitignore/api/python,pycharm,linux,macos,database,data # Edit at https://www.toptal.com/developers/gitignore?templates=python,pycharm,linux,macos,database,data datasets/ +db23-project-venv/ ### Data ### *.csv *.dat diff --git a/DataExploration.ipynb b/DataExploration.ipynb index a52ecca..0d62426 100644 --- a/DataExploration.ipynb +++ b/DataExploration.ipynb @@ -2,14 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 20, + "execution_count": 1, "id": "17ca2acb", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, - "tags": [] + "tags": [], + "ExecuteTime": { + "end_time": "2023-11-16T16:20:49.426349Z", + "start_time": "2023-11-16T16:20:16.117316Z" + } }, "outputs": [ { @@ -156,7 +160,11 @@ "2 Unbekannt 2016-01-01T02:00:00 2021-02-03 213.0 Gemessen \n", "3 Unbekannt 2016-01-01T03:00:00 2021-02-03 112.0 Gemessen \n", "4 Unbekannt 2016-01-01T04:00:00 2021-02-03 80.0 Gemessen \n", - "Data for year 2017:\n", + "File not found for year 2017: datasets/sid_dav_verkehrszaehlung_miv_OD2031_2017.csv\n", + "File not found for year 2018: datasets/sid_dav_verkehrszaehlung_miv_OD2031_2018.csv\n", + "File not found for year 2019: datasets/sid_dav_verkehrszaehlung_miv_OD2031_2019.csv\n", + "File not found for year 2020: datasets/sid_dav_verkehrszaehlung_miv_OD2031_2020.csv\n", + "Data for year 2021:\n", " MSID MSName ZSID ZSName Achse \\\n", "0 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", "1 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", @@ -179,34 +187,39 @@ "4 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", "\n", " D4ID MessungDatZeit LieferDat AnzFahrzeuge AnzFahrzeugeStatus \n", - "0 Unbekannt 2017-01-01T00:00:00 2021-02-03 295.0 Gemessen \n", - "1 Unbekannt 2017-01-01T01:00:00 2021-02-03 264.0 Gemessen \n", - "2 Unbekannt 2017-01-01T02:00:00 2021-02-03 180.0 Gemessen \n", - "3 Unbekannt 2017-01-01T03:00:00 2021-02-03 107.0 Gemessen \n", - "4 Unbekannt 2017-01-01T04:00:00 2021-02-03 97.0 Gemessen \n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/tmp/ipykernel_311061/2135127822.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlow_memory\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0mtable_key\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"miv_{year}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 946\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 947\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 948\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 949\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 950\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 615\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 616\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 617\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 618\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 1746\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1747\u001b[0m \u001b[0mcol_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1748\u001b[0;31m \u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0;31m# type: ignore[attr-defined]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1749\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1750\u001b[0m )\n", - "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 237\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 239\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 240\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_first_chunk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.read\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_rows\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._convert_column_data\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers._maybe_upcast\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/numpy/core/multiarray.py\u001b[0m in \u001b[0;36mputmask\u001b[0;34m(a, mask, values)\u001b[0m\n\u001b[1;32m 1129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1130\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1131\u001b[0;31m \u001b[0;34m@\u001b[0m\u001b[0marray_function_from_c_func_and_dispatcher\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_multiarray_umath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mputmask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1132\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mputmask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m/\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1133\u001b[0m \"\"\"\n", - "\u001b[0;32m_pydevd_bundle/pydevd_cython.pyx\u001b[0m in \u001b[0;36m_pydevd_bundle.pydevd_cython.SafeCallWrapper.__call__\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m_pydevd_bundle/pydevd_cython.pyx\u001b[0m in \u001b[0;36m_pydevd_bundle.pydevd_cython.ThreadTracer.__call__\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/debugpy/_vendored/pydevd/_pydev_bundle/pydev_is_thread_alive.py\u001b[0m in \u001b[0;36mis_thread_alive\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_temp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_is_stopped'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 3.x has this\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mis_thread_alive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_is_stopped\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "0 Unbekannt 2021-01-01T00:00:00 2021-12-30 122.0 Gemessen \n", + "1 Unbekannt 2021-01-01T01:00:00 2021-12-30 177.0 Gemessen \n", + "2 Unbekannt 2021-01-01T02:00:00 2021-12-30 125.0 Gemessen \n", + "3 Unbekannt 2021-01-01T03:00:00 2021-12-30 84.0 Gemessen \n", + "4 Unbekannt 2021-01-01T04:00:00 2021-12-30 49.0 Gemessen \n", + "Data for year 2022:\n", + " MSID MSName ZSID ZSName Achse \\\n", + "0 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "1 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "2 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "3 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "4 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "\n", + " HNr Hoehe EKoord NKoord Richtung Knummer \\\n", + "0 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "1 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "2 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "3 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "4 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "\n", + " Kname AnzDetektoren D1ID D2ID D3ID \\\n", + "0 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "1 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "2 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "3 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "4 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "\n", + " D4ID MessungDatZeit LieferDat AnzFahrzeuge AnzFahrzeugeStatus \n", + "0 Unbekannt 2022-01-01T00:00:00 2022-12-30 166.0 Gemessen \n", + "1 Unbekannt 2022-01-01T01:00:00 2022-12-30 255.0 Gemessen \n", + "2 Unbekannt 2022-01-01T02:00:00 2022-12-30 168.0 Gemessen \n", + "3 Unbekannt 2022-01-01T03:00:00 2022-12-30 96.0 Gemessen \n", + "4 Unbekannt 2022-01-01T04:00:00 2022-12-30 63.0 Gemessen \n" ] } ], @@ -219,7 +232,8 @@ "\n", "tables_dict = {}\n", "pd.set_option(\"display.max_columns\", None)\n", - "data_dir = 'data/'\n", + "data_dir = 'datasets/'\n", + "\n", "for year in range(2012, 2023):\n", " file_name = f'sid_dav_verkehrszaehlung_miv_OD2031_{year}.csv'\n", " file_path = os.path.join(data_dir, file_name)\n", @@ -701,7 +715,7 @@ "else:\n", " print(f\"Failed to download data. Status code: {response.status_code}\")\n", "\n", - "accidents_file_path = os.path.join(data_dir, ')\n" + "accidents_file_path = os.path.join(data_dir)\n" ] } ], diff --git a/docs/diary.md b/docs/diary.md index e69de29..5e94779 100644 --- a/docs/diary.md +++ b/docs/diary.md @@ -0,0 +1,16 @@ +# Project Diary + +| Version
0.01 | Author:
michel.romancuk@stud.unibas.ch
sebastian.lenzlinger@unibas.ch
| HS 2023
Databases
| +|-----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Date | | Problems | +| Oktober/ pre 16.11.23 | Decision to use postrges server.
Server setup at slenzlinger.dev
Setup pgadmin at [pgadmin.slenzlinger.dev](pgadmin.slenzlinger.dev) | | +| 20.11.2022 | Integrated Events table with the time and location. Not done yet | Had problems with the identity of the rows. Solution is to add a primary key at the beggining of the CSV, which helps keep the identity of the rows and the integrity of the database. | +| | | | +| | | | +| | | | +| | | | +| | | | +| | | | +| | | | +| | | | +| | \ No newline at end of file diff --git a/docs/wiki.md b/docs/wiki.md index e69de29..cb937d3 100644 --- a/docs/wiki.md +++ b/docs/wiki.md @@ -0,0 +1,25 @@ +# Setup Of Virtual Python dev env +First open the terminal and make sure to be in the root directory. +All steps assume one is in the root folder. +## Create Virtual environment +``` +python3 -m venv db23-project-venv +``` +## Activating the virtual environment +``` +source db23/bin/activate +``` +#### When in the environment ``db23-project`` just install all needed packages. +``` +pip3 install pkg_name +``` +## Getting back out +``` +deactivate +``` + +# List of used packages +See ``requirements.txt`` + +# Setting up postgres +# Setting up pgadmin as container serverd by nginx \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8d6bbeb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,102 @@ +anyio==4.0.0 +appnope==0.1.3 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.3.0 +asttokens==2.4.1 +async-lru==2.0.4 +attrs==23.1.0 +Babel==2.13.1 +beautifulsoup4==4.12.2 +black==23.11.0 +bleach==6.1.0 +certifi==2023.7.22 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +comm==0.2.0 +debugpy==1.8.0 +decorator==5.1.1 +defusedxml==0.7.1 +executing==2.0.1 +fastjsonschema==2.19.0 +fqdn==1.5.1 +idna==3.4 +ipykernel==6.26.0 +ipython==8.17.2 +ipywidgets==8.1.1 +isoduration==20.11.0 +jedi==0.19.1 +Jinja2==3.1.2 +json5==0.9.14 +jsonpointer==2.4 +jsonschema==4.19.2 +jsonschema-specifications==2023.11.1 +jupyter==1.0.0 +jupyter-console==6.6.3 +jupyter-events==0.9.0 +jupyter-lsp==2.2.0 +jupyter_client==8.6.0 +jupyter_core==5.5.0 +jupyter_server==2.10.1 +jupyter_server_terminals==0.4.4 +jupyterlab==4.0.8 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.9 +jupyterlab_server==2.25.1 +MarkupSafe==2.1.3 +matplotlib-inline==0.1.6 +mistune==3.0.2 +mypy-extensions==1.0.0 +nbclient==0.9.0 +nbconvert==7.11.0 +nbformat==5.9.2 +nest-asyncio==1.5.8 +notebook==7.0.6 +notebook_shim==0.2.3 +numpy==1.26.2 +overrides==7.4.0 +packaging==23.2 +pandas==2.1.3 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.11.2 +pexpect==4.8.0 +platformdirs==4.0.0 +prometheus-client==0.18.0 +prompt-toolkit==3.0.41 +psutil==5.9.6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycparser==2.21 +Pygments==2.16.1 +python-dateutil==2.8.2 +python-json-logger==2.0.7 +pytz==2023.3.post1 +PyYAML==6.0.1 +pyzmq==25.1.1 +qtconsole==5.5.1 +QtPy==2.4.1 +referencing==0.31.0 +requests==2.31.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rpds-py==0.13.0 +Send2Trash==1.8.2 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.5 +stack-data==0.6.3 +terminado==0.18.0 +tinycss2==1.2.1 +tornado==6.3.3 +traitlets==5.13.0 +types-python-dateutil==2.8.19.14 +tzdata==2023.3 +uri-template==1.3.0 +urllib3==2.1.0 +wcwidth==0.2.10 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.6.4 +widgetsnbextension==4.0.9 From 8cf5940a4d8284e20bf46e10e2fcabcbb15c6693 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Thu, 16 Nov 2023 18:04:10 +0100 Subject: [PATCH 02/22] Add Text File containing relevant datasource urls. First diary entries written. Wiki entries on how to setup a python virtual env for the project --- docs/diary.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/diary.md b/docs/diary.md index 5e94779..5d4030c 100644 --- a/docs/diary.md +++ b/docs/diary.md @@ -1,16 +1,16 @@ # Project Diary -| Version
0.01 | Author:
michel.romancuk@stud.unibas.ch
sebastian.lenzlinger@unibas.ch
| HS 2023
Databases
| -|-----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Date | | Problems | -| Oktober/ pre 16.11.23 | Decision to use postrges server.
Server setup at slenzlinger.dev
Setup pgadmin at [pgadmin.slenzlinger.dev](pgadmin.slenzlinger.dev) | | -| 20.11.2022 | Integrated Events table with the time and location. Not done yet | Had problems with the identity of the rows. Solution is to add a primary key at the beggining of the CSV, which helps keep the identity of the rows and the integrity of the database. | -| | | | -| | | | -| | | | -| | | | -| | | | -| | | | -| | | | -| | | | +| Version
0.00 | Author:
michel.romancuk@stud.unibas.ch
sebastian.lenzlinger@unibas.ch
| HS 2023
Databases
| +|-----------------------|-------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------| +| Date | | Problems | +| Oktober/ pre 16.11.23 | Decision to use postrges server.
Server setup.
Setup pgadmin at [pgadmin.slenzlinger.dev](pgadmin.slenzlinger.dev) | | +| 16.11.23 | Setup Repo and written some instructions into ``wiki.md`` on how to setup environment. | Realize steps not written down how postgres, pgadmin, nginx etc. was setup at the time. | +| | | | +| | | | +| | | | +| | | | +| | | | +| | | | +| | | | +| | | | | | \ No newline at end of file From 653c3341ce9b5a5ac3774c223dc51c8819ee07fb Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Thu, 16 Nov 2023 18:40:42 +0100 Subject: [PATCH 03/22] Add Text File containing relevant datasource urls. First diary entries written. Wiki entries on how to setup a python virtual env for the project --- docs/diary.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/diary.md b/docs/diary.md index 5d4030c..fc63c34 100644 --- a/docs/diary.md +++ b/docs/diary.md @@ -1,3 +1,7 @@ +# TODOs +* Write a script that makes tables and inserts the data. +* Find out if data cleaning can be done in python with pandas or if it all must be SQL scipts. + # Project Diary | Version
0.00 | Author:
michel.romancuk@stud.unibas.ch
sebastian.lenzlinger@unibas.ch
| HS 2023
Databases
| From 77bf140efcbc0a33f16e13b172f62eddb93ad49a Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Thu, 16 Nov 2023 18:40:42 +0100 Subject: [PATCH 04/22] Add Text File containing relevant datasource urls. First diary entries written. Wiki entries on how to setup a python virtual env for the project --- docs/accident_loc_urls.txt | 1 + docs/all_csv_urls.txt | 22 ++++++ docs/diary.md | 4 + docs/foot_bike_zaehlung_urls.txt | 11 +++ docs/verkehrszaehlung_moto_urls.txt | 11 +++ src/data_utils.py | 118 ++++++++++++++++++++++++++++ src/integrate.py | 77 ++++++++++++++++++ src/preparations.py | 3 + 8 files changed, 247 insertions(+) create mode 100644 docs/accident_loc_urls.txt create mode 100644 docs/all_csv_urls.txt create mode 100644 docs/foot_bike_zaehlung_urls.txt create mode 100644 docs/verkehrszaehlung_moto_urls.txt create mode 100644 src/data_utils.py create mode 100644 src/integrate.py create mode 100644 src/preparations.py diff --git a/docs/accident_loc_urls.txt b/docs/accident_loc_urls.txt new file mode 100644 index 0000000..1378079 --- /dev/null +++ b/docs/accident_loc_urls.txt @@ -0,0 +1 @@ +https://data.stadt-zuerich.ch/dataset/sid_dav_strassenverkehrsunfallorte/download/RoadTrafficAccidentLocations.json diff --git a/docs/all_csv_urls.txt b/docs/all_csv_urls.txt new file mode 100644 index 0000000..b9731a0 --- /dev/null +++ b/docs/all_csv_urls.txt @@ -0,0 +1,22 @@ +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2012.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2013.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2014.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2015.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2016.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2017.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2018.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2019.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2020.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2021.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2022.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2012_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2013_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2014_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2015_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2016_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2017_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2018_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2019_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2020_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2021_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2022_verkehrszaehlungen_werte_fussgaenger_velo.csv \ No newline at end of file diff --git a/docs/diary.md b/docs/diary.md index 5d4030c..fc63c34 100644 --- a/docs/diary.md +++ b/docs/diary.md @@ -1,3 +1,7 @@ +# TODOs +* Write a script that makes tables and inserts the data. +* Find out if data cleaning can be done in python with pandas or if it all must be SQL scipts. + # Project Diary | Version
0.00 | Author:
michel.romancuk@stud.unibas.ch
sebastian.lenzlinger@unibas.ch
| HS 2023
Databases
| diff --git a/docs/foot_bike_zaehlung_urls.txt b/docs/foot_bike_zaehlung_urls.txt new file mode 100644 index 0000000..00f6353 --- /dev/null +++ b/docs/foot_bike_zaehlung_urls.txt @@ -0,0 +1,11 @@ +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2012_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2013_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2014_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2015_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2016_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2017_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2018_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2019_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2020_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2021_verkehrszaehlungen_werte_fussgaenger_velo.csv +https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2022_verkehrszaehlungen_werte_fussgaenger_velo.csv \ No newline at end of file diff --git a/docs/verkehrszaehlung_moto_urls.txt b/docs/verkehrszaehlung_moto_urls.txt new file mode 100644 index 0000000..427888d --- /dev/null +++ b/docs/verkehrszaehlung_moto_urls.txt @@ -0,0 +1,11 @@ +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2012.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2013.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2014.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2015.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2016.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2017.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2018.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2019.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2020.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2021.csv +https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2022.csv \ No newline at end of file diff --git a/src/data_utils.py b/src/data_utils.py new file mode 100644 index 0000000..584619c --- /dev/null +++ b/src/data_utils.py @@ -0,0 +1,118 @@ +# data_utils.py + +import os +import pandas as pd +import requests +from urllib.parse import urlparse +import geopandas as gpd +from concurrent.futures import ThreadPoolExecutor as tpe + + +def download_csv(url, local_filename): + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(local_filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + +def process_urls(data_dir, urls_file): + # Ensure the data directory exists + if not os.path.exists(data_dir): + os.makedirs(data_dir) + + # Read URLs from the file + with open(urls_file, 'r') as file: + urls = file.readlines() + + # Process each URL + for url in urls: + url = url.strip() + filename = os.path.basename(urlparse(url).path) + local_filename = os.path.join(data_dir, filename) + + # Check if the file already exists + if not os.path.isfile(local_filename): + print(f"Downloading {url}...") + download_csv(url, local_filename) + print(f"Saved to {local_filename}") + else: + print(f"File {filename} already exists in {data_dir}, skipping download.") + + +def load_dataframe_from_csv(filepath): + try: + df = pd.read_csv(filepath, low_memory=False) + return df + except Exception as e: + print(f"Error loading {filepath}: {e}") + return None + + +def load_dataframes_from_csv_files(data_dir, u_string): + dataframes = [] + + with tpe(max_workers=5) as executor: + for filename in os.listdir(data_dir): + if (u_string in filename) and filename.endswith('.csv'): + filepath = os.path.join(data_dir, filename) + future = executor.submit(load_dataframe_from_csv, filepath) + dataframes.append(future) + + dataframes = [future.result() for future in dataframes if future.result() is not None] + + return dataframes + + # for filename in os.listdir(data_dir): + # if (u_string in filename) and filename.endswith('.csv'): + # filepath = os.path.join(data_dir, filename) + # df = pd.read_csv(filepath, low_memory=False) + # dataframes.append(df) + # return dataframes + + +def load_dataframes_from_geojson_files(data_dir, u_string): + print('u_string', u_string) + gdf = gpd.GeoDataFrame() + for filename in os.listdir(data_dir): + print("Filename:", filename) + if (u_string in filename) and filename.endswith('.json'): + filepath = os.path.join(data_dir, filename) + print("Filepath:", filepath) + gdf = gpd.read_file(filepath) # Read GeoJSON directly as GeoDataFrame + + return gdf + + +def combine_dataframes(dataframes): + if dataframes: + combined_dataframe = pd.concat(dataframes, ignore_index=True) + return combined_dataframe + else: + print("No dataframes to combine") + return pd.DataFrame() # Return an empty DataFrame + + +def create_unified_df(urls_file, u_string, data_dir, files_present=False): + df_list = [] + df_unified = None + if not files_present: + process_urls(data_dir, urls_file) + + df_list = load_dataframes_from_csv_files(data_dir, u_string) + df_unified = combine_dataframes(df_list) + + return df_unified + + +def save_dataframe_to_csv(df, integrated_dir, filename): + pass + + +if __name__ == "__main__": + # Test the functions here if necessary + csv_urls_file = '../docs/all_csv_urls.txt' + datasets_dir = 'datasets/' + output_file = 'column_names.txt' + process_urls(datasets_dir, csv_urls_file) + # extract_column_names(datasets_dir, output_file) diff --git a/src/integrate.py b/src/integrate.py new file mode 100644 index 0000000..050fcb4 --- /dev/null +++ b/src/integrate.py @@ -0,0 +1,77 @@ +import data_utils as du +from datetime import datetime as dt +import os +import requests +import pandas as pd + +foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt' +miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt' +accident_file_url = '../docs/accident_loc_urls.txt' + +# Using u_string to discriminate between files that belong to each other +motor_file_u_string = 'sid_dav_verkehrszaehlung_miv_OD2031' +foot_bike_file_u_string = 'velo.csv' +accident_file_u_string = 'RoadTrafficAccidentLocations.json' + +data_dir = 'datasets/' +integrated_dir = 'datasets/integrated/' + +weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] + + +def process_foot_bike_data(): + fb_df_unified = du.create_unified_df(foot_bike_urls_file, foot_bike_file_u_string, data_dir, files_present=True) + fb_df_unified[['DATE', "TIME"]] = fb_df_unified['DATUM'].str.split('T', expand=True) + fb_df_unified[['HRS', 'MINS']] = fb_df_unified['TIME'].str.split(':', expand=True) + ## Evt brauchen wir doch FK_ZAEHLER + fb_cols_to_drop = ['DATUM'] + fb_df_unified_correct_cols = fb_df_unified.drop(columns=fb_cols_to_drop, axis=1) + fb_df_unified_correct_cols.fillna(0, inplace=True) + fb_df_grouped = fb_df_unified_correct_cols.groupby(['OST', 'NORD', 'DATE', 'HRS']).agg({ + 'VELO_IN': 'sum', + 'VELO_OUT': 'sum', + 'FUSS_IN': 'sum', + 'FUSS_OUT': 'sum' + }).reset_index() + dt_obj = pd.to_datetime(fb_df_grouped['DATE']) + days = dt_obj.dt.weekday + fb_df_grouped['Weekday_en'] = days.map(lambda x: weekday_names[x]) + cleaned_fb_df = fb_df_grouped + return cleaned_fb_df + + +def process_miv_data(): + miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir,files_present=True) + + miv_df_unified[['Date', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True) + miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True) + + miv_cols_to_keep = ['MSID','ZSID','Achse', 'EKoord', 'NKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus', + 'Date', 'Hrs'] + miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep] + + dt_obj = pd.to_datetime(miv_df_cols_dropped['Date']) + days = dt_obj.dt.weekday + miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x]) + + + cleaned_miv_df = miv_df_cols_dropped + return cleaned_miv_df + + +def process_accident_data(): + + acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string) + acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType', + 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', + 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E', + 'AccidentLocation_CHLV95_N', 'geometry'] + cleaned_acc_df = acc_df_unified[acc_cols_to_keep] + return cleaned_acc_df + + +if __name__ == '__main__': + fb_df = process_miv_data() + print(fb_df['MessungDatZeit']) + print(fb_df.dtypes) + print(fb_df.head(100)) diff --git a/src/preparations.py b/src/preparations.py new file mode 100644 index 0000000..499d1d6 --- /dev/null +++ b/src/preparations.py @@ -0,0 +1,3 @@ +import data_utils + + From 101243fae858bf412187d9a5af6a5edc3550eb8b Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 00:28:25 +0100 Subject: [PATCH 05/22] Add test notebook for debugging on different machine --- src/testArea.ipynb | 383 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 383 insertions(+) create mode 100644 src/testArea.ipynb diff --git a/src/testArea.ipynb b/src/testArea.ipynb new file mode 100644 index 0000000..385300d --- /dev/null +++ b/src/testArea.ipynb @@ -0,0 +1,383 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from datetime import datetime as dt\n", + "\n", + "import integrate as intr\n" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T20:45:25.057214Z", + "start_time": "2023-12-02T20:45:24.634062Z" + } + }, + "id": "be55b25929d95559" + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/seb/Projects/repos/group-1/src/integrate.py:55: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n" + ] + } + ], + "source": [ + "\n", + "miv_df = intr.process_miv_data()\n", + "fb_data = intr.process_foot_bike_data()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T20:49:39.247032Z", + "start_time": "2023-12-02T20:45:26.952158Z" + } + }, + "id": "dd3831953afdeb72" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "test_df = miv_df\n" + ], + "metadata": { + "collapsed": false + }, + "id": "14471cd78389ce4d" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "test_df.dtypes\n", + "date_object = pd.to_datetime(test_df['Date'])\n" + ], + "metadata": { + "collapsed": false + }, + "id": "c70d21adef38fd68" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "test_df['Weekday_Name']" + ], + "metadata": { + "collapsed": false + }, + "id": "d0df3c0ef49e8061" + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "u_string RoadTrafficAccidentLocations.json\n", + "Filename: 2017_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: RoadTrafficAccidentLocations.json\n", + "Filepath: datasets/RoadTrafficAccidentLocations.json\n", + "Filename: 2016_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: 2022_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: 2015_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: 2019_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2013.csv\n", + "Filename: 2021_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2012.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2016.csv\n", + "Filename: 2014_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: 2018_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2017.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2015.csv\n", + "Filename: 2020_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2014.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2019.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2018.csv\n", + "Filename: 2013_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2022.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2020.csv\n", + "Filename: 2012_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", + "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2021.csv\n" + ] + } + ], + "source": [ + "acc_df = intr.process_accident_data()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T20:50:19.543547Z", + "start_time": "2023-12-02T20:50:05.258441Z" + } + }, + "id": "f86bc612060b17a4" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "acc_df.head()\n", + "acc_df['AccidentWeekDay'].unique()\n", + "#acc_df.dtypes\n", + "date_obj = dt.strptime(acc_df[''])\n" + ], + "metadata": { + "collapsed": false + }, + "id": "6affbeea6c7cf3ef" + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accident Columns:\n", + "AccidentUID object\n", + "AccidentHour object\n", + "AccidentYear object\n", + "AccidentWeekDay_en object\n", + "AccidentType object\n", + "AccidentSeverityCategory object\n", + "AccidentInvolvingPedestrian object\n", + "AccidentInvolvingBicycle object\n", + "AccidentInvolvingMotorcycle object\n", + "RoadType object\n", + "RoadType_en object\n", + "AccidentLocation_CHLV95_E object\n", + "AccidentLocation_CHLV95_N object\n", + "geometry geometry\n", + "dtype: object\n", + "\n", + "MIV Columns:\n", + "MSID object\n", + "ZSID object\n", + "Achse object\n", + "EKoord float64\n", + "NKoord float64\n", + "Richtung object\n", + "AnzFahrzeuge float64\n", + "AnzFahrzeugeStatus object\n", + "Date object\n", + "Hrs object\n", + "Weekday_en object\n", + "dtype: object\n", + "\n", + "FB Cols:\n", + "OST int64\n", + "NORD int64\n", + "DATE object\n", + "HRS object\n", + "VELO_IN float64\n", + "VELO_OUT float64\n", + "FUSS_IN float64\n", + "FUSS_OUT float64\n", + "Weekday_en object\n", + "dtype: object\n" + ] + } + ], + "source": [ + "print(\"Accident Columns:\")\n", + "print(acc_df.dtypes)\n", + "print()\n", + "print(\"MIV Columns:\")\n", + "print(miv_df.dtypes)\n", + "print()\n", + "print(\"FB Cols:\")\n", + "print(fb_data.dtypes)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T20:50:32.272482Z", + "start_time": "2023-12-02T20:50:32.270846Z" + } + }, + "id": "242041cd369d8454" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "1841925ee109a417" + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MIV unqiue: (187,)\n", + "Acc unique: (8661,)\n", + "FB unique: (62,)\n" + ] + } + ], + "source": [ + "print(\"MIV unqiue:\", miv_df['EKoord'].unique().shape)\n", + "print(\"Acc unique:\", acc_df['AccidentLocation_CHLV95_E'].unique().shape)\n", + "print(\"FB unique: \", fb_data['OST'].unique())\n" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T21:59:53.075227Z", + "start_time": "2023-12-02T21:59:52.868698Z" + } + }, + "id": "f6d752ea17eda341" + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "data": { + "text/plain": " AccidentUID AccidentHour AccidentYear \\\n0 A2D2677533867004E0430A865E337004 00 2011 \n1 9FD6441F802C20A6E0430A865E3320A6 01 2011 \n2 9FDA0DC4856A6094E0430A865E336094 02 2011 \n3 A3B66E42396E6000E0430A865E336000 02 2011 \n4 9FDA0DBE8CCE9096E0430A865E339096 03 2011 \n\n AccidentWeekDay_en AccidentType AccidentSeverityCategory \\\n0 Saturday at0 as4 \n1 Saturday at0 as3 \n2 Saturday at0 as4 \n3 Saturday at5 as3 \n4 Saturday at0 as4 \n\n AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n0 false false \n1 false true \n2 false false \n3 false false \n4 false false \n\n AccidentInvolvingMotorcycle RoadType RoadType_en AccidentLocation_CHLV95_E \\\n0 false rt433 Minor road 2684605 \n1 false rt433 Minor road 2682382 \n2 false rt439 Other 2682791 \n3 false rt433 Minor road 2681199 \n4 false rt433 Minor road 2682479 \n\n AccidentLocation_CHLV95_N geometry \n0 1245194 POINT Z (8.55841 47.35217 0.00000) \n1 1246980 POINT Z (8.52932 47.36851 0.00000) \n2 1247749 POINT Z (8.53488 47.37538 0.00000) \n3 1247102 POINT Z (8.51368 47.36976 0.00000) \n4 1250690 POINT Z (8.53129 47.40186 0.00000) ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentHourAccidentYearAccidentWeekDay_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_enAccidentLocation_CHLV95_EAccidentLocation_CHLV95_Ngeometry
0A2D2677533867004E0430A865E337004002011Saturdayat0as4falsefalsefalsert433Minor road26846051245194POINT Z (8.55841 47.35217 0.00000)
19FD6441F802C20A6E0430A865E3320A6012011Saturdayat0as3falsetruefalsert433Minor road26823821246980POINT Z (8.52932 47.36851 0.00000)
29FDA0DC4856A6094E0430A865E336094022011Saturdayat0as4falsefalsefalsert439Other26827911247749POINT Z (8.53488 47.37538 0.00000)
3A3B66E42396E6000E0430A865E336000022011Saturdayat5as3falsefalsefalsert433Minor road26811991247102POINT Z (8.51368 47.36976 0.00000)
49FDA0DBE8CCE9096E0430A865E339096032011Saturdayat0as4falsefalsefalsert433Minor road26824791250690POINT Z (8.53129 47.40186 0.00000)
\n
" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "acc_df.head()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T20:52:32.645509Z", + "start_time": "2023-12-02T20:52:32.643877Z" + } + }, + "id": "a159cafa9c227b88" + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [ + { + "ename": "ProgrammingError", + "evalue": "(psycopg2.ProgrammingError) can't adapt type 'Point'\n[SQL: INSERT INTO table_name (\"AccidentUID\", \"AccidentHour\", \"AccidentYear\", \"AccidentWeekDay_en\", \"AccidentType\", \"AccidentSeverityCategory\", \"AccidentInvolvingPedestrian\", \"AccidentInvolvingBicycle\", \"AccidentInvolvingMotorcycle\", \"RoadType\", \"RoadType_e ... 398437 characters truncated ... n__999)s, %(AccidentLocation_CHLV95_E__999)s, %(AccidentLocation_CHLV95_N__999)s, %(geometry__999)s)]\n[parameters: {'AccidentYear__0': '2011', 'AccidentLocation_CHLV95_N__0': '1245194', 'AccidentType__0': 'at0', 'AccidentSeverityCategory__0': 'as4', 'RoadType_en__0': 'Minor road', 'AccidentLocation_CHLV95_E__0': '2684605', 'AccidentWeekDay_en__0': 'Saturday', 'AccidentInvolvingMotorcycle__0': 'false', 'RoadType__0': 'rt433', 'AccidentUID__0': 'A2D2677533867004E0430A865E337004', 'AccidentInvolvingPedestrian__0': 'false', 'AccidentHour__0': '00', 'geometry__0': , 'AccidentInvolvingBicycle__0': 'false', 'AccidentYear__1': '2011', 'AccidentLocation_CHLV95_N__1': '1246980', 'AccidentType__1': 'at0', 'AccidentSeverityCategory__1': 'as3', 'RoadType_en__1': 'Minor road', 'AccidentLocation_CHLV95_E__1': '2682382', 'AccidentWeekDay_en__1': 'Saturday', 'AccidentInvolvingMotorcycle__1': 'false', 'RoadType__1': 'rt433', 'AccidentUID__1': '9FD6441F802C20A6E0430A865E3320A6', 'AccidentInvolvingPedestrian__1': 'false', 'AccidentHour__1': '01', 'geometry__1': , 'AccidentInvolvingBicycle__1': 'true', 'AccidentYear__2': '2011', 'AccidentLocation_CHLV95_N__2': '1247749', 'AccidentType__2': 'at0', 'AccidentSeverityCategory__2': 'as4', 'RoadType_en__2': 'Other', 'AccidentLocation_CHLV95_E__2': '2682791', 'AccidentWeekDay_en__2': 'Saturday', 'AccidentInvolvingMotorcycle__2': 'false', 'RoadType__2': 'rt439', 'AccidentUID__2': '9FDA0DC4856A6094E0430A865E336094', 'AccidentInvolvingPedestrian__2': 'false', 'AccidentHour__2': '02', 'geometry__2': , 'AccidentInvolvingBicycle__2': 'false', 'AccidentYear__3': '2011', 'AccidentLocation_CHLV95_N__3': '1247102', 'AccidentType__3': 'at5', 'AccidentSeverityCategory__3': 'as3', 'RoadType_en__3': 'Minor road', 'AccidentLocation_CHLV95_E__3': '2681199', 'AccidentWeekDay_en__3': 'Saturday', 'AccidentInvolvingMotorcycle__3': 'false' ... 13900 parameters truncated ... 'AccidentWeekDay_en__996': 'Tuesday', 'AccidentInvolvingMotorcycle__996': 'false', 'RoadType__996': 'rt433', 'AccidentUID__996': 'A5D2C4A55E38707EE0430A865E33707E', 'AccidentInvolvingPedestrian__996': 'false', 'AccidentHour__996': '08', 'geometry__996': , 'AccidentInvolvingBicycle__996': 'false', 'AccidentYear__997': '2011', 'AccidentLocation_CHLV95_N__997': '1251718', 'AccidentType__997': 'at2', 'AccidentSeverityCategory__997': 'as3', 'RoadType_en__997': 'Principal road', 'AccidentLocation_CHLV95_E__997': '2685190', 'AccidentWeekDay_en__997': 'Tuesday', 'AccidentInvolvingMotorcycle__997': 'false', 'RoadType__997': 'rt432', 'AccidentUID__997': 'A5F1841A36B070AEE0430A865E3370AE', 'AccidentInvolvingPedestrian__997': 'false', 'AccidentHour__997': '11', 'geometry__997': , 'AccidentInvolvingBicycle__997': 'false', 'AccidentYear__998': '2011', 'AccidentLocation_CHLV95_N__998': '1246106', 'AccidentType__998': 'at2', 'AccidentSeverityCategory__998': 'as4', 'RoadType_en__998': 'Principal road', 'AccidentLocation_CHLV95_E__998': '2685329', 'AccidentWeekDay_en__998': 'Tuesday', 'AccidentInvolvingMotorcycle__998': 'false', 'RoadType__998': 'rt432', 'AccidentUID__998': 'A5E25678EDD7505EE0430A865E33505E', 'AccidentInvolvingPedestrian__998': 'false', 'AccidentHour__998': '14', 'geometry__998': , 'AccidentInvolvingBicycle__998': 'false', 'AccidentYear__999': '2011', 'AccidentLocation_CHLV95_N__999': '1251852', 'AccidentType__999': 'at00', 'AccidentSeverityCategory__999': 'as3', 'RoadType_en__999': 'Principal road', 'AccidentLocation_CHLV95_E__999': '2683606', 'AccidentWeekDay_en__999': 'Tuesday', 'AccidentInvolvingMotorcycle__999': 'false', 'RoadType__999': 'rt432', 'AccidentUID__999': 'A6431CCEC810E09CE0430A865E33E09C', 'AccidentInvolvingPedestrian__999': 'false', 'AccidentHour__999': '16', 'geometry__999': , 'AccidentInvolvingBicycle__999': 'false'}]\n(Background on this error at: https://sqlalche.me/e/20/f405)", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mProgrammingError\u001B[0m Traceback (most recent call last)", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2112\u001B[0m, in \u001B[0;36mConnection._exec_insertmany_context\u001B[0;34m(self, dialect, context)\u001B[0m\n\u001B[1;32m 2111\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 2112\u001B[0m \u001B[43mdialect\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdo_execute\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 2113\u001B[0m \u001B[43m \u001B[49m\u001B[43mcursor\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2114\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_stmt\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2115\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_params\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2116\u001B[0m \u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2117\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2119\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/default.py:922\u001B[0m, in \u001B[0;36mDefaultDialect.do_execute\u001B[0;34m(self, cursor, statement, parameters, context)\u001B[0m\n\u001B[1;32m 921\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mdo_execute\u001B[39m(\u001B[38;5;28mself\u001B[39m, cursor, statement, parameters, context\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m):\n\u001B[0;32m--> 922\u001B[0m \u001B[43mcursor\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43mstatement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparameters\u001B[49m\u001B[43m)\u001B[49m\n", + "\u001B[0;31mProgrammingError\u001B[0m: can't adapt type 'Point'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001B[0;31mProgrammingError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[17], line 8\u001B[0m\n\u001B[1;32m 4\u001B[0m engine \u001B[38;5;241m=\u001B[39m create_engine(db_url)\n\u001B[1;32m 6\u001B[0m \u001B[38;5;66;03m#miv_df.to_sql('table_name', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 7\u001B[0m \u001B[38;5;66;03m#fb_data.to_sql('footbike', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[0;32m----> 8\u001B[0m \u001B[43macc_df\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_sql\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mtable_name\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mengine\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mif_exists\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mreplace\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mindex\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mFalse\u001B[39;49;00m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/util/_decorators.py:333\u001B[0m, in \u001B[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 327\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(args) \u001B[38;5;241m>\u001B[39m num_allow_args:\n\u001B[1;32m 328\u001B[0m warnings\u001B[38;5;241m.\u001B[39mwarn(\n\u001B[1;32m 329\u001B[0m msg\u001B[38;5;241m.\u001B[39mformat(arguments\u001B[38;5;241m=\u001B[39m_format_argument_list(allow_args)),\n\u001B[1;32m 330\u001B[0m \u001B[38;5;167;01mFutureWarning\u001B[39;00m,\n\u001B[1;32m 331\u001B[0m stacklevel\u001B[38;5;241m=\u001B[39mfind_stack_level(),\n\u001B[1;32m 332\u001B[0m )\n\u001B[0;32m--> 333\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/core/generic.py:3008\u001B[0m, in \u001B[0;36mNDFrame.to_sql\u001B[0;34m(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)\u001B[0m\n\u001B[1;32m 2813\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 2814\u001B[0m \u001B[38;5;124;03mWrite records stored in a DataFrame to a SQL database.\u001B[39;00m\n\u001B[1;32m 2815\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 3004\u001B[0m \u001B[38;5;124;03m[(1,), (None,), (2,)]\u001B[39;00m\n\u001B[1;32m 3005\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m \u001B[38;5;66;03m# noqa: E501\u001B[39;00m\n\u001B[1;32m 3006\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mpandas\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m sql\n\u001B[0;32m-> 3008\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43msql\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_sql\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 3009\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3010\u001B[0m \u001B[43m \u001B[49m\u001B[43mname\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3011\u001B[0m \u001B[43m \u001B[49m\u001B[43mcon\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3012\u001B[0m \u001B[43m \u001B[49m\u001B[43mschema\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mschema\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3013\u001B[0m \u001B[43m \u001B[49m\u001B[43mif_exists\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mif_exists\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3014\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3015\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex_label\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex_label\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3016\u001B[0m \u001B[43m \u001B[49m\u001B[43mchunksize\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mchunksize\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3017\u001B[0m \u001B[43m \u001B[49m\u001B[43mdtype\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdtype\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3018\u001B[0m \u001B[43m \u001B[49m\u001B[43mmethod\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmethod\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3019\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:788\u001B[0m, in \u001B[0;36mto_sql\u001B[0;34m(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method, engine, **engine_kwargs)\u001B[0m\n\u001B[1;32m 783\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mNotImplementedError\u001B[39;00m(\n\u001B[1;32m 784\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mframe\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m argument should be either a Series or a DataFrame\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 785\u001B[0m )\n\u001B[1;32m 787\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m pandasSQL_builder(con, schema\u001B[38;5;241m=\u001B[39mschema, need_transaction\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m) \u001B[38;5;28;01mas\u001B[39;00m pandas_sql:\n\u001B[0;32m--> 788\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mpandas_sql\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_sql\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 789\u001B[0m \u001B[43m \u001B[49m\u001B[43mframe\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 790\u001B[0m \u001B[43m \u001B[49m\u001B[43mname\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 791\u001B[0m \u001B[43m \u001B[49m\u001B[43mif_exists\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mif_exists\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 792\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 793\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex_label\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex_label\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 794\u001B[0m \u001B[43m \u001B[49m\u001B[43mschema\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mschema\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 795\u001B[0m \u001B[43m \u001B[49m\u001B[43mchunksize\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mchunksize\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 796\u001B[0m \u001B[43m \u001B[49m\u001B[43mdtype\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdtype\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 797\u001B[0m \u001B[43m \u001B[49m\u001B[43mmethod\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmethod\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 798\u001B[0m \u001B[43m \u001B[49m\u001B[43mengine\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mengine\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 799\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mengine_kwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 800\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:1958\u001B[0m, in \u001B[0;36mSQLDatabase.to_sql\u001B[0;34m(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method, engine, **engine_kwargs)\u001B[0m\n\u001B[1;32m 1946\u001B[0m sql_engine \u001B[38;5;241m=\u001B[39m get_engine(engine)\n\u001B[1;32m 1948\u001B[0m table \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mprep_table(\n\u001B[1;32m 1949\u001B[0m frame\u001B[38;5;241m=\u001B[39mframe,\n\u001B[1;32m 1950\u001B[0m name\u001B[38;5;241m=\u001B[39mname,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 1955\u001B[0m dtype\u001B[38;5;241m=\u001B[39mdtype,\n\u001B[1;32m 1956\u001B[0m )\n\u001B[0;32m-> 1958\u001B[0m total_inserted \u001B[38;5;241m=\u001B[39m \u001B[43msql_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minsert_records\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1959\u001B[0m \u001B[43m \u001B[49m\u001B[43mtable\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtable\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1960\u001B[0m \u001B[43m \u001B[49m\u001B[43mcon\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcon\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1961\u001B[0m \u001B[43m \u001B[49m\u001B[43mframe\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mframe\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1962\u001B[0m \u001B[43m \u001B[49m\u001B[43mname\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mname\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1963\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1964\u001B[0m \u001B[43m \u001B[49m\u001B[43mschema\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mschema\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1965\u001B[0m \u001B[43m \u001B[49m\u001B[43mchunksize\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mchunksize\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1966\u001B[0m \u001B[43m \u001B[49m\u001B[43mmethod\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmethod\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1967\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mengine_kwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1968\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1970\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcheck_case_sensitive(name\u001B[38;5;241m=\u001B[39mname, schema\u001B[38;5;241m=\u001B[39mschema)\n\u001B[1;32m 1971\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m total_inserted\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:1507\u001B[0m, in \u001B[0;36mSQLAlchemyEngine.insert_records\u001B[0;34m(self, table, con, frame, name, index, schema, chunksize, method, **engine_kwargs)\u001B[0m\n\u001B[1;32m 1505\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m re\u001B[38;5;241m.\u001B[39msearch(msg, err_text):\n\u001B[1;32m 1506\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124minf cannot be used with MySQL\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[0;32m-> 1507\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m err\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:1498\u001B[0m, in \u001B[0;36mSQLAlchemyEngine.insert_records\u001B[0;34m(self, table, con, frame, name, index, schema, chunksize, method, **engine_kwargs)\u001B[0m\n\u001B[1;32m 1495\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01msqlalchemy\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m exc\n\u001B[1;32m 1497\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 1498\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mtable\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minsert\u001B[49m\u001B[43m(\u001B[49m\u001B[43mchunksize\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mchunksize\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmethod\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmethod\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1499\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m exc\u001B[38;5;241m.\u001B[39mStatementError \u001B[38;5;28;01mas\u001B[39;00m err:\n\u001B[1;32m 1500\u001B[0m \u001B[38;5;66;03m# GH34431\u001B[39;00m\n\u001B[1;32m 1501\u001B[0m \u001B[38;5;66;03m# https://stackoverflow.com/a/67358288/6067848\u001B[39;00m\n\u001B[1;32m 1502\u001B[0m msg \u001B[38;5;241m=\u001B[39m \u001B[38;5;124mr\u001B[39m\u001B[38;5;124m\"\"\"\u001B[39m\u001B[38;5;124m(\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m(1054, \u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mUnknown column \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124minf(e0)?\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m in \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mfield list\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m))(?#\u001B[39m\n\u001B[1;32m 1503\u001B[0m \u001B[38;5;124m )|inf can not be used with MySQL\u001B[39m\u001B[38;5;124m\"\"\"\u001B[39m\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:1059\u001B[0m, in \u001B[0;36mSQLTable.insert\u001B[0;34m(self, chunksize, method)\u001B[0m\n\u001B[1;32m 1056\u001B[0m \u001B[38;5;28;01mbreak\u001B[39;00m\n\u001B[1;32m 1058\u001B[0m chunk_iter \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mzip\u001B[39m(\u001B[38;5;241m*\u001B[39m(arr[start_i:end_i] \u001B[38;5;28;01mfor\u001B[39;00m arr \u001B[38;5;129;01min\u001B[39;00m data_list))\n\u001B[0;32m-> 1059\u001B[0m num_inserted \u001B[38;5;241m=\u001B[39m \u001B[43mexec_insert\u001B[49m\u001B[43m(\u001B[49m\u001B[43mconn\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mkeys\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mchunk_iter\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1060\u001B[0m \u001B[38;5;66;03m# GH 46891\u001B[39;00m\n\u001B[1;32m 1061\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m num_inserted \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:951\u001B[0m, in \u001B[0;36mSQLTable._execute_insert\u001B[0;34m(self, conn, keys, data_iter)\u001B[0m\n\u001B[1;32m 939\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 940\u001B[0m \u001B[38;5;124;03mExecute SQL statement inserting data\u001B[39;00m\n\u001B[1;32m 941\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 948\u001B[0m \u001B[38;5;124;03m Each item contains a list of values to be inserted\u001B[39;00m\n\u001B[1;32m 949\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 950\u001B[0m data \u001B[38;5;241m=\u001B[39m [\u001B[38;5;28mdict\u001B[39m(\u001B[38;5;28mzip\u001B[39m(keys, row)) \u001B[38;5;28;01mfor\u001B[39;00m row \u001B[38;5;129;01min\u001B[39;00m data_iter]\n\u001B[0;32m--> 951\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[43mconn\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtable\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minsert\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 952\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m result\u001B[38;5;241m.\u001B[39mrowcount\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1416\u001B[0m, in \u001B[0;36mConnection.execute\u001B[0;34m(self, statement, parameters, execution_options)\u001B[0m\n\u001B[1;32m 1414\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m exc\u001B[38;5;241m.\u001B[39mObjectNotExecutableError(statement) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m 1415\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1416\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mmeth\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1417\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1418\u001B[0m \u001B[43m \u001B[49m\u001B[43mdistilled_parameters\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1419\u001B[0m \u001B[43m \u001B[49m\u001B[43mexecution_options\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mNO_OPTIONS\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1420\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/sql/elements.py:516\u001B[0m, in \u001B[0;36mClauseElement._execute_on_connection\u001B[0;34m(self, connection, distilled_params, execution_options)\u001B[0m\n\u001B[1;32m 514\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m TYPE_CHECKING:\n\u001B[1;32m 515\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(\u001B[38;5;28mself\u001B[39m, Executable)\n\u001B[0;32m--> 516\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mconnection\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_execute_clauseelement\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 517\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdistilled_params\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mexecution_options\u001B[49m\n\u001B[1;32m 518\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 519\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 520\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m exc\u001B[38;5;241m.\u001B[39mObjectNotExecutableError(\u001B[38;5;28mself\u001B[39m)\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1639\u001B[0m, in \u001B[0;36mConnection._execute_clauseelement\u001B[0;34m(self, elem, distilled_parameters, execution_options)\u001B[0m\n\u001B[1;32m 1627\u001B[0m compiled_cache: Optional[CompiledCacheType] \u001B[38;5;241m=\u001B[39m execution_options\u001B[38;5;241m.\u001B[39mget(\n\u001B[1;32m 1628\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcompiled_cache\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mengine\u001B[38;5;241m.\u001B[39m_compiled_cache\n\u001B[1;32m 1629\u001B[0m )\n\u001B[1;32m 1631\u001B[0m compiled_sql, extracted_params, cache_hit \u001B[38;5;241m=\u001B[39m elem\u001B[38;5;241m.\u001B[39m_compile_w_cache(\n\u001B[1;32m 1632\u001B[0m dialect\u001B[38;5;241m=\u001B[39mdialect,\n\u001B[1;32m 1633\u001B[0m compiled_cache\u001B[38;5;241m=\u001B[39mcompiled_cache,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 1637\u001B[0m linting\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdialect\u001B[38;5;241m.\u001B[39mcompiler_linting \u001B[38;5;241m|\u001B[39m compiler\u001B[38;5;241m.\u001B[39mWARN_LINTING,\n\u001B[1;32m 1638\u001B[0m )\n\u001B[0;32m-> 1639\u001B[0m ret \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_execute_context\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1640\u001B[0m \u001B[43m \u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1641\u001B[0m \u001B[43m \u001B[49m\u001B[43mdialect\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecution_ctx_cls\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_init_compiled\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1642\u001B[0m \u001B[43m \u001B[49m\u001B[43mcompiled_sql\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1643\u001B[0m \u001B[43m \u001B[49m\u001B[43mdistilled_parameters\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1644\u001B[0m \u001B[43m \u001B[49m\u001B[43mexecution_options\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1645\u001B[0m \u001B[43m \u001B[49m\u001B[43mcompiled_sql\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1646\u001B[0m \u001B[43m \u001B[49m\u001B[43mdistilled_parameters\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1647\u001B[0m \u001B[43m \u001B[49m\u001B[43melem\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1648\u001B[0m \u001B[43m \u001B[49m\u001B[43mextracted_params\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1649\u001B[0m \u001B[43m \u001B[49m\u001B[43mcache_hit\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcache_hit\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1650\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1651\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m has_events:\n\u001B[1;32m 1652\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdispatch\u001B[38;5;241m.\u001B[39mafter_execute(\n\u001B[1;32m 1653\u001B[0m \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m 1654\u001B[0m elem,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 1658\u001B[0m ret,\n\u001B[1;32m 1659\u001B[0m )\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1843\u001B[0m, in \u001B[0;36mConnection._execute_context\u001B[0;34m(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)\u001B[0m\n\u001B[1;32m 1840\u001B[0m context\u001B[38;5;241m.\u001B[39mpre_exec()\n\u001B[1;32m 1842\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m context\u001B[38;5;241m.\u001B[39mexecute_style \u001B[38;5;129;01mis\u001B[39;00m ExecuteStyle\u001B[38;5;241m.\u001B[39mINSERTMANYVALUES:\n\u001B[0;32m-> 1843\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_exec_insertmany_context\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1844\u001B[0m \u001B[43m \u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1845\u001B[0m \u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1846\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1847\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 1848\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_exec_single_context(\n\u001B[1;32m 1849\u001B[0m dialect, context, statement, parameters\n\u001B[1;32m 1850\u001B[0m )\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2120\u001B[0m, in \u001B[0;36mConnection._exec_insertmany_context\u001B[0;34m(self, dialect, context)\u001B[0m\n\u001B[1;32m 2112\u001B[0m dialect\u001B[38;5;241m.\u001B[39mdo_execute(\n\u001B[1;32m 2113\u001B[0m cursor,\n\u001B[1;32m 2114\u001B[0m sub_stmt,\n\u001B[1;32m 2115\u001B[0m sub_params,\n\u001B[1;32m 2116\u001B[0m context,\n\u001B[1;32m 2117\u001B[0m )\n\u001B[1;32m 2119\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[0;32m-> 2120\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_handle_dbapi_exception\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 2121\u001B[0m \u001B[43m \u001B[49m\u001B[43me\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2122\u001B[0m \u001B[43m \u001B[49m\u001B[43msql_util\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_long_statement\u001B[49m\u001B[43m(\u001B[49m\u001B[43msub_stmt\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2123\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_params\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2124\u001B[0m \u001B[43m \u001B[49m\u001B[43mcursor\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2125\u001B[0m \u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2126\u001B[0m \u001B[43m \u001B[49m\u001B[43mis_sub_exec\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m 2127\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2129\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m engine_events:\n\u001B[1;32m 2130\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdispatch\u001B[38;5;241m.\u001B[39mafter_cursor_execute(\n\u001B[1;32m 2131\u001B[0m \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m 2132\u001B[0m cursor,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 2136\u001B[0m context\u001B[38;5;241m.\u001B[39mexecutemany,\n\u001B[1;32m 2137\u001B[0m )\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2343\u001B[0m, in \u001B[0;36mConnection._handle_dbapi_exception\u001B[0;34m(self, e, statement, parameters, cursor, context, is_sub_exec)\u001B[0m\n\u001B[1;32m 2341\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m should_wrap:\n\u001B[1;32m 2342\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m sqlalchemy_exception \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m-> 2343\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m sqlalchemy_exception\u001B[38;5;241m.\u001B[39mwith_traceback(exc_info[\u001B[38;5;241m2\u001B[39m]) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01me\u001B[39;00m\n\u001B[1;32m 2344\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 2345\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m exc_info[\u001B[38;5;241m1\u001B[39m] \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2112\u001B[0m, in \u001B[0;36mConnection._exec_insertmany_context\u001B[0;34m(self, dialect, context)\u001B[0m\n\u001B[1;32m 2110\u001B[0m \u001B[38;5;28;01mbreak\u001B[39;00m\n\u001B[1;32m 2111\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 2112\u001B[0m \u001B[43mdialect\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdo_execute\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 2113\u001B[0m \u001B[43m \u001B[49m\u001B[43mcursor\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2114\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_stmt\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2115\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_params\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2116\u001B[0m \u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2117\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2119\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[1;32m 2120\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handle_dbapi_exception(\n\u001B[1;32m 2121\u001B[0m e,\n\u001B[1;32m 2122\u001B[0m sql_util\u001B[38;5;241m.\u001B[39m_long_statement(sub_stmt),\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 2126\u001B[0m is_sub_exec\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 2127\u001B[0m )\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/default.py:922\u001B[0m, in \u001B[0;36mDefaultDialect.do_execute\u001B[0;34m(self, cursor, statement, parameters, context)\u001B[0m\n\u001B[1;32m 921\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mdo_execute\u001B[39m(\u001B[38;5;28mself\u001B[39m, cursor, statement, parameters, context\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m):\n\u001B[0;32m--> 922\u001B[0m \u001B[43mcursor\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43mstatement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparameters\u001B[49m\u001B[43m)\u001B[49m\n", + "\u001B[0;31mProgrammingError\u001B[0m: (psycopg2.ProgrammingError) can't adapt type 'Point'\n[SQL: INSERT INTO table_name (\"AccidentUID\", \"AccidentHour\", \"AccidentYear\", \"AccidentWeekDay_en\", \"AccidentType\", \"AccidentSeverityCategory\", \"AccidentInvolvingPedestrian\", \"AccidentInvolvingBicycle\", \"AccidentInvolvingMotorcycle\", \"RoadType\", \"RoadType_e ... 398437 characters truncated ... n__999)s, %(AccidentLocation_CHLV95_E__999)s, %(AccidentLocation_CHLV95_N__999)s, %(geometry__999)s)]\n[parameters: {'AccidentYear__0': '2011', 'AccidentLocation_CHLV95_N__0': '1245194', 'AccidentType__0': 'at0', 'AccidentSeverityCategory__0': 'as4', 'RoadType_en__0': 'Minor road', 'AccidentLocation_CHLV95_E__0': '2684605', 'AccidentWeekDay_en__0': 'Saturday', 'AccidentInvolvingMotorcycle__0': 'false', 'RoadType__0': 'rt433', 'AccidentUID__0': 'A2D2677533867004E0430A865E337004', 'AccidentInvolvingPedestrian__0': 'false', 'AccidentHour__0': '00', 'geometry__0': , 'AccidentInvolvingBicycle__0': 'false', 'AccidentYear__1': '2011', 'AccidentLocation_CHLV95_N__1': '1246980', 'AccidentType__1': 'at0', 'AccidentSeverityCategory__1': 'as3', 'RoadType_en__1': 'Minor road', 'AccidentLocation_CHLV95_E__1': '2682382', 'AccidentWeekDay_en__1': 'Saturday', 'AccidentInvolvingMotorcycle__1': 'false', 'RoadType__1': 'rt433', 'AccidentUID__1': '9FD6441F802C20A6E0430A865E3320A6', 'AccidentInvolvingPedestrian__1': 'false', 'AccidentHour__1': '01', 'geometry__1': , 'AccidentInvolvingBicycle__1': 'true', 'AccidentYear__2': '2011', 'AccidentLocation_CHLV95_N__2': '1247749', 'AccidentType__2': 'at0', 'AccidentSeverityCategory__2': 'as4', 'RoadType_en__2': 'Other', 'AccidentLocation_CHLV95_E__2': '2682791', 'AccidentWeekDay_en__2': 'Saturday', 'AccidentInvolvingMotorcycle__2': 'false', 'RoadType__2': 'rt439', 'AccidentUID__2': '9FDA0DC4856A6094E0430A865E336094', 'AccidentInvolvingPedestrian__2': 'false', 'AccidentHour__2': '02', 'geometry__2': , 'AccidentInvolvingBicycle__2': 'false', 'AccidentYear__3': '2011', 'AccidentLocation_CHLV95_N__3': '1247102', 'AccidentType__3': 'at5', 'AccidentSeverityCategory__3': 'as3', 'RoadType_en__3': 'Minor road', 'AccidentLocation_CHLV95_E__3': '2681199', 'AccidentWeekDay_en__3': 'Saturday', 'AccidentInvolvingMotorcycle__3': 'false' ... 13900 parameters truncated ... 'AccidentWeekDay_en__996': 'Tuesday', 'AccidentInvolvingMotorcycle__996': 'false', 'RoadType__996': 'rt433', 'AccidentUID__996': 'A5D2C4A55E38707EE0430A865E33707E', 'AccidentInvolvingPedestrian__996': 'false', 'AccidentHour__996': '08', 'geometry__996': , 'AccidentInvolvingBicycle__996': 'false', 'AccidentYear__997': '2011', 'AccidentLocation_CHLV95_N__997': '1251718', 'AccidentType__997': 'at2', 'AccidentSeverityCategory__997': 'as3', 'RoadType_en__997': 'Principal road', 'AccidentLocation_CHLV95_E__997': '2685190', 'AccidentWeekDay_en__997': 'Tuesday', 'AccidentInvolvingMotorcycle__997': 'false', 'RoadType__997': 'rt432', 'AccidentUID__997': 'A5F1841A36B070AEE0430A865E3370AE', 'AccidentInvolvingPedestrian__997': 'false', 'AccidentHour__997': '11', 'geometry__997': , 'AccidentInvolvingBicycle__997': 'false', 'AccidentYear__998': '2011', 'AccidentLocation_CHLV95_N__998': '1246106', 'AccidentType__998': 'at2', 'AccidentSeverityCategory__998': 'as4', 'RoadType_en__998': 'Principal road', 'AccidentLocation_CHLV95_E__998': '2685329', 'AccidentWeekDay_en__998': 'Tuesday', 'AccidentInvolvingMotorcycle__998': 'false', 'RoadType__998': 'rt432', 'AccidentUID__998': 'A5E25678EDD7505EE0430A865E33505E', 'AccidentInvolvingPedestrian__998': 'false', 'AccidentHour__998': '14', 'geometry__998': , 'AccidentInvolvingBicycle__998': 'false', 'AccidentYear__999': '2011', 'AccidentLocation_CHLV95_N__999': '1251852', 'AccidentType__999': 'at00', 'AccidentSeverityCategory__999': 'as3', 'RoadType_en__999': 'Principal road', 'AccidentLocation_CHLV95_E__999': '2683606', 'AccidentWeekDay_en__999': 'Tuesday', 'AccidentInvolvingMotorcycle__999': 'false', 'RoadType__999': 'rt432', 'AccidentUID__999': 'A6431CCEC810E09CE0430A865E33E09C', 'AccidentInvolvingPedestrian__999': 'false', 'AccidentHour__999': '16', 'geometry__999': , 'AccidentInvolvingBicycle__999': 'false'}]\n(Background on this error at: https://sqlalche.me/e/20/f405)" + ] + } + ], + "source": [ + "from sqlalchemy import create_engine\n", + "\n", + "db_url = f'postgresql://seb:@localhost:5432/test-db23'\n", + "engine = create_engine(db_url)\n", + "\n", + "#miv_df.to_sql('table_name', engine, if_exists='replace', index=False)\n", + "#fb_data.to_sql('footbike', engine, if_exists='replace', index=False)\n", + "acc_df.to_sql('table_name', engine, if_exists='replace', index=False)\n" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T22:27:41.753420Z", + "start_time": "2023-12-02T22:27:40.962764Z" + } + }, + "id": "fa76af8343443d7a" + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [], + "source": [ + "engine.dispose()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T22:26:54.275225Z", + "start_time": "2023-12-02T22:26:54.273410Z" + } + }, + "id": "bc0a23a5126e76c2" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From b6715a0ff1b1b90c898f15e967b04a4a7502b8d1 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 01:09:32 +0100 Subject: [PATCH 06/22] UNFINNISHED: Commit for Syncing Repos --- requirements.txt | 217 +++++++++++++++++++++++-------------------- src/integrate.py | 2 +- src/testArea.ipynb | 227 ++++++++++++++++++++------------------------- 3 files changed, 218 insertions(+), 228 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8d6bbeb..08150f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,102 +1,119 @@ -anyio==4.0.0 -appnope==0.1.3 -argon2-cffi==23.1.0 -argon2-cffi-bindings==21.2.0 -arrow==1.3.0 -asttokens==2.4.1 -async-lru==2.0.4 -attrs==23.1.0 -Babel==2.13.1 -beautifulsoup4==4.12.2 -black==23.11.0 -bleach==6.1.0 -certifi==2023.7.22 -cffi==1.16.0 -charset-normalizer==3.3.2 -click==8.1.7 -comm==0.2.0 -debugpy==1.8.0 -decorator==5.1.1 -defusedxml==0.7.1 -executing==2.0.1 -fastjsonschema==2.19.0 -fqdn==1.5.1 -idna==3.4 -ipykernel==6.26.0 -ipython==8.17.2 -ipywidgets==8.1.1 -isoduration==20.11.0 -jedi==0.19.1 -Jinja2==3.1.2 -json5==0.9.14 -jsonpointer==2.4 -jsonschema==4.19.2 -jsonschema-specifications==2023.11.1 -jupyter==1.0.0 -jupyter-console==6.6.3 -jupyter-events==0.9.0 -jupyter-lsp==2.2.0 -jupyter_client==8.6.0 -jupyter_core==5.5.0 -jupyter_server==2.10.1 -jupyter_server_terminals==0.4.4 -jupyterlab==4.0.8 -jupyterlab-pygments==0.2.2 -jupyterlab-widgets==3.0.9 -jupyterlab_server==2.25.1 -MarkupSafe==2.1.3 -matplotlib-inline==0.1.6 -mistune==3.0.2 -mypy-extensions==1.0.0 -nbclient==0.9.0 -nbconvert==7.11.0 -nbformat==5.9.2 -nest-asyncio==1.5.8 -notebook==7.0.6 -notebook_shim==0.2.3 -numpy==1.26.2 -overrides==7.4.0 -packaging==23.2 pandas==2.1.3 -pandocfilters==1.5.0 -parso==0.8.3 -pathspec==0.11.2 -pexpect==4.8.0 -platformdirs==4.0.0 -prometheus-client==0.18.0 -prompt-toolkit==3.0.41 -psutil==5.9.6 -ptyprocess==0.7.0 -pure-eval==0.2.2 -pycparser==2.21 -Pygments==2.16.1 -python-dateutil==2.8.2 -python-json-logger==2.0.7 -pytz==2023.3.post1 -PyYAML==6.0.1 -pyzmq==25.1.1 -qtconsole==5.5.1 -QtPy==2.4.1 -referencing==0.31.0 requests==2.31.0 -rfc3339-validator==0.1.4 -rfc3986-validator==0.1.1 -rpds-py==0.13.0 -Send2Trash==1.8.2 -six==1.16.0 -sniffio==1.3.0 -soupsieve==2.5 -stack-data==0.6.3 -terminado==0.18.0 -tinycss2==1.2.1 -tornado==6.3.3 -traitlets==5.13.0 -types-python-dateutil==2.8.19.14 -tzdata==2023.3 -uri-template==1.3.0 -urllib3==2.1.0 -wcwidth==0.2.10 -webcolors==1.13 -webencodings==0.5.1 -websocket-client==1.6.4 -widgetsnbextension==4.0.9 + +geopandas~=0.14.1Package Version +------------------------- ------------ +anyio 4.0.0 +appnope 0.1.3 +argon2-cffi 23.1.0 +argon2-cffi-bindings 21.2.0 +arrow 1.3.0 +asttokens 2.4.1 +async-lru 2.0.4 +attrs 23.1.0 +Babel 2.13.1 +beautifulsoup4 4.12.2 +black 23.11.0 +bleach 6.1.0 +certifi 2023.7.22 +cffi 1.16.0 +charset-normalizer 3.3.2 +click 8.1.7 +click-plugins 1.1.1 +cligj 0.7.2 +comm 0.2.0 +debugpy 1.8.0 +decorator 5.1.1 +defusedxml 0.7.1 +executing 2.0.1 +fastjsonschema 2.19.0 +fiona 1.9.5 +fqdn 1.5.1 +GeoAlchemy2 0.14.2 +geopandas 0.14.1 +idna 3.4 +ipykernel 6.26.0 +ipython 8.17.2 +ipywidgets 8.1.1 +isoduration 20.11.0 +jedi 0.19.1 +Jinja2 3.1.2 +json5 0.9.14 +jsonpointer 2.4 +jsonschema 4.19.2 +jsonschema-specifications 2023.11.1 +jupyter 1.0.0 +jupyter_client 8.6.0 +jupyter-console 6.6.3 +jupyter_core 5.5.0 +jupyter-events 0.9.0 +jupyter-lsp 2.2.0 +jupyter_server 2.10.1 +jupyter_server_terminals 0.4.4 +jupyterlab 4.0.8 +jupyterlab-pygments 0.2.2 +jupyterlab_server 2.25.1 +jupyterlab-widgets 3.0.9 +MarkupSafe 2.1.3 +matplotlib-inline 0.1.6 +mistune 3.0.2 +mypy-extensions 1.0.0 +nbclient 0.9.0 +nbconvert 7.11.0 +nbformat 5.9.2 +nest-asyncio 1.5.8 +notebook 7.0.6 +notebook_shim 0.2.3 +numpy 1.26.2 +overrides 7.4.0 +packaging 23.2 +pandas 2.1.3 +pandocfilters 1.5.0 +parso 0.8.3 +pathspec 0.11.2 +pexpect 4.8.0 +pip 23.3.1 +platformdirs 4.0.0 +prometheus-client 0.18.0 +prompt-toolkit 3.0.41 +psutil 5.9.6 +psycopg2 2.9.9 +ptyprocess 0.7.0 +pure-eval 0.2.2 +pycparser 2.21 +Pygments 2.16.1 +pyproj 3.6.1 +python-dateutil 2.8.2 +python-json-logger 2.0.7 +pytz 2023.3.post1 +PyYAML 6.0.1 +pyzmq 25.1.1 +qtconsole 5.5.1 +QtPy 2.4.1 +referencing 0.31.0 +requests 2.31.0 +rfc3339-validator 0.1.4 +rfc3986-validator 0.1.1 +rpds-py 0.13.0 +Send2Trash 1.8.2 +setuptools 68.2.2 +shapely 2.0.2 +six 1.16.0 +sniffio 1.3.0 +soupsieve 2.5 +SQLAlchemy 2.0.23 +stack-data 0.6.3 +terminado 0.18.0 +tinycss2 1.2.1 +tornado 6.3.3 +traitlets 5.13.0 +types-python-dateutil 2.8.19.14 +typing_extensions 4.8.0 +tzdata 2023.3 +uri-template 1.3.0 +urllib3 2.1.0 +wcwidth 0.2.10 +webcolors 1.13 +webencodings 0.5.1 +websocket-client 1.6.4 +widgetsnbextension 4.0.9 diff --git a/src/integrate.py b/src/integrate.py index 050fcb4..607719d 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -65,7 +65,7 @@ def process_accident_data(): acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E', - 'AccidentLocation_CHLV95_N', 'geometry'] + 'AccidentLocation_CHLV95_N', 'geometry', 'AccidentMonth'] cleaned_acc_df = acc_df_unified[acc_cols_to_keep] return cleaned_acc_df diff --git a/src/testArea.ipynb b/src/testArea.ipynb index 385300d..e158c10 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "outputs": [], "source": [ "import pandas as pd\n", @@ -13,15 +13,15 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T20:45:25.057214Z", - "start_time": "2023-12-02T20:45:24.634062Z" + "end_time": "2023-12-02T23:43:55.980827Z", + "start_time": "2023-12-02T23:43:55.546732Z" } }, "id": "be55b25929d95559" }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "outputs": [ { "name": "stderr", @@ -44,52 +44,86 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T20:49:39.247032Z", - "start_time": "2023-12-02T20:45:26.952158Z" + "end_time": "2023-12-02T23:48:08.233784Z", + "start_time": "2023-12-02T23:43:55.980667Z" } }, "id": "dd3831953afdeb72" }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "outputs": [], "source": [ "test_df = miv_df\n" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T23:48:08.239957Z", + "start_time": "2023-12-02T23:48:08.230590Z" + } }, "id": "14471cd78389ce4d" }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "outputs": [], "source": [ "test_df.dtypes\n", "date_object = pd.to_datetime(test_df['Date'])\n" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T23:48:09.754205Z", + "start_time": "2023-12-02T23:48:08.232651Z" + } }, "id": "c70d21adef38fd68" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 5, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Weekday_Name'", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/core/indexes/base.py:3790\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3789\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 3790\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 3791\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", + "File \u001B[0;32mindex.pyx:152\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", + "File \u001B[0;32mindex.pyx:181\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", + "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7080\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", + "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7088\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", + "\u001B[0;31mKeyError\u001B[0m: 'Weekday_Name'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[5], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[43mtest_df\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mWeekday_Name\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/core/frame.py:3893\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3891\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m 3892\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[0;32m-> 3893\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 3894\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[1;32m 3895\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", + "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/core/indexes/base.py:3797\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3792\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[1;32m 3793\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[1;32m 3794\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[1;32m 3795\u001B[0m ):\n\u001B[1;32m 3796\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[0;32m-> 3797\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m 3798\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[1;32m 3799\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[1;32m 3800\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[1;32m 3801\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[1;32m 3802\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", + "\u001B[0;31mKeyError\u001B[0m: 'Weekday_Name'" + ] + } + ], "source": [ "test_df['Weekday_Name']" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-02T23:48:10.103198Z", + "start_time": "2023-12-02T23:48:09.756006Z" + } }, "id": "d0df3c0ef49e8061" }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "outputs": [ { "name": "stdout", @@ -129,8 +163,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T20:50:19.543547Z", - "start_time": "2023-12-02T20:50:05.258441Z" + "end_time": "2023-12-02T23:49:50.185415Z", + "start_time": "2023-12-02T23:49:34.846049Z" } }, "id": "f86bc612060b17a4" @@ -146,63 +180,17 @@ "date_obj = dt.strptime(acc_df[''])\n" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "start_time": "2023-12-02T23:48:10.101387Z" + } }, "id": "6affbeea6c7cf3ef" }, { "cell_type": "code", - "execution_count": 5, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accident Columns:\n", - "AccidentUID object\n", - "AccidentHour object\n", - "AccidentYear object\n", - "AccidentWeekDay_en object\n", - "AccidentType object\n", - "AccidentSeverityCategory object\n", - "AccidentInvolvingPedestrian object\n", - "AccidentInvolvingBicycle object\n", - "AccidentInvolvingMotorcycle object\n", - "RoadType object\n", - "RoadType_en object\n", - "AccidentLocation_CHLV95_E object\n", - "AccidentLocation_CHLV95_N object\n", - "geometry geometry\n", - "dtype: object\n", - "\n", - "MIV Columns:\n", - "MSID object\n", - "ZSID object\n", - "Achse object\n", - "EKoord float64\n", - "NKoord float64\n", - "Richtung object\n", - "AnzFahrzeuge float64\n", - "AnzFahrzeugeStatus object\n", - "Date object\n", - "Hrs object\n", - "Weekday_en object\n", - "dtype: object\n", - "\n", - "FB Cols:\n", - "OST int64\n", - "NORD int64\n", - "DATE object\n", - "HRS object\n", - "VELO_IN float64\n", - "VELO_OUT float64\n", - "FUSS_IN float64\n", - "FUSS_OUT float64\n", - "Weekday_en object\n", - "dtype: object\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "print(\"Accident Columns:\")\n", "print(acc_df.dtypes)\n", @@ -216,8 +204,7 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T20:50:32.272482Z", - "start_time": "2023-12-02T20:50:32.270846Z" + "start_time": "2023-12-02T23:48:10.102789Z" } }, "id": "242041cd369d8454" @@ -228,24 +215,17 @@ "outputs": [], "source": [], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "start_time": "2023-12-02T23:48:10.103954Z" + } }, "id": "1841925ee109a417" }, { "cell_type": "code", - "execution_count": 13, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MIV unqiue: (187,)\n", - "Acc unique: (8661,)\n", - "FB unique: (62,)\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "print(\"MIV unqiue:\", miv_df['EKoord'].unique().shape)\n", "print(\"Acc unique:\", acc_df['AccidentLocation_CHLV95_E'].unique().shape)\n", @@ -254,22 +234,21 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T21:59:53.075227Z", - "start_time": "2023-12-02T21:59:52.868698Z" + "start_time": "2023-12-02T23:48:10.104894Z" } }, "id": "f6d752ea17eda341" }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "outputs": [ { "data": { - "text/plain": " AccidentUID AccidentHour AccidentYear \\\n0 A2D2677533867004E0430A865E337004 00 2011 \n1 9FD6441F802C20A6E0430A865E3320A6 01 2011 \n2 9FDA0DC4856A6094E0430A865E336094 02 2011 \n3 A3B66E42396E6000E0430A865E336000 02 2011 \n4 9FDA0DBE8CCE9096E0430A865E339096 03 2011 \n\n AccidentWeekDay_en AccidentType AccidentSeverityCategory \\\n0 Saturday at0 as4 \n1 Saturday at0 as3 \n2 Saturday at0 as4 \n3 Saturday at5 as3 \n4 Saturday at0 as4 \n\n AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n0 false false \n1 false true \n2 false false \n3 false false \n4 false false \n\n AccidentInvolvingMotorcycle RoadType RoadType_en AccidentLocation_CHLV95_E \\\n0 false rt433 Minor road 2684605 \n1 false rt433 Minor road 2682382 \n2 false rt439 Other 2682791 \n3 false rt433 Minor road 2681199 \n4 false rt433 Minor road 2682479 \n\n AccidentLocation_CHLV95_N geometry \n0 1245194 POINT Z (8.55841 47.35217 0.00000) \n1 1246980 POINT Z (8.52932 47.36851 0.00000) \n2 1247749 POINT Z (8.53488 47.37538 0.00000) \n3 1247102 POINT Z (8.51368 47.36976 0.00000) \n4 1250690 POINT Z (8.53129 47.40186 0.00000) ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentHourAccidentYearAccidentWeekDay_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_enAccidentLocation_CHLV95_EAccidentLocation_CHLV95_Ngeometry
0A2D2677533867004E0430A865E337004002011Saturdayat0as4falsefalsefalsert433Minor road26846051245194POINT Z (8.55841 47.35217 0.00000)
19FD6441F802C20A6E0430A865E3320A6012011Saturdayat0as3falsetruefalsert433Minor road26823821246980POINT Z (8.52932 47.36851 0.00000)
29FDA0DC4856A6094E0430A865E336094022011Saturdayat0as4falsefalsefalsert439Other26827911247749POINT Z (8.53488 47.37538 0.00000)
3A3B66E42396E6000E0430A865E336000022011Saturdayat5as3falsefalsefalsert433Minor road26811991247102POINT Z (8.51368 47.36976 0.00000)
49FDA0DBE8CCE9096E0430A865E339096032011Saturdayat0as4falsefalsefalsert433Minor road26824791250690POINT Z (8.53129 47.40186 0.00000)
\n
" + "text/plain": " AccidentUID AccidentHour AccidentYear \\\n0 A2D2677533867004E0430A865E337004 00 2011 \n1 9FD6441F802C20A6E0430A865E3320A6 01 2011 \n2 9FDA0DC4856A6094E0430A865E336094 02 2011 \n3 A3B66E42396E6000E0430A865E336000 02 2011 \n4 9FDA0DBE8CCE9096E0430A865E339096 03 2011 \n\n AccidentWeekDay_en AccidentType AccidentSeverityCategory \\\n0 Saturday at0 as4 \n1 Saturday at0 as3 \n2 Saturday at0 as4 \n3 Saturday at5 as3 \n4 Saturday at0 as4 \n\n AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n0 false false \n1 false true \n2 false false \n3 false false \n4 false false \n\n AccidentInvolvingMotorcycle RoadType RoadType_en AccidentLocation_CHLV95_E \\\n0 false rt433 Minor road 2684605 \n1 false rt433 Minor road 2682382 \n2 false rt439 Other 2682791 \n3 false rt433 Minor road 2681199 \n4 false rt433 Minor road 2682479 \n\n AccidentLocation_CHLV95_N geometry \n0 1245194 POINT(8.55841025 47.3521677) \n1 1246980 POINT(8.52932024 47.36851152) \n2 1247749 POINT(8.5348767 47.37537618) \n3 1247102 POINT(8.51368203 47.36975554) \n4 1250690 POINT(8.53128819 47.40186473) ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentHourAccidentYearAccidentWeekDay_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_enAccidentLocation_CHLV95_EAccidentLocation_CHLV95_Ngeometry
0A2D2677533867004E0430A865E337004002011Saturdayat0as4falsefalsefalsert433Minor road26846051245194POINT(8.55841025 47.3521677)
19FD6441F802C20A6E0430A865E3320A6012011Saturdayat0as3falsetruefalsert433Minor road26823821246980POINT(8.52932024 47.36851152)
29FDA0DC4856A6094E0430A865E336094022011Saturdayat0as4falsefalsefalsert439Other26827911247749POINT(8.5348767 47.37537618)
3A3B66E42396E6000E0430A865E336000022011Saturdayat5as3falsefalsefalsert433Minor road26811991247102POINT(8.51368203 47.36975554)
49FDA0DBE8CCE9096E0430A865E339096032011Saturdayat0as4falsefalsefalsert433Minor road26824791250690POINT(8.53128819 47.40186473)
\n
" }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -280,71 +259,65 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T20:52:32.645509Z", - "start_time": "2023-12-02T20:52:32.643877Z" + "end_time": "2023-12-02T23:53:22.460557Z", + "start_time": "2023-12-02T23:53:22.453434Z" } }, "id": "a159cafa9c227b88" }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 21, "outputs": [ { - "ename": "ProgrammingError", - "evalue": "(psycopg2.ProgrammingError) can't adapt type 'Point'\n[SQL: INSERT INTO table_name (\"AccidentUID\", \"AccidentHour\", \"AccidentYear\", \"AccidentWeekDay_en\", \"AccidentType\", \"AccidentSeverityCategory\", \"AccidentInvolvingPedestrian\", \"AccidentInvolvingBicycle\", \"AccidentInvolvingMotorcycle\", \"RoadType\", \"RoadType_e ... 398437 characters truncated ... n__999)s, %(AccidentLocation_CHLV95_E__999)s, %(AccidentLocation_CHLV95_N__999)s, %(geometry__999)s)]\n[parameters: {'AccidentYear__0': '2011', 'AccidentLocation_CHLV95_N__0': '1245194', 'AccidentType__0': 'at0', 'AccidentSeverityCategory__0': 'as4', 'RoadType_en__0': 'Minor road', 'AccidentLocation_CHLV95_E__0': '2684605', 'AccidentWeekDay_en__0': 'Saturday', 'AccidentInvolvingMotorcycle__0': 'false', 'RoadType__0': 'rt433', 'AccidentUID__0': 'A2D2677533867004E0430A865E337004', 'AccidentInvolvingPedestrian__0': 'false', 'AccidentHour__0': '00', 'geometry__0': , 'AccidentInvolvingBicycle__0': 'false', 'AccidentYear__1': '2011', 'AccidentLocation_CHLV95_N__1': '1246980', 'AccidentType__1': 'at0', 'AccidentSeverityCategory__1': 'as3', 'RoadType_en__1': 'Minor road', 'AccidentLocation_CHLV95_E__1': '2682382', 'AccidentWeekDay_en__1': 'Saturday', 'AccidentInvolvingMotorcycle__1': 'false', 'RoadType__1': 'rt433', 'AccidentUID__1': '9FD6441F802C20A6E0430A865E3320A6', 'AccidentInvolvingPedestrian__1': 'false', 'AccidentHour__1': '01', 'geometry__1': , 'AccidentInvolvingBicycle__1': 'true', 'AccidentYear__2': '2011', 'AccidentLocation_CHLV95_N__2': '1247749', 'AccidentType__2': 'at0', 'AccidentSeverityCategory__2': 'as4', 'RoadType_en__2': 'Other', 'AccidentLocation_CHLV95_E__2': '2682791', 'AccidentWeekDay_en__2': 'Saturday', 'AccidentInvolvingMotorcycle__2': 'false', 'RoadType__2': 'rt439', 'AccidentUID__2': '9FDA0DC4856A6094E0430A865E336094', 'AccidentInvolvingPedestrian__2': 'false', 'AccidentHour__2': '02', 'geometry__2': , 'AccidentInvolvingBicycle__2': 'false', 'AccidentYear__3': '2011', 'AccidentLocation_CHLV95_N__3': '1247102', 'AccidentType__3': 'at5', 'AccidentSeverityCategory__3': 'as3', 'RoadType_en__3': 'Minor road', 'AccidentLocation_CHLV95_E__3': '2681199', 'AccidentWeekDay_en__3': 'Saturday', 'AccidentInvolvingMotorcycle__3': 'false' ... 13900 parameters truncated ... 'AccidentWeekDay_en__996': 'Tuesday', 'AccidentInvolvingMotorcycle__996': 'false', 'RoadType__996': 'rt433', 'AccidentUID__996': 'A5D2C4A55E38707EE0430A865E33707E', 'AccidentInvolvingPedestrian__996': 'false', 'AccidentHour__996': '08', 'geometry__996': , 'AccidentInvolvingBicycle__996': 'false', 'AccidentYear__997': '2011', 'AccidentLocation_CHLV95_N__997': '1251718', 'AccidentType__997': 'at2', 'AccidentSeverityCategory__997': 'as3', 'RoadType_en__997': 'Principal road', 'AccidentLocation_CHLV95_E__997': '2685190', 'AccidentWeekDay_en__997': 'Tuesday', 'AccidentInvolvingMotorcycle__997': 'false', 'RoadType__997': 'rt432', 'AccidentUID__997': 'A5F1841A36B070AEE0430A865E3370AE', 'AccidentInvolvingPedestrian__997': 'false', 'AccidentHour__997': '11', 'geometry__997': , 'AccidentInvolvingBicycle__997': 'false', 'AccidentYear__998': '2011', 'AccidentLocation_CHLV95_N__998': '1246106', 'AccidentType__998': 'at2', 'AccidentSeverityCategory__998': 'as4', 'RoadType_en__998': 'Principal road', 'AccidentLocation_CHLV95_E__998': '2685329', 'AccidentWeekDay_en__998': 'Tuesday', 'AccidentInvolvingMotorcycle__998': 'false', 'RoadType__998': 'rt432', 'AccidentUID__998': 'A5E25678EDD7505EE0430A865E33505E', 'AccidentInvolvingPedestrian__998': 'false', 'AccidentHour__998': '14', 'geometry__998': , 'AccidentInvolvingBicycle__998': 'false', 'AccidentYear__999': '2011', 'AccidentLocation_CHLV95_N__999': '1251852', 'AccidentType__999': 'at00', 'AccidentSeverityCategory__999': 'as3', 'RoadType_en__999': 'Principal road', 'AccidentLocation_CHLV95_E__999': '2683606', 'AccidentWeekDay_en__999': 'Tuesday', 'AccidentInvolvingMotorcycle__999': 'false', 'RoadType__999': 'rt432', 'AccidentUID__999': 'A6431CCEC810E09CE0430A865E33E09C', 'AccidentInvolvingPedestrian__999': 'false', 'AccidentHour__999': '16', 'geometry__999': , 'AccidentInvolvingBicycle__999': 'false'}]\n(Background on this error at: https://sqlalche.me/e/20/f405)", - "output_type": "error", - "traceback": [ - "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[0;31mProgrammingError\u001B[0m Traceback (most recent call last)", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2112\u001B[0m, in \u001B[0;36mConnection._exec_insertmany_context\u001B[0;34m(self, dialect, context)\u001B[0m\n\u001B[1;32m 2111\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 2112\u001B[0m \u001B[43mdialect\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdo_execute\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 2113\u001B[0m \u001B[43m \u001B[49m\u001B[43mcursor\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2114\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_stmt\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2115\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_params\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2116\u001B[0m \u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2117\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2119\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/default.py:922\u001B[0m, in \u001B[0;36mDefaultDialect.do_execute\u001B[0;34m(self, cursor, statement, parameters, context)\u001B[0m\n\u001B[1;32m 921\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mdo_execute\u001B[39m(\u001B[38;5;28mself\u001B[39m, cursor, statement, parameters, context\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m):\n\u001B[0;32m--> 922\u001B[0m \u001B[43mcursor\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43mstatement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparameters\u001B[49m\u001B[43m)\u001B[49m\n", - "\u001B[0;31mProgrammingError\u001B[0m: can't adapt type 'Point'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001B[0;31mProgrammingError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[0;32mIn[17], line 8\u001B[0m\n\u001B[1;32m 4\u001B[0m engine \u001B[38;5;241m=\u001B[39m create_engine(db_url)\n\u001B[1;32m 6\u001B[0m \u001B[38;5;66;03m#miv_df.to_sql('table_name', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[1;32m 7\u001B[0m \u001B[38;5;66;03m#fb_data.to_sql('footbike', engine, if_exists='replace', index=False)\u001B[39;00m\n\u001B[0;32m----> 8\u001B[0m \u001B[43macc_df\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_sql\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mtable_name\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mengine\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mif_exists\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mreplace\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mindex\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mFalse\u001B[39;49;00m\u001B[43m)\u001B[49m\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/util/_decorators.py:333\u001B[0m, in \u001B[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 327\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(args) \u001B[38;5;241m>\u001B[39m num_allow_args:\n\u001B[1;32m 328\u001B[0m warnings\u001B[38;5;241m.\u001B[39mwarn(\n\u001B[1;32m 329\u001B[0m msg\u001B[38;5;241m.\u001B[39mformat(arguments\u001B[38;5;241m=\u001B[39m_format_argument_list(allow_args)),\n\u001B[1;32m 330\u001B[0m \u001B[38;5;167;01mFutureWarning\u001B[39;00m,\n\u001B[1;32m 331\u001B[0m stacklevel\u001B[38;5;241m=\u001B[39mfind_stack_level(),\n\u001B[1;32m 332\u001B[0m )\n\u001B[0;32m--> 333\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/core/generic.py:3008\u001B[0m, in \u001B[0;36mNDFrame.to_sql\u001B[0;34m(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)\u001B[0m\n\u001B[1;32m 2813\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 2814\u001B[0m \u001B[38;5;124;03mWrite records stored in a DataFrame to a SQL database.\u001B[39;00m\n\u001B[1;32m 2815\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 3004\u001B[0m \u001B[38;5;124;03m[(1,), (None,), (2,)]\u001B[39;00m\n\u001B[1;32m 3005\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m \u001B[38;5;66;03m# noqa: E501\u001B[39;00m\n\u001B[1;32m 3006\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mpandas\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m sql\n\u001B[0;32m-> 3008\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43msql\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_sql\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 3009\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3010\u001B[0m \u001B[43m \u001B[49m\u001B[43mname\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3011\u001B[0m \u001B[43m \u001B[49m\u001B[43mcon\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3012\u001B[0m \u001B[43m \u001B[49m\u001B[43mschema\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mschema\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3013\u001B[0m \u001B[43m \u001B[49m\u001B[43mif_exists\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mif_exists\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3014\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3015\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex_label\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex_label\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3016\u001B[0m \u001B[43m \u001B[49m\u001B[43mchunksize\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mchunksize\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3017\u001B[0m \u001B[43m \u001B[49m\u001B[43mdtype\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdtype\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3018\u001B[0m \u001B[43m \u001B[49m\u001B[43mmethod\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmethod\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 3019\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:788\u001B[0m, in \u001B[0;36mto_sql\u001B[0;34m(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method, engine, **engine_kwargs)\u001B[0m\n\u001B[1;32m 783\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mNotImplementedError\u001B[39;00m(\n\u001B[1;32m 784\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mframe\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m argument should be either a Series or a DataFrame\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 785\u001B[0m )\n\u001B[1;32m 787\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m pandasSQL_builder(con, schema\u001B[38;5;241m=\u001B[39mschema, need_transaction\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m) \u001B[38;5;28;01mas\u001B[39;00m pandas_sql:\n\u001B[0;32m--> 788\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mpandas_sql\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_sql\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 789\u001B[0m \u001B[43m \u001B[49m\u001B[43mframe\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 790\u001B[0m \u001B[43m \u001B[49m\u001B[43mname\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 791\u001B[0m \u001B[43m \u001B[49m\u001B[43mif_exists\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mif_exists\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 792\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 793\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex_label\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex_label\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 794\u001B[0m \u001B[43m \u001B[49m\u001B[43mschema\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mschema\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 795\u001B[0m \u001B[43m \u001B[49m\u001B[43mchunksize\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mchunksize\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 796\u001B[0m \u001B[43m \u001B[49m\u001B[43mdtype\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdtype\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 797\u001B[0m \u001B[43m \u001B[49m\u001B[43mmethod\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmethod\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 798\u001B[0m \u001B[43m \u001B[49m\u001B[43mengine\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mengine\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 799\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mengine_kwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 800\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:1958\u001B[0m, in \u001B[0;36mSQLDatabase.to_sql\u001B[0;34m(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method, engine, **engine_kwargs)\u001B[0m\n\u001B[1;32m 1946\u001B[0m sql_engine \u001B[38;5;241m=\u001B[39m get_engine(engine)\n\u001B[1;32m 1948\u001B[0m table \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mprep_table(\n\u001B[1;32m 1949\u001B[0m frame\u001B[38;5;241m=\u001B[39mframe,\n\u001B[1;32m 1950\u001B[0m name\u001B[38;5;241m=\u001B[39mname,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 1955\u001B[0m dtype\u001B[38;5;241m=\u001B[39mdtype,\n\u001B[1;32m 1956\u001B[0m )\n\u001B[0;32m-> 1958\u001B[0m total_inserted \u001B[38;5;241m=\u001B[39m \u001B[43msql_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minsert_records\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1959\u001B[0m \u001B[43m \u001B[49m\u001B[43mtable\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtable\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1960\u001B[0m \u001B[43m \u001B[49m\u001B[43mcon\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcon\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1961\u001B[0m \u001B[43m \u001B[49m\u001B[43mframe\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mframe\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1962\u001B[0m \u001B[43m \u001B[49m\u001B[43mname\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mname\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1963\u001B[0m \u001B[43m \u001B[49m\u001B[43mindex\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1964\u001B[0m \u001B[43m \u001B[49m\u001B[43mschema\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mschema\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1965\u001B[0m \u001B[43m \u001B[49m\u001B[43mchunksize\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mchunksize\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1966\u001B[0m \u001B[43m \u001B[49m\u001B[43mmethod\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmethod\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1967\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mengine_kwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1968\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1970\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcheck_case_sensitive(name\u001B[38;5;241m=\u001B[39mname, schema\u001B[38;5;241m=\u001B[39mschema)\n\u001B[1;32m 1971\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m total_inserted\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:1507\u001B[0m, in \u001B[0;36mSQLAlchemyEngine.insert_records\u001B[0;34m(self, table, con, frame, name, index, schema, chunksize, method, **engine_kwargs)\u001B[0m\n\u001B[1;32m 1505\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m re\u001B[38;5;241m.\u001B[39msearch(msg, err_text):\n\u001B[1;32m 1506\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124minf cannot be used with MySQL\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[0;32m-> 1507\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m err\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:1498\u001B[0m, in \u001B[0;36mSQLAlchemyEngine.insert_records\u001B[0;34m(self, table, con, frame, name, index, schema, chunksize, method, **engine_kwargs)\u001B[0m\n\u001B[1;32m 1495\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01msqlalchemy\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m exc\n\u001B[1;32m 1497\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 1498\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mtable\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minsert\u001B[49m\u001B[43m(\u001B[49m\u001B[43mchunksize\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mchunksize\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmethod\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmethod\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1499\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m exc\u001B[38;5;241m.\u001B[39mStatementError \u001B[38;5;28;01mas\u001B[39;00m err:\n\u001B[1;32m 1500\u001B[0m \u001B[38;5;66;03m# GH34431\u001B[39;00m\n\u001B[1;32m 1501\u001B[0m \u001B[38;5;66;03m# https://stackoverflow.com/a/67358288/6067848\u001B[39;00m\n\u001B[1;32m 1502\u001B[0m msg \u001B[38;5;241m=\u001B[39m \u001B[38;5;124mr\u001B[39m\u001B[38;5;124m\"\"\"\u001B[39m\u001B[38;5;124m(\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m(1054, \u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mUnknown column \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124minf(e0)?\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m in \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mfield list\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m))(?#\u001B[39m\n\u001B[1;32m 1503\u001B[0m \u001B[38;5;124m )|inf can not be used with MySQL\u001B[39m\u001B[38;5;124m\"\"\"\u001B[39m\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:1059\u001B[0m, in \u001B[0;36mSQLTable.insert\u001B[0;34m(self, chunksize, method)\u001B[0m\n\u001B[1;32m 1056\u001B[0m \u001B[38;5;28;01mbreak\u001B[39;00m\n\u001B[1;32m 1058\u001B[0m chunk_iter \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mzip\u001B[39m(\u001B[38;5;241m*\u001B[39m(arr[start_i:end_i] \u001B[38;5;28;01mfor\u001B[39;00m arr \u001B[38;5;129;01min\u001B[39;00m data_list))\n\u001B[0;32m-> 1059\u001B[0m num_inserted \u001B[38;5;241m=\u001B[39m \u001B[43mexec_insert\u001B[49m\u001B[43m(\u001B[49m\u001B[43mconn\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mkeys\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mchunk_iter\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1060\u001B[0m \u001B[38;5;66;03m# GH 46891\u001B[39;00m\n\u001B[1;32m 1061\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m num_inserted \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/io/sql.py:951\u001B[0m, in \u001B[0;36mSQLTable._execute_insert\u001B[0;34m(self, conn, keys, data_iter)\u001B[0m\n\u001B[1;32m 939\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 940\u001B[0m \u001B[38;5;124;03mExecute SQL statement inserting data\u001B[39;00m\n\u001B[1;32m 941\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 948\u001B[0m \u001B[38;5;124;03m Each item contains a list of values to be inserted\u001B[39;00m\n\u001B[1;32m 949\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 950\u001B[0m data \u001B[38;5;241m=\u001B[39m [\u001B[38;5;28mdict\u001B[39m(\u001B[38;5;28mzip\u001B[39m(keys, row)) \u001B[38;5;28;01mfor\u001B[39;00m row \u001B[38;5;129;01min\u001B[39;00m data_iter]\n\u001B[0;32m--> 951\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[43mconn\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtable\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minsert\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 952\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m result\u001B[38;5;241m.\u001B[39mrowcount\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1416\u001B[0m, in \u001B[0;36mConnection.execute\u001B[0;34m(self, statement, parameters, execution_options)\u001B[0m\n\u001B[1;32m 1414\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m exc\u001B[38;5;241m.\u001B[39mObjectNotExecutableError(statement) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m 1415\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1416\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mmeth\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1417\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1418\u001B[0m \u001B[43m \u001B[49m\u001B[43mdistilled_parameters\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1419\u001B[0m \u001B[43m \u001B[49m\u001B[43mexecution_options\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mNO_OPTIONS\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1420\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/sql/elements.py:516\u001B[0m, in \u001B[0;36mClauseElement._execute_on_connection\u001B[0;34m(self, connection, distilled_params, execution_options)\u001B[0m\n\u001B[1;32m 514\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m TYPE_CHECKING:\n\u001B[1;32m 515\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(\u001B[38;5;28mself\u001B[39m, Executable)\n\u001B[0;32m--> 516\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mconnection\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_execute_clauseelement\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 517\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdistilled_params\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mexecution_options\u001B[49m\n\u001B[1;32m 518\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 519\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 520\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m exc\u001B[38;5;241m.\u001B[39mObjectNotExecutableError(\u001B[38;5;28mself\u001B[39m)\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1639\u001B[0m, in \u001B[0;36mConnection._execute_clauseelement\u001B[0;34m(self, elem, distilled_parameters, execution_options)\u001B[0m\n\u001B[1;32m 1627\u001B[0m compiled_cache: Optional[CompiledCacheType] \u001B[38;5;241m=\u001B[39m execution_options\u001B[38;5;241m.\u001B[39mget(\n\u001B[1;32m 1628\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcompiled_cache\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mengine\u001B[38;5;241m.\u001B[39m_compiled_cache\n\u001B[1;32m 1629\u001B[0m )\n\u001B[1;32m 1631\u001B[0m compiled_sql, extracted_params, cache_hit \u001B[38;5;241m=\u001B[39m elem\u001B[38;5;241m.\u001B[39m_compile_w_cache(\n\u001B[1;32m 1632\u001B[0m dialect\u001B[38;5;241m=\u001B[39mdialect,\n\u001B[1;32m 1633\u001B[0m compiled_cache\u001B[38;5;241m=\u001B[39mcompiled_cache,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 1637\u001B[0m linting\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdialect\u001B[38;5;241m.\u001B[39mcompiler_linting \u001B[38;5;241m|\u001B[39m compiler\u001B[38;5;241m.\u001B[39mWARN_LINTING,\n\u001B[1;32m 1638\u001B[0m )\n\u001B[0;32m-> 1639\u001B[0m ret \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_execute_context\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1640\u001B[0m \u001B[43m \u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1641\u001B[0m \u001B[43m \u001B[49m\u001B[43mdialect\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecution_ctx_cls\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_init_compiled\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1642\u001B[0m \u001B[43m \u001B[49m\u001B[43mcompiled_sql\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1643\u001B[0m \u001B[43m \u001B[49m\u001B[43mdistilled_parameters\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1644\u001B[0m \u001B[43m \u001B[49m\u001B[43mexecution_options\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1645\u001B[0m \u001B[43m \u001B[49m\u001B[43mcompiled_sql\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1646\u001B[0m \u001B[43m \u001B[49m\u001B[43mdistilled_parameters\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1647\u001B[0m \u001B[43m \u001B[49m\u001B[43melem\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1648\u001B[0m \u001B[43m \u001B[49m\u001B[43mextracted_params\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1649\u001B[0m \u001B[43m \u001B[49m\u001B[43mcache_hit\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcache_hit\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1650\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1651\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m has_events:\n\u001B[1;32m 1652\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdispatch\u001B[38;5;241m.\u001B[39mafter_execute(\n\u001B[1;32m 1653\u001B[0m \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m 1654\u001B[0m elem,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 1658\u001B[0m ret,\n\u001B[1;32m 1659\u001B[0m )\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1843\u001B[0m, in \u001B[0;36mConnection._execute_context\u001B[0;34m(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)\u001B[0m\n\u001B[1;32m 1840\u001B[0m context\u001B[38;5;241m.\u001B[39mpre_exec()\n\u001B[1;32m 1842\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m context\u001B[38;5;241m.\u001B[39mexecute_style \u001B[38;5;129;01mis\u001B[39;00m ExecuteStyle\u001B[38;5;241m.\u001B[39mINSERTMANYVALUES:\n\u001B[0;32m-> 1843\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_exec_insertmany_context\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1844\u001B[0m \u001B[43m \u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1845\u001B[0m \u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1846\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1847\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 1848\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_exec_single_context(\n\u001B[1;32m 1849\u001B[0m dialect, context, statement, parameters\n\u001B[1;32m 1850\u001B[0m )\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2120\u001B[0m, in \u001B[0;36mConnection._exec_insertmany_context\u001B[0;34m(self, dialect, context)\u001B[0m\n\u001B[1;32m 2112\u001B[0m dialect\u001B[38;5;241m.\u001B[39mdo_execute(\n\u001B[1;32m 2113\u001B[0m cursor,\n\u001B[1;32m 2114\u001B[0m sub_stmt,\n\u001B[1;32m 2115\u001B[0m sub_params,\n\u001B[1;32m 2116\u001B[0m context,\n\u001B[1;32m 2117\u001B[0m )\n\u001B[1;32m 2119\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[0;32m-> 2120\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_handle_dbapi_exception\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 2121\u001B[0m \u001B[43m \u001B[49m\u001B[43me\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2122\u001B[0m \u001B[43m \u001B[49m\u001B[43msql_util\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_long_statement\u001B[49m\u001B[43m(\u001B[49m\u001B[43msub_stmt\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2123\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_params\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2124\u001B[0m \u001B[43m \u001B[49m\u001B[43mcursor\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2125\u001B[0m \u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2126\u001B[0m \u001B[43m \u001B[49m\u001B[43mis_sub_exec\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m 2127\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2129\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m engine_events:\n\u001B[1;32m 2130\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdispatch\u001B[38;5;241m.\u001B[39mafter_cursor_execute(\n\u001B[1;32m 2131\u001B[0m \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m 2132\u001B[0m cursor,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 2136\u001B[0m context\u001B[38;5;241m.\u001B[39mexecutemany,\n\u001B[1;32m 2137\u001B[0m )\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2343\u001B[0m, in \u001B[0;36mConnection._handle_dbapi_exception\u001B[0;34m(self, e, statement, parameters, cursor, context, is_sub_exec)\u001B[0m\n\u001B[1;32m 2341\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m should_wrap:\n\u001B[1;32m 2342\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m sqlalchemy_exception \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m-> 2343\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m sqlalchemy_exception\u001B[38;5;241m.\u001B[39mwith_traceback(exc_info[\u001B[38;5;241m2\u001B[39m]) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01me\u001B[39;00m\n\u001B[1;32m 2344\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 2345\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m exc_info[\u001B[38;5;241m1\u001B[39m] \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2112\u001B[0m, in \u001B[0;36mConnection._exec_insertmany_context\u001B[0;34m(self, dialect, context)\u001B[0m\n\u001B[1;32m 2110\u001B[0m \u001B[38;5;28;01mbreak\u001B[39;00m\n\u001B[1;32m 2111\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 2112\u001B[0m \u001B[43mdialect\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdo_execute\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 2113\u001B[0m \u001B[43m \u001B[49m\u001B[43mcursor\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2114\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_stmt\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2115\u001B[0m \u001B[43m \u001B[49m\u001B[43msub_params\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2116\u001B[0m \u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 2117\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2119\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[1;32m 2120\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handle_dbapi_exception(\n\u001B[1;32m 2121\u001B[0m e,\n\u001B[1;32m 2122\u001B[0m sql_util\u001B[38;5;241m.\u001B[39m_long_statement(sub_stmt),\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 2126\u001B[0m is_sub_exec\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 2127\u001B[0m )\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/sqlalchemy/engine/default.py:922\u001B[0m, in \u001B[0;36mDefaultDialect.do_execute\u001B[0;34m(self, cursor, statement, parameters, context)\u001B[0m\n\u001B[1;32m 921\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mdo_execute\u001B[39m(\u001B[38;5;28mself\u001B[39m, cursor, statement, parameters, context\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m):\n\u001B[0;32m--> 922\u001B[0m \u001B[43mcursor\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43mstatement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparameters\u001B[49m\u001B[43m)\u001B[49m\n", - "\u001B[0;31mProgrammingError\u001B[0m: (psycopg2.ProgrammingError) can't adapt type 'Point'\n[SQL: INSERT INTO table_name (\"AccidentUID\", \"AccidentHour\", \"AccidentYear\", \"AccidentWeekDay_en\", \"AccidentType\", \"AccidentSeverityCategory\", \"AccidentInvolvingPedestrian\", \"AccidentInvolvingBicycle\", \"AccidentInvolvingMotorcycle\", \"RoadType\", \"RoadType_e ... 398437 characters truncated ... n__999)s, %(AccidentLocation_CHLV95_E__999)s, %(AccidentLocation_CHLV95_N__999)s, %(geometry__999)s)]\n[parameters: {'AccidentYear__0': '2011', 'AccidentLocation_CHLV95_N__0': '1245194', 'AccidentType__0': 'at0', 'AccidentSeverityCategory__0': 'as4', 'RoadType_en__0': 'Minor road', 'AccidentLocation_CHLV95_E__0': '2684605', 'AccidentWeekDay_en__0': 'Saturday', 'AccidentInvolvingMotorcycle__0': 'false', 'RoadType__0': 'rt433', 'AccidentUID__0': 'A2D2677533867004E0430A865E337004', 'AccidentInvolvingPedestrian__0': 'false', 'AccidentHour__0': '00', 'geometry__0': , 'AccidentInvolvingBicycle__0': 'false', 'AccidentYear__1': '2011', 'AccidentLocation_CHLV95_N__1': '1246980', 'AccidentType__1': 'at0', 'AccidentSeverityCategory__1': 'as3', 'RoadType_en__1': 'Minor road', 'AccidentLocation_CHLV95_E__1': '2682382', 'AccidentWeekDay_en__1': 'Saturday', 'AccidentInvolvingMotorcycle__1': 'false', 'RoadType__1': 'rt433', 'AccidentUID__1': '9FD6441F802C20A6E0430A865E3320A6', 'AccidentInvolvingPedestrian__1': 'false', 'AccidentHour__1': '01', 'geometry__1': , 'AccidentInvolvingBicycle__1': 'true', 'AccidentYear__2': '2011', 'AccidentLocation_CHLV95_N__2': '1247749', 'AccidentType__2': 'at0', 'AccidentSeverityCategory__2': 'as4', 'RoadType_en__2': 'Other', 'AccidentLocation_CHLV95_E__2': '2682791', 'AccidentWeekDay_en__2': 'Saturday', 'AccidentInvolvingMotorcycle__2': 'false', 'RoadType__2': 'rt439', 'AccidentUID__2': '9FDA0DC4856A6094E0430A865E336094', 'AccidentInvolvingPedestrian__2': 'false', 'AccidentHour__2': '02', 'geometry__2': , 'AccidentInvolvingBicycle__2': 'false', 'AccidentYear__3': '2011', 'AccidentLocation_CHLV95_N__3': '1247102', 'AccidentType__3': 'at5', 'AccidentSeverityCategory__3': 'as3', 'RoadType_en__3': 'Minor road', 'AccidentLocation_CHLV95_E__3': '2681199', 'AccidentWeekDay_en__3': 'Saturday', 'AccidentInvolvingMotorcycle__3': 'false' ... 13900 parameters truncated ... 'AccidentWeekDay_en__996': 'Tuesday', 'AccidentInvolvingMotorcycle__996': 'false', 'RoadType__996': 'rt433', 'AccidentUID__996': 'A5D2C4A55E38707EE0430A865E33707E', 'AccidentInvolvingPedestrian__996': 'false', 'AccidentHour__996': '08', 'geometry__996': , 'AccidentInvolvingBicycle__996': 'false', 'AccidentYear__997': '2011', 'AccidentLocation_CHLV95_N__997': '1251718', 'AccidentType__997': 'at2', 'AccidentSeverityCategory__997': 'as3', 'RoadType_en__997': 'Principal road', 'AccidentLocation_CHLV95_E__997': '2685190', 'AccidentWeekDay_en__997': 'Tuesday', 'AccidentInvolvingMotorcycle__997': 'false', 'RoadType__997': 'rt432', 'AccidentUID__997': 'A5F1841A36B070AEE0430A865E3370AE', 'AccidentInvolvingPedestrian__997': 'false', 'AccidentHour__997': '11', 'geometry__997': , 'AccidentInvolvingBicycle__997': 'false', 'AccidentYear__998': '2011', 'AccidentLocation_CHLV95_N__998': '1246106', 'AccidentType__998': 'at2', 'AccidentSeverityCategory__998': 'as4', 'RoadType_en__998': 'Principal road', 'AccidentLocation_CHLV95_E__998': '2685329', 'AccidentWeekDay_en__998': 'Tuesday', 'AccidentInvolvingMotorcycle__998': 'false', 'RoadType__998': 'rt432', 'AccidentUID__998': 'A5E25678EDD7505EE0430A865E33505E', 'AccidentInvolvingPedestrian__998': 'false', 'AccidentHour__998': '14', 'geometry__998': , 'AccidentInvolvingBicycle__998': 'false', 'AccidentYear__999': '2011', 'AccidentLocation_CHLV95_N__999': '1251852', 'AccidentType__999': 'at00', 'AccidentSeverityCategory__999': 'as3', 'RoadType_en__999': 'Principal road', 'AccidentLocation_CHLV95_E__999': '2683606', 'AccidentWeekDay_en__999': 'Tuesday', 'AccidentInvolvingMotorcycle__999': 'false', 'RoadType__999': 'rt432', 'AccidentUID__999': 'A6431CCEC810E09CE0430A865E33E09C', 'AccidentInvolvingPedestrian__999': 'false', 'AccidentHour__999': '16', 'geometry__999': , 'AccidentInvolvingBicycle__999': 'false'}]\n(Background on this error at: https://sqlalche.me/e/20/f405)" + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/s3/8bc7ys2d24lgqhdlpttvp70r0000gn/T/ipykernel_59953/958527375.py:15: UserWarning: Geometry column does not contain geometry.\n", + " acc_df['geometry'] = acc_df['geometry'].apply(lambda geom: WKTElement(geom.wkt, srid=4326))\n" ] + }, + { + "data": { + "text/plain": "826" + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "from sqlalchemy import create_engine\n", + "from geoalchemy2 import Geometry, WKTElement\n", + "import geopandas as gpd\n", + "from shapely import wkt\n", "\n", "db_url = f'postgresql://seb:@localhost:5432/test-db23'\n", "engine = create_engine(db_url)\n", "\n", "#miv_df.to_sql('table_name', engine, if_exists='replace', index=False)\n", "#fb_data.to_sql('footbike', engine, if_exists='replace', index=False)\n", - "acc_df.to_sql('table_name', engine, if_exists='replace', index=False)\n" + "\n", + "geometry_column = 'geometry'\n", + "\n", + "\n", + "acc_df['geometry'] = acc_df['geometry'].apply(lambda geom: WKTElement(geom.wkt, srid=4326))\n", + "\n", + "acc_df.to_sql('accidents', engine, if_exists='replace', index=False, dtype={'geometry': Geometry('POINT', srid=4326)})\n", + "\n" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T22:27:41.753420Z", - "start_time": "2023-12-02T22:27:40.962764Z" + "end_time": "2023-12-03T00:00:35.257439Z", + "start_time": "2023-12-03T00:00:32.802219Z" } }, "id": "fa76af8343443d7a" }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 22, "outputs": [], "source": [ "engine.dispose()" @@ -352,8 +325,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T22:26:54.275225Z", - "start_time": "2023-12-02T22:26:54.273410Z" + "end_time": "2023-12-03T00:00:40.409019Z", + "start_time": "2023-12-03T00:00:40.406193Z" } }, "id": "bc0a23a5126e76c2" From 250e13acba65077190dc0f771c9221fd2fb48353 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 11:44:42 +0100 Subject: [PATCH 07/22] Fix requirements.txt --- requirements.txt | 119 ----------------------------------------------- 1 file changed, 119 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 08150f4..0000000 --- a/requirements.txt +++ /dev/null @@ -1,119 +0,0 @@ -pandas==2.1.3 -requests==2.31.0 - -geopandas~=0.14.1Package Version -------------------------- ------------ -anyio 4.0.0 -appnope 0.1.3 -argon2-cffi 23.1.0 -argon2-cffi-bindings 21.2.0 -arrow 1.3.0 -asttokens 2.4.1 -async-lru 2.0.4 -attrs 23.1.0 -Babel 2.13.1 -beautifulsoup4 4.12.2 -black 23.11.0 -bleach 6.1.0 -certifi 2023.7.22 -cffi 1.16.0 -charset-normalizer 3.3.2 -click 8.1.7 -click-plugins 1.1.1 -cligj 0.7.2 -comm 0.2.0 -debugpy 1.8.0 -decorator 5.1.1 -defusedxml 0.7.1 -executing 2.0.1 -fastjsonschema 2.19.0 -fiona 1.9.5 -fqdn 1.5.1 -GeoAlchemy2 0.14.2 -geopandas 0.14.1 -idna 3.4 -ipykernel 6.26.0 -ipython 8.17.2 -ipywidgets 8.1.1 -isoduration 20.11.0 -jedi 0.19.1 -Jinja2 3.1.2 -json5 0.9.14 -jsonpointer 2.4 -jsonschema 4.19.2 -jsonschema-specifications 2023.11.1 -jupyter 1.0.0 -jupyter_client 8.6.0 -jupyter-console 6.6.3 -jupyter_core 5.5.0 -jupyter-events 0.9.0 -jupyter-lsp 2.2.0 -jupyter_server 2.10.1 -jupyter_server_terminals 0.4.4 -jupyterlab 4.0.8 -jupyterlab-pygments 0.2.2 -jupyterlab_server 2.25.1 -jupyterlab-widgets 3.0.9 -MarkupSafe 2.1.3 -matplotlib-inline 0.1.6 -mistune 3.0.2 -mypy-extensions 1.0.0 -nbclient 0.9.0 -nbconvert 7.11.0 -nbformat 5.9.2 -nest-asyncio 1.5.8 -notebook 7.0.6 -notebook_shim 0.2.3 -numpy 1.26.2 -overrides 7.4.0 -packaging 23.2 -pandas 2.1.3 -pandocfilters 1.5.0 -parso 0.8.3 -pathspec 0.11.2 -pexpect 4.8.0 -pip 23.3.1 -platformdirs 4.0.0 -prometheus-client 0.18.0 -prompt-toolkit 3.0.41 -psutil 5.9.6 -psycopg2 2.9.9 -ptyprocess 0.7.0 -pure-eval 0.2.2 -pycparser 2.21 -Pygments 2.16.1 -pyproj 3.6.1 -python-dateutil 2.8.2 -python-json-logger 2.0.7 -pytz 2023.3.post1 -PyYAML 6.0.1 -pyzmq 25.1.1 -qtconsole 5.5.1 -QtPy 2.4.1 -referencing 0.31.0 -requests 2.31.0 -rfc3339-validator 0.1.4 -rfc3986-validator 0.1.1 -rpds-py 0.13.0 -Send2Trash 1.8.2 -setuptools 68.2.2 -shapely 2.0.2 -six 1.16.0 -sniffio 1.3.0 -soupsieve 2.5 -SQLAlchemy 2.0.23 -stack-data 0.6.3 -terminado 0.18.0 -tinycss2 1.2.1 -tornado 6.3.3 -traitlets 5.13.0 -types-python-dateutil 2.8.19.14 -typing_extensions 4.8.0 -tzdata 2023.3 -uri-template 1.3.0 -urllib3 2.1.0 -wcwidth 0.2.10 -webcolors 1.13 -webencodings 0.5.1 -websocket-client 1.6.4 -widgetsnbextension 4.0.9 From 983f85b116fd7213d6ab757e3081b3d43b8d161f Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 11:45:00 +0100 Subject: [PATCH 08/22] Fix requirements.txt --- requirements.txt | 116 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6bf25ad --- /dev/null +++ b/requirements.txt @@ -0,0 +1,116 @@ +Package Version +------------------------- ------------ +anyio 4.0.0 +appnope 0.1.3 +argon2-cffi 23.1.0 +argon2-cffi-bindings 21.2.0 +arrow 1.3.0 +asttokens 2.4.1 +async-lru 2.0.4 +attrs 23.1.0 +Babel 2.13.1 +beautifulsoup4 4.12.2 +black 23.11.0 +bleach 6.1.0 +certifi 2023.7.22 +cffi 1.16.0 +charset-normalizer 3.3.2 +click 8.1.7 +click-plugins 1.1.1 +cligj 0.7.2 +comm 0.2.0 +debugpy 1.8.0 +decorator 5.1.1 +defusedxml 0.7.1 +executing 2.0.1 +fastjsonschema 2.19.0 +fiona 1.9.5 +fqdn 1.5.1 +GeoAlchemy2 0.14.2 +geopandas 0.14.1 +idna 3.4 +ipykernel 6.26.0 +ipython 8.17.2 +ipywidgets 8.1.1 +isoduration 20.11.0 +jedi 0.19.1 +Jinja2 3.1.2 +json5 0.9.14 +jsonpointer 2.4 +jsonschema 4.19.2 +jsonschema-specifications 2023.11.1 +jupyter 1.0.0 +jupyter_client 8.6.0 +jupyter-console 6.6.3 +jupyter_core 5.5.0 +jupyter-events 0.9.0 +jupyter-lsp 2.2.0 +jupyter_server 2.10.1 +jupyter_server_terminals 0.4.4 +jupyterlab 4.0.8 +jupyterlab-pygments 0.2.2 +jupyterlab_server 2.25.1 +jupyterlab-widgets 3.0.9 +MarkupSafe 2.1.3 +matplotlib-inline 0.1.6 +mistune 3.0.2 +mypy-extensions 1.0.0 +nbclient 0.9.0 +nbconvert 7.11.0 +nbformat 5.9.2 +nest-asyncio 1.5.8 +notebook 7.0.6 +notebook_shim 0.2.3 +numpy 1.26.2 +overrides 7.4.0 +packaging 23.2 +pandas 2.1.3 +pandocfilters 1.5.0 +parso 0.8.3 +pathspec 0.11.2 +pexpect 4.8.0 +pip 23.3.1 +platformdirs 4.0.0 +prometheus-client 0.18.0 +prompt-toolkit 3.0.41 +psutil 5.9.6 +psycopg2 2.9.9 +ptyprocess 0.7.0 +pure-eval 0.2.2 +pycparser 2.21 +Pygments 2.16.1 +pyproj 3.6.1 +python-dateutil 2.8.2 +python-json-logger 2.0.7 +pytz 2023.3.post1 +PyYAML 6.0.1 +pyzmq 25.1.1 +qtconsole 5.5.1 +QtPy 2.4.1 +referencing 0.31.0 +requests 2.31.0 +rfc3339-validator 0.1.4 +rfc3986-validator 0.1.1 +rpds-py 0.13.0 +Send2Trash 1.8.2 +setuptools 68.2.2 +shapely 2.0.2 +six 1.16.0 +sniffio 1.3.0 +soupsieve 2.5 +SQLAlchemy 2.0.23 +stack-data 0.6.3 +terminado 0.18.0 +tinycss2 1.2.1 +tornado 6.3.3 +traitlets 5.13.0 +types-python-dateutil 2.8.19.14 +typing_extensions 4.8.0 +tzdata 2023.3 +uri-template 1.3.0 +urllib3 2.1.0 +wcwidth 0.2.10 +webcolors 1.13 +webencodings 0.5.1 +websocket-client 1.6.4 +widgetsnbextension 4.0.9 From 793b783dd9ab5740cf938b1c5ea66a7b2ecab7d9 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 11:45:00 +0100 Subject: [PATCH 09/22] + --- requirements.txt | 116 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6bf25ad --- /dev/null +++ b/requirements.txt @@ -0,0 +1,116 @@ +Package Version +------------------------- ------------ +anyio 4.0.0 +appnope 0.1.3 +argon2-cffi 23.1.0 +argon2-cffi-bindings 21.2.0 +arrow 1.3.0 +asttokens 2.4.1 +async-lru 2.0.4 +attrs 23.1.0 +Babel 2.13.1 +beautifulsoup4 4.12.2 +black 23.11.0 +bleach 6.1.0 +certifi 2023.7.22 +cffi 1.16.0 +charset-normalizer 3.3.2 +click 8.1.7 +click-plugins 1.1.1 +cligj 0.7.2 +comm 0.2.0 +debugpy 1.8.0 +decorator 5.1.1 +defusedxml 0.7.1 +executing 2.0.1 +fastjsonschema 2.19.0 +fiona 1.9.5 +fqdn 1.5.1 +GeoAlchemy2 0.14.2 +geopandas 0.14.1 +idna 3.4 +ipykernel 6.26.0 +ipython 8.17.2 +ipywidgets 8.1.1 +isoduration 20.11.0 +jedi 0.19.1 +Jinja2 3.1.2 +json5 0.9.14 +jsonpointer 2.4 +jsonschema 4.19.2 +jsonschema-specifications 2023.11.1 +jupyter 1.0.0 +jupyter_client 8.6.0 +jupyter-console 6.6.3 +jupyter_core 5.5.0 +jupyter-events 0.9.0 +jupyter-lsp 2.2.0 +jupyter_server 2.10.1 +jupyter_server_terminals 0.4.4 +jupyterlab 4.0.8 +jupyterlab-pygments 0.2.2 +jupyterlab_server 2.25.1 +jupyterlab-widgets 3.0.9 +MarkupSafe 2.1.3 +matplotlib-inline 0.1.6 +mistune 3.0.2 +mypy-extensions 1.0.0 +nbclient 0.9.0 +nbconvert 7.11.0 +nbformat 5.9.2 +nest-asyncio 1.5.8 +notebook 7.0.6 +notebook_shim 0.2.3 +numpy 1.26.2 +overrides 7.4.0 +packaging 23.2 +pandas 2.1.3 +pandocfilters 1.5.0 +parso 0.8.3 +pathspec 0.11.2 +pexpect 4.8.0 +pip 23.3.1 +platformdirs 4.0.0 +prometheus-client 0.18.0 +prompt-toolkit 3.0.41 +psutil 5.9.6 +psycopg2 2.9.9 +ptyprocess 0.7.0 +pure-eval 0.2.2 +pycparser 2.21 +Pygments 2.16.1 +pyproj 3.6.1 +python-dateutil 2.8.2 +python-json-logger 2.0.7 +pytz 2023.3.post1 +PyYAML 6.0.1 +pyzmq 25.1.1 +qtconsole 5.5.1 +QtPy 2.4.1 +referencing 0.31.0 +requests 2.31.0 +rfc3339-validator 0.1.4 +rfc3986-validator 0.1.1 +rpds-py 0.13.0 +Send2Trash 1.8.2 +setuptools 68.2.2 +shapely 2.0.2 +six 1.16.0 +sniffio 1.3.0 +soupsieve 2.5 +SQLAlchemy 2.0.23 +stack-data 0.6.3 +terminado 0.18.0 +tinycss2 1.2.1 +tornado 6.3.3 +traitlets 5.13.0 +types-python-dateutil 2.8.19.14 +typing_extensions 4.8.0 +tzdata 2023.3 +uri-template 1.3.0 +urllib3 2.1.0 +wcwidth 0.2.10 +webcolors 1.13 +webencodings 0.5.1 +websocket-client 1.6.4 +widgetsnbextension 4.0.9 From e73962d8e1e0c5be48fda762d959293f150c05c1 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 11:48:32 +0100 Subject: [PATCH 10/22] + --- requirements.txt | 228 +++++++++++++++++++++++------------------------ 1 file changed, 112 insertions(+), 116 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6bf25ad..49eb76e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,116 +1,112 @@ -Package Version -------------------------- ------------ -anyio 4.0.0 -appnope 0.1.3 -argon2-cffi 23.1.0 -argon2-cffi-bindings 21.2.0 -arrow 1.3.0 -asttokens 2.4.1 -async-lru 2.0.4 -attrs 23.1.0 -Babel 2.13.1 -beautifulsoup4 4.12.2 -black 23.11.0 -bleach 6.1.0 -certifi 2023.7.22 -cffi 1.16.0 -charset-normalizer 3.3.2 -click 8.1.7 -click-plugins 1.1.1 -cligj 0.7.2 -comm 0.2.0 -debugpy 1.8.0 -decorator 5.1.1 -defusedxml 0.7.1 -executing 2.0.1 -fastjsonschema 2.19.0 -fiona 1.9.5 -fqdn 1.5.1 -GeoAlchemy2 0.14.2 -geopandas 0.14.1 -idna 3.4 -ipykernel 6.26.0 -ipython 8.17.2 -ipywidgets 8.1.1 -isoduration 20.11.0 -jedi 0.19.1 -Jinja2 3.1.2 -json5 0.9.14 -jsonpointer 2.4 -jsonschema 4.19.2 -jsonschema-specifications 2023.11.1 -jupyter 1.0.0 -jupyter_client 8.6.0 -jupyter-console 6.6.3 -jupyter_core 5.5.0 -jupyter-events 0.9.0 -jupyter-lsp 2.2.0 -jupyter_server 2.10.1 -jupyter_server_terminals 0.4.4 -jupyterlab 4.0.8 -jupyterlab-pygments 0.2.2 -jupyterlab_server 2.25.1 -jupyterlab-widgets 3.0.9 -MarkupSafe 2.1.3 -matplotlib-inline 0.1.6 -mistune 3.0.2 -mypy-extensions 1.0.0 -nbclient 0.9.0 -nbconvert 7.11.0 -nbformat 5.9.2 -nest-asyncio 1.5.8 -notebook 7.0.6 -notebook_shim 0.2.3 -numpy 1.26.2 -overrides 7.4.0 -packaging 23.2 -pandas 2.1.3 -pandocfilters 1.5.0 -parso 0.8.3 -pathspec 0.11.2 -pexpect 4.8.0 -pip 23.3.1 -platformdirs 4.0.0 -prometheus-client 0.18.0 -prompt-toolkit 3.0.41 -psutil 5.9.6 -psycopg2 2.9.9 -ptyprocess 0.7.0 -pure-eval 0.2.2 -pycparser 2.21 -Pygments 2.16.1 -pyproj 3.6.1 -python-dateutil 2.8.2 -python-json-logger 2.0.7 -pytz 2023.3.post1 -PyYAML 6.0.1 -pyzmq 25.1.1 -qtconsole 5.5.1 -QtPy 2.4.1 -referencing 0.31.0 -requests 2.31.0 -rfc3339-validator 0.1.4 -rfc3986-validator 0.1.1 -rpds-py 0.13.0 -Send2Trash 1.8.2 -setuptools 68.2.2 -shapely 2.0.2 -six 1.16.0 -sniffio 1.3.0 -soupsieve 2.5 -SQLAlchemy 2.0.23 -stack-data 0.6.3 -terminado 0.18.0 -tinycss2 1.2.1 -tornado 6.3.3 -traitlets 5.13.0 -types-python-dateutil 2.8.19.14 -typing_extensions 4.8.0 -tzdata 2023.3 -uri-template 1.3.0 -urllib3 2.1.0 -wcwidth 0.2.10 -webcolors 1.13 -webencodings 0.5.1 -websocket-client 1.6.4 -widgetsnbextension 4.0.9 +anyio==4.0.0 +appnope==0.1.3 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.3.0 +asttokens==2.4.1 +async-lru==2.0.4 +attrs==23.1.0 +Babel==2.13.1 +beautifulsoup4==4.12.2 +black==23.11.0 +bleach==6.1.0 +certifi==2023.7.22 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +click-plugins==1.1.1 +cligj==0.7.2 +comm==0.2.0 +debugpy==1.8.0 +decorator==5.1.1 +defusedxml==0.7.1 +executing==2.0.1 +fastjsonschema==2.19.0 +fiona==1.9.5 +fqdn==1.5.1 +GeoAlchemy2==0.14.2 +geopandas==0.14.1 +idna==3.4 +ipykernel==6.26.0 +ipython==8.17.2 +ipywidgets==8.1.1 +isoduration==20.11.0 +jedi==0.19.1 +Jinja2==3.1.2 +json5==0.9.14 +jsonpointer==2.4 +jsonschema==4.19.2 +jsonschema-specifications==2023.11.1 +jupyter==1.0.0 +jupyter-console==6.6.3 +jupyter-events==0.9.0 +jupyter-lsp==2.2.0 +jupyter_client==8.6.0 +jupyter_core==5.5.0 +jupyter_server==2.10.1 +jupyter_server_terminals==0.4.4 +jupyterlab==4.0.8 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.9 +jupyterlab_server==2.25.1 +MarkupSafe==2.1.3 +matplotlib-inline==0.1.6 +mistune==3.0.2 +mypy-extensions==1.0.0 +nbclient==0.9.0 +nbconvert==7.11.0 +nbformat==5.9.2 +nest-asyncio==1.5.8 +notebook==7.0.6 +notebook_shim==0.2.3 +numpy==1.26.2 +overrides==7.4.0 +packaging==23.2 +pandas==2.1.3 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.11.2 +pexpect==4.8.0 +platformdirs==4.0.0 +prometheus-client==0.18.0 +prompt-toolkit==3.0.41 +psutil==5.9.6 +psycopg2==2.9.9 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycparser==2.21 +Pygments==2.16.1 +pyproj==3.6.1 +python-dateutil==2.8.2 +python-json-logger==2.0.7 +pytz==2023.3.post1 +PyYAML==6.0.1 +pyzmq==25.1.1 +qtconsole==5.5.1 +QtPy==2.4.1 +referencing==0.31.0 +requests==2.31.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rpds-py==0.13.0 +Send2Trash==1.8.2 +shapely==2.0.2 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.5 +SQLAlchemy==2.0.23 +stack-data==0.6.3 +terminado==0.18.0 +tinycss2==1.2.1 +tornado==6.3.3 +traitlets==5.13.0 +types-python-dateutil==2.8.19.14 +typing_extensions==4.8.0 +tzdata==2023.3 +uri-template==1.3.0 +urllib3==2.1.0 +wcwidth==0.2.10 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.6.4 +widgetsnbextension==4.0.9 From e4d0484a231c78cc9cf558f42aa35376bb3de92d Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 12:28:32 +0100 Subject: [PATCH 11/22] Add logger. --- src/data_utils.py | 13 +++++++------ src/integrate.py | 17 +++++++++++------ src/testArea.ipynb | 14 +++++++------- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/data_utils.py b/src/data_utils.py index 584619c..9ce34c5 100644 --- a/src/data_utils.py +++ b/src/data_utils.py @@ -1,11 +1,13 @@ -# data_utils.py - import os import pandas as pd import requests from urllib.parse import urlparse import geopandas as gpd from concurrent.futures import ThreadPoolExecutor as tpe +import logging + +logging.basicConfig(level=logging.DEBUG, filename='app.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('data_utils.py') def download_csv(url, local_filename): @@ -75,11 +77,11 @@ def load_dataframes_from_geojson_files(data_dir, u_string): print('u_string', u_string) gdf = gpd.GeoDataFrame() for filename in os.listdir(data_dir): - print("Filename:", filename) + #print("Filename:", filename) if (u_string in filename) and filename.endswith('.json'): filepath = os.path.join(data_dir, filename) print("Filepath:", filepath) - gdf = gpd.read_file(filepath) # Read GeoJSON directly as GeoDataFrame + gdf = gpd.read_file(filepath) return gdf @@ -90,7 +92,7 @@ def combine_dataframes(dataframes): return combined_dataframe else: print("No dataframes to combine") - return pd.DataFrame() # Return an empty DataFrame + return pd.DataFrame() def create_unified_df(urls_file, u_string, data_dir, files_present=False): @@ -110,7 +112,6 @@ def save_dataframe_to_csv(df, integrated_dir, filename): if __name__ == "__main__": - # Test the functions here if necessary csv_urls_file = '../docs/all_csv_urls.txt' datasets_dir = 'datasets/' output_file = 'column_names.txt' diff --git a/src/integrate.py b/src/integrate.py index 607719d..6427e29 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -4,6 +4,12 @@ import os import requests import pandas as pd +import logging + +logging.basicConfig(level=logging.DEBUG, filename='app.log', + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('integrate.py') + foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt' miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt' accident_file_url = '../docs/accident_loc_urls.txt' @@ -41,7 +47,7 @@ def process_foot_bike_data(): def process_miv_data(): - miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir,files_present=True) + miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=True) miv_df_unified[['Date', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True) miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True) @@ -65,13 +71,12 @@ def process_accident_data(): acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E', - 'AccidentLocation_CHLV95_N', 'geometry', 'AccidentMonth'] + 'AccidentLocation_CHLV95_N', 'AccidentMonth', 'geometry'] cleaned_acc_df = acc_df_unified[acc_cols_to_keep] return cleaned_acc_df if __name__ == '__main__': - fb_df = process_miv_data() - print(fb_df['MessungDatZeit']) - print(fb_df.dtypes) - print(fb_df.head(100)) + acc_df = process_accident_data() + print(acc_df.dtypes) + print(acc_df.head(100)) diff --git a/src/testArea.ipynb b/src/testArea.ipynb index e158c10..c4739bb 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -13,8 +13,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T23:43:55.980827Z", - "start_time": "2023-12-02T23:43:55.546732Z" + "end_time": "2023-12-03T10:58:50.698090Z", + "start_time": "2023-12-03T10:58:50.384352Z" } }, "id": "be55b25929d95559" @@ -44,8 +44,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T23:48:08.233784Z", - "start_time": "2023-12-02T23:43:55.980667Z" + "end_time": "2023-12-03T11:01:14.422749Z", + "start_time": "2023-12-03T10:58:52.300667Z" } }, "id": "dd3831953afdeb72" @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "outputs": [ { "name": "stdout", @@ -163,8 +163,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T23:49:50.185415Z", - "start_time": "2023-12-02T23:49:34.846049Z" + "end_time": "2023-12-03T11:15:51.051154Z", + "start_time": "2023-12-03T11:15:36.154717Z" } }, "id": "f86bc612060b17a4" From ca3450a4de685599f0a749ef3ed6f5f2a63cae6d Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 12:28:32 +0100 Subject: [PATCH 12/22] Add logger. --- src/data_utils.py | 19 ++++++++++--------- src/integrate.py | 17 +++++++++++------ src/testArea.ipynb | 14 +++++++------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/data_utils.py b/src/data_utils.py index 584619c..72b88c5 100644 --- a/src/data_utils.py +++ b/src/data_utils.py @@ -1,11 +1,13 @@ -# data_utils.py - import os import pandas as pd import requests from urllib.parse import urlparse import geopandas as gpd from concurrent.futures import ThreadPoolExecutor as tpe +import logging + +logging.basicConfig(level=logging.DEBUG, filename='data_utils.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('data_utils.py') def download_csv(url, local_filename): @@ -33,9 +35,9 @@ def process_urls(data_dir, urls_file): # Check if the file already exists if not os.path.isfile(local_filename): - print(f"Downloading {url}...") + logger.debug(f"Downloading {url}...") download_csv(url, local_filename) - print(f"Saved to {local_filename}") + logger.debug(f"Saved to {local_filename}") else: print(f"File {filename} already exists in {data_dir}, skipping download.") @@ -45,7 +47,7 @@ def load_dataframe_from_csv(filepath): df = pd.read_csv(filepath, low_memory=False) return df except Exception as e: - print(f"Error loading {filepath}: {e}") + logger.error(f"Error loading {filepath}: {e}") return None @@ -75,11 +77,11 @@ def load_dataframes_from_geojson_files(data_dir, u_string): print('u_string', u_string) gdf = gpd.GeoDataFrame() for filename in os.listdir(data_dir): - print("Filename:", filename) + #print("Filename:", filename) if (u_string in filename) and filename.endswith('.json'): filepath = os.path.join(data_dir, filename) print("Filepath:", filepath) - gdf = gpd.read_file(filepath) # Read GeoJSON directly as GeoDataFrame + gdf = gpd.read_file(filepath) return gdf @@ -90,7 +92,7 @@ def combine_dataframes(dataframes): return combined_dataframe else: print("No dataframes to combine") - return pd.DataFrame() # Return an empty DataFrame + return pd.DataFrame() def create_unified_df(urls_file, u_string, data_dir, files_present=False): @@ -110,7 +112,6 @@ def save_dataframe_to_csv(df, integrated_dir, filename): if __name__ == "__main__": - # Test the functions here if necessary csv_urls_file = '../docs/all_csv_urls.txt' datasets_dir = 'datasets/' output_file = 'column_names.txt' diff --git a/src/integrate.py b/src/integrate.py index 607719d..c2ccfe1 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -4,6 +4,12 @@ import os import requests import pandas as pd +import logging + +logging.basicConfig(level=logging.DEBUG, filename='integrate.log', + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('integrate.py') + foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt' miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt' accident_file_url = '../docs/accident_loc_urls.txt' @@ -41,7 +47,7 @@ def process_foot_bike_data(): def process_miv_data(): - miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir,files_present=True) + miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=True) miv_df_unified[['Date', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True) miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True) @@ -65,13 +71,12 @@ def process_accident_data(): acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E', - 'AccidentLocation_CHLV95_N', 'geometry', 'AccidentMonth'] + 'AccidentLocation_CHLV95_N', 'AccidentMonth', 'geometry'] cleaned_acc_df = acc_df_unified[acc_cols_to_keep] return cleaned_acc_df if __name__ == '__main__': - fb_df = process_miv_data() - print(fb_df['MessungDatZeit']) - print(fb_df.dtypes) - print(fb_df.head(100)) + acc_df = process_accident_data() + print(acc_df.dtypes) + print(acc_df.head(100)) diff --git a/src/testArea.ipynb b/src/testArea.ipynb index e158c10..c4739bb 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -13,8 +13,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T23:43:55.980827Z", - "start_time": "2023-12-02T23:43:55.546732Z" + "end_time": "2023-12-03T10:58:50.698090Z", + "start_time": "2023-12-03T10:58:50.384352Z" } }, "id": "be55b25929d95559" @@ -44,8 +44,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T23:48:08.233784Z", - "start_time": "2023-12-02T23:43:55.980667Z" + "end_time": "2023-12-03T11:01:14.422749Z", + "start_time": "2023-12-03T10:58:52.300667Z" } }, "id": "dd3831953afdeb72" @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "outputs": [ { "name": "stdout", @@ -163,8 +163,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-02T23:49:50.185415Z", - "start_time": "2023-12-02T23:49:34.846049Z" + "end_time": "2023-12-03T11:15:51.051154Z", + "start_time": "2023-12-03T11:15:36.154717Z" } }, "id": "f86bc612060b17a4" From 65bd9480e70fa7b751975c83fad8b40a40f74c65 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 12:35:38 +0100 Subject: [PATCH 13/22] Load accident file from internet if not present. --- src/integrate.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/integrate.py b/src/integrate.py index c2ccfe1..bdc39d5 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -65,8 +65,9 @@ def process_miv_data(): return cleaned_miv_df -def process_accident_data(): - +def process_accident_data(file_present: bool): + if not file_present: + du.process_urls(data_dir, accident_file_url) acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string) acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', @@ -77,6 +78,6 @@ def process_accident_data(): if __name__ == '__main__': - acc_df = process_accident_data() + acc_df = process_accident_data(False) print(acc_df.dtypes) print(acc_df.head(100)) From 94ee3cc3b0805b9ef55ac8b8e655016bfb5767d7 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 13:18:01 +0100 Subject: [PATCH 14/22] ADD ID to fb. --- src/integrate.py | 9 +- src/testArea.ipynb | 199 ++++++++------------------------------------- 2 files changed, 42 insertions(+), 166 deletions(-) diff --git a/src/integrate.py b/src/integrate.py index bdc39d5..41c5b64 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -43,6 +43,7 @@ def process_foot_bike_data(): days = dt_obj.dt.weekday fb_df_grouped['Weekday_en'] = days.map(lambda x: weekday_names[x]) cleaned_fb_df = fb_df_grouped + cleaned_fb_df['ID'] = cleaned_fb_df.index + 1 return cleaned_fb_df @@ -65,7 +66,7 @@ def process_miv_data(): return cleaned_miv_df -def process_accident_data(file_present: bool): +def process_accident_data(file_present: bool = True): if not file_present: du.process_urls(data_dir, accident_file_url) acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string) @@ -74,10 +75,14 @@ def process_accident_data(file_present: bool): 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E', 'AccidentLocation_CHLV95_N', 'AccidentMonth', 'geometry'] cleaned_acc_df = acc_df_unified[acc_cols_to_keep] + cleaned_acc_df.rename(columns={ + 'AccidentLocation_CHLV95_E': 'EKoord', + 'AccidentLocation_CHLV95_N': 'NKoord', + }, inplace=True) return cleaned_acc_df if __name__ == '__main__': - acc_df = process_accident_data(False) + acc_df = process_accident_data(True) print(acc_df.dtypes) print(acc_df.head(100)) diff --git a/src/testArea.ipynb b/src/testArea.ipynb index c4739bb..3104921 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -13,29 +13,16 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-03T10:58:50.698090Z", - "start_time": "2023-12-03T10:58:50.384352Z" + "end_time": "2023-12-03T12:17:02.386525Z", + "start_time": "2023-12-03T12:17:01.722469Z" } }, "id": "be55b25929d95559" }, { "cell_type": "code", - "execution_count": 2, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/seb/Projects/repos/group-1/src/integrate.py:55: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "\n", "miv_df = intr.process_miv_data()\n", @@ -43,129 +30,60 @@ ], "metadata": { "collapsed": false, + "is_executing": true, "ExecuteTime": { - "end_time": "2023-12-03T11:01:14.422749Z", - "start_time": "2023-12-03T10:58:52.300667Z" + "start_time": "2023-12-03T12:17:04.199209Z" } }, "id": "dd3831953afdeb72" }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "outputs": [], "source": [ "test_df = miv_df\n" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-02T23:48:08.239957Z", - "start_time": "2023-12-02T23:48:08.230590Z" - } + "collapsed": false }, "id": "14471cd78389ce4d" }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "outputs": [], "source": [ "test_df.dtypes\n", "date_object = pd.to_datetime(test_df['Date'])\n" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-02T23:48:09.754205Z", - "start_time": "2023-12-02T23:48:08.232651Z" - } + "collapsed": false }, "id": "c70d21adef38fd68" }, { "cell_type": "code", - "execution_count": 5, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'Weekday_Name'", - "output_type": "error", - "traceback": [ - "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/core/indexes/base.py:3790\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3789\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 3790\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 3791\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", - "File \u001B[0;32mindex.pyx:152\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", - "File \u001B[0;32mindex.pyx:181\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", - "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7080\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", - "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7088\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", - "\u001B[0;31mKeyError\u001B[0m: 'Weekday_Name'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[0;32mIn[5], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[43mtest_df\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mWeekday_Name\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/core/frame.py:3893\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3891\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m 3892\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[0;32m-> 3893\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 3894\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[1;32m 3895\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", - "File \u001B[0;32m~/Projects/repos/group-1/db23-project-venv/lib/python3.11/site-packages/pandas/core/indexes/base.py:3797\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3792\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[1;32m 3793\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[1;32m 3794\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[1;32m 3795\u001B[0m ):\n\u001B[1;32m 3796\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[0;32m-> 3797\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m 3798\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[1;32m 3799\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[1;32m 3800\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[1;32m 3801\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[1;32m 3802\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", - "\u001B[0;31mKeyError\u001B[0m: 'Weekday_Name'" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "test_df['Weekday_Name']" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-02T23:48:10.103198Z", - "start_time": "2023-12-02T23:48:09.756006Z" - } + "collapsed": false }, "id": "d0df3c0ef49e8061" }, { "cell_type": "code", - "execution_count": 3, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "u_string RoadTrafficAccidentLocations.json\n", - "Filename: 2017_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: RoadTrafficAccidentLocations.json\n", - "Filepath: datasets/RoadTrafficAccidentLocations.json\n", - "Filename: 2016_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: 2022_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: 2015_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: 2019_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2013.csv\n", - "Filename: 2021_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2012.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2016.csv\n", - "Filename: 2014_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: 2018_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2017.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2015.csv\n", - "Filename: 2020_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2014.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2019.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2018.csv\n", - "Filename: 2013_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2022.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2020.csv\n", - "Filename: 2012_verkehrszaehlungen_werte_fussgaenger_velo.csv\n", - "Filename: sid_dav_verkehrszaehlung_miv_OD2031_2021.csv\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ - "acc_df = intr.process_accident_data()" + "acc_df = intr.process_accident_data(True)" ], "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T11:15:51.051154Z", - "start_time": "2023-12-03T11:15:36.154717Z" - } + "is_executing": true }, "id": "f86bc612060b17a4" }, @@ -175,15 +93,12 @@ "outputs": [], "source": [ "acc_df.head()\n", - "acc_df['AccidentWeekDay'].unique()\n", + "acc_df['AccidentWeekDay_en'].unique()\n", "#acc_df.dtypes\n", - "date_obj = dt.strptime(acc_df[''])\n" + "\n" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "start_time": "2023-12-02T23:48:10.101387Z" - } + "collapsed": false }, "id": "6affbeea6c7cf3ef" }, @@ -203,9 +118,7 @@ ], "metadata": { "collapsed": false, - "ExecuteTime": { - "start_time": "2023-12-02T23:48:10.102789Z" - } + "is_executing": true }, "id": "242041cd369d8454" }, @@ -213,12 +126,12 @@ "cell_type": "code", "execution_count": null, "outputs": [], - "source": [], + "source": [ + "acc_df['ID'] = acc_df.index +1\n", + "acc_df[('ID')]" + ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "start_time": "2023-12-02T23:48:10.103954Z" - } + "collapsed": false }, "id": "1841925ee109a417" }, @@ -232,60 +145,26 @@ "print(\"FB unique: \", fb_data['OST'].unique())\n" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "start_time": "2023-12-02T23:48:10.104894Z" - } + "collapsed": false }, "id": "f6d752ea17eda341" }, { "cell_type": "code", - "execution_count": 9, - "outputs": [ - { - "data": { - "text/plain": " AccidentUID AccidentHour AccidentYear \\\n0 A2D2677533867004E0430A865E337004 00 2011 \n1 9FD6441F802C20A6E0430A865E3320A6 01 2011 \n2 9FDA0DC4856A6094E0430A865E336094 02 2011 \n3 A3B66E42396E6000E0430A865E336000 02 2011 \n4 9FDA0DBE8CCE9096E0430A865E339096 03 2011 \n\n AccidentWeekDay_en AccidentType AccidentSeverityCategory \\\n0 Saturday at0 as4 \n1 Saturday at0 as3 \n2 Saturday at0 as4 \n3 Saturday at5 as3 \n4 Saturday at0 as4 \n\n AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n0 false false \n1 false true \n2 false false \n3 false false \n4 false false \n\n AccidentInvolvingMotorcycle RoadType RoadType_en AccidentLocation_CHLV95_E \\\n0 false rt433 Minor road 2684605 \n1 false rt433 Minor road 2682382 \n2 false rt439 Other 2682791 \n3 false rt433 Minor road 2681199 \n4 false rt433 Minor road 2682479 \n\n AccidentLocation_CHLV95_N geometry \n0 1245194 POINT(8.55841025 47.3521677) \n1 1246980 POINT(8.52932024 47.36851152) \n2 1247749 POINT(8.5348767 47.37537618) \n3 1247102 POINT(8.51368203 47.36975554) \n4 1250690 POINT(8.53128819 47.40186473) ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentHourAccidentYearAccidentWeekDay_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_enAccidentLocation_CHLV95_EAccidentLocation_CHLV95_Ngeometry
0A2D2677533867004E0430A865E337004002011Saturdayat0as4falsefalsefalsert433Minor road26846051245194POINT(8.55841025 47.3521677)
19FD6441F802C20A6E0430A865E3320A6012011Saturdayat0as3falsetruefalsert433Minor road26823821246980POINT(8.52932024 47.36851152)
29FDA0DC4856A6094E0430A865E336094022011Saturdayat0as4falsefalsefalsert439Other26827911247749POINT(8.5348767 47.37537618)
3A3B66E42396E6000E0430A865E336000022011Saturdayat5as3falsefalsefalsert433Minor road26811991247102POINT(8.51368203 47.36975554)
49FDA0DBE8CCE9096E0430A865E339096032011Saturdayat0as4falsefalsefalsert433Minor road26824791250690POINT(8.53128819 47.40186473)
\n
" - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "outputs": [], "source": [ "acc_df.head()" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-02T23:53:22.460557Z", - "start_time": "2023-12-02T23:53:22.453434Z" - } + "collapsed": false }, "id": "a159cafa9c227b88" }, { "cell_type": "code", - "execution_count": 21, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/s3/8bc7ys2d24lgqhdlpttvp70r0000gn/T/ipykernel_59953/958527375.py:15: UserWarning: Geometry column does not contain geometry.\n", - " acc_df['geometry'] = acc_df['geometry'].apply(lambda geom: WKTElement(geom.wkt, srid=4326))\n" - ] - }, - { - "data": { - "text/plain": "826" - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "outputs": [], "source": [ "from sqlalchemy import create_engine\n", "from geoalchemy2 import Geometry, WKTElement\n", @@ -307,27 +186,19 @@ "\n" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T00:00:35.257439Z", - "start_time": "2023-12-03T00:00:32.802219Z" - } + "collapsed": false }, "id": "fa76af8343443d7a" }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "outputs": [], "source": [ "engine.dispose()" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T00:00:40.409019Z", - "start_time": "2023-12-03T00:00:40.406193Z" - } + "collapsed": false }, "id": "bc0a23a5126e76c2" } From 1ef7bbe39b430c96ce0c9f2cf6189f6e546aa576 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 13:37:11 +0100 Subject: [PATCH 15/22] Convert MIV coordinate to Int --- src/integrate.py | 3 + src/testArea.ipynb | 187 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 172 insertions(+), 18 deletions(-) diff --git a/src/integrate.py b/src/integrate.py index 41c5b64..2df95c1 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -61,6 +61,9 @@ def process_miv_data(): days = dt_obj.dt.weekday miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x]) + # Convert row type to int so they match other + miv_df_cols_dropped['EKoord'] = miv_df_cols_dropped['EKoord'].astype(int) + miv_df_cols_dropped['NKoord'] = miv_df_cols_dropped['NKoord'].astype(int) cleaned_miv_df = miv_df_cols_dropped return cleaned_miv_df diff --git a/src/testArea.ipynb b/src/testArea.ipynb index 3104921..270b2ac 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -21,8 +21,21 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 2, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/seb/Projects/repos/group-1/src/integrate.py:62: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n" + ] + } + ], "source": [ "\n", "miv_df = intr.process_miv_data()\n", @@ -30,8 +43,8 @@ ], "metadata": { "collapsed": false, - "is_executing": true, "ExecuteTime": { + "end_time": "2023-12-03T12:20:31.968179Z", "start_time": "2023-12-03T12:17:04.199209Z" } }, @@ -76,14 +89,37 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 3, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "u_string RoadTrafficAccidentLocations.json\n", + "Filepath: datasets/RoadTrafficAccidentLocations.json\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/seb/Projects/repos/group-1/src/integrate.py:78: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " cleaned_acc_df.rename(columns={\n" + ] + } + ], "source": [ "acc_df = intr.process_accident_data(True)" ], "metadata": { "collapsed": false, - "is_executing": true + "ExecuteTime": { + "end_time": "2023-12-03T12:20:47.066579Z", + "start_time": "2023-12-03T12:20:31.964275Z" + } }, "id": "f86bc612060b17a4" }, @@ -104,8 +140,59 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 4, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accident Columns:\n", + "AccidentUID object\n", + "AccidentHour object\n", + "AccidentYear object\n", + "AccidentWeekDay_en object\n", + "AccidentType object\n", + "AccidentSeverityCategory object\n", + "AccidentInvolvingPedestrian object\n", + "AccidentInvolvingBicycle object\n", + "AccidentInvolvingMotorcycle object\n", + "RoadType object\n", + "RoadType_en object\n", + "EKoord object\n", + "NKoord object\n", + "AccidentMonth object\n", + "geometry geometry\n", + "dtype: object\n", + "\n", + "MIV Columns:\n", + "MSID object\n", + "ZSID object\n", + "Achse object\n", + "EKoord float64\n", + "NKoord float64\n", + "Richtung object\n", + "AnzFahrzeuge float64\n", + "AnzFahrzeugeStatus object\n", + "Date object\n", + "Hrs object\n", + "Weekday_en object\n", + "dtype: object\n", + "\n", + "FB Cols:\n", + "OST int64\n", + "NORD int64\n", + "DATE object\n", + "HRS object\n", + "VELO_IN float64\n", + "VELO_OUT float64\n", + "FUSS_IN float64\n", + "FUSS_OUT float64\n", + "Weekday_en object\n", + "ID int64\n", + "dtype: object\n" + ] + } + ], "source": [ "print(\"Accident Columns:\")\n", "print(acc_df.dtypes)\n", @@ -118,7 +205,10 @@ ], "metadata": { "collapsed": false, - "is_executing": true + "ExecuteTime": { + "end_time": "2023-12-03T12:20:47.067419Z", + "start_time": "2023-12-03T12:20:47.063397Z" + } }, "id": "242041cd369d8454" }, @@ -137,27 +227,88 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 8, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MIV unqiue: 0 2683009.89\n", + "1 2683009.89\n", + "2 2683009.89\n", + "3 2683009.89\n", + "4 2683009.89\n", + " ... \n", + "16699185 2682704.50\n", + "16699186 2682704.50\n", + "16699187 2682704.50\n", + "16699188 2682704.50\n", + "16699189 2682704.50\n", + "Name: EKoord, Length: 16699190, dtype: float64\n", + "Acc unique: 0 2684605\n", + "1 2682382\n", + "2 2682791\n", + "3 2681199\n", + "4 2682479\n", + " ... \n", + "55821 2682244\n", + "55822 2680029\n", + "55823 2684990\n", + "55824 2678025\n", + "55825 2684500\n", + "Name: EKoord, Length: 55826, dtype: object\n", + "FB unique: 0 2678956\n", + "1 2678956\n", + "2 2678956\n", + "3 2678956\n", + "4 2678956\n", + " ... \n", + "3011488 2684578\n", + "3011489 2684578\n", + "3011490 2684578\n", + "3011491 2684578\n", + "3011492 2684578\n", + "Name: OST, Length: 3011493, dtype: int64\n" + ] + } + ], "source": [ - "print(\"MIV unqiue:\", miv_df['EKoord'].unique().shape)\n", - "print(\"Acc unique:\", acc_df['AccidentLocation_CHLV95_E'].unique().shape)\n", - "print(\"FB unique: \", fb_data['OST'].unique())\n" + "print(\"MIV unqiue:\", miv_df['EKoord'])\n", + "print(\"Acc unique:\", acc_df['EKoord'])\n", + "print(\"FB unique: \", fb_data['OST'])\n" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-03T12:33:32.280058Z", + "start_time": "2023-12-03T12:33:32.275419Z" + } }, "id": "f6d752ea17eda341" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 5, + "outputs": [ + { + "data": { + "text/plain": " AccidentUID AccidentHour AccidentYear \\\n0 A2D2677533867004E0430A865E337004 00 2011 \n1 9FD6441F802C20A6E0430A865E3320A6 01 2011 \n2 9FDA0DC4856A6094E0430A865E336094 02 2011 \n3 A3B66E42396E6000E0430A865E336000 02 2011 \n4 9FDA0DBE8CCE9096E0430A865E339096 03 2011 \n\n AccidentWeekDay_en AccidentType AccidentSeverityCategory \\\n0 Saturday at0 as4 \n1 Saturday at0 as3 \n2 Saturday at0 as4 \n3 Saturday at5 as3 \n4 Saturday at0 as4 \n\n AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n0 false false \n1 false true \n2 false false \n3 false false \n4 false false \n\n AccidentInvolvingMotorcycle RoadType RoadType_en EKoord NKoord \\\n0 false rt433 Minor road 2684605 1245194 \n1 false rt433 Minor road 2682382 1246980 \n2 false rt439 Other 2682791 1247749 \n3 false rt433 Minor road 2681199 1247102 \n4 false rt433 Minor road 2682479 1250690 \n\n AccidentMonth geometry \n0 1 POINT Z (8.55841 47.35217 0.00000) \n1 1 POINT Z (8.52932 47.36851 0.00000) \n2 1 POINT Z (8.53488 47.37538 0.00000) \n3 1 POINT Z (8.51368 47.36976 0.00000) \n4 1 POINT Z (8.53129 47.40186 0.00000) ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentHourAccidentYearAccidentWeekDay_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_enEKoordNKoordAccidentMonthgeometry
0A2D2677533867004E0430A865E337004002011Saturdayat0as4falsefalsefalsert433Minor road268460512451941POINT Z (8.55841 47.35217 0.00000)
19FD6441F802C20A6E0430A865E3320A6012011Saturdayat0as3falsetruefalsert433Minor road268238212469801POINT Z (8.52932 47.36851 0.00000)
29FDA0DC4856A6094E0430A865E336094022011Saturdayat0as4falsefalsefalsert439Other268279112477491POINT Z (8.53488 47.37538 0.00000)
3A3B66E42396E6000E0430A865E336000022011Saturdayat5as3falsefalsefalsert433Minor road268119912471021POINT Z (8.51368 47.36976 0.00000)
49FDA0DBE8CCE9096E0430A865E339096032011Saturdayat0as4falsefalsefalsert433Minor road268247912506901POINT Z (8.53129 47.40186 0.00000)
\n
" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "acc_df.head()" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-03T12:32:55.249260Z", + "start_time": "2023-12-03T12:32:55.235008Z" + } }, "id": "a159cafa9c227b88" }, From c33ca87aafa7a6f97d5d7dec7fa988b6cde8a0b8 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 16:34:40 +0100 Subject: [PATCH 16/22] UNFINNISHED: Script to create csv of integrated and clean datasets. --- src/integrate.py | 171 ++++++++++++++++++--- src/{preparations.py => prepare_for_db.py} | 0 src/setup_tables.sql | 71 +++++++++ src/testArea.ipynb | 58 +++---- 4 files changed, 252 insertions(+), 48 deletions(-) rename src/{preparations.py => prepare_for_db.py} (100%) create mode 100644 src/setup_tables.sql diff --git a/src/integrate.py b/src/integrate.py index 2df95c1..9a7c03b 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -1,14 +1,19 @@ import data_utils as du -from datetime import datetime as dt import os -import requests import pandas as pd +from datetime import datetime +import time +from shapely.geometry import Point import logging logging.basicConfig(level=logging.DEBUG, filename='integrate.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('integrate.py') +stream_handler = logging.StreamHandler() +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +stream_handler.setFormatter(formatter) +logger.addHandler(stream_handler) foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt' miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt' @@ -24,9 +29,70 @@ integrated_dir = 'datasets/integrated/' weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] +fb_data_types = { + 'ID': 'int', + 'NORD': 'int', + 'OST': 'int', + 'DATE': 'str', + 'HRS': 'int', + 'VELO_IN': 'int', + 'VELO_OUT': 'int', + 'FUSS_IN': 'int', + 'FUSS_OUT': 'int', + 'Weekday_en': 'str' +} -def process_foot_bike_data(): - fb_df_unified = du.create_unified_df(foot_bike_urls_file, foot_bike_file_u_string, data_dir, files_present=True) +miv_data_types = { + 'MSID': 'str', + 'ZSID': 'str', + 'Achse': 'str', + 'NKoord': 'int', + 'EKoord': 'int', + 'Richtung': 'str', + 'AnzFahrzeuge': 'int', + 'AnzFahrzeugeStatus': 'str', + 'Datum': 'str', + 'Hrs': 'int', + 'Weekday_en': 'str' +} + +acc_data_types = { + 'AccidentUID': 'str', + 'AccidentYear': 'int', + 'AccidentMonth': 'int', + 'AccidentWeekDay_en': 'str', + 'AccidentHour': 'int', + 'NKoord': 'int', + 'EKoord': 'int', + 'AccidentType_en': 'str', + 'AccidentType': 'str', + 'AccidentSeverityCategory': 'str', + 'AccidentInvolvingPedestrian': 'bool', + 'AccidentInvolvingBicycle': 'bool', + 'AccidentInvolvingMotorcycle': 'bool', + 'RoadType': 'str', + 'RoadType_en': 'str', + 'Geometry': 'str' # TODO: Figure out what dtype this needs to be for postgres +} + + +def ensure_dirs_exist(data_dir, integrated_dir): + """ + This should be called before anything else to make sure that the relevant directories exists. + :param data_dir: directory where the datasets are stored + :param integrated_dir: directory where the integrated data will be stored + :return: + """ + logger.debug(f'data_dir: {data_dir}\n integrated_dir: {integrated_dir}') + logger.info("Ensuring needed directories exist.") + os.makedirs(data_dir, exist_ok=True) + logger.debug("data_dir created.") + os.makedirs(integrated_dir, exist_ok=True) + logger.debug("integrated_dir created") + + +def process_foot_bike_data(files_present=True): + fb_df_unified = du.create_unified_df(foot_bike_urls_file, foot_bike_file_u_string, data_dir, files_present=files_present) fb_df_unified[['DATE', "TIME"]] = fb_df_unified['DATUM'].str.split('T', expand=True) fb_df_unified[['HRS', 'MINS']] = fb_df_unified['TIME'].str.split(':', expand=True) ## Evt brauchen wir doch FK_ZAEHLER @@ -44,28 +110,32 @@ def process_foot_bike_data(): fb_df_grouped['Weekday_en'] = days.map(lambda x: weekday_names[x]) cleaned_fb_df = fb_df_grouped cleaned_fb_df['ID'] = cleaned_fb_df.index + 1 + cleaned_fb_df = cleaned_fb_df[['ID', 'NORD', 'OST', 'DATE', 'HRS', 'VELO_IN', 'VELO_OUT', 'FUSS_IN', + 'FUSS_OUT', 'Weekday_en']] + # Ensure datatype of df and sql table match + cleaned_fb_df = cleaned_fb_df.astype(fb_data_types) return cleaned_fb_df -def process_miv_data(): - miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=True) +def process_miv_data(files_present=True): + miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=files_present) - miv_df_unified[['Date', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True) + miv_df_unified[['Datum', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True) miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True) miv_cols_to_keep = ['MSID','ZSID','Achse', 'EKoord', 'NKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus', - 'Date', 'Hrs'] + 'Datum', 'Hrs'] miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep] - dt_obj = pd.to_datetime(miv_df_cols_dropped['Date']) + dt_obj = pd.to_datetime(miv_df_cols_dropped['Datum']) days = dt_obj.dt.weekday miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x]) + miv_df_cols_dropped['AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int) - # Convert row type to int so they match other - miv_df_cols_dropped['EKoord'] = miv_df_cols_dropped['EKoord'].astype(int) - miv_df_cols_dropped['NKoord'] = miv_df_cols_dropped['NKoord'].astype(int) + cleaned_miv_df = miv_df_cols_dropped[['MSID', 'ZSID', 'Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge', + 'AnzFahrzeugeStatus', 'Datum', 'Hrs', 'Weekday_en']] - cleaned_miv_df = miv_df_cols_dropped + cleaned_miv_df = cleaned_miv_df.astype(miv_data_types) return cleaned_miv_df @@ -73,19 +143,82 @@ def process_accident_data(file_present: bool = True): if not file_present: du.process_urls(data_dir, accident_file_url) acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string) - acc_cols_to_keep = ['AccidentUID', 'AccidentHour', 'AccidentYear', 'AccidentWeekDay_en', 'AccidentType', + acc_cols_to_keep = ['AccidentUID', 'AccidentYear', 'AccidentMonth', 'AccidentWeekDay_en','AccidentHour', + 'AccidentLocation_CHLV95_N', 'AccidentLocation_CHLV95_E', 'AccidentType_en', 'AccidentType', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', - 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', 'AccidentLocation_CHLV95_E', - 'AccidentLocation_CHLV95_N', 'AccidentMonth', 'geometry'] + 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', + 'Geometry'] cleaned_acc_df = acc_df_unified[acc_cols_to_keep] cleaned_acc_df.rename(columns={ 'AccidentLocation_CHLV95_E': 'EKoord', 'AccidentLocation_CHLV95_N': 'NKoord', }, inplace=True) + + cleaned_acc_df = cleaned_acc_df.astype(acc_data_types) + return cleaned_acc_df +def process_all_data_sources(fb_present=True, miv_present=True, accident_present=True): + """ + Process all data sources and turn them in to csv files. After this function is called there + should be csv files of the cleaned and integrated data sources + + :param fb_present: bool, are the files present in local file system + :param miv_present: bool, are the files present in local file system + :param accident_present: bool, are the files present in local file system + :return: + """ + ensure_dirs_exist(data_dir, integrated_dir) + logger.info("Started processing all data sources.") + start_time = time.time() + logger.info("Start processing pedestrian and bicycle data (FootBikeCount)") + fb_count_df = process_foot_bike_data(fb_present) + logger.debug(f'FB Head:{fb_count_df.head()}\n FB dtypes: {fb_count_df.dtypes}') + fb_file_path = os.path.join(integrated_dir, 'FootBikeCount.csv') + logger.debug(f'FB Cleaned File Path: {fb_file_path}') + fb_count_df.to_csv(fb_file_path, index=False) + logger.info("FB integrated csv created.") + logger.info(f'Time taken for FootBikeCount: {start_time-time.time()}') + + start_time2 = time.time() + logger.info("Start processing motorized vehicle data (MivCount)") + miv_count_df = process_miv_data(miv_present) + logger.debug(f'MIV Head:{miv_count_df.head()}\n MIV dtypes: {miv_count_df.dtypes}') + miv_file_path = os.path.join(integrated_dir, 'MivCount.csv') + logger.debug(f'MIV Cleaned File Path: {miv_file_path}') + miv_count_df.to_csv(miv_file_path, index=False) + logger.info("MIV integrated csv created.") + logger.info(f'Time taken for MivCount: {start_time2-time.time()}') + +def fb_to_integrated(files_present=True): + + start_time = time.time() + logger.info("Start processing pedestrian and bicycle data (FootBikeCount)") + fb_count_df = process_foot_bike_data(files_present) + logger.debug(f'FB Head:{fb_count_df.head()}\n FB dtypes: {fb_count_df.dtypes}') + fb_file_path = os.path.join(integrated_dir, 'FootBikeCount.csv') + logger.debug(f'FB Cleaned File Path: {fb_file_path}') + fb_count_df.to_csv(fb_file_path, index=False) + logger.info("FB integrated csv created.") + end_time = time.time() + logger.info(f'Time taken for FootBikeCount: {end_time-start_time}') + + +def miv_to_integrated_csv(miv_present=True): + + start_time2 = time.time() + logger.info("Start processing motorized vehicle data (MivCount)") + miv_count_df = process_miv_data(miv_present) + logger.debug(f'MIV Head:{miv_count_df.head()}\n MIV dtypes: {miv_count_df.dtypes}') + miv_file_path = os.path.join(integrated_dir, 'MivCount.csv') + logger.debug(f'MIV Cleaned File Path: {miv_file_path}') + miv_count_df.to_csv(miv_file_path, index=False) + logger.info("MIV integrated csv created.") + end_time = time.time() + logger.info(f'Time taken for MivCount: {end_time-start_time2}') + + if __name__ == '__main__': - acc_df = process_accident_data(True) - print(acc_df.dtypes) - print(acc_df.head(100)) + #process_all_data_sources(True, True, True) + miv_to_integrated_csv() diff --git a/src/preparations.py b/src/prepare_for_db.py similarity index 100% rename from src/preparations.py rename to src/prepare_for_db.py diff --git a/src/setup_tables.sql b/src/setup_tables.sql new file mode 100644 index 0000000..d510b00 --- /dev/null +++ b/src/setup_tables.sql @@ -0,0 +1,71 @@ +CREATE EXTENSION IF NOT EXISTS postgis; + +DROP TABLE IF EXISTS FootBikeCount; + +CREATE TABLE FootBikeCount ( + ID INTEGER , + NORD INTEGER , + OST INT , + DATE VARCHAR(10) , + HRS INTEGER , + VELO_IN INTEGER , + VELO_OUT INTEGER , + FUSS_IN INTEGER , + FUSS_OUT INTEGER , + Weekday_en VARCHAR(10) , + + PRIMARY KEY(ID) , + CHECK (Weekday_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')), + CHECK (Hrs BETWEEN 0 AND 23) + + +); + +DROP TABLE IF EXISTS MivCount; + +CREATE TABLE MivCount ( + MSID VARCHAR(256) , + ZSID VARCHAR(256) , + Achse VARCHAR(256) , + NKoord INTEGER , + EKoord INTEGER , + Richtung VARCHAR(10) , + AnzFahrzeuge INTEGER , + AnzFahrzeugeStatus VARCHAR(20) , + Datum VARCHAR(10) , + Hrs Integer , + Weekday_en VARCHAR(10), + PRIMARY KEY (MSID), + CHECK (Weekday_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')), + CHECK (Hrs BETWEEN 0 AND 23) +); + + +DROP TABLE IF EXISTS Accidents; + +CREATE TABLE Accidents ( + AccidentUID VARCHAR(32) , + AccidentYear INTEGER , + AccidentMonth INTEGER, + AccidentWeekDay_en VARCHAR(10) , + AccidentHour INTEGER , + NKoord INTEGER , + EKoord INTEGER , + AccidentType_en VARCHAR(256) , + AccidentType VARCHAR(4) , + AccidentSeverityCategory VARCHAR(4) , + AccidentInvolvingPedestrian BOOLEAN , + AccidentInvolvingBicycle BOOLEAN , + AccidentInvolvingMotorcycle BOOLEAN , + RoadType VARCHAR(5) , + RoadType_en VARCHAR(256) , + Geometry geometry(Point) , + + PRIMARY KEY (AccidentUID) , + CHECK ( AccidentHour BETWEEN 0 AND 23) , + CHECK (AccidentWeekDay_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')) +); + +COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv' + DELIMITER ',' + CSV HEADER; \ No newline at end of file diff --git a/src/testArea.ipynb b/src/testArea.ipynb index 270b2ac..eb3a6b9 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -227,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "outputs": [ { "name": "stdout", @@ -245,43 +245,43 @@ "16699188 2682704.50\n", "16699189 2682704.50\n", "Name: EKoord, Length: 16699190, dtype: float64\n", - "Acc unique: 0 2684605\n", - "1 2682382\n", - "2 2682791\n", - "3 2681199\n", - "4 2682479\n", - " ... \n", - "55821 2682244\n", - "55822 2680029\n", - "55823 2684990\n", - "55824 2678025\n", - "55825 2684500\n", - "Name: EKoord, Length: 55826, dtype: object\n", - "FB unique: 0 2678956\n", - "1 2678956\n", - "2 2678956\n", - "3 2678956\n", - "4 2678956\n", - " ... \n", - "3011488 2684578\n", - "3011489 2684578\n", - "3011490 2684578\n", - "3011491 2684578\n", - "3011492 2684578\n", - "Name: OST, Length: 3011493, dtype: int64\n" + "Acc unique: \n", + "FB unique: 0 2012-01-01\n", + "1 2012-01-01\n", + "2 2012-01-01\n", + "3 2012-01-01\n", + "4 2012-01-01\n", + " ... \n", + "3011488 2019-07-13\n", + "3011489 2019-07-13\n", + "3011490 2019-07-13\n", + "3011491 2019-07-13\n", + "3011492 2019-07-13\n", + "Name: DATE, Length: 3011493, dtype: object\n" ] } ], "source": [ "print(\"MIV unqiue:\", miv_df['EKoord'])\n", - "print(\"Acc unique:\", acc_df['EKoord'])\n", - "print(\"FB unique: \", fb_data['OST'])\n" + "print(\"Acc unique:\", acc_df['RoadType'].unique)\n", + "print(\"FB unique: \", fb_data['DATE'])\n" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-03T12:33:32.280058Z", - "start_time": "2023-12-03T12:33:32.275419Z" + "end_time": "2023-12-03T15:03:13.580284Z", + "start_time": "2023-12-03T15:03:13.574959Z" } }, "id": "f6d752ea17eda341" From 920f725d74a498d60507e02b952a243d25fcfd1d Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 17:05:09 +0100 Subject: [PATCH 17/22] Drop duplicates in unified miv csv. --- src/integrate.py | 7 +- src/setup_tables.sql | 8 +- src/testArea.ipynb | 229 ++++++++----------------------------------- 3 files changed, 54 insertions(+), 190 deletions(-) diff --git a/src/integrate.py b/src/integrate.py index 9a7c03b..3fe0e7b 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -125,7 +125,7 @@ def process_miv_data(files_present=True): miv_cols_to_keep = ['MSID','ZSID','Achse', 'EKoord', 'NKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus', 'Datum', 'Hrs'] - miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep] + miv_df_cols_dropped = miv_df_unified#[miv_cols_to_keep] dt_obj = pd.to_datetime(miv_df_cols_dropped['Datum']) days = dt_obj.dt.weekday @@ -136,6 +136,7 @@ def process_miv_data(files_present=True): 'AnzFahrzeugeStatus', 'Datum', 'Hrs', 'Weekday_en']] cleaned_miv_df = cleaned_miv_df.astype(miv_data_types) + cleaned_miv_df = cleaned_miv_df.drop_duplicates() return cleaned_miv_df @@ -222,3 +223,7 @@ def miv_to_integrated_csv(miv_present=True): if __name__ == '__main__': #process_all_data_sources(True, True, True) miv_to_integrated_csv() + # path = os.path.join(integrated_dir, 'MivCount.csv') + # df = pd.read_csv(path) + # duplicate_rows = df[df.duplicated()] + # print(duplicate_rows.shape[0]) diff --git a/src/setup_tables.sql b/src/setup_tables.sql index d510b00..e5f0325 100644 --- a/src/setup_tables.sql +++ b/src/setup_tables.sql @@ -35,7 +35,7 @@ CREATE TABLE MivCount ( Datum VARCHAR(10) , Hrs Integer , Weekday_en VARCHAR(10), - PRIMARY KEY (MSID), + PRIMARY KEY (MSID, Achse,Richtung, Datum, Hrs), CHECK (Weekday_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')), CHECK (Hrs BETWEEN 0 AND 23) ); @@ -44,7 +44,7 @@ CREATE TABLE MivCount ( DROP TABLE IF EXISTS Accidents; CREATE TABLE Accidents ( - AccidentUID VARCHAR(32) , + AccidentUID VARCHAR(256) , AccidentYear INTEGER , AccidentMonth INTEGER, AccidentWeekDay_en VARCHAR(10) , @@ -67,5 +67,9 @@ CREATE TABLE Accidents ( ); COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv' + DELIMITER ',' + CSV HEADER; + +COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv' DELIMITER ',' CSV HEADER; \ No newline at end of file diff --git a/src/testArea.ipynb b/src/testArea.ipynb index eb3a6b9..c0bcbf8 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -8,13 +8,14 @@ "import pandas as pd\n", "from datetime import datetime as dt\n", "\n", - "import integrate as intr\n" + "import integrate as intr\n", + "\n" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-03T12:17:02.386525Z", - "start_time": "2023-12-03T12:17:01.722469Z" + "end_time": "2023-12-03T15:47:10.110909Z", + "start_time": "2023-12-03T15:47:09.656556Z" } }, "id": "be55b25929d95559" @@ -27,38 +28,57 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/seb/Projects/repos/group-1/src/integrate.py:62: SettingWithCopyWarning: \n", + "/Users/seb/Projects/repos/group-1/src/integrate.py:132: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n" + " miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n", + "/Users/seb/Projects/repos/group-1/src/integrate.py:133: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " miv_df_cols_dropped['AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int)\n" ] } ], "source": [ "\n", "miv_df = intr.process_miv_data()\n", - "fb_data = intr.process_foot_bike_data()" + "#fb_data = intr.process_foot_bike_data()" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-03T12:20:31.968179Z", - "start_time": "2023-12-03T12:17:04.199209Z" + "end_time": "2023-12-03T15:49:07.561603Z", + "start_time": "2023-12-03T15:47:14.759104Z" } }, "id": "dd3831953afdeb72" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 6, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "121\n" + ] + } + ], "source": [ - "test_df = miv_df\n" + "duplicate_rows = miv_df[miv_df.duplicated()]\n", + "print(duplicate_rows.shape[0])" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-03T15:51:21.158909Z", + "start_time": "2023-12-03T15:51:15.711222Z" + } }, "id": "14471cd78389ce4d" }, @@ -66,60 +86,11 @@ "cell_type": "code", "execution_count": null, "outputs": [], - "source": [ - "test_df.dtypes\n", - "date_object = pd.to_datetime(test_df['Date'])\n" - ], - "metadata": { - "collapsed": false - }, - "id": "c70d21adef38fd68" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "test_df['Weekday_Name']" - ], - "metadata": { - "collapsed": false - }, - "id": "d0df3c0ef49e8061" - }, - { - "cell_type": "code", - "execution_count": 3, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "u_string RoadTrafficAccidentLocations.json\n", - "Filepath: datasets/RoadTrafficAccidentLocations.json\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/seb/Projects/repos/group-1/src/integrate.py:78: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " cleaned_acc_df.rename(columns={\n" - ] - } - ], "source": [ "acc_df = intr.process_accident_data(True)" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T12:20:47.066579Z", - "start_time": "2023-12-03T12:20:31.964275Z" - } + "collapsed": false }, "id": "f86bc612060b17a4" }, @@ -140,59 +111,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accident Columns:\n", - "AccidentUID object\n", - "AccidentHour object\n", - "AccidentYear object\n", - "AccidentWeekDay_en object\n", - "AccidentType object\n", - "AccidentSeverityCategory object\n", - "AccidentInvolvingPedestrian object\n", - "AccidentInvolvingBicycle object\n", - "AccidentInvolvingMotorcycle object\n", - "RoadType object\n", - "RoadType_en object\n", - "EKoord object\n", - "NKoord object\n", - "AccidentMonth object\n", - "geometry geometry\n", - "dtype: object\n", - "\n", - "MIV Columns:\n", - "MSID object\n", - "ZSID object\n", - "Achse object\n", - "EKoord float64\n", - "NKoord float64\n", - "Richtung object\n", - "AnzFahrzeuge float64\n", - "AnzFahrzeugeStatus object\n", - "Date object\n", - "Hrs object\n", - "Weekday_en object\n", - "dtype: object\n", - "\n", - "FB Cols:\n", - "OST int64\n", - "NORD int64\n", - "DATE object\n", - "HRS object\n", - "VELO_IN float64\n", - "VELO_OUT float64\n", - "FUSS_IN float64\n", - "FUSS_OUT float64\n", - "Weekday_en object\n", - "ID int64\n", - "dtype: object\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "print(\"Accident Columns:\")\n", "print(acc_df.dtypes)\n", @@ -204,11 +124,7 @@ "print(fb_data.dtypes)" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T12:20:47.067419Z", - "start_time": "2023-12-03T12:20:47.063397Z" - } + "collapsed": false }, "id": "242041cd369d8454" }, @@ -227,88 +143,27 @@ }, { "cell_type": "code", - "execution_count": 10, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MIV unqiue: 0 2683009.89\n", - "1 2683009.89\n", - "2 2683009.89\n", - "3 2683009.89\n", - "4 2683009.89\n", - " ... \n", - "16699185 2682704.50\n", - "16699186 2682704.50\n", - "16699187 2682704.50\n", - "16699188 2682704.50\n", - "16699189 2682704.50\n", - "Name: EKoord, Length: 16699190, dtype: float64\n", - "Acc unique: \n", - "FB unique: 0 2012-01-01\n", - "1 2012-01-01\n", - "2 2012-01-01\n", - "3 2012-01-01\n", - "4 2012-01-01\n", - " ... \n", - "3011488 2019-07-13\n", - "3011489 2019-07-13\n", - "3011490 2019-07-13\n", - "3011491 2019-07-13\n", - "3011492 2019-07-13\n", - "Name: DATE, Length: 3011493, dtype: object\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "print(\"MIV unqiue:\", miv_df['EKoord'])\n", "print(\"Acc unique:\", acc_df['RoadType'].unique)\n", "print(\"FB unique: \", fb_data['DATE'])\n" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T15:03:13.580284Z", - "start_time": "2023-12-03T15:03:13.574959Z" - } + "collapsed": false }, "id": "f6d752ea17eda341" }, { "cell_type": "code", - "execution_count": 5, - "outputs": [ - { - "data": { - "text/plain": " AccidentUID AccidentHour AccidentYear \\\n0 A2D2677533867004E0430A865E337004 00 2011 \n1 9FD6441F802C20A6E0430A865E3320A6 01 2011 \n2 9FDA0DC4856A6094E0430A865E336094 02 2011 \n3 A3B66E42396E6000E0430A865E336000 02 2011 \n4 9FDA0DBE8CCE9096E0430A865E339096 03 2011 \n\n AccidentWeekDay_en AccidentType AccidentSeverityCategory \\\n0 Saturday at0 as4 \n1 Saturday at0 as3 \n2 Saturday at0 as4 \n3 Saturday at5 as3 \n4 Saturday at0 as4 \n\n AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n0 false false \n1 false true \n2 false false \n3 false false \n4 false false \n\n AccidentInvolvingMotorcycle RoadType RoadType_en EKoord NKoord \\\n0 false rt433 Minor road 2684605 1245194 \n1 false rt433 Minor road 2682382 1246980 \n2 false rt439 Other 2682791 1247749 \n3 false rt433 Minor road 2681199 1247102 \n4 false rt433 Minor road 2682479 1250690 \n\n AccidentMonth geometry \n0 1 POINT Z (8.55841 47.35217 0.00000) \n1 1 POINT Z (8.52932 47.36851 0.00000) \n2 1 POINT Z (8.53488 47.37538 0.00000) \n3 1 POINT Z (8.51368 47.36976 0.00000) \n4 1 POINT Z (8.53129 47.40186 0.00000) ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentHourAccidentYearAccidentWeekDay_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_enEKoordNKoordAccidentMonthgeometry
0A2D2677533867004E0430A865E337004002011Saturdayat0as4falsefalsefalsert433Minor road268460512451941POINT Z (8.55841 47.35217 0.00000)
19FD6441F802C20A6E0430A865E3320A6012011Saturdayat0as3falsetruefalsert433Minor road268238212469801POINT Z (8.52932 47.36851 0.00000)
29FDA0DC4856A6094E0430A865E336094022011Saturdayat0as4falsefalsefalsert439Other268279112477491POINT Z (8.53488 47.37538 0.00000)
3A3B66E42396E6000E0430A865E336000022011Saturdayat5as3falsefalsefalsert433Minor road268119912471021POINT Z (8.51368 47.36976 0.00000)
49FDA0DBE8CCE9096E0430A865E339096032011Saturdayat0as4falsefalsefalsert433Minor road268247912506901POINT Z (8.53129 47.40186 0.00000)
\n
" - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "outputs": [], "source": [ "acc_df.head()" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T12:32:55.249260Z", - "start_time": "2023-12-03T12:32:55.235008Z" - } + "collapsed": false }, "id": "a159cafa9c227b88" }, From 90a38b1bb52f804ba023b5f07c6926dd8286e3e8 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 17:05:09 +0100 Subject: [PATCH 18/22] Drop duplicates in unified miv csv. --- src/integrate.py | 16 ++- src/setup_tables.sql | 13 ++- src/testArea.ipynb | 229 ++++++++----------------------------------- 3 files changed, 63 insertions(+), 195 deletions(-) diff --git a/src/integrate.py b/src/integrate.py index 9a7c03b..ce03042 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -53,7 +53,8 @@ miv_data_types = { 'AnzFahrzeugeStatus': 'str', 'Datum': 'str', 'Hrs': 'int', - 'Weekday_en': 'str' + 'Weekday_en': 'str', + 'MessungDatZeit': 'str' } acc_data_types = { @@ -123,19 +124,21 @@ def process_miv_data(files_present=True): miv_df_unified[['Datum', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True) miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True) - miv_cols_to_keep = ['MSID','ZSID','Achse', 'EKoord', 'NKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus', - 'Datum', 'Hrs'] + miv_cols_to_keep = ['MSID','ZSID','Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus', + 'Datum', 'Hrs', 'MessungDatZeit'] miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep] dt_obj = pd.to_datetime(miv_df_cols_dropped['Datum']) days = dt_obj.dt.weekday miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x]) miv_df_cols_dropped['AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int) + miv_df_cols_dropped['ZSID'] = miv_df_cols_dropped['ZSID'].fillna('Missing').astype(str) cleaned_miv_df = miv_df_cols_dropped[['MSID', 'ZSID', 'Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge', - 'AnzFahrzeugeStatus', 'Datum', 'Hrs', 'Weekday_en']] + 'AnzFahrzeugeStatus', 'Datum', 'Hrs', 'Weekday_en', 'MessungDatZeit']] cleaned_miv_df = cleaned_miv_df.astype(miv_data_types) + cleaned_miv_df = cleaned_miv_df.drop_duplicates() return cleaned_miv_df @@ -222,3 +225,8 @@ def miv_to_integrated_csv(miv_present=True): if __name__ == '__main__': #process_all_data_sources(True, True, True) miv_to_integrated_csv() + path = os.path.join(integrated_dir, 'MivCount.csv') + df = pd.read_csv(path) + df = df[['MSID', 'MessungDatZeit']] + duplicate_rows = df[df.duplicated()] + print(duplicate_rows.shape[0]) diff --git a/src/setup_tables.sql b/src/setup_tables.sql index d510b00..9879b94 100644 --- a/src/setup_tables.sql +++ b/src/setup_tables.sql @@ -25,17 +25,18 @@ DROP TABLE IF EXISTS MivCount; CREATE TABLE MivCount ( MSID VARCHAR(256) , - ZSID VARCHAR(256) , + ZSID VARCHAR(256) NULL, Achse VARCHAR(256) , NKoord INTEGER , EKoord INTEGER , - Richtung VARCHAR(10) , + Richtung VARCHAR(100) , AnzFahrzeuge INTEGER , AnzFahrzeugeStatus VARCHAR(20) , Datum VARCHAR(10) , Hrs Integer , Weekday_en VARCHAR(10), - PRIMARY KEY (MSID), + MessungDatZeit VARCHAR(100), + PRIMARY KEY (MSID, Achse,Richtung, AnzFahrzeuge, Datum, Hrs), CHECK (Weekday_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')), CHECK (Hrs BETWEEN 0 AND 23) ); @@ -44,7 +45,7 @@ CREATE TABLE MivCount ( DROP TABLE IF EXISTS Accidents; CREATE TABLE Accidents ( - AccidentUID VARCHAR(32) , + AccidentUID VARCHAR(256) , AccidentYear INTEGER , AccidentMonth INTEGER, AccidentWeekDay_en VARCHAR(10) , @@ -67,5 +68,9 @@ CREATE TABLE Accidents ( ); COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv' + DELIMITER ',' + CSV HEADER; + +COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv' DELIMITER ',' CSV HEADER; \ No newline at end of file diff --git a/src/testArea.ipynb b/src/testArea.ipynb index eb3a6b9..c0bcbf8 100644 --- a/src/testArea.ipynb +++ b/src/testArea.ipynb @@ -8,13 +8,14 @@ "import pandas as pd\n", "from datetime import datetime as dt\n", "\n", - "import integrate as intr\n" + "import integrate as intr\n", + "\n" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-03T12:17:02.386525Z", - "start_time": "2023-12-03T12:17:01.722469Z" + "end_time": "2023-12-03T15:47:10.110909Z", + "start_time": "2023-12-03T15:47:09.656556Z" } }, "id": "be55b25929d95559" @@ -27,38 +28,57 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/seb/Projects/repos/group-1/src/integrate.py:62: SettingWithCopyWarning: \n", + "/Users/seb/Projects/repos/group-1/src/integrate.py:132: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n" + " miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n", + "/Users/seb/Projects/repos/group-1/src/integrate.py:133: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " miv_df_cols_dropped['AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int)\n" ] } ], "source": [ "\n", "miv_df = intr.process_miv_data()\n", - "fb_data = intr.process_foot_bike_data()" + "#fb_data = intr.process_foot_bike_data()" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-12-03T12:20:31.968179Z", - "start_time": "2023-12-03T12:17:04.199209Z" + "end_time": "2023-12-03T15:49:07.561603Z", + "start_time": "2023-12-03T15:47:14.759104Z" } }, "id": "dd3831953afdeb72" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 6, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "121\n" + ] + } + ], "source": [ - "test_df = miv_df\n" + "duplicate_rows = miv_df[miv_df.duplicated()]\n", + "print(duplicate_rows.shape[0])" ], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-03T15:51:21.158909Z", + "start_time": "2023-12-03T15:51:15.711222Z" + } }, "id": "14471cd78389ce4d" }, @@ -66,60 +86,11 @@ "cell_type": "code", "execution_count": null, "outputs": [], - "source": [ - "test_df.dtypes\n", - "date_object = pd.to_datetime(test_df['Date'])\n" - ], - "metadata": { - "collapsed": false - }, - "id": "c70d21adef38fd68" - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "test_df['Weekday_Name']" - ], - "metadata": { - "collapsed": false - }, - "id": "d0df3c0ef49e8061" - }, - { - "cell_type": "code", - "execution_count": 3, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "u_string RoadTrafficAccidentLocations.json\n", - "Filepath: datasets/RoadTrafficAccidentLocations.json\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/seb/Projects/repos/group-1/src/integrate.py:78: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " cleaned_acc_df.rename(columns={\n" - ] - } - ], "source": [ "acc_df = intr.process_accident_data(True)" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T12:20:47.066579Z", - "start_time": "2023-12-03T12:20:31.964275Z" - } + "collapsed": false }, "id": "f86bc612060b17a4" }, @@ -140,59 +111,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accident Columns:\n", - "AccidentUID object\n", - "AccidentHour object\n", - "AccidentYear object\n", - "AccidentWeekDay_en object\n", - "AccidentType object\n", - "AccidentSeverityCategory object\n", - "AccidentInvolvingPedestrian object\n", - "AccidentInvolvingBicycle object\n", - "AccidentInvolvingMotorcycle object\n", - "RoadType object\n", - "RoadType_en object\n", - "EKoord object\n", - "NKoord object\n", - "AccidentMonth object\n", - "geometry geometry\n", - "dtype: object\n", - "\n", - "MIV Columns:\n", - "MSID object\n", - "ZSID object\n", - "Achse object\n", - "EKoord float64\n", - "NKoord float64\n", - "Richtung object\n", - "AnzFahrzeuge float64\n", - "AnzFahrzeugeStatus object\n", - "Date object\n", - "Hrs object\n", - "Weekday_en object\n", - "dtype: object\n", - "\n", - "FB Cols:\n", - "OST int64\n", - "NORD int64\n", - "DATE object\n", - "HRS object\n", - "VELO_IN float64\n", - "VELO_OUT float64\n", - "FUSS_IN float64\n", - "FUSS_OUT float64\n", - "Weekday_en object\n", - "ID int64\n", - "dtype: object\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "print(\"Accident Columns:\")\n", "print(acc_df.dtypes)\n", @@ -204,11 +124,7 @@ "print(fb_data.dtypes)" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T12:20:47.067419Z", - "start_time": "2023-12-03T12:20:47.063397Z" - } + "collapsed": false }, "id": "242041cd369d8454" }, @@ -227,88 +143,27 @@ }, { "cell_type": "code", - "execution_count": 10, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MIV unqiue: 0 2683009.89\n", - "1 2683009.89\n", - "2 2683009.89\n", - "3 2683009.89\n", - "4 2683009.89\n", - " ... \n", - "16699185 2682704.50\n", - "16699186 2682704.50\n", - "16699187 2682704.50\n", - "16699188 2682704.50\n", - "16699189 2682704.50\n", - "Name: EKoord, Length: 16699190, dtype: float64\n", - "Acc unique: \n", - "FB unique: 0 2012-01-01\n", - "1 2012-01-01\n", - "2 2012-01-01\n", - "3 2012-01-01\n", - "4 2012-01-01\n", - " ... \n", - "3011488 2019-07-13\n", - "3011489 2019-07-13\n", - "3011490 2019-07-13\n", - "3011491 2019-07-13\n", - "3011492 2019-07-13\n", - "Name: DATE, Length: 3011493, dtype: object\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "print(\"MIV unqiue:\", miv_df['EKoord'])\n", "print(\"Acc unique:\", acc_df['RoadType'].unique)\n", "print(\"FB unique: \", fb_data['DATE'])\n" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T15:03:13.580284Z", - "start_time": "2023-12-03T15:03:13.574959Z" - } + "collapsed": false }, "id": "f6d752ea17eda341" }, { "cell_type": "code", - "execution_count": 5, - "outputs": [ - { - "data": { - "text/plain": " AccidentUID AccidentHour AccidentYear \\\n0 A2D2677533867004E0430A865E337004 00 2011 \n1 9FD6441F802C20A6E0430A865E3320A6 01 2011 \n2 9FDA0DC4856A6094E0430A865E336094 02 2011 \n3 A3B66E42396E6000E0430A865E336000 02 2011 \n4 9FDA0DBE8CCE9096E0430A865E339096 03 2011 \n\n AccidentWeekDay_en AccidentType AccidentSeverityCategory \\\n0 Saturday at0 as4 \n1 Saturday at0 as3 \n2 Saturday at0 as4 \n3 Saturday at5 as3 \n4 Saturday at0 as4 \n\n AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n0 false false \n1 false true \n2 false false \n3 false false \n4 false false \n\n AccidentInvolvingMotorcycle RoadType RoadType_en EKoord NKoord \\\n0 false rt433 Minor road 2684605 1245194 \n1 false rt433 Minor road 2682382 1246980 \n2 false rt439 Other 2682791 1247749 \n3 false rt433 Minor road 2681199 1247102 \n4 false rt433 Minor road 2682479 1250690 \n\n AccidentMonth geometry \n0 1 POINT Z (8.55841 47.35217 0.00000) \n1 1 POINT Z (8.52932 47.36851 0.00000) \n2 1 POINT Z (8.53488 47.37538 0.00000) \n3 1 POINT Z (8.51368 47.36976 0.00000) \n4 1 POINT Z (8.53129 47.40186 0.00000) ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AccidentUIDAccidentHourAccidentYearAccidentWeekDay_enAccidentTypeAccidentSeverityCategoryAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_enEKoordNKoordAccidentMonthgeometry
0A2D2677533867004E0430A865E337004002011Saturdayat0as4falsefalsefalsert433Minor road268460512451941POINT Z (8.55841 47.35217 0.00000)
19FD6441F802C20A6E0430A865E3320A6012011Saturdayat0as3falsetruefalsert433Minor road268238212469801POINT Z (8.52932 47.36851 0.00000)
29FDA0DC4856A6094E0430A865E336094022011Saturdayat0as4falsefalsefalsert439Other268279112477491POINT Z (8.53488 47.37538 0.00000)
3A3B66E42396E6000E0430A865E336000022011Saturdayat5as3falsefalsefalsert433Minor road268119912471021POINT Z (8.51368 47.36976 0.00000)
49FDA0DBE8CCE9096E0430A865E339096032011Saturdayat0as4falsefalsefalsert433Minor road268247912506901POINT Z (8.53129 47.40186 0.00000)
\n
" - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "outputs": [], "source": [ "acc_df.head()" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-03T12:32:55.249260Z", - "start_time": "2023-12-03T12:32:55.235008Z" - } + "collapsed": false }, "id": "a159cafa9c227b88" }, From fcfb3f028ba586265d54e560b0c79177bba63885 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 20:46:54 +0100 Subject: [PATCH 19/22] Intermediate Commit --- src/fill_db.py | 10 ++++---- src/fill_db_alchemy.py | 35 +++++++++++++++++++++++++ src/integrate.py | 58 +++++++++++++++++++++++------------------- src/setup_tables.sql | 6 +++-- 4 files changed, 76 insertions(+), 33 deletions(-) create mode 100644 src/fill_db_alchemy.py diff --git a/src/fill_db.py b/src/fill_db.py index fc626e6..ec2b333 100644 --- a/src/fill_db.py +++ b/src/fill_db.py @@ -8,11 +8,11 @@ integrated_dir = 'datasets/integrated/' # Set up info needed to connect to db db_info = { - 'host': '127.0.0.1', - 'database': 'zh-traffic', - 'port': '54322', - 'user': 'db23-db', - 'password': 'db23-project-role-PW@0', + 'host': 'localhost', + 'database': 'test-db23', + 'port': '5432', + 'user': 'seb', + 'password': '', 'sslmode': 'disable' } diff --git a/src/fill_db_alchemy.py b/src/fill_db_alchemy.py new file mode 100644 index 0000000..b9f053b --- /dev/null +++ b/src/fill_db_alchemy.py @@ -0,0 +1,35 @@ +import os +import pandas as pd +from sqlalchemy import create_engine + +integrated_dir = 'datasets/integrated/' + +# Set up info needed to connect to db +db_info = { + 'host': 'localhost', + 'database': 'test-db23', + 'port': '5432', + 'user': 'seb', + 'password': '', +} + +csv_table_maps = [ + {'file': os.path.join(integrated_dir, 'FootBikeCount.csv'), 'table': 'FootBikeCount'}, + {'file': os.path.join(integrated_dir, 'MivCount.csv'), 'table': 'MivCount'} +] + +# Create a SQLAlchemy engine +engine = create_engine( + f"postgresql://{db_info['user']}:{db_info['password']}@{db_info['host']}:{db_info['port']}/{db_info['database']}", + echo=True # Set echo to True to display SQL queries (optional) +) + +def csv_to_existing_table(csv_file_path, table_name): + df = pd.read_csv(csv_file_path) + df.to_sql(table_name, engine, if_exists='append', index=False) + +for i in csv_table_maps: + csv_to_existing_table(i['file'], i['table']) + +# Close the SQLAlchemy engine +engine.dispose() diff --git a/src/integrate.py b/src/integrate.py index 45cc14d..c025832 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -1,9 +1,10 @@ import data_utils as du import os import pandas as pd -from datetime import datetime +import geopandas as gpd import time from shapely.geometry import Point +import re import logging @@ -73,7 +74,7 @@ acc_data_types = { 'AccidentInvolvingMotorcycle': 'bool', 'RoadType': 'str', 'RoadType_en': 'str', - 'Geometry': 'str' # TODO: Figure out what dtype this needs to be for postgres + 'geometry': 'str' # TODO: Figure out what dtype this needs to be for postgres } @@ -93,7 +94,8 @@ def ensure_dirs_exist(data_dir, integrated_dir): def process_foot_bike_data(files_present=True): - fb_df_unified = du.create_unified_df(foot_bike_urls_file, foot_bike_file_u_string, data_dir, files_present=files_present) + fb_df_unified = du.create_unified_df(foot_bike_urls_file, foot_bike_file_u_string, data_dir, + files_present=files_present) fb_df_unified[['DATE', "TIME"]] = fb_df_unified['DATUM'].str.split('T', expand=True) fb_df_unified[['HRS', 'MINS']] = fb_df_unified['TIME'].str.split(':', expand=True) ## Evt brauchen wir doch FK_ZAEHLER @@ -152,7 +154,7 @@ def process_accident_data(file_present: bool = True): 'AccidentLocation_CHLV95_N', 'AccidentLocation_CHLV95_E', 'AccidentType_en', 'AccidentType', 'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle', 'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en', - 'Geometry'] + 'geometry'] cleaned_acc_df = acc_df_unified[acc_cols_to_keep] cleaned_acc_df.rename(columns={ 'AccidentLocation_CHLV95_E': 'EKoord', @@ -168,32 +170,17 @@ def process_all_data_sources(fb_present=True, miv_present=True, accident_present Process all data sources and turn them in to csv files. After this function is called there should be csv files of the cleaned and integrated data sources - :param fb_present: bool, are the files present in local file system - :param miv_present: bool, are the files present in local file system - :param accident_present: bool, are the files present in local file system + :param fb_present: bool, if the files present in local file system + :param miv_present: bool, if the files present in local file system + :param accident_present: bool, if the files present in local file system :return: """ ensure_dirs_exist(data_dir, integrated_dir) logger.info("Started processing all data sources.") - start_time = time.time() - logger.info("Start processing pedestrian and bicycle data (FootBikeCount)") - fb_count_df = process_foot_bike_data(fb_present) - logger.debug(f'FB Head:{fb_count_df.head()}\n FB dtypes: {fb_count_df.dtypes}') - fb_file_path = os.path.join(integrated_dir, 'FootBikeCount.csv') - logger.debug(f'FB Cleaned File Path: {fb_file_path}') - fb_count_df.to_csv(fb_file_path, index=False) - logger.info("FB integrated csv created.") - logger.info(f'Time taken for FootBikeCount: {start_time-time.time()}') + fb_to_integrated(fb_present) + + miv_to_integrated_csv(miv_present) - start_time2 = time.time() - logger.info("Start processing motorized vehicle data (MivCount)") - miv_count_df = process_miv_data(miv_present) - logger.debug(f'MIV Head:{miv_count_df.head()}\n MIV dtypes: {miv_count_df.dtypes}') - miv_file_path = os.path.join(integrated_dir, 'MivCount.csv') - logger.debug(f'MIV Cleaned File Path: {miv_file_path}') - miv_count_df.to_csv(miv_file_path, index=False) - logger.info("MIV integrated csv created.") - logger.info(f'Time taken for MivCount: {start_time2-time.time()}') def fb_to_integrated(files_present=True): @@ -223,6 +210,25 @@ def miv_to_integrated_csv(miv_present=True): logger.info(f'Time taken for MivCount: {end_time-start_time2}') +def acc_to_cleaned_geojson(acc_present=True): + start_time3 = time.time() + logger.info("Start processing accident data (Accidents)") + acc_df = process_accident_data(acc_present) + logger.debug(f'ACC Head: { acc_df.head()}\n Acc dtypes: {acc_df.dtypes}') + acc_file_path = os.path.join(integrated_dir, 'Accidents.geojson') + logger.debug(f'Acc Cleaned file path: {acc_file_path}') + acc_df['geometry'] = acc_df['geometry'].apply(lambda row: re.findall(r"[-+]?\d*\.\d+|\d+", row)) + # Create a Point object using the extracted coordinates + acc_df['geometry'] = acc_df['geometry'].apply( + lambda coords: Point(float(coords[0]), float(coords[1]), float(coords[2]))) + acc_gdf = gpd.GeoDataFrame(acc_df, geometry='geometry') + acc_gdf.to_file(acc_file_path, driver='GeoJSON') + logger.info("ACC integrated csv created.") + end_time = time.time() + logger.info(f'Time taken for Accidents: {end_time - start_time3}') + + if __name__ == '__main__': - process_all_data_sources(True, True, True) + # process_all_data_sources(True, True, True) # miv_to_integrated_csv() + acc_to_cleaned_geojson() diff --git a/src/setup_tables.sql b/src/setup_tables.sql index 8b4360c..69f012a 100644 --- a/src/setup_tables.sql +++ b/src/setup_tables.sql @@ -61,7 +61,7 @@ CREATE TABLE Accidents ( AccidentInvolvingMotorcycle BOOLEAN , RoadType VARCHAR(5) , RoadType_en VARCHAR(256) , - Geometry geometry(Point) , + Geometry geometry(Point, 4326) , PRIMARY KEY (AccidentUID) , CHECK ( AccidentHour BETWEEN 0 AND 23) , @@ -74,4 +74,6 @@ COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrat COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv' DELIMITER ',' - CSV HEADER; \ No newline at end of file + CSV HEADER; + +COPY Accidents FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/Accidents.geojson' WITH (FORMAT 'geojson'); From b79ee792b69e9eba73d74c6196dbf75accfb2423 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 21:40:29 +0100 Subject: [PATCH 20/22] Preliminary: Completed Integration Scripts. Add scripts that load data into a database of choice. Some config is still manual. --- src/data_utils.py | 2 +- src/fill_db.py | 80 +++++++++++++++++++++++++---------- src/integrate.py | 11 +++-- src/load_accidents_into_db.sh | 15 +++++++ src/load_csvs_into_db.sql | 7 +++ src/prepare_for_db.py | 3 -- src/setup_tables.sql | 21 +++------ 7 files changed, 96 insertions(+), 43 deletions(-) create mode 100644 src/load_accidents_into_db.sh create mode 100644 src/load_csvs_into_db.sql delete mode 100644 src/prepare_for_db.py diff --git a/src/data_utils.py b/src/data_utils.py index 1dc7109..fa51384 100644 --- a/src/data_utils.py +++ b/src/data_utils.py @@ -6,7 +6,7 @@ import geopandas as gpd from concurrent.futures import ThreadPoolExecutor as tpe import logging -logging.basicConfig(level=logging.DEBUG, filename='data_utils.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.DEBUG, filename='logs/data_utils.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('data_utils.py') stream_handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') diff --git a/src/fill_db.py b/src/fill_db.py index ec2b333..50f6d4c 100644 --- a/src/fill_db.py +++ b/src/fill_db.py @@ -1,38 +1,74 @@ -import os - -import pandas as pd +import logging import psycopg2 -from psycopg2 import sql +import subprocess + +logging.basicConfig(level=logging.DEBUG, filename='logs/fill_db.log', + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('fill_db.py') +stream_handler = logging.StreamHandler() +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +stream_handler.setFormatter(formatter) +logger.addHandler(stream_handler) integrated_dir = 'datasets/integrated/' +accident_geojson_file = 'datasets/integrated/Accidents.geojson' +accident_loader_script = 'load_accidents_into_db.sh' +accident_table_name = 'accidents' -# Set up info needed to connect to db db_info = { 'host': 'localhost', 'database': 'test-db23', 'port': '5432', 'user': 'seb', 'password': '', - 'sslmode': 'disable' } - -csv_table_maps = [ - {'file': os.path.join(integrated_dir, 'FootBikeCount.csv'), 'table': 'FootBikeCount'}, - {'file': os.path.join(integrated_dir, 'MivCount.csv'), 'table': 'MivCount'} -] - -db_connection = psycopg2.connect(**db_info) +setup_tables_script = 'setup_tables.sql' +load_csvs_into_db_script = 'load_csvs_into_db.sql' -def csv_to_existing_table(csv_file_path, table_name): - df = pd.read_csv(csv_file_path) - curs = db_connection.cursor() - df.to_sql(table_name, db_connection, if_exists='append', index_label=False) - db_connection.commit() - curs.close() + +def run_sql(script, db_info): + db_connection = psycopg2.connect(**db_info) + db_cursor = db_connection.cursor() + + with open(script, 'r') as sql_file: + sql_script = sql_file.read() + + try: + db_cursor.execute(sql_script) + db_connection.commit() + logger.info(f'{script} executed successfully') + except Exception as e: + db_connection.rollback() + logger.exception(f'Error executing {sql_script}: {e}') + finally: + db_cursor.close() + db_connection.close() -for i in csv_table_maps: - csv_to_existing_table(i['file'], i['table']) +def run_geojson_loader_script(script, *args): + + try: + cmd = ['bash', script] + list(args) + res = subprocess.run(cmd, check=True, text=True, capture_output=True) + logger.info(f'{script} executed successfully. Output: {res.stdout}') + except subprocess.CalledProcessError as e: + logger.exception(f'Error executing {script}: {e}') + logger.info(f"Remember to set the correct permissions for the script: chmod +x {script}") + + +if __name__ == '__main__': + run_sql(setup_tables_script, db_info) + logger.info("Finnished setting up tables.") + run_sql(load_csvs_into_db_script, db_info) + logger.info("Finnished loading csv into db.") + run_geojson_loader_script(accident_loader_script, + accident_geojson_file, + db_info['database'], + db_info['user'], + db_info['password'], + db_info['host'], + db_info['port'], + accident_table_name) + logger.info('Finished loading geojson into db using bash script.') -db_connection.close() diff --git a/src/integrate.py b/src/integrate.py index c025832..8c85bd3 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -8,7 +8,7 @@ import re import logging -logging.basicConfig(level=logging.DEBUG, filename='integrate.log', +logging.basicConfig(level=logging.DEBUG, filename='logs/integrate.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('integrate.py') stream_handler = logging.StreamHandler() @@ -27,6 +27,7 @@ accident_file_u_string = 'RoadTrafficAccidentLocations.json' data_dir = 'datasets/' integrated_dir = 'datasets/integrated/' +logs_dir = 'logs/' weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] @@ -91,6 +92,8 @@ def ensure_dirs_exist(data_dir, integrated_dir): logger.debug("data_dir created.") os.makedirs(integrated_dir, exist_ok=True) logger.debug("integrated_dir created") + os.makedirs(logs_dir, exist_ok=True) + logger.debug("logs_dir created") def process_foot_bike_data(files_present=True): @@ -181,6 +184,8 @@ def process_all_data_sources(fb_present=True, miv_present=True, accident_present miv_to_integrated_csv(miv_present) + acc_to_cleaned_geojson(accident_present) + def fb_to_integrated(files_present=True): @@ -229,6 +234,6 @@ def acc_to_cleaned_geojson(acc_present=True): if __name__ == '__main__': - # process_all_data_sources(True, True, True) + process_all_data_sources(True, True, True) # miv_to_integrated_csv() - acc_to_cleaned_geojson() + # acc_to_cleaned_geojson() diff --git a/src/load_accidents_into_db.sh b/src/load_accidents_into_db.sh new file mode 100644 index 0000000..9593d63 --- /dev/null +++ b/src/load_accidents_into_db.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Define parameters +GEOJSON_FILE=$1 +DB_NAME=$2 +DB_USER=$3 +DB_PASSWORD=$4 +DB_HOST=$5 +DB_PORT=$6 +TARGET_TABLE=$7 + +# Run ogr2ogr command +ogr2ogr -f "PostgreSQL" PG:"dbname='$DB_NAME' host='$DB_HOST' port='$DB_PORT' user='$DB_USER' password='$DB_PASSWORD'" "$GEOJSON_FILE" -nln $TARGET_TABLE -append + +echo "GeoJSON data has been imported into $TARGET_TABLE" diff --git a/src/load_csvs_into_db.sql b/src/load_csvs_into_db.sql new file mode 100644 index 0000000..77ceb25 --- /dev/null +++ b/src/load_csvs_into_db.sql @@ -0,0 +1,7 @@ +COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv' + DELIMITER ',' + CSV HEADER; + +COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv' + DELIMITER ',' + CSV HEADER; \ No newline at end of file diff --git a/src/prepare_for_db.py b/src/prepare_for_db.py deleted file mode 100644 index 499d1d6..0000000 --- a/src/prepare_for_db.py +++ /dev/null @@ -1,3 +0,0 @@ -import data_utils - - diff --git a/src/setup_tables.sql b/src/setup_tables.sql index 69f012a..3a9881e 100644 --- a/src/setup_tables.sql +++ b/src/setup_tables.sql @@ -2,6 +2,11 @@ CREATE EXTENSION IF NOT EXISTS postgis; DROP TABLE IF EXISTS FootBikeCount; +DROP TABLE IF EXISTS Accidents; + +DROP TABLE IF EXISTS MivCount; + + CREATE TABLE FootBikeCount ( ID INTEGER , NORD INTEGER , @@ -21,7 +26,7 @@ CREATE TABLE FootBikeCount ( ); -DROP TABLE IF EXISTS MivCount; + CREATE TABLE MivCount ( ID INTEGER , @@ -43,8 +48,6 @@ CREATE TABLE MivCount ( ); -DROP TABLE IF EXISTS Accidents; - CREATE TABLE Accidents ( AccidentUID VARCHAR(256) , AccidentYear INTEGER , @@ -66,14 +69,4 @@ CREATE TABLE Accidents ( PRIMARY KEY (AccidentUID) , CHECK ( AccidentHour BETWEEN 0 AND 23) , CHECK (AccidentWeekDay_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')) -); - -COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv' - DELIMITER ',' - CSV HEADER; - -COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv' - DELIMITER ',' - CSV HEADER; - -COPY Accidents FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/Accidents.geojson' WITH (FORMAT 'geojson'); +); \ No newline at end of file From e9b1d82517b696147af2fa56707113ed56ac1b92 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 22:52:20 +0100 Subject: [PATCH 21/22] FINAL TOUCH: Add functions to get data from api link. Refactor bash script into python function to ensure portability. Add sql queries to create "Contemporaneous" db table. --- src/data_utils.py | 12 ++++++++++++ src/fill_db.py | 18 ++++++++++++++++++ src/fill_db_alchemy.py | 35 ----------------------------------- src/integrate.py | 6 ++++++ src/queries.sql | 42 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 35 deletions(-) delete mode 100644 src/fill_db_alchemy.py create mode 100644 src/queries.sql diff --git a/src/data_utils.py b/src/data_utils.py index fa51384..8dcdcd5 100644 --- a/src/data_utils.py +++ b/src/data_utils.py @@ -1,3 +1,4 @@ +import json import os import pandas as pd import requests @@ -115,6 +116,17 @@ def create_unified_df(urls_file, u_string, data_dir, files_present=False): return df_unified +def load_file_from_api(api_link, target_name, integrated_dir): + response = requests.get(api_link) + final_location = os.path.join(integrated_dir, target_name) + if response.status_code == 200: + logger.info(f"Succesfull get from {api_link}") + data = response.json() + with open(f'{final_location}.geojson', 'w') as file: + json.dump(data, file) + logger.info(f"{api_link} successfully downloaded and saved to {final_location}") + else: + logger.critical(f"Failed to get data. Status Code: {response.status_code}") def save_dataframe_to_csv(df, integrated_dir, filename): pass diff --git a/src/fill_db.py b/src/fill_db.py index 50f6d4c..041dad8 100644 --- a/src/fill_db.py +++ b/src/fill_db.py @@ -57,6 +57,24 @@ def run_geojson_loader_script(script, *args): logger.info(f"Remember to set the correct permissions for the script: chmod +x {script}") +def geojson_loader(*args): + geojson_file, db_name, db_user, db_password, db_host, db_port, target_table = args + cmd = [ + "ogr2ogr", + "-f", "PostgreSQL", + f"PG:dbname='{db_name}' host='{db_host}' port='{db_port}' user='{db_user}' password='{db_password}'", + geojson_file, + "-nln", target_table, + "-append" + ] + try: + # Run the command + res = subprocess.run(cmd, check=True, text=True, capture_output=True) + logger.info(f"ogr2ogr command executed successfully. Output: {res.stdout}") + except subprocess.CalledProcessError as e: + logger.exception(f"Error executing ogr2ogr command: {e}") + + if __name__ == '__main__': run_sql(setup_tables_script, db_info) logger.info("Finnished setting up tables.") diff --git a/src/fill_db_alchemy.py b/src/fill_db_alchemy.py deleted file mode 100644 index b9f053b..0000000 --- a/src/fill_db_alchemy.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -import pandas as pd -from sqlalchemy import create_engine - -integrated_dir = 'datasets/integrated/' - -# Set up info needed to connect to db -db_info = { - 'host': 'localhost', - 'database': 'test-db23', - 'port': '5432', - 'user': 'seb', - 'password': '', -} - -csv_table_maps = [ - {'file': os.path.join(integrated_dir, 'FootBikeCount.csv'), 'table': 'FootBikeCount'}, - {'file': os.path.join(integrated_dir, 'MivCount.csv'), 'table': 'MivCount'} -] - -# Create a SQLAlchemy engine -engine = create_engine( - f"postgresql://{db_info['user']}:{db_info['password']}@{db_info['host']}:{db_info['port']}/{db_info['database']}", - echo=True # Set echo to True to display SQL queries (optional) -) - -def csv_to_existing_table(csv_file_path, table_name): - df = pd.read_csv(csv_file_path) - df.to_sql(table_name, engine, if_exists='append', index=False) - -for i in csv_table_maps: - csv_to_existing_table(i['file'], i['table']) - -# Close the SQLAlchemy engine -engine.dispose() diff --git a/src/integrate.py b/src/integrate.py index 8c85bd3..31c2d02 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -29,6 +29,8 @@ data_dir = 'datasets/' integrated_dir = 'datasets/integrated/' logs_dir = 'logs/' +signaled_speeds_json_api = 'https://www.ogd.stadt-zuerich.ch/wfs/geoportal/Signalisierte_Geschwindigkeiten?service=WFS&version=1.1.0&request=GetFeature&outputFormat=GeoJSON&typename=view_geoserver_tempo_ist' + weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] fb_data_types = { @@ -233,6 +235,10 @@ def acc_to_cleaned_geojson(acc_present=True): logger.info(f'Time taken for Accidents: {end_time - start_time3}') +def load_tempo_geojson_from_api_to_local(): + du.load_file_from_api(signaled_speeds_json_api, 'signaled_speeds.geojson', integrated_dir) + + if __name__ == '__main__': process_all_data_sources(True, True, True) # miv_to_integrated_csv() diff --git a/src/queries.sql b/src/queries.sql new file mode 100644 index 0000000..f127869 --- /dev/null +++ b/src/queries.sql @@ -0,0 +1,42 @@ +select p.id, a.accidentuid, m.id +from footbikecount p, accidents a, mivcount m +where p.weekday_en = a.accidentweekday_en AND a.accidentweekday_en = m.weekday_en +AND p.weekday_en = m.weekday_en AND p.hrs = a.accidenthour AND a.accidenthour = m.hrs +AND p.hrs = m.hrs AND (p.ost - m.ekoord between -100 AND 100) AND (p.nord - m.nkoord between -100 AND 100); + +DROP TABLE IF EXISTS Contemporaneous2; + +CREATE TABLE Contemporaneous2 ( + p_id INTEGER, + accidentuid VARCHAR(256), + m_id INTEGER, + weekday_en VARCHAR(10), + hrs INTEGER, + distance DOUBLE PRECISION +); + + +CREATE TABLE Intermediate2 AS +SELECT + p.id AS p_id, + a.accidentuid, + m.id AS m_id, + p.weekday_en, + p.hrs, + SQRT(POWER(p.ost - m.ekoord, 2) + POWER(p.nord - m.nkoord, 2)) AS distance +FROM + footbikecount p, + accidents a, + mivcount m +WHERE + p.weekday_en = a.accidentweekday_en + AND a.accidentweekday_en = m.weekday_en + AND p.weekday_en = m.weekday_en + AND p.hrs = a.accidenthour + AND a.accidenthour = m.hrs + AND p.hrs = m.hrs + AND (p.ost - m.ekoord BETWEEN -100 AND 100) + AND (p.nord - m.nkoord BETWEEN -100 AND 100); + +INSERT INTO Contemporaneous2 (p_id, accidentuid, m_id, weekday_en, hrs, distance) +SELECT p_id, accidentuid, m_id, weekday_en, hrs, distance FROM Intermediate2; From 49bb3f4e206129b8aad1351edc5d60ee27a41ee9 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Sun, 3 Dec 2023 23:53:36 +0100 Subject: [PATCH 22/22] Data Integration Milestone Completed. Refactored ensuring all relevant dirs exist into separate script. --- README.md | 25 ++++++++++++------------- src/ensure_dirs_exist.py | 26 ++++++++++++++++++++++++++ src/fill_db.py | 28 ++++++++++++++++++++++------ src/integrate.py | 38 +++++++++++++++++++------------------- 4 files changed, 79 insertions(+), 38 deletions(-) create mode 100644 src/ensure_dirs_exist.py diff --git a/README.md b/README.md index 9c9703d..bae52c9 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,14 @@ -# Databases Project +# Database Project Group 1 -Use this repository for your integration code and any source code created while working on your project (ad-hoc code, -SQL queries, project files needed by external tools, etc.). +## Preliminaries +* Ensure you have access to a running postgres instance +* Ensure you have ```python3``` and ```pip``` installed. +* From within the root of the project, run ```pip install -r requirements.txt```. This insures all python dependecies are met. +* In ```src/fill_db.py``` look for the ```db_info``` variable and adapt it to your credentials. -- Merge your code into the main branch on the due date. -- Do not commit datasets! -- Any other document (except for the dump in the final hand-in) should be handed-in via ADAM. - -If you have any questions regarding the project, please do not hesitate to ask during the exercise lessons or via mail -to [raphael.waltenspuel@unibas.ch](mailto:raphael.waltenspuel@unibas.ch)! - -It is recommended that you first create a ```.gitignore``` file. (And exclude the "datasets" folder, for example). A useful tool for creating ```.gitignore``` files is www.gitignore.io. - -Feel free to update or replace this readme with a brief description of your project and goals. \ No newline at end of file +## Action +In the following the order matters. +1. Run ```unsure_dirs_exist.py```. This makes sure all the directories needed to perform the data integration and logging exist. +1. Run ```integrate.py```. Adjust the main method to fit your needs. In particular adjust the ```process_all_data()``` method, such that the parameter corresponding to a dataset is ```False``` if the script shall download it form the internet, and ```True``` else. To get geojson data form signaled speed in to city of Zurich uncomment the line in the ``main`` method where you find ```load_tempo_geojson_from_api_to_local()``` +2. Run ```fill_db.py```. This will load the data into the database based on the credentials given in the ``db_info`` variable. +3. Perform Analysis. \ No newline at end of file diff --git a/src/ensure_dirs_exist.py b/src/ensure_dirs_exist.py new file mode 100644 index 0000000..2ac3d57 --- /dev/null +++ b/src/ensure_dirs_exist.py @@ -0,0 +1,26 @@ +import logging +import os +""" +The functionality of this script has been adapted from data_utils.ensure_dirs_exist(). +This needs to be run before any other script. +""" +data_dir = 'datasets/' +integrated_dir = 'datasets/integrated/' +logs_dir = 'logs/' + +logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('integrate.py') +stream_handler = logging.StreamHandler() +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +stream_handler.setFormatter(formatter) +logger.addHandler(stream_handler) + +logger.debug(f'data_dir: {data_dir}\n integrated_dir: {integrated_dir}') +logger.info("Ensuring needed directories exist.") +os.makedirs(data_dir, exist_ok=True) +logger.debug("data_dir created.") +os.makedirs(integrated_dir, exist_ok=True) +logger.debug("integrated_dir created") +os.makedirs(logs_dir, exist_ok=True) +logger.debug("logs_dir created") diff --git a/src/fill_db.py b/src/fill_db.py index 041dad8..003b451 100644 --- a/src/fill_db.py +++ b/src/fill_db.py @@ -12,9 +12,14 @@ logger.addHandler(stream_handler) integrated_dir = 'datasets/integrated/' accident_geojson_file = 'datasets/integrated/Accidents.geojson' +signaled_speeds_file = 'datasets/integrated/signaled_speeds.geojson.geojson' accident_loader_script = 'load_accidents_into_db.sh' accident_table_name = 'accidents' +signaled_speeds_table_name = 'signaled_speeds' +""" +Make sure db_info contain the correct credentials +""" db_info = { 'host': 'localhost', 'database': 'test-db23', @@ -26,7 +31,6 @@ setup_tables_script = 'setup_tables.sql' load_csvs_into_db_script = 'load_csvs_into_db.sql' - def run_sql(script, db_info): db_connection = psycopg2.connect(**db_info) db_cursor = db_connection.cursor() @@ -47,7 +51,6 @@ def run_sql(script, db_info): def run_geojson_loader_script(script, *args): - try: cmd = ['bash', script] + list(args) res = subprocess.run(cmd, check=True, text=True, capture_output=True) @@ -57,7 +60,13 @@ def run_geojson_loader_script(script, *args): logger.info(f"Remember to set the correct permissions for the script: chmod +x {script}") -def geojson_loader(*args): +def geojson_loader(*args, modus='append'): + """ + Use this instead of run_geojson_loader_script() in the main method to avoid the bash dependency. + :param args: All the arguments needed for ogr2org to run properly + :param modus: append or overwrite db table + :return: + """ geojson_file, db_name, db_user, db_password, db_host, db_port, target_table = args cmd = [ "ogr2ogr", @@ -65,7 +74,7 @@ def geojson_loader(*args): f"PG:dbname='{db_name}' host='{db_host}' port='{db_port}' user='{db_user}' password='{db_password}'", geojson_file, "-nln", target_table, - "-append" + f"-{modus}" ] try: # Run the command @@ -88,5 +97,12 @@ if __name__ == '__main__': db_info['host'], db_info['port'], accident_table_name) - logger.info('Finished loading geojson into db using bash script.') - + logger.info('Finished loading accident geojson into db using bash script.') + geojson_loader(signaled_speeds_file, + db_info['database'], + db_info['user'], + db_info['password'], + db_info['host'], + db_info['port'], + signaled_speeds_table_name, + modus='overwrite') diff --git a/src/integrate.py b/src/integrate.py index 31c2d02..d37d174 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -79,23 +79,21 @@ acc_data_types = { 'RoadType_en': 'str', 'geometry': 'str' # TODO: Figure out what dtype this needs to be for postgres } - - -def ensure_dirs_exist(data_dir, integrated_dir): - """ - This should be called before anything else to make sure that the relevant directories exists. - :param data_dir: directory where the datasets are stored - :param integrated_dir: directory where the integrated data will be stored - :return: - """ - logger.debug(f'data_dir: {data_dir}\n integrated_dir: {integrated_dir}') - logger.info("Ensuring needed directories exist.") - os.makedirs(data_dir, exist_ok=True) - logger.debug("data_dir created.") - os.makedirs(integrated_dir, exist_ok=True) - logger.debug("integrated_dir created") - os.makedirs(logs_dir, exist_ok=True) - logger.debug("logs_dir created") +# def ensure_dirs_exist(data_dir, integrated_dir, logs_dir): +# """ +# This should be called before anything else to make sure that the relevant directories exists. +# :param data_dir: directory where the datasets are stored +# :param integrated_dir: directory where the integrated data will be stored +# :return: +# """ +# logger.debug(f'data_dir: {data_dir}\n integrated_dir: {integrated_dir}') +# logger.info("Ensuring needed directories exist.") +# os.makedirs(data_dir, exist_ok=True) +# logger.debug("data_dir created.") +# os.makedirs(integrated_dir, exist_ok=True) +# logger.debug("integrated_dir created") +# os.makedirs(logs_dir, exist_ok=True) +# logger.debug("logs_dir created") def process_foot_bike_data(files_present=True): @@ -180,7 +178,7 @@ def process_all_data_sources(fb_present=True, miv_present=True, accident_present :param accident_present: bool, if the files present in local file system :return: """ - ensure_dirs_exist(data_dir, integrated_dir) + # ensure_dirs_exist(data_dir, integrated_dir) logger.info("Started processing all data sources.") fb_to_integrated(fb_present) @@ -240,6 +238,8 @@ def load_tempo_geojson_from_api_to_local(): if __name__ == '__main__': - process_all_data_sources(True, True, True) + # ensure_dirs_exist(data_dir, integrated_dir, logs_dir) + # process_all_data_sources(True, True, True) # miv_to_integrated_csv() # acc_to_cleaned_geojson() + load_tempo_geojson_from_api_to_local()