Merge branch 'dev-explore' into 'main'

Milestone Merge See merge request dbis/lecture-groups/database-systems/2023hs/group-1!3
2023-12-03 22:58:31 +00:00 · 2023-12-03 22:58:31 +00:00 · 8be3279ace
commit 8be3279ace
parent 1ab8c9fb2e 9e2709eaaa
19 changed files with 1149 additions and 60 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 # Created by https://www.toptal.com/developers/gitignore/api/python,pycharm,linux,macos,database,data
 # Edit at https://www.toptal.com/developers/gitignore?templates=python,pycharm,linux,macos,database,data
 datasets/
+db23-project-venv/
 ### Data ###
 *.csv
 *.dat
--- a/DataExploration.ipynb
+++ b/DataExploration.ipynb
@ -2,14 +2,18 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 1,
   "id": "17ca2acb",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
-    "tags": []
+    "tags": [],
+    "ExecuteTime": {
+     "end_time": "2023-11-16T16:20:49.426349Z",
+     "start_time": "2023-11-16T16:20:16.117316Z"
+    }
   },
   "outputs": [
    {
@ -156,7 +160,11 @@
      "2  Unbekannt  2016-01-01T02:00:00  2021-02-03         213.0           Gemessen  \n",
      "3  Unbekannt  2016-01-01T03:00:00  2021-02-03         112.0           Gemessen  \n",
      "4  Unbekannt  2016-01-01T04:00:00  2021-02-03          80.0           Gemessen  \n",
-      "Data for year 2017:\n",
+      "File not found for year 2017: datasets/sid_dav_verkehrszaehlung_miv_OD2031_2017.csv\n",
+      "File not found for year 2018: datasets/sid_dav_verkehrszaehlung_miv_OD2031_2018.csv\n",
+      "File not found for year 2019: datasets/sid_dav_verkehrszaehlung_miv_OD2031_2019.csv\n",
+      "File not found for year 2020: datasets/sid_dav_verkehrszaehlung_miv_OD2031_2020.csv\n",
+      "Data for year 2021:\n",
      "       MSID     MSName  ZSID                              ZSName       Achse  \\\n",
      "0  Z001M001  Unbekannt  Z001  Seestrasse (Strandbad Wollishofen)  Seestrasse   \n",
      "1  Z001M001  Unbekannt  Z001  Seestrasse (Strandbad Wollishofen)  Seestrasse   \n",
@ -179,34 +187,39 @@
      "4  Badanstalt Wollishofen              1     2  Unbekannt  Unbekannt   \n",
      "\n",
      "        D4ID       MessungDatZeit   LieferDat  AnzFahrzeuge AnzFahrzeugeStatus  \n",
-      "0  Unbekannt  2017-01-01T00:00:00  2021-02-03         295.0           Gemessen  \n",
-      "1  Unbekannt  2017-01-01T01:00:00  2021-02-03         264.0           Gemessen  \n",
-      "2  Unbekannt  2017-01-01T02:00:00  2021-02-03         180.0           Gemessen  \n",
-      "3  Unbekannt  2017-01-01T03:00:00  2021-02-03         107.0           Gemessen  \n",
-      "4  Unbekannt  2017-01-01T04:00:00  2021-02-03          97.0           Gemessen  \n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_311061/2135127822.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m         \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlow_memory\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m         \u001b[0mtable_key\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"miv_{year}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m    946\u001b[0m     \u001b[0mkwds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    947\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 948\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    949\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    950\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    615\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    616\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 617\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    618\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    619\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m   1746\u001b[0m                     \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1747\u001b[0m                     \u001b[0mcol_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1748\u001b[0;31m                 \u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m  \u001b[0;31m# type: ignore[attr-defined]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1749\u001b[0m                     \u001b[0mnrows\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1750\u001b[0m                 )\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m    237\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    238\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 239\u001b[0;31m                 \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    240\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    241\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_first_chunk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.read\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_rows\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._convert_column_data\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers._maybe_upcast\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/numpy/core/multiarray.py\u001b[0m in \u001b[0;36mputmask\u001b[0;34m(a, mask, values)\u001b[0m\n\u001b[1;32m   1129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1130\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1131\u001b[0;31m \u001b[0;34m@\u001b[0m\u001b[0marray_function_from_c_func_and_dispatcher\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_multiarray_umath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mputmask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1132\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mputmask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m/\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1133\u001b[0m     \"\"\"\n",
-      "\u001b[0;32m_pydevd_bundle/pydevd_cython.pyx\u001b[0m in \u001b[0;36m_pydevd_bundle.pydevd_cython.SafeCallWrapper.__call__\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;32m_pydevd_bundle/pydevd_cython.pyx\u001b[0m in \u001b[0;36m_pydevd_bundle.pydevd_cython.ThreadTracer.__call__\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/debugpy/_vendored/pydevd/_pydev_bundle/pydev_is_thread_alive.py\u001b[0m in \u001b[0;36mis_thread_alive\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_temp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_is_stopped'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# Python 3.x has this\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m     \u001b[0;32mdef\u001b[0m \u001b[0mis_thread_alive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_is_stopped\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+      "0  Unbekannt  2021-01-01T00:00:00  2021-12-30         122.0           Gemessen  \n",
+      "1  Unbekannt  2021-01-01T01:00:00  2021-12-30         177.0           Gemessen  \n",
+      "2  Unbekannt  2021-01-01T02:00:00  2021-12-30         125.0           Gemessen  \n",
+      "3  Unbekannt  2021-01-01T03:00:00  2021-12-30          84.0           Gemessen  \n",
+      "4  Unbekannt  2021-01-01T04:00:00  2021-12-30          49.0           Gemessen  \n",
+      "Data for year 2022:\n",
+      "       MSID     MSName  ZSID                              ZSName       Achse  \\\n",
+      "0  Z001M001  Unbekannt  Z001  Seestrasse (Strandbad Wollishofen)  Seestrasse   \n",
+      "1  Z001M001  Unbekannt  Z001  Seestrasse (Strandbad Wollishofen)  Seestrasse   \n",
+      "2  Z001M001  Unbekannt  Z001  Seestrasse (Strandbad Wollishofen)  Seestrasse   \n",
+      "3  Z001M001  Unbekannt  Z001  Seestrasse (Strandbad Wollishofen)  Seestrasse   \n",
+      "4  Z001M001  Unbekannt  Z001  Seestrasse (Strandbad Wollishofen)  Seestrasse   \n",
+      "\n",
+      "   HNr      Hoehe      EKoord     NKoord  Richtung  Knummer  \\\n",
+      "0  451  Unbekannt  2683009.89  1243936.2  auswärts      789   \n",
+      "1  451  Unbekannt  2683009.89  1243936.2  auswärts      789   \n",
+      "2  451  Unbekannt  2683009.89  1243936.2  auswärts      789   \n",
+      "3  451  Unbekannt  2683009.89  1243936.2  auswärts      789   \n",
+      "4  451  Unbekannt  2683009.89  1243936.2  auswärts      789   \n",
+      "\n",
+      "                    Kname  AnzDetektoren  D1ID       D2ID       D3ID  \\\n",
+      "0  Badanstalt Wollishofen              1     2  Unbekannt  Unbekannt   \n",
+      "1  Badanstalt Wollishofen              1     2  Unbekannt  Unbekannt   \n",
+      "2  Badanstalt Wollishofen              1     2  Unbekannt  Unbekannt   \n",
+      "3  Badanstalt Wollishofen              1     2  Unbekannt  Unbekannt   \n",
+      "4  Badanstalt Wollishofen              1     2  Unbekannt  Unbekannt   \n",
+      "\n",
+      "        D4ID       MessungDatZeit   LieferDat  AnzFahrzeuge AnzFahrzeugeStatus  \n",
+      "0  Unbekannt  2022-01-01T00:00:00  2022-12-30         166.0           Gemessen  \n",
+      "1  Unbekannt  2022-01-01T01:00:00  2022-12-30         255.0           Gemessen  \n",
+      "2  Unbekannt  2022-01-01T02:00:00  2022-12-30         168.0           Gemessen  \n",
+      "3  Unbekannt  2022-01-01T03:00:00  2022-12-30          96.0           Gemessen  \n",
+      "4  Unbekannt  2022-01-01T04:00:00  2022-12-30          63.0           Gemessen  \n"
     ]
    }
   ],
@ -219,7 +232,8 @@
    "\n",
    "tables_dict = {}\n",
    "pd.set_option(\"display.max_columns\", None)\n",
-    "data_dir = 'data/'\n",
+    "data_dir = 'datasets/'\n",
+    "\n",
    "for year in range(2012, 2023):\n",
    "    file_name = f'sid_dav_verkehrszaehlung_miv_OD2031_{year}.csv'\n",
    "    file_path = os.path.join(data_dir, file_name)\n",
@ -701,7 +715,7 @@
    "else:\n",
    "    print(f\"Failed to download data. Status code: {response.status_code}\")\n",
    "\n",
-    "accidents_file_path = os.path.join(data_dir, ')\n"
+    "accidents_file_path = os.path.join(data_dir)\n"
   ]
  }
 ],
--- a/README.md
+++ b/README.md
@ -1,29 +1,14 @@
-# Databases Project
+# Database Project Group 1

-Use this repository for your integration code and any source code created while working on your project (ad-hoc code,
-SQL queries, project files needed by external tools, etc.).
+## Preliminaries
+* Ensure you have access to a running postgres instance
+* Ensure you have ```python3``` and ```pip``` installed.
+* From within the root of the project, run ```pip install -r requirements.txt```. This insures all python dependecies are  met.
+* In ```src/fill_db.py``` look for the ```db_info``` variable and adapt it to your credentials.

- Merge your code into the main branch on the due date. 
- Do not commit datasets!
- Any other document (except for the dump in the final hand-in) should be handed-in via ADAM.
-
-If you have any questions regarding the project, please do not hesitate to ask during the exercise lessons or via mail
-to [raphael.waltenspuel@unibas.ch](mailto:raphael.waltenspuel@unibas.ch)!
-
-It is recommended that you first create a ```.gitignore``` file. (And exclude the "datasets" folder, for example). A useful tool for creating ```.gitignore``` files is www.gitignore.io.
-
-Feel free to update or replace this readme with a brief description of your project and goals.
-
-### Database setup guide
-
-1. Make sure all the requirements in ’requirements.txt’ are met. If they are not
-met, run pip install -r requirements.txt in the root of the project.
-2. Run the python script ’integrate.py’ in the ’src’ folder. Set all booleans to
-’False’ in the main methode of the script. If the datasets have already been
-downloaded, set all the booleans to ’True’. The datasets need to be in a
-folder named ’datasets’ in ’src’ (this should be set up automatically by the
-script).
-3. Ensure you have a running Postgres instance with a database.
-4. Ensure you have the correct credentials in the python script ’fill_db.py’ in
-’dbinfo’
-5. Run ’fill_db.py’
+## Action
+In the following the order matters.
+1. Run ```unsure_dirs_exist.py```. This makes sure all the directories needed to perform the data integration and logging exist.
+1. Run ```integrate.py```. Adjust the main method to fit your needs. In particular adjust the ```process_all_data()``` method, such that the parameter corresponding to a dataset is ```False``` if the script shall download it form the  internet, and ```True``` else. To get geojson data form signaled speed in to city of Zurich uncomment the line in the ``main`` method where you find ```load_tempo_geojson_from_api_to_local()```
+2. Run ```fill_db.py```. This will load the data into the database based on the credentials given in the ``db_info`` variable.
+3. Perform Analysis.
--- a/docs/accident_loc_urls.txt
+++ b/docs/accident_loc_urls.txt
@ -0,0 +1 @@
+https://data.stadt-zuerich.ch/dataset/sid_dav_strassenverkehrsunfallorte/download/RoadTrafficAccidentLocations.json
--- a/docs/all_csv_urls.txt
+++ b/docs/all_csv_urls.txt
@ -0,0 +1,22 @@
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2012.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2013.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2014.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2015.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2016.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2017.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2018.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2019.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2020.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2021.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2022.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2012_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2013_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2014_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2015_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2016_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2017_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2018_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2019_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2020_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2021_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2022_verkehrszaehlungen_werte_fussgaenger_velo.csv
--- a/docs/diary.md
+++ b/docs/diary.md
@ -0,0 +1,20 @@
+# TODOs
+* Write a script that makes tables and inserts the data.
+* Find out if data cleaning can be done in python with pandas or if it all must be SQL scipts. 
+
+# Project Diary
+
+| Version<br/> 0.00     | Author: <br />michel.romancuk@stud.unibas.ch<br />sebastian.lenzlinger@unibas.ch<br />                                        | HS 2023<br />Databases<br />                                                            |
+|-----------------------|-------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------|
+| Date                  |                                                                                                                               | Problems                                                                                |
+| Oktober/ pre 16.11.23 | Decision to use postrges server. <br/> Server setup.<br/> Setup pgadmin at [pgadmin.slenzlinger.dev](pgadmin.slenzlinger.dev) |                                                                                         |
+| 16.11.23              | Setup Repo and written some instructions into ``wiki.md`` on how to setup environment.                                        | Realize steps not written down how postgres, pgadmin, nginx etc. was setup at the time. |
+|                       |                                                                                                                               |                                                                                         |
+|                       |                                                                                                                               |                                                                                         |
+|                       |                                                                                                                               |                                                                                         |
+|                       |                                                                                                                               |                                                                                         |
+|                       |                                                                                                                               |                                                                                         |
+|                       |                                                                                                                               |                                                                                         |
+|                       |                                                                                                                               |                                                                                         |
+|                       |                                                                                                                               |                                                                                         |
+|                       |      
--- a/docs/foot_bike_zaehlung_urls.txt
+++ b/docs/foot_bike_zaehlung_urls.txt
@ -0,0 +1,11 @@
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2012_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2013_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2014_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2015_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2016_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2017_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2018_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2019_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2020_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2021_verkehrszaehlungen_werte_fussgaenger_velo.csv
+https://data.stadt-zuerich.ch/dataset/ted_taz_verkehrszaehlungen_werte_fussgaenger_velo/download/2022_verkehrszaehlungen_werte_fussgaenger_velo.csv
--- a/docs/verkehrszaehlung_moto_urls.txt
+++ b/docs/verkehrszaehlung_moto_urls.txt
@ -0,0 +1,11 @@
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2012.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2013.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2014.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2015.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2016.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2017.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2018.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2019.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2020.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2021.csv
+https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2022.csv
--- a/docs/wiki.md
+++ b/docs/wiki.md
@ -0,0 +1,25 @@
+# Setup Of Virtual Python dev env
+First open the terminal and make sure to be in the root directory. 
+All steps assume one is in the root folder. 
+## Create Virtual environment
+```
+python3 -m venv db23-project-venv
+```
+## Activating the virtual environment
+```
+source db23/bin/activate
+```
+#### When in the environment ``db23-project`` just install all needed packages.
+```
+pip3 install pkg_name
+```
+## Getting back out
+```
+deactivate
+```
+
+# List of used packages
+See ``requirements.txt``
+
+# Setting up postgres
+# Setting up pgadmin as container serverd by nginx
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,112 @@
+anyio==4.0.0
+appnope==0.1.3
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==2.4.1
+async-lru==2.0.4
+attrs==23.1.0
+Babel==2.13.1
+beautifulsoup4==4.12.2
+black==23.11.0
+bleach==6.1.0
+certifi==2023.7.22
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+click-plugins==1.1.1
+cligj==0.7.2
+comm==0.2.0
+debugpy==1.8.0
+decorator==5.1.1
+defusedxml==0.7.1
+executing==2.0.1
+fastjsonschema==2.19.0
+fiona==1.9.5
+fqdn==1.5.1
+GeoAlchemy2==0.14.2
+geopandas==0.14.1
+idna==3.4
+ipykernel==6.26.0
+ipython==8.17.2
+ipywidgets==8.1.1
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.2
+json5==0.9.14
+jsonpointer==2.4
+jsonschema==4.19.2
+jsonschema-specifications==2023.11.1
+jupyter==1.0.0
+jupyter-console==6.6.3
+jupyter-events==0.9.0
+jupyter-lsp==2.2.0
+jupyter_client==8.6.0
+jupyter_core==5.5.0
+jupyter_server==2.10.1
+jupyter_server_terminals==0.4.4
+jupyterlab==4.0.8
+jupyterlab-pygments==0.2.2
+jupyterlab-widgets==3.0.9
+jupyterlab_server==2.25.1
+MarkupSafe==2.1.3
+matplotlib-inline==0.1.6
+mistune==3.0.2
+mypy-extensions==1.0.0
+nbclient==0.9.0
+nbconvert==7.11.0
+nbformat==5.9.2
+nest-asyncio==1.5.8
+notebook==7.0.6
+notebook_shim==0.2.3
+numpy==1.26.2
+overrides==7.4.0
+packaging==23.2
+pandas==2.1.3
+pandocfilters==1.5.0
+parso==0.8.3
+pathspec==0.11.2
+pexpect==4.8.0
+platformdirs==4.0.0
+prometheus-client==0.18.0
+prompt-toolkit==3.0.41
+psutil==5.9.6
+psycopg2==2.9.9
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pycparser==2.21
+Pygments==2.16.1
+pyproj==3.6.1
+python-dateutil==2.8.2
+python-json-logger==2.0.7
+pytz==2023.3.post1
+PyYAML==6.0.1
+pyzmq==25.1.1
+qtconsole==5.5.1
+QtPy==2.4.1
+referencing==0.31.0
+requests==2.31.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rpds-py==0.13.0
+Send2Trash==1.8.2
+shapely==2.0.2
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.5
+SQLAlchemy==2.0.23
+stack-data==0.6.3
+terminado==0.18.0
+tinycss2==1.2.1
+tornado==6.3.3
+traitlets==5.13.0
+types-python-dateutil==2.8.19.14
+typing_extensions==4.8.0
+tzdata==2023.3
+uri-template==1.3.0
+urllib3==2.1.0
+wcwidth==0.2.10
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.6.4
+widgetsnbextension==4.0.9
--- a/src/data_utils.py
+++ b/src/data_utils.py
@ -0,0 +1,139 @@
+import json
+import os
+import pandas as pd
+import requests
+from urllib.parse import urlparse
+import geopandas as gpd
+from concurrent.futures import ThreadPoolExecutor as tpe
+import logging
+
+logging.basicConfig(level=logging.DEBUG, filename='logs/data_utils.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger('data_utils.py')
+stream_handler = logging.StreamHandler()
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+stream_handler.setFormatter(formatter)
+logger.addHandler(stream_handler)
+
+
+def download_csv(url, local_filename):
+    with requests.get(url, stream=True) as r:
+        r.raise_for_status()
+        with open(local_filename, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+
+
+def process_urls(data_dir, urls_file):
+    # Ensure the data directory exists
+    if not os.path.exists(data_dir):
+        os.makedirs(data_dir)
+
+    # Read URLs from the file
+    with open(urls_file, 'r') as file:
+        urls = file.readlines()
+
+    # Process each URL
+    for url in urls:
+        url = url.strip()
+        filename = os.path.basename(urlparse(url).path)
+        local_filename = os.path.join(data_dir, filename)
+
+        # Check if the file already exists
+        if not os.path.isfile(local_filename):
+            logger.debug(f"Downloading {url}...")
+            download_csv(url, local_filename)
+            logger.debug(f"Saved to {local_filename}")
+        else:
+            print(f"File {filename} already exists in {data_dir}, skipping download.")
+
+
+def load_dataframe_from_csv(filepath):
+    try:
+        df = pd.read_csv(filepath, low_memory=False)
+        return df
+    except Exception as e:
+        logger.error(f"Error loading {filepath}: {e}")
+        return None
+
+
+def load_dataframes_from_csv_files(data_dir, u_string):
+    dataframes = []
+
+    # with tpe(max_workers=5) as executor:
+    #     for filename in os.listdir(data_dir):
+    #         if (u_string in filename) and filename.endswith('.csv'):
+    #             filepath = os.path.join(data_dir, filename)
+    #             future = executor.submit(load_dataframe_from_csv, filepath)
+    #             dataframes.append(future)
+    #
+    # dataframes = [future.result() for future in dataframes if future.result() is not None]
+    #
+    # return dataframes
+
+    for filename in os.listdir(data_dir):
+        if (u_string in filename) and filename.endswith('.csv'):
+            filepath = os.path.join(data_dir, filename)
+            df = pd.read_csv(filepath, low_memory=False)
+            logger.debug(f'Duplicate Rows for {filename}: {df[df.duplicated()].shape[0]}')
+            df = df.drop_duplicates()
+            logger.debug(f'Duplicate Rows after DROPPING for {filename}: {df[df.duplicated()].shape[0]}')
+            dataframes.append(df.drop_duplicates())
+    return dataframes
+
+
+def load_dataframes_from_geojson_files(data_dir, u_string):
+    print('u_string', u_string)
+    gdf = gpd.GeoDataFrame()
+    for filename in os.listdir(data_dir):
+        #print("Filename:", filename)
+        if (u_string in filename) and filename.endswith('.json'):
+            filepath = os.path.join(data_dir, filename)
+            print("Filepath:", filepath)
+            gdf = gpd.read_file(filepath)
+
+    return gdf
+
+
+def combine_dataframes(dataframes):
+    if dataframes:
+        combined_dataframe = pd.concat(dataframes, ignore_index=True)
+        logger.debug(f'Duplicate Rows after combining: {combined_dataframe[combined_dataframe.duplicated()]}')
+        return combined_dataframe
+    else:
+        print("No dataframes to combine")
+        return pd.DataFrame()
+
+
+def create_unified_df(urls_file, u_string, data_dir, files_present=False):
+    df_list = []
+    df_unified = None
+    if not files_present:
+        process_urls(data_dir, urls_file)
+
+    df_list = load_dataframes_from_csv_files(data_dir, u_string)
+    df_unified = combine_dataframes(df_list)
+
+    return df_unified
+
+
+def load_file_from_api(api_link, target_name, integrated_dir):
+    response = requests.get(api_link)
+    final_location = os.path.join(integrated_dir, target_name)
+    if response.status_code == 200:
+        logger.info(f"Succesfull get from {api_link}")
+        data = response.json()
+        with open(f'{final_location}.geojson', 'w') as file:
+            json.dump(data, file)
+        logger.info(f"{api_link} successfully downloaded and saved to {final_location}")
+    else:
+        logger.critical(f"Failed to get data. Status Code: {response.status_code}")
+def save_dataframe_to_csv(df, integrated_dir, filename):
+    pass
+
+
+if __name__ == "__main__":
+    csv_urls_file = '../docs/all_csv_urls.txt'
+    datasets_dir = 'datasets/'
+    output_file = 'column_names.txt'
+    process_urls(datasets_dir, csv_urls_file)
+    # extract_column_names(datasets_dir, output_file)
--- a/src/ensure_dirs_exist.py
+++ b/src/ensure_dirs_exist.py
@ -0,0 +1,26 @@
+import logging
+import os
+"""
+The functionality of this script has been adapted from data_utils.ensure_dirs_exist().
+This needs to be run before any other script.
+"""
+data_dir = 'datasets/'
+integrated_dir = 'datasets/integrated/'
+logs_dir = 'logs/'
+
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger('integrate.py')
+stream_handler = logging.StreamHandler()
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+stream_handler.setFormatter(formatter)
+logger.addHandler(stream_handler)
+
+logger.debug(f'data_dir: {data_dir}\n integrated_dir: {integrated_dir}')
+logger.info("Ensuring needed directories exist.")
+os.makedirs(data_dir, exist_ok=True)
+logger.debug("data_dir created.")
+os.makedirs(integrated_dir, exist_ok=True)
+logger.debug("integrated_dir created")
+os.makedirs(logs_dir, exist_ok=True)
+logger.debug("logs_dir created")
--- a/src/fill_db.py
+++ b/src/fill_db.py
@ -0,0 +1,108 @@
+import logging
+import psycopg2
+import subprocess
+
+logging.basicConfig(level=logging.DEBUG, filename='logs/fill_db.log',
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger('fill_db.py')
+stream_handler = logging.StreamHandler()
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+stream_handler.setFormatter(formatter)
+logger.addHandler(stream_handler)
+
+integrated_dir = 'datasets/integrated/'
+accident_geojson_file = 'datasets/integrated/Accidents.geojson'
+signaled_speeds_file = 'datasets/integrated/signaled_speeds.geojson.geojson'
+accident_loader_script = 'load_accidents_into_db.sh'
+accident_table_name = 'accidents'
+signaled_speeds_table_name = 'signaled_speeds'
+
+"""
+Make sure db_info contain the correct credentials
+"""
+db_info = {
+    'host': 'localhost',
+    'database': 'test-db23',
+    'port': '5432',
+    'user': 'seb',
+    'password': '',
+}
+setup_tables_script = 'setup_tables.sql'
+load_csvs_into_db_script = 'load_csvs_into_db.sql'
+
+
+def run_sql(script, db_info):
+    db_connection = psycopg2.connect(**db_info)
+    db_cursor = db_connection.cursor()
+
+    with open(script, 'r') as sql_file:
+        sql_script = sql_file.read()
+
+    try:
+        db_cursor.execute(sql_script)
+        db_connection.commit()
+        logger.info(f'{script} executed successfully')
+    except Exception as e:
+        db_connection.rollback()
+        logger.exception(f'Error executing {sql_script}: {e}')
+    finally:
+        db_cursor.close()
+        db_connection.close()
+
+
+def run_geojson_loader_script(script, *args):
+    try:
+        cmd = ['bash', script] + list(args)
+        res = subprocess.run(cmd, check=True, text=True, capture_output=True)
+        logger.info(f'{script} executed successfully. Output: {res.stdout}')
+    except subprocess.CalledProcessError as e:
+        logger.exception(f'Error executing {script}: {e}')
+        logger.info(f"Remember to set the correct permissions for the script: chmod +x {script}")
+
+
+def geojson_loader(*args, modus='append'):
+    """
+    Use this instead of run_geojson_loader_script() in the main method to avoid the bash dependency.
+    :param args: All the arguments needed for ogr2org to run properly
+    :param modus: append or overwrite db table
+    :return:
+    """
+    geojson_file, db_name, db_user, db_password, db_host, db_port, target_table = args
+    cmd = [
+        "ogr2ogr",
+        "-f", "PostgreSQL",
+        f"PG:dbname='{db_name}' host='{db_host}' port='{db_port}' user='{db_user}' password='{db_password}'",
+        geojson_file,
+        "-nln", target_table,
+        f"-{modus}"
+    ]
+    try:
+        # Run the command
+        res = subprocess.run(cmd, check=True, text=True, capture_output=True)
+        logger.info(f"ogr2ogr command executed successfully. Output: {res.stdout}")
+    except subprocess.CalledProcessError as e:
+        logger.exception(f"Error executing ogr2ogr command: {e}")
+
+
+if __name__ == '__main__':
+    run_sql(setup_tables_script, db_info)
+    logger.info("Finnished setting up tables.")
+    run_sql(load_csvs_into_db_script, db_info)
+    logger.info("Finnished loading csv into db.")
+    run_geojson_loader_script(accident_loader_script,
+                              accident_geojson_file,
+                              db_info['database'],
+                              db_info['user'],
+                              db_info['password'],
+                              db_info['host'],
+                              db_info['port'],
+                              accident_table_name)
+    logger.info('Finished loading accident geojson into db using bash script.')
+    geojson_loader(signaled_speeds_file,
+                   db_info['database'],
+                   db_info['user'],
+                   db_info['password'],
+                   db_info['host'],
+                   db_info['port'],
+                   signaled_speeds_table_name,
+                   modus='overwrite')
--- a/src/integrate.py
+++ b/src/integrate.py
@ -0,0 +1,245 @@
+import data_utils as du
+import os
+import pandas as pd
+import geopandas as gpd
+import time
+from shapely.geometry import Point
+import re
+
+import logging
+
+logging.basicConfig(level=logging.DEBUG, filename='logs/integrate.log',
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger('integrate.py')
+stream_handler = logging.StreamHandler()
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+stream_handler.setFormatter(formatter)
+logger.addHandler(stream_handler)
+
+foot_bike_urls_file = '../docs/foot_bike_zaehlung_urls.txt'
+miv_file_urls = '../docs/verkehrszaehlung_moto_urls.txt'
+accident_file_url = '../docs/accident_loc_urls.txt'
+
+# Using u_string to discriminate between files that belong to each other
+motor_file_u_string = 'sid_dav_verkehrszaehlung_miv_OD2031'
+foot_bike_file_u_string = 'velo.csv'
+accident_file_u_string = 'RoadTrafficAccidentLocations.json'
+
+data_dir = 'datasets/'
+integrated_dir = 'datasets/integrated/'
+logs_dir = 'logs/'
+
+signaled_speeds_json_api = 'https://www.ogd.stadt-zuerich.ch/wfs/geoportal/Signalisierte_Geschwindigkeiten?service=WFS&version=1.1.0&request=GetFeature&outputFormat=GeoJSON&typename=view_geoserver_tempo_ist'
+
+weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
+
+fb_data_types = {
+    'ID': 'int',
+    'NORD': 'int',
+    'OST': 'int',
+    'DATE': 'str',
+    'HRS': 'int',
+    'VELO_IN': 'int',
+    'VELO_OUT': 'int',
+    'FUSS_IN': 'int',
+    'FUSS_OUT': 'int',
+    'Weekday_en': 'str'
+}
+
+miv_data_types = {
+    'ID': 'int',
+    'MSID': 'str',
+    'ZSID': 'str',
+    'Achse': 'str',
+    'NKoord': 'int',
+    'EKoord': 'int',
+    'Richtung': 'str',
+    'AnzFahrzeuge': 'int',
+    'AnzFahrzeugeStatus': 'str',
+    'Datum': 'str',
+    'Hrs': 'int',
+    'Weekday_en': 'str',
+}
+
+acc_data_types = {
+    'AccidentUID': 'str',
+    'AccidentYear': 'int',
+    'AccidentMonth': 'int',
+    'AccidentWeekDay_en': 'str',
+    'AccidentHour': 'int',
+    'NKoord': 'int',
+    'EKoord': 'int',
+    'AccidentType_en': 'str',
+    'AccidentType': 'str',
+    'AccidentSeverityCategory': 'str',
+    'AccidentInvolvingPedestrian': 'bool',
+    'AccidentInvolvingBicycle': 'bool',
+    'AccidentInvolvingMotorcycle': 'bool',
+    'RoadType': 'str',
+    'RoadType_en': 'str',
+    'geometry': 'str' # TODO: Figure out what dtype this needs to be for postgres
+}
+# def ensure_dirs_exist(data_dir, integrated_dir, logs_dir):
+#     """
+#     This should be called before anything else to make sure that the relevant directories exists.
+#     :param data_dir: directory where the datasets are stored
+#     :param integrated_dir: directory where the integrated data will be stored
+#     :return:
+#     """
+#     logger.debug(f'data_dir: {data_dir}\n integrated_dir: {integrated_dir}')
+#     logger.info("Ensuring needed directories exist.")
+#     os.makedirs(data_dir, exist_ok=True)
+#     logger.debug("data_dir created.")
+#     os.makedirs(integrated_dir, exist_ok=True)
+#     logger.debug("integrated_dir created")
+#     os.makedirs(logs_dir, exist_ok=True)
+#     logger.debug("logs_dir created")
+
+
+def process_foot_bike_data(files_present=True):
+    fb_df_unified = du.create_unified_df(foot_bike_urls_file, foot_bike_file_u_string, data_dir,
+                                         files_present=files_present)
+    fb_df_unified[['DATE', "TIME"]] = fb_df_unified['DATUM'].str.split('T', expand=True)
+    fb_df_unified[['HRS', 'MINS']] = fb_df_unified['TIME'].str.split(':', expand=True)
+    ## Evt brauchen wir doch FK_ZAEHLER
+    fb_cols_to_drop = ['DATUM']
+    fb_df_unified_correct_cols = fb_df_unified.drop(columns=fb_cols_to_drop, axis=1)
+    fb_df_unified_correct_cols.fillna(0, inplace=True)
+    fb_df_grouped = fb_df_unified_correct_cols.groupby(['OST', 'NORD', 'DATE', 'HRS']).agg({
+        'VELO_IN': 'sum',
+        'VELO_OUT': 'sum',
+        'FUSS_IN': 'sum',
+        'FUSS_OUT': 'sum'
+    }).reset_index()
+    dt_obj = pd.to_datetime(fb_df_grouped['DATE'])
+    days = dt_obj.dt.weekday
+    fb_df_grouped['Weekday_en'] = days.map(lambda x: weekday_names[x])
+    cleaned_fb_df = fb_df_grouped
+    cleaned_fb_df['ID'] = cleaned_fb_df.index + 1
+    cleaned_fb_df = cleaned_fb_df[['ID', 'NORD', 'OST', 'DATE', 'HRS', 'VELO_IN', 'VELO_OUT', 'FUSS_IN',
+                                   'FUSS_OUT', 'Weekday_en']]
+    # Ensure datatype of df and sql table match
+    cleaned_fb_df = cleaned_fb_df.astype(fb_data_types)
+    return cleaned_fb_df
+
+
+def process_miv_data(files_present=True):
+    miv_df_unified = du.create_unified_df(miv_file_urls, motor_file_u_string, data_dir, files_present=files_present)
+
+    miv_df_unified[['Datum', "Time"]] = miv_df_unified['MessungDatZeit'].str.split('T', expand=True)
+    miv_df_unified[['Hrs', 'Mins', 'Sec']] = miv_df_unified['Time'].str.split(':', expand=True)
+
+    miv_cols_to_keep = ['MSID','ZSID','Achse', 'NKoord', 'EKoord',  'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus',
+                        'Datum', 'Hrs',]
+    miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep]
+
+    dt_obj = pd.to_datetime(miv_df_cols_dropped['Datum'])
+    days = dt_obj.dt.weekday
+    miv_df_cols_dropped.loc[:, 'Weekday_en'] = days.map(lambda x: weekday_names[x])
+
+    miv_df_cols_dropped.loc[:, 'AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int)
+    miv_df_cols_dropped[:, 'ZSID'] = miv_df_cols_dropped['ZSID'].fillna('Missing').astype(str)
+    miv_df_cols_dropped['ID'] = (miv_df_cols_dropped.index + 1).copy()
+
+    cleaned_miv_df = miv_df_cols_dropped[['ID', 'MSID', 'ZSID', 'Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge',
+                                          'AnzFahrzeugeStatus', 'Datum', 'Hrs', 'Weekday_en']]
+
+    cleaned_miv_df = cleaned_miv_df.astype(miv_data_types)
+    cleaned_miv_df = cleaned_miv_df.drop_duplicates()
+    return cleaned_miv_df
+
+
+def process_accident_data(file_present: bool = True):
+    if not file_present:
+        du.process_urls(data_dir, accident_file_url)
+    acc_df_unified = du.load_dataframes_from_geojson_files(data_dir, accident_file_u_string)
+    acc_cols_to_keep = ['AccidentUID', 'AccidentYear', 'AccidentMonth', 'AccidentWeekDay_en','AccidentHour',
+                        'AccidentLocation_CHLV95_N', 'AccidentLocation_CHLV95_E', 'AccidentType_en', 'AccidentType',
+                        'AccidentSeverityCategory', 'AccidentInvolvingPedestrian', 'AccidentInvolvingBicycle',
+                        'AccidentInvolvingMotorcycle', 'RoadType', 'RoadType_en',
+                        'geometry']
+    cleaned_acc_df = acc_df_unified[acc_cols_to_keep]
+    cleaned_acc_df.rename(columns={
+        'AccidentLocation_CHLV95_E': 'EKoord',
+        'AccidentLocation_CHLV95_N': 'NKoord',
+    }, inplace=True)
+
+    cleaned_acc_df = cleaned_acc_df.astype(acc_data_types)
+    return cleaned_acc_df
+
+
+def process_all_data_sources(fb_present=True, miv_present=True, accident_present=True):
+    """
+    Process all data sources and turn them in to csv files. After this function is called there
+    should be csv files of the cleaned and integrated data sources
+
+    :param fb_present: bool, if the files present in local file system
+    :param miv_present: bool, if the files present in local file system
+    :param accident_present: bool, if the files present in local file system
+    :return:
+    """
+    # ensure_dirs_exist(data_dir, integrated_dir)
+    logger.info("Started processing all data sources.")
+    fb_to_integrated(fb_present)
+
+    miv_to_integrated_csv(miv_present)
+
+    acc_to_cleaned_geojson(accident_present)
+
+
+def fb_to_integrated(files_present=True):
+
+    start_time = time.time()
+    logger.info("Start processing pedestrian and bicycle data (FootBikeCount)")
+    fb_count_df = process_foot_bike_data(files_present)
+    logger.debug(f'FB Head:{fb_count_df.head()}\n FB dtypes: {fb_count_df.dtypes}')
+    fb_file_path = os.path.join(integrated_dir, 'FootBikeCount.csv')
+    logger.debug(f'FB Cleaned File Path: {fb_file_path}')
+    fb_count_df.to_csv(fb_file_path, index=False)
+    logger.info("FB integrated csv created.")
+    end_time = time.time()
+    logger.info(f'Time taken for FootBikeCount: {end_time-start_time}')
+
+
+def miv_to_integrated_csv(miv_present=True):
+
+    start_time2 = time.time()
+    logger.info("Start processing motorized vehicle data (MivCount)")
+    miv_count_df = process_miv_data(miv_present)
+    logger.debug(f'MIV Head:{miv_count_df.head()}\n MIV dtypes: {miv_count_df.dtypes}')
+    miv_file_path = os.path.join(integrated_dir, 'MivCount.csv')
+    logger.debug(f'MIV Cleaned File Path: {miv_file_path}')
+    miv_count_df.to_csv(miv_file_path, index=False)
+    logger.info("MIV integrated csv created.")
+    end_time = time.time()
+    logger.info(f'Time taken for MivCount: {end_time-start_time2}')
+
+
+def acc_to_cleaned_geojson(acc_present=True):
+    start_time3 = time.time()
+    logger.info("Start processing accident data (Accidents)")
+    acc_df = process_accident_data(acc_present)
+    logger.debug(f'ACC Head: { acc_df.head()}\n Acc dtypes: {acc_df.dtypes}')
+    acc_file_path = os.path.join(integrated_dir, 'Accidents.geojson')
+    logger.debug(f'Acc Cleaned file path: {acc_file_path}')
+    acc_df['geometry'] = acc_df['geometry'].apply(lambda row: re.findall(r"[-+]?\d*\.\d+|\d+", row))
+    # Create a Point object using the extracted coordinates
+    acc_df['geometry'] = acc_df['geometry'].apply(
+        lambda coords: Point(float(coords[0]), float(coords[1]), float(coords[2])))
+    acc_gdf = gpd.GeoDataFrame(acc_df, geometry='geometry')
+    acc_gdf.to_file(acc_file_path, driver='GeoJSON')
+    logger.info("ACC integrated csv created.")
+    end_time = time.time()
+    logger.info(f'Time taken for Accidents: {end_time - start_time3}')
+
+
+def load_tempo_geojson_from_api_to_local():
+    du.load_file_from_api(signaled_speeds_json_api, 'signaled_speeds.geojson', integrated_dir)
+
+
+if __name__ == '__main__':
+    # ensure_dirs_exist(data_dir, integrated_dir, logs_dir)
+    # process_all_data_sources(True, True, True)
+    # miv_to_integrated_csv()
+    # acc_to_cleaned_geojson()
+    load_tempo_geojson_from_api_to_local()
--- a/src/load_accidents_into_db.sh
+++ b/src/load_accidents_into_db.sh
@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Define parameters
+GEOJSON_FILE=$1
+DB_NAME=$2
+DB_USER=$3
+DB_PASSWORD=$4
+DB_HOST=$5
+DB_PORT=$6
+TARGET_TABLE=$7
+
+# Run ogr2ogr command
+ogr2ogr -f "PostgreSQL" PG:"dbname='$DB_NAME' host='$DB_HOST' port='$DB_PORT' user='$DB_USER' password='$DB_PASSWORD'" "$GEOJSON_FILE" -nln $TARGET_TABLE -append
+
+echo "GeoJSON data has been imported into $TARGET_TABLE"
--- a/src/load_csvs_into_db.sql
+++ b/src/load_csvs_into_db.sql
@ -0,0 +1,7 @@
+COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv'
+    DELIMITER ','
+    CSV HEADER;
+
+COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv'
+    DELIMITER ','
+    CSV HEADER;
--- a/src/queries.sql
+++ b/src/queries.sql
@ -0,0 +1,42 @@
+select p.id, a.accidentuid, m.id
+from footbikecount p, accidents a, mivcount m
+where p.weekday_en = a.accidentweekday_en AND a.accidentweekday_en = m.weekday_en
+AND p.weekday_en = m.weekday_en AND p.hrs = a.accidenthour AND a.accidenthour = m.hrs
+AND p.hrs = m.hrs AND (p.ost - m.ekoord between -100 AND 100) AND (p.nord - m.nkoord between -100 AND 100);
+
+DROP TABLE IF EXISTS Contemporaneous2;
+
+CREATE TABLE Contemporaneous2 (
+    p_id INTEGER,
+    accidentuid VARCHAR(256),
+    m_id INTEGER,
+    weekday_en VARCHAR(10),
+    hrs INTEGER,
+    distance DOUBLE PRECISION
+);
+
+
+CREATE TABLE Intermediate2 AS
+SELECT
+    p.id AS p_id,
+    a.accidentuid,
+    m.id AS m_id,
+    p.weekday_en,
+    p.hrs,
+    SQRT(POWER(p.ost - m.ekoord, 2) + POWER(p.nord - m.nkoord, 2)) AS distance
+FROM
+    footbikecount p,
+    accidents a,
+    mivcount m
+WHERE
+    p.weekday_en = a.accidentweekday_en
+    AND a.accidentweekday_en = m.weekday_en
+    AND p.weekday_en = m.weekday_en
+    AND p.hrs = a.accidenthour
+    AND a.accidenthour = m.hrs
+    AND p.hrs = m.hrs
+    AND (p.ost - m.ekoord BETWEEN -100 AND 100)
+    AND (p.nord - m.nkoord BETWEEN -100 AND 100);
+
+INSERT INTO Contemporaneous2 (p_id, accidentuid, m_id, weekday_en, hrs, distance)
+SELECT p_id, accidentuid, m_id, weekday_en, hrs, distance FROM Intermediate2;
--- a/src/setup_tables.sql
+++ b/src/setup_tables.sql
@ -0,0 +1,72 @@
+CREATE EXTENSION IF NOT EXISTS postgis;
+
+DROP TABLE IF EXISTS FootBikeCount;
+
+DROP TABLE IF EXISTS Accidents;
+
+DROP TABLE IF EXISTS MivCount;
+
+
+CREATE TABLE FootBikeCount (
+    ID INTEGER ,
+    NORD INTEGER ,
+    OST INT ,
+    DATE VARCHAR(10) ,
+    HRS INTEGER ,
+    VELO_IN INTEGER ,
+    VELO_OUT INTEGER ,
+    FUSS_IN INTEGER ,
+    FUSS_OUT INTEGER ,
+    Weekday_en VARCHAR(10) ,
+
+    PRIMARY KEY(ID) ,
+    CHECK (Weekday_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')),
+    CHECK (Hrs BETWEEN 0 AND 23)
+
+
+);
+
+
+
+CREATE TABLE MivCount (
+    ID INTEGER ,
+    MSID VARCHAR(10) ,
+    ZSID VARCHAR(10) ,
+    Achse VARCHAR(256) ,
+    NKoord INTEGER ,
+    EKoord INTEGER ,
+    Richtung VARCHAR(100) ,
+    AnzFahrzeuge INTEGER ,
+    AnzFahrzeugeStatus VARCHAR(20) ,
+    Datum VARCHAR(10) ,
+    Hrs Integer ,
+    Weekday_en VARCHAR(10),
+
+    PRIMARY KEY (ID),
+    CHECK (Weekday_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')),
+    CHECK (Hrs BETWEEN 0 AND 23)
+);
+
+
+CREATE TABLE Accidents (
+    AccidentUID VARCHAR(256) ,
+    AccidentYear INTEGER ,
+    AccidentMonth INTEGER,
+    AccidentWeekDay_en VARCHAR(10) ,
+    AccidentHour INTEGER ,
+    NKoord INTEGER ,
+    EKoord INTEGER ,
+    AccidentType_en VARCHAR(256) ,
+    AccidentType VARCHAR(4) ,
+    AccidentSeverityCategory VARCHAR(4) ,
+    AccidentInvolvingPedestrian BOOLEAN ,
+    AccidentInvolvingBicycle BOOLEAN ,
+    AccidentInvolvingMotorcycle BOOLEAN ,
+    RoadType VARCHAR(5) ,
+    RoadType_en VARCHAR(256) ,
+    Geometry geometry(Point, 4326) ,
+
+    PRIMARY KEY (AccidentUID) ,
+    CHECK ( AccidentHour BETWEEN 0 AND 23) ,
+    CHECK (AccidentWeekDay_en IN ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'))
+);
--- a/src/testArea.ipynb
+++ b/src/testArea.ipynb
@ -0,0 +1,233 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from datetime import datetime as dt\n",
+    "\n",
+    "import integrate as intr\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-03T15:47:10.110909Z",
+     "start_time": "2023-12-03T15:47:09.656556Z"
+    }
+   },
+   "id": "be55b25929d95559"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/seb/Projects/repos/group-1/src/integrate.py:132: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  miv_df_cols_dropped['Weekday_en'] = days.map(lambda x: weekday_names[x])\n",
+      "/Users/seb/Projects/repos/group-1/src/integrate.py:133: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  miv_df_cols_dropped['AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int)\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "miv_df = intr.process_miv_data()\n",
+    "#fb_data = intr.process_foot_bike_data()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-03T15:49:07.561603Z",
+     "start_time": "2023-12-03T15:47:14.759104Z"
+    }
+   },
+   "id": "dd3831953afdeb72"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "121\n"
+     ]
+    }
+   ],
+   "source": [
+    "duplicate_rows = miv_df[miv_df.duplicated()]\n",
+    "print(duplicate_rows.shape[0])"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-03T15:51:21.158909Z",
+     "start_time": "2023-12-03T15:51:15.711222Z"
+    }
+   },
+   "id": "14471cd78389ce4d"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "acc_df = intr.process_accident_data(True)"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "f86bc612060b17a4"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "acc_df.head()\n",
+    "acc_df['AccidentWeekDay_en'].unique()\n",
+    "#acc_df.dtypes\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "6affbeea6c7cf3ef"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "print(\"Accident Columns:\")\n",
+    "print(acc_df.dtypes)\n",
+    "print()\n",
+    "print(\"MIV Columns:\")\n",
+    "print(miv_df.dtypes)\n",
+    "print()\n",
+    "print(\"FB Cols:\")\n",
+    "print(fb_data.dtypes)"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "242041cd369d8454"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "acc_df['ID'] = acc_df.index +1\n",
+    "acc_df[('ID')]"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "1841925ee109a417"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "print(\"MIV unqiue:\", miv_df['EKoord'])\n",
+    "print(\"Acc unique:\", acc_df['RoadType'].unique)\n",
+    "print(\"FB unique: \", fb_data['DATE'])\n"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "f6d752ea17eda341"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "acc_df.head()"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "a159cafa9c227b88"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from sqlalchemy import create_engine\n",
+    "from geoalchemy2 import Geometry, WKTElement\n",
+    "import geopandas as gpd\n",
+    "from shapely import wkt\n",
+    "\n",
+    "db_url = f'postgresql://seb:@localhost:5432/test-db23'\n",
+    "engine =  create_engine(db_url)\n",
+    "\n",
+    "#miv_df.to_sql('table_name', engine, if_exists='replace', index=False)\n",
+    "#fb_data.to_sql('footbike', engine, if_exists='replace', index=False)\n",
+    "\n",
+    "geometry_column = 'geometry'\n",
+    "\n",
+    "\n",
+    "acc_df['geometry'] = acc_df['geometry'].apply(lambda geom: WKTElement(geom.wkt, srid=4326))\n",
+    "\n",
+    "acc_df.to_sql('accidents', engine, if_exists='replace', index=False, dtype={'geometry': Geometry('POINT', srid=4326)})\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "fa76af8343443d7a"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "engine.dispose()"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "bc0a23a5126e76c2"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
				`@ -0,0 +1 @@`
				`https://data.stadt-zuerich.ch/dataset/sid_dav_strassenverkehrsunfallorte/download/RoadTrafficAccidentLocations.json`