From b29ce4278c4dbd10030a884f326834e7a78360b3 Mon Sep 17 00:00:00 2001 From: Sebastian Lenzlinger <74497638+sebaschi@users.noreply.github.com> Date: Wed, 15 Nov 2023 14:04:39 +0100 Subject: [PATCH] Add Data Exploration Notebook. --- .gitignore | 2 + DataExploration.ipynb | 729 ++++++++++++++++++++++++++++++++++++++++++ docs/diary.md | 0 docs/wiki.md | 0 4 files changed, 731 insertions(+) create mode 100644 .gitignore create mode 100644 DataExploration.ipynb create mode 100644 docs/diary.md create mode 100644 docs/wiki.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..73ce095 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +data/* +*.csv diff --git a/DataExploration.ipynb b/DataExploration.ipynb new file mode 100644 index 0000000..a52ecca --- /dev/null +++ b/DataExploration.ipynb @@ -0,0 +1,729 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 20, + "id": "17ca2acb", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data for year 2012:\n", + " MSID MSName ZSID ZSName Achse \\\n", + "0 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "1 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "2 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "3 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "4 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "\n", + " HNr Hoehe EKoord NKoord Richtung Knummer \\\n", + "0 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "1 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "2 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "3 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "4 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "\n", + " Kname AnzDetektoren D1ID D2ID D3ID \\\n", + "0 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "1 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "2 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "3 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "4 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "\n", + " D4ID MessungDatZeit LieferDat AnzFahrzeuge AnzFahrzeugeStatus \n", + "0 Unbekannt 2012-01-01T00:00:00 2021-02-03 NaN Fehlend \n", + "1 Unbekannt 2012-01-01T01:00:00 2021-02-03 256.0 Gemessen \n", + "2 Unbekannt 2012-01-01T02:00:00 2021-02-03 186.0 Gemessen \n", + "3 Unbekannt 2012-01-01T03:00:00 2021-02-03 142.0 Gemessen \n", + "4 Unbekannt 2012-01-01T04:00:00 2021-02-03 116.0 Gemessen \n", + "Data for year 2013:\n", + " MSID MSName ZSID ZSName Achse \\\n", + "0 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "1 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "2 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "3 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "4 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "\n", + " HNr Hoehe EKoord NKoord Richtung Knummer \\\n", + "0 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "1 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "2 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "3 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "4 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "\n", + " Kname AnzDetektoren D1ID D2ID D3ID \\\n", + "0 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "1 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "2 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "3 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "4 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "\n", + " D4ID MessungDatZeit LieferDat AnzFahrzeuge AnzFahrzeugeStatus \n", + "0 Unbekannt 2013-01-01T00:00:00 2021-02-03 224.0 Gemessen \n", + "1 Unbekannt 2013-01-01T01:00:00 2021-02-03 547.0 Gemessen \n", + "2 Unbekannt 2013-01-01T02:00:00 2021-02-03 249.0 Gemessen \n", + "3 Unbekannt 2013-01-01T03:00:00 2021-02-03 123.0 Gemessen \n", + "4 Unbekannt 2013-01-01T04:00:00 2021-02-03 95.0 Gemessen \n", + "Data for year 2014:\n", + " MSID MSName ZSID ZSName Achse \\\n", + "0 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "1 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "2 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "3 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "4 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "\n", + " HNr Hoehe EKoord NKoord Richtung Knummer \\\n", + "0 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "1 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "2 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "3 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "4 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "\n", + " Kname AnzDetektoren D1ID D2ID D3ID \\\n", + "0 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "1 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "2 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "3 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "4 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "\n", + " D4ID MessungDatZeit LieferDat AnzFahrzeuge AnzFahrzeugeStatus \n", + "0 Unbekannt 2014-01-01T00:00:00 2021-02-03 229.0 Gemessen \n", + "1 Unbekannt 2014-01-01T01:00:00 2021-02-03 441.0 Gemessen \n", + "2 Unbekannt 2014-01-01T02:00:00 2021-02-03 195.0 Gemessen \n", + "3 Unbekannt 2014-01-01T03:00:00 2021-02-03 142.0 Gemessen \n", + "4 Unbekannt 2014-01-01T04:00:00 2021-02-03 98.0 Gemessen \n", + "Data for year 2015:\n", + " MSID MSName ZSID ZSName Achse \\\n", + "0 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "1 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "2 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "3 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "4 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "\n", + " HNr Hoehe EKoord NKoord Richtung Knummer \\\n", + "0 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "1 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "2 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "3 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "4 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "\n", + " Kname AnzDetektoren D1ID D2ID D3ID \\\n", + "0 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "1 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "2 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "3 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "4 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "\n", + " D4ID MessungDatZeit LieferDat AnzFahrzeuge AnzFahrzeugeStatus \n", + "0 Unbekannt 2015-01-01T00:00:00 2021-02-03 228.0 Gemessen \n", + "1 Unbekannt 2015-01-01T01:00:00 2021-02-03 338.0 Gemessen \n", + "2 Unbekannt 2015-01-01T02:00:00 2021-02-03 170.0 Gemessen \n", + "3 Unbekannt 2015-01-01T03:00:00 2021-02-03 120.0 Gemessen \n", + "4 Unbekannt 2015-01-01T04:00:00 2021-02-03 84.0 Gemessen \n", + "Data for year 2016:\n", + " MSID MSName ZSID ZSName Achse \\\n", + "0 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "1 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "2 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "3 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "4 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "\n", + " HNr Hoehe EKoord NKoord Richtung Knummer \\\n", + "0 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "1 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "2 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "3 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "4 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "\n", + " Kname AnzDetektoren D1ID D2ID D3ID \\\n", + "0 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "1 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "2 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "3 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "4 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "\n", + " D4ID MessungDatZeit LieferDat AnzFahrzeuge AnzFahrzeugeStatus \n", + "0 Unbekannt 2016-01-01T00:00:00 2021-02-03 236.0 Gemessen \n", + "1 Unbekannt 2016-01-01T01:00:00 2021-02-03 335.0 Gemessen \n", + "2 Unbekannt 2016-01-01T02:00:00 2021-02-03 213.0 Gemessen \n", + "3 Unbekannt 2016-01-01T03:00:00 2021-02-03 112.0 Gemessen \n", + "4 Unbekannt 2016-01-01T04:00:00 2021-02-03 80.0 Gemessen \n", + "Data for year 2017:\n", + " MSID MSName ZSID ZSName Achse \\\n", + "0 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "1 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "2 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "3 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "4 Z001M001 Unbekannt Z001 Seestrasse (Strandbad Wollishofen) Seestrasse \n", + "\n", + " HNr Hoehe EKoord NKoord Richtung Knummer \\\n", + "0 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "1 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "2 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "3 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "4 451 Unbekannt 2683009.89 1243936.2 auswärts 789 \n", + "\n", + " Kname AnzDetektoren D1ID D2ID D3ID \\\n", + "0 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "1 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "2 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "3 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "4 Badanstalt Wollishofen 1 2 Unbekannt Unbekannt \n", + "\n", + " D4ID MessungDatZeit LieferDat AnzFahrzeuge AnzFahrzeugeStatus \n", + "0 Unbekannt 2017-01-01T00:00:00 2021-02-03 295.0 Gemessen \n", + "1 Unbekannt 2017-01-01T01:00:00 2021-02-03 264.0 Gemessen \n", + "2 Unbekannt 2017-01-01T02:00:00 2021-02-03 180.0 Gemessen \n", + "3 Unbekannt 2017-01-01T03:00:00 2021-02-03 107.0 Gemessen \n", + "4 Unbekannt 2017-01-01T04:00:00 2021-02-03 97.0 Gemessen \n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_311061/2135127822.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlow_memory\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0mtable_key\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"miv_{year}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 946\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 947\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 948\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 949\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 950\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 615\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 616\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 617\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 618\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 1746\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1747\u001b[0m \u001b[0mcol_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1748\u001b[0;31m \u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0;31m# type: ignore[attr-defined]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1749\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1750\u001b[0m )\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 237\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 239\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 240\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_first_chunk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.read\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_rows\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._convert_column_data\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mparsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers._maybe_upcast\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/numpy/core/multiarray.py\u001b[0m in \u001b[0;36mputmask\u001b[0;34m(a, mask, values)\u001b[0m\n\u001b[1;32m 1129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1130\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1131\u001b[0;31m \u001b[0;34m@\u001b[0m\u001b[0marray_function_from_c_func_and_dispatcher\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_multiarray_umath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mputmask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1132\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mputmask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m/\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1133\u001b[0m \"\"\"\n", + "\u001b[0;32m_pydevd_bundle/pydevd_cython.pyx\u001b[0m in \u001b[0;36m_pydevd_bundle.pydevd_cython.SafeCallWrapper.__call__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m_pydevd_bundle/pydevd_cython.pyx\u001b[0m in \u001b[0;36m_pydevd_bundle.pydevd_cython.ThreadTracer.__call__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/debugpy/_vendored/pydevd/_pydev_bundle/pydev_is_thread_alive.py\u001b[0m in \u001b[0;36mis_thread_alive\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_temp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_is_stopped'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 3.x has this\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mis_thread_alive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_is_stopped\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "import pandas as pd\n", + "import requests\n", + "from io import StringIO\n", + "import csv\n", + "import os\n", + "\n", + "tables_dict = {}\n", + "pd.set_option(\"display.max_columns\", None)\n", + "data_dir = 'data/'\n", + "for year in range(2012, 2023):\n", + " file_name = f'sid_dav_verkehrszaehlung_miv_OD2031_{year}.csv'\n", + " file_path = os.path.join(data_dir, file_name)\n", + "\n", + " if os.path.exists(file_path):\n", + " df = pd.read_csv(file_path, low_memory=False)\n", + "\n", + " table_key = f\"miv_{year}\"\n", + " tables_dict[table_key] = df\n", + "\n", + " print(f\"Data for year {year}:\")\n", + " print(df.head())\n", + " else:\n", + " print(f\"File not found for year {year}: {file_path}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "8c86b235", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AccidentUIDAccidentTypeAccidentType_deAccidentType_frAccidentType_itAccidentType_enAccidentSeverityCategoryAccidentSeverityCategory_deAccidentSeverityCategory_frAccidentSeverityCategory_itAccidentSeverityCategory_enAccidentInvolvingPedestrianAccidentInvolvingBicycleAccidentInvolvingMotorcycleRoadTypeRoadType_deRoadType_frRoadType_itRoadType_enAccidentLocation_CHLV95_EAccidentLocation_CHLV95_NCantonCodeMunicipalityCodeAccidentYearAccidentMonthAccidentMonth_deAccidentMonth_frAccidentMonth_itAccidentMonth_enAccidentWeekDayAccidentWeekDay_deAccidentWeekDay_frAccidentWeekDay_itAccidentWeekDay_enAccidentHourAccidentHour_text
0A2D2677533867004E0430A865E337004at0Schleuder- oder Selbstunfalldérapage ou perte de maîtriseIncidente di sbandamento o per colpa propriaAccident with skidding or self-accidentas4Unfall mit Sachschadenaccident avec dommages matérielsIncidente con danni materialiAccident with property damageFalseFalseFalsert433Nebenstrasseroute secondaireStrada secondariaMinor road26846051245194ZH26120111JanuarjanvierGennaioJanuaryaw406SamstagsamediSabatoSaturday000h-01h
19FD6441F802C20A6E0430A865E3320A6at0Schleuder- oder Selbstunfalldérapage ou perte de maîtriseIncidente di sbandamento o per colpa propriaAccident with skidding or self-accidentas3Unfall mit Leichtverletztenaccident avec blessés légersIncidente con feriti leggeriAccident with light injuriesFalseTrueFalsert433Nebenstrasseroute secondaireStrada secondariaMinor road26823821246980ZH26120111JanuarjanvierGennaioJanuaryaw406SamstagsamediSabatoSaturday101h-02h
29FDA0DC4856A6094E0430A865E336094at0Schleuder- oder Selbstunfalldérapage ou perte de maîtriseIncidente di sbandamento o per colpa propriaAccident with skidding or self-accidentas4Unfall mit Sachschadenaccident avec dommages matérielsIncidente con danni materialiAccident with property damageFalseFalseFalsert439andereautreAltroOther26827911247749ZH26120111JanuarjanvierGennaioJanuaryaw406SamstagsamediSabatoSaturday202h-03h
3A3B66E42396E6000E0430A865E336000at5Überqueren der Fahrbahnaccident en traversant une routeIncidente nell'attraversare la carreggiataAccident when crossing the lane(s)as3Unfall mit Leichtverletztenaccident avec blessés légersIncidente con feriti leggeriAccident with light injuriesFalseFalseFalsert433Nebenstrasseroute secondaireStrada secondariaMinor road26811991247102ZH26120111JanuarjanvierGennaioJanuaryaw406SamstagsamediSabatoSaturday202h-03h
49FDA0DBE8CCE9096E0430A865E339096at0Schleuder- oder Selbstunfalldérapage ou perte de maîtriseIncidente di sbandamento o per colpa propriaAccident with skidding or self-accidentas4Unfall mit Sachschadenaccident avec dommages matérielsIncidente con danni materialiAccident with property damageFalseFalseFalsert433Nebenstrasseroute secondaireStrada secondariaMinor road26824791250690ZH26120111JanuarjanvierGennaioJanuaryaw406SamstagsamediSabatoSaturday303h-04h
\n", + "
" + ], + "text/plain": [ + " AccidentUID AccidentType \\\n", + "0 A2D2677533867004E0430A865E337004 at0 \n", + "1 9FD6441F802C20A6E0430A865E3320A6 at0 \n", + "2 9FDA0DC4856A6094E0430A865E336094 at0 \n", + "3 A3B66E42396E6000E0430A865E336000 at5 \n", + "4 9FDA0DBE8CCE9096E0430A865E339096 at0 \n", + "\n", + " AccidentType_de AccidentType_fr \\\n", + "0 Schleuder- oder Selbstunfall dérapage ou perte de maîtrise \n", + "1 Schleuder- oder Selbstunfall dérapage ou perte de maîtrise \n", + "2 Schleuder- oder Selbstunfall dérapage ou perte de maîtrise \n", + "3 Überqueren der Fahrbahn accident en traversant une route \n", + "4 Schleuder- oder Selbstunfall dérapage ou perte de maîtrise \n", + "\n", + " AccidentType_it \\\n", + "0 Incidente di sbandamento o per colpa propria \n", + "1 Incidente di sbandamento o per colpa propria \n", + "2 Incidente di sbandamento o per colpa propria \n", + "3 Incidente nell'attraversare la carreggiata \n", + "4 Incidente di sbandamento o per colpa propria \n", + "\n", + " AccidentType_en AccidentSeverityCategory \\\n", + "0 Accident with skidding or self-accident as4 \n", + "1 Accident with skidding or self-accident as3 \n", + "2 Accident with skidding or self-accident as4 \n", + "3 Accident when crossing the lane(s) as3 \n", + "4 Accident with skidding or self-accident as4 \n", + "\n", + " AccidentSeverityCategory_de AccidentSeverityCategory_fr \\\n", + "0 Unfall mit Sachschaden accident avec dommages matériels \n", + "1 Unfall mit Leichtverletzten accident avec blessés légers \n", + "2 Unfall mit Sachschaden accident avec dommages matériels \n", + "3 Unfall mit Leichtverletzten accident avec blessés légers \n", + "4 Unfall mit Sachschaden accident avec dommages matériels \n", + "\n", + " AccidentSeverityCategory_it AccidentSeverityCategory_en \\\n", + "0 Incidente con danni materiali Accident with property damage \n", + "1 Incidente con feriti leggeri Accident with light injuries \n", + "2 Incidente con danni materiali Accident with property damage \n", + "3 Incidente con feriti leggeri Accident with light injuries \n", + "4 Incidente con danni materiali Accident with property damage \n", + "\n", + " AccidentInvolvingPedestrian AccidentInvolvingBicycle \\\n", + "0 False False \n", + "1 False True \n", + "2 False False \n", + "3 False False \n", + "4 False False \n", + "\n", + " AccidentInvolvingMotorcycle RoadType RoadType_de RoadType_fr \\\n", + "0 False rt433 Nebenstrasse route secondaire \n", + "1 False rt433 Nebenstrasse route secondaire \n", + "2 False rt439 andere autre \n", + "3 False rt433 Nebenstrasse route secondaire \n", + "4 False rt433 Nebenstrasse route secondaire \n", + "\n", + " RoadType_it RoadType_en AccidentLocation_CHLV95_E \\\n", + "0 Strada secondaria Minor road 2684605 \n", + "1 Strada secondaria Minor road 2682382 \n", + "2 Altro Other 2682791 \n", + "3 Strada secondaria Minor road 2681199 \n", + "4 Strada secondaria Minor road 2682479 \n", + "\n", + " AccidentLocation_CHLV95_N CantonCode MunicipalityCode AccidentYear \\\n", + "0 1245194 ZH 261 2011 \n", + "1 1246980 ZH 261 2011 \n", + "2 1247749 ZH 261 2011 \n", + "3 1247102 ZH 261 2011 \n", + "4 1250690 ZH 261 2011 \n", + "\n", + " AccidentMonth AccidentMonth_de AccidentMonth_fr AccidentMonth_it \\\n", + "0 1 Januar janvier Gennaio \n", + "1 1 Januar janvier Gennaio \n", + "2 1 Januar janvier Gennaio \n", + "3 1 Januar janvier Gennaio \n", + "4 1 Januar janvier Gennaio \n", + "\n", + " AccidentMonth_en AccidentWeekDay AccidentWeekDay_de AccidentWeekDay_fr \\\n", + "0 January aw406 Samstag samedi \n", + "1 January aw406 Samstag samedi \n", + "2 January aw406 Samstag samedi \n", + "3 January aw406 Samstag samedi \n", + "4 January aw406 Samstag samedi \n", + "\n", + " AccidentWeekDay_it AccidentWeekDay_en AccidentHour AccidentHour_text \n", + "0 Sabato Saturday 0 00h-01h \n", + "1 Sabato Saturday 1 01h-02h \n", + "2 Sabato Saturday 2 02h-03h \n", + "3 Sabato Saturday 2 02h-03h \n", + "4 Sabato Saturday 3 03h-04h " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 17;\n", + " var nbb_unformatted_code = \"\\n\\naloc_file_path = \\\"/home/slnopriv/projects/db23-data-exploration/data/RoadTrafficAccidentLocations.csv\\\"\\nalocs = pd.read_csv(file_path, low_memory=False)\\n\\nalocs.head()\";\n", + " var nbb_formatted_code = \"aloc_file_path = \\\"/home/slnopriv/projects/db23-data-exploration/data/RoadTrafficAccidentLocations.csv\\\"\\nalocs = pd.read_csv(file_path, low_memory=False)\\n\\nalocs.head()\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "aloc_file_path = \"/home/slnopriv/projects/db23-data-exploration/data/RoadTrafficAccidentLocations.csv\"\n", + "alocs = pd.read_csv(file_path, low_memory=False)\n", + "\n", + "alocs.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f6941bd7", + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 11;\n", + " var nbb_unformatted_code = \"%load_ext nb_black\";\n", + " var nbb_formatted_code = \"%load_ext nb_black\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%load_ext nb_black" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "808aa85f-fc18-4fca-8268-40f2ecc777e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CSV file downloaded successfully.\n" + ] + } + ], + "source": [ + "# Specify the URL of the CSV file\n", + "csv_file_url = 'https://data.stadt-zuerich.ch/dataset/sid_dav_verkehrszaehlung_miv_od2031/download/sid_dav_verkehrszaehlung_miv_OD2031_2020.csv'\n", + "csv_rtal = 'https://data.stadt-zuerich.ch/dataset/sid_dav_strassenverkehrsunfallorte/download/RoadTrafficAccidentLocations.csv'\n", + "# Send an HTTP GET request\n", + "response = requests.get(csv_rtal)\n", + "\n", + "# Check the response status\n", + "if response.status_code == 200:\n", + " # Save the CSV content to a local file\n", + " with open('local_file2.csv', 'wb') as local_file:\n", + " local_file.write(response.content)\n", + " print(\"CSV file downloaded successfully.\")\n", + "else:\n", + " print(f\"Failed to download data. Status code: {response.status_code}\")\n", + "\n", + "accidents_file_path = os.path.join(data_dir, ')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/diary.md b/docs/diary.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/wiki.md b/docs/wiki.md new file mode 100644 index 0000000..e69de29