diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/DataExploration.ipynb b/DataExploration.ipynb old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/docs/accident_loc_urls.txt b/docs/accident_loc_urls.txt old mode 100644 new mode 100755 diff --git a/docs/all_csv_urls.txt b/docs/all_csv_urls.txt old mode 100644 new mode 100755 diff --git a/docs/diary.md b/docs/diary.md old mode 100644 new mode 100755 diff --git a/docs/foot_bike_zaehlung_urls.txt b/docs/foot_bike_zaehlung_urls.txt old mode 100644 new mode 100755 diff --git a/docs/urls.txt b/docs/urls.txt old mode 100644 new mode 100755 diff --git a/docs/verkehrszaehlung_moto_urls.txt b/docs/verkehrszaehlung_moto_urls.txt old mode 100644 new mode 100755 diff --git a/docs/wiki.md b/docs/wiki.md old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 diff --git a/src/data_utils.py b/src/data_utils.py old mode 100644 new mode 100755 index 8dcdcd5..1a4edba --- a/src/data_utils.py +++ b/src/data_utils.py @@ -7,7 +7,7 @@ import geopandas as gpd from concurrent.futures import ThreadPoolExecutor as tpe import logging -logging.basicConfig(level=logging.DEBUG, filename='logs/data_utils.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.INFO, filename='logs/data_utils.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('data_utils.py') stream_handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') diff --git a/src/ensure_dirs_exist.py b/src/ensure_dirs_exist.py old mode 100644 new mode 100755 diff --git a/src/fill_db.py b/src/fill_db.py old mode 100644 new mode 100755 index 003b451..f08dd83 --- a/src/fill_db.py +++ b/src/fill_db.py @@ -22,10 +22,10 @@ Make sure db_info contain the correct credentials """ db_info = { 'host': 'localhost', - 'database': 'test-db23', - 'port': '5432', - 'user': 'seb', - 'password': '', + 'database': 'proj_db', + 'port': '5433', + 'user': 'sebl', + 'password': 'hatred-pollute-digraph-disciple', } setup_tables_script = 'setup_tables.sql' load_csvs_into_db_script = 'load_csvs_into_db.sql' diff --git a/src/integrate.py b/src/integrate.py old mode 100644 new mode 100755 index d37d174..330f682 --- a/src/integrate.py +++ b/src/integrate.py @@ -113,7 +113,7 @@ def process_foot_bike_data(files_present=True): }).reset_index() dt_obj = pd.to_datetime(fb_df_grouped['DATE']) days = dt_obj.dt.weekday - fb_df_grouped['Weekday_en'] = days.map(lambda x: weekday_names[x]) + fb_df_grouped.loc[:,'Weekday_en'] = days.map(lambda x: weekday_names[x]) cleaned_fb_df = fb_df_grouped cleaned_fb_df['ID'] = cleaned_fb_df.index + 1 cleaned_fb_df = cleaned_fb_df[['ID', 'NORD', 'OST', 'DATE', 'HRS', 'VELO_IN', 'VELO_OUT', 'FUSS_IN', @@ -131,14 +131,14 @@ def process_miv_data(files_present=True): miv_cols_to_keep = ['MSID','ZSID','Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge', 'AnzFahrzeugeStatus', 'Datum', 'Hrs',] - miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep] + miv_df_cols_dropped = miv_df_unified[miv_cols_to_keep].copy() dt_obj = pd.to_datetime(miv_df_cols_dropped['Datum']) days = dt_obj.dt.weekday miv_df_cols_dropped.loc[:, 'Weekday_en'] = days.map(lambda x: weekday_names[x]) miv_df_cols_dropped.loc[:, 'AnzFahrzeuge'] = miv_df_cols_dropped['AnzFahrzeuge'].fillna(0).astype(int) - miv_df_cols_dropped[:, 'ZSID'] = miv_df_cols_dropped['ZSID'].fillna('Missing').astype(str) + miv_df_cols_dropped.loc[:, 'ZSID'] = miv_df_cols_dropped['ZSID'].fillna('Missing').astype(str) miv_df_cols_dropped['ID'] = (miv_df_cols_dropped.index + 1).copy() cleaned_miv_df = miv_df_cols_dropped[['ID', 'MSID', 'ZSID', 'Achse', 'NKoord', 'EKoord', 'Richtung', 'AnzFahrzeuge', @@ -239,7 +239,7 @@ def load_tempo_geojson_from_api_to_local(): if __name__ == '__main__': # ensure_dirs_exist(data_dir, integrated_dir, logs_dir) - # process_all_data_sources(True, True, True) + process_all_data_sources(True, False, False) # miv_to_integrated_csv() # acc_to_cleaned_geojson() load_tempo_geojson_from_api_to_local() diff --git a/src/load_accidents_into_db.sh b/src/load_accidents_into_db.sh old mode 100644 new mode 100755 diff --git a/src/load_csvs_into_db.sql b/src/load_csvs_into_db.sql old mode 100644 new mode 100755 index 77ceb25..b5c3a9d --- a/src/load_csvs_into_db.sql +++ b/src/load_csvs_into_db.sql @@ -1,7 +1,7 @@ -COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv' +COPY FootBikeCount FROM '/home/sebl/group-1/src/datasets/integrated/FootBikeCount.csv' DELIMITER ',' CSV HEADER; -COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv' +COPY MivCount FROM '/home/sebl/group-1/src/datasets/integrated/MivCount.csv' DELIMITER ',' CSV HEADER; \ No newline at end of file diff --git a/src/load_csvs_into_db.sql.backup b/src/load_csvs_into_db.sql.backup new file mode 100755 index 0000000..77ceb25 --- /dev/null +++ b/src/load_csvs_into_db.sql.backup @@ -0,0 +1,7 @@ +COPY FootBikeCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/FootBikeCount.csv' + DELIMITER ',' + CSV HEADER; + +COPY MivCount FROM '/Users/seb/Projects/repos/group-1/src/datasets/integrated/MivCount.csv' + DELIMITER ',' + CSV HEADER; \ No newline at end of file diff --git a/src/queries.sql b/src/queries.sql old mode 100644 new mode 100755 diff --git a/src/setup_tables.sql b/src/setup_tables.sql old mode 100644 new mode 100755 diff --git a/src/testArea.ipynb b/src/testArea.ipynb old mode 100644 new mode 100755