diff --git a/src/data_utils.py b/src/data_utils.py index fa51384..8dcdcd5 100644 --- a/src/data_utils.py +++ b/src/data_utils.py @@ -1,3 +1,4 @@ +import json import os import pandas as pd import requests @@ -115,6 +116,17 @@ def create_unified_df(urls_file, u_string, data_dir, files_present=False): return df_unified +def load_file_from_api(api_link, target_name, integrated_dir): + response = requests.get(api_link) + final_location = os.path.join(integrated_dir, target_name) + if response.status_code == 200: + logger.info(f"Succesfull get from {api_link}") + data = response.json() + with open(f'{final_location}.geojson', 'w') as file: + json.dump(data, file) + logger.info(f"{api_link} successfully downloaded and saved to {final_location}") + else: + logger.critical(f"Failed to get data. Status Code: {response.status_code}") def save_dataframe_to_csv(df, integrated_dir, filename): pass diff --git a/src/fill_db.py b/src/fill_db.py index 50f6d4c..041dad8 100644 --- a/src/fill_db.py +++ b/src/fill_db.py @@ -57,6 +57,24 @@ def run_geojson_loader_script(script, *args): logger.info(f"Remember to set the correct permissions for the script: chmod +x {script}") +def geojson_loader(*args): + geojson_file, db_name, db_user, db_password, db_host, db_port, target_table = args + cmd = [ + "ogr2ogr", + "-f", "PostgreSQL", + f"PG:dbname='{db_name}' host='{db_host}' port='{db_port}' user='{db_user}' password='{db_password}'", + geojson_file, + "-nln", target_table, + "-append" + ] + try: + # Run the command + res = subprocess.run(cmd, check=True, text=True, capture_output=True) + logger.info(f"ogr2ogr command executed successfully. Output: {res.stdout}") + except subprocess.CalledProcessError as e: + logger.exception(f"Error executing ogr2ogr command: {e}") + + if __name__ == '__main__': run_sql(setup_tables_script, db_info) logger.info("Finnished setting up tables.") diff --git a/src/fill_db_alchemy.py b/src/fill_db_alchemy.py deleted file mode 100644 index b9f053b..0000000 --- a/src/fill_db_alchemy.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -import pandas as pd -from sqlalchemy import create_engine - -integrated_dir = 'datasets/integrated/' - -# Set up info needed to connect to db -db_info = { - 'host': 'localhost', - 'database': 'test-db23', - 'port': '5432', - 'user': 'seb', - 'password': '', -} - -csv_table_maps = [ - {'file': os.path.join(integrated_dir, 'FootBikeCount.csv'), 'table': 'FootBikeCount'}, - {'file': os.path.join(integrated_dir, 'MivCount.csv'), 'table': 'MivCount'} -] - -# Create a SQLAlchemy engine -engine = create_engine( - f"postgresql://{db_info['user']}:{db_info['password']}@{db_info['host']}:{db_info['port']}/{db_info['database']}", - echo=True # Set echo to True to display SQL queries (optional) -) - -def csv_to_existing_table(csv_file_path, table_name): - df = pd.read_csv(csv_file_path) - df.to_sql(table_name, engine, if_exists='append', index=False) - -for i in csv_table_maps: - csv_to_existing_table(i['file'], i['table']) - -# Close the SQLAlchemy engine -engine.dispose() diff --git a/src/integrate.py b/src/integrate.py index 8c85bd3..31c2d02 100644 --- a/src/integrate.py +++ b/src/integrate.py @@ -29,6 +29,8 @@ data_dir = 'datasets/' integrated_dir = 'datasets/integrated/' logs_dir = 'logs/' +signaled_speeds_json_api = 'https://www.ogd.stadt-zuerich.ch/wfs/geoportal/Signalisierte_Geschwindigkeiten?service=WFS&version=1.1.0&request=GetFeature&outputFormat=GeoJSON&typename=view_geoserver_tempo_ist' + weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] fb_data_types = { @@ -233,6 +235,10 @@ def acc_to_cleaned_geojson(acc_present=True): logger.info(f'Time taken for Accidents: {end_time - start_time3}') +def load_tempo_geojson_from_api_to_local(): + du.load_file_from_api(signaled_speeds_json_api, 'signaled_speeds.geojson', integrated_dir) + + if __name__ == '__main__': process_all_data_sources(True, True, True) # miv_to_integrated_csv() diff --git a/src/queries.sql b/src/queries.sql new file mode 100644 index 0000000..f127869 --- /dev/null +++ b/src/queries.sql @@ -0,0 +1,42 @@ +select p.id, a.accidentuid, m.id +from footbikecount p, accidents a, mivcount m +where p.weekday_en = a.accidentweekday_en AND a.accidentweekday_en = m.weekday_en +AND p.weekday_en = m.weekday_en AND p.hrs = a.accidenthour AND a.accidenthour = m.hrs +AND p.hrs = m.hrs AND (p.ost - m.ekoord between -100 AND 100) AND (p.nord - m.nkoord between -100 AND 100); + +DROP TABLE IF EXISTS Contemporaneous2; + +CREATE TABLE Contemporaneous2 ( + p_id INTEGER, + accidentuid VARCHAR(256), + m_id INTEGER, + weekday_en VARCHAR(10), + hrs INTEGER, + distance DOUBLE PRECISION +); + + +CREATE TABLE Intermediate2 AS +SELECT + p.id AS p_id, + a.accidentuid, + m.id AS m_id, + p.weekday_en, + p.hrs, + SQRT(POWER(p.ost - m.ekoord, 2) + POWER(p.nord - m.nkoord, 2)) AS distance +FROM + footbikecount p, + accidents a, + mivcount m +WHERE + p.weekday_en = a.accidentweekday_en + AND a.accidentweekday_en = m.weekday_en + AND p.weekday_en = m.weekday_en + AND p.hrs = a.accidenthour + AND a.accidenthour = m.hrs + AND p.hrs = m.hrs + AND (p.ost - m.ekoord BETWEEN -100 AND 100) + AND (p.nord - m.nkoord BETWEEN -100 AND 100); + +INSERT INTO Contemporaneous2 (p_id, accidentuid, m_id, weekday_en, hrs, distance) +SELECT p_id, accidentuid, m_id, weekday_en, hrs, distance FROM Intermediate2;