FINAL TOUCH: Add functions to get data from api link.

Refactor bash script into python function to ensure portability.
Add sql queries to create "Contemporaneous" db table.
This commit is contained in:
Sebastian Lenzlinger 2023-12-03 22:52:20 +01:00
parent b79ee792b6
commit e9b1d82517
5 changed files with 78 additions and 35 deletions

View File

@ -1,3 +1,4 @@
import json
import os
import pandas as pd
import requests
@ -115,6 +116,17 @@ def create_unified_df(urls_file, u_string, data_dir, files_present=False):
return df_unified
def load_file_from_api(api_link, target_name, integrated_dir):
response = requests.get(api_link)
final_location = os.path.join(integrated_dir, target_name)
if response.status_code == 200:
logger.info(f"Succesfull get from {api_link}")
data = response.json()
with open(f'{final_location}.geojson', 'w') as file:
json.dump(data, file)
logger.info(f"{api_link} successfully downloaded and saved to {final_location}")
else:
logger.critical(f"Failed to get data. Status Code: {response.status_code}")
def save_dataframe_to_csv(df, integrated_dir, filename):
pass

View File

@ -57,6 +57,24 @@ def run_geojson_loader_script(script, *args):
logger.info(f"Remember to set the correct permissions for the script: chmod +x {script}")
def geojson_loader(*args):
geojson_file, db_name, db_user, db_password, db_host, db_port, target_table = args
cmd = [
"ogr2ogr",
"-f", "PostgreSQL",
f"PG:dbname='{db_name}' host='{db_host}' port='{db_port}' user='{db_user}' password='{db_password}'",
geojson_file,
"-nln", target_table,
"-append"
]
try:
# Run the command
res = subprocess.run(cmd, check=True, text=True, capture_output=True)
logger.info(f"ogr2ogr command executed successfully. Output: {res.stdout}")
except subprocess.CalledProcessError as e:
logger.exception(f"Error executing ogr2ogr command: {e}")
if __name__ == '__main__':
run_sql(setup_tables_script, db_info)
logger.info("Finnished setting up tables.")

View File

@ -1,35 +0,0 @@
import os
import pandas as pd
from sqlalchemy import create_engine
integrated_dir = 'datasets/integrated/'
# Set up info needed to connect to db
db_info = {
'host': 'localhost',
'database': 'test-db23',
'port': '5432',
'user': 'seb',
'password': '',
}
csv_table_maps = [
{'file': os.path.join(integrated_dir, 'FootBikeCount.csv'), 'table': 'FootBikeCount'},
{'file': os.path.join(integrated_dir, 'MivCount.csv'), 'table': 'MivCount'}
]
# Create a SQLAlchemy engine
engine = create_engine(
f"postgresql://{db_info['user']}:{db_info['password']}@{db_info['host']}:{db_info['port']}/{db_info['database']}",
echo=True # Set echo to True to display SQL queries (optional)
)
def csv_to_existing_table(csv_file_path, table_name):
df = pd.read_csv(csv_file_path)
df.to_sql(table_name, engine, if_exists='append', index=False)
for i in csv_table_maps:
csv_to_existing_table(i['file'], i['table'])
# Close the SQLAlchemy engine
engine.dispose()

View File

@ -29,6 +29,8 @@ data_dir = 'datasets/'
integrated_dir = 'datasets/integrated/'
logs_dir = 'logs/'
signaled_speeds_json_api = 'https://www.ogd.stadt-zuerich.ch/wfs/geoportal/Signalisierte_Geschwindigkeiten?service=WFS&version=1.1.0&request=GetFeature&outputFormat=GeoJSON&typename=view_geoserver_tempo_ist'
weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
fb_data_types = {
@ -233,6 +235,10 @@ def acc_to_cleaned_geojson(acc_present=True):
logger.info(f'Time taken for Accidents: {end_time - start_time3}')
def load_tempo_geojson_from_api_to_local():
du.load_file_from_api(signaled_speeds_json_api, 'signaled_speeds.geojson', integrated_dir)
if __name__ == '__main__':
process_all_data_sources(True, True, True)
# miv_to_integrated_csv()

42
src/queries.sql Normal file
View File

@ -0,0 +1,42 @@
select p.id, a.accidentuid, m.id
from footbikecount p, accidents a, mivcount m
where p.weekday_en = a.accidentweekday_en AND a.accidentweekday_en = m.weekday_en
AND p.weekday_en = m.weekday_en AND p.hrs = a.accidenthour AND a.accidenthour = m.hrs
AND p.hrs = m.hrs AND (p.ost - m.ekoord between -100 AND 100) AND (p.nord - m.nkoord between -100 AND 100);
DROP TABLE IF EXISTS Contemporaneous2;
CREATE TABLE Contemporaneous2 (
p_id INTEGER,
accidentuid VARCHAR(256),
m_id INTEGER,
weekday_en VARCHAR(10),
hrs INTEGER,
distance DOUBLE PRECISION
);
CREATE TABLE Intermediate2 AS
SELECT
p.id AS p_id,
a.accidentuid,
m.id AS m_id,
p.weekday_en,
p.hrs,
SQRT(POWER(p.ost - m.ekoord, 2) + POWER(p.nord - m.nkoord, 2)) AS distance
FROM
footbikecount p,
accidents a,
mivcount m
WHERE
p.weekday_en = a.accidentweekday_en
AND a.accidentweekday_en = m.weekday_en
AND p.weekday_en = m.weekday_en
AND p.hrs = a.accidenthour
AND a.accidenthour = m.hrs
AND p.hrs = m.hrs
AND (p.ost - m.ekoord BETWEEN -100 AND 100)
AND (p.nord - m.nkoord BETWEEN -100 AND 100);
INSERT INTO Contemporaneous2 (p_id, accidentuid, m_id, weekday_en, hrs, distance)
SELECT p_id, accidentuid, m_id, weekday_en, hrs, distance FROM Intermediate2;