From 91d67896d13fe39a9f9f6cf2e35492af59d31097 Mon Sep 17 00:00:00 2001 From: Jake Pullen Date: Thu, 29 Aug 2024 11:02:15 +0100 Subject: [PATCH] Refactor join conditions in dash_app fix is weekday issue, making fridays a weekend update ERD --- config/exit_codes.py | 3 ++- dash_app.py | 6 +++--- docs/ERD.md | 21 +++++++++++++++------ main.py | 11 +++++++++-- pipeline/dimensions.py | 2 +- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/config/exit_codes.py b/config/exit_codes.py index 5c508c0..d1e9521 100644 --- a/config/exit_codes.py +++ b/config/exit_codes.py @@ -10,4 +10,5 @@ NOT_FOUND = 8 CONFLICT = 9 MOVE_FILE_ERROR = 10 DUPLICATE_RESOLUTION_ERROR = 11 -UNIQUE_ID_NOT_FOUND = 12 \ No newline at end of file +UNIQUE_ID_NOT_FOUND = 12 +NO_DATA_PRODUCED = 13 \ No newline at end of file diff --git a/dash_app.py b/dash_app.py index 9533285..d83f408 100644 --- a/dash_app.py +++ b/dash_app.py @@ -14,9 +14,9 @@ scheduled_transactions = pl.read_parquet('data/warehouse/scheduled_transactions. transactions = pl.read_parquet('data/warehouse/transactions.parquet') # Join transactions with accounts, categories, and payees to create a master DataFrame -master_df = transactions.join(categories, left_on='category_id', right_on='id', suffix='_category')\ - .join(accounts, left_on='account_id', right_on='id', suffix='_account')\ - .join(payees, left_on='payee_id', right_on='id', suffix='_payee')\ +master_df = transactions.join(categories, left_on='category_id', right_on='category_id', suffix='_category')\ + .join(accounts, left_on='account_id', right_on='account_id', suffix='_account')\ + .join(payees, left_on='payee_id', right_on='payee_id', suffix='_payee')\ .join(dates, left_on='transaction_date', right_on='date_id', suffix='_date')\ # Create aggregations diff --git a/docs/ERD.md b/docs/ERD.md index 23cc120..b0e2ea1 100644 --- a/docs/ERD.md +++ b/docs/ERD.md @@ -34,23 +34,29 @@ erDiagram } DATES { - int date_id - string date + string date_id + date date int year int month int day + boolean is_weekday + int weekday } TRANSACTIONS { - int transaction_id + str transaction_id int account_id int category_id int payee_id - int date_id + int transaction_date decimal amount boolean cleared boolean approved boolean deleted + string memo + string flag_color + str transfer_account_id + } SCHEDULED_TRANSACTIONS { @@ -58,10 +64,14 @@ erDiagram int account_id int category_id int payee_id - int date_id + str date_first + str date_next decimal amount string frequency boolean deleted + text memo + string flag_color + str transfer_account_id } TRANSACTIONS ||--o{ ACCOUNTS : "belongs to" @@ -73,4 +83,3 @@ erDiagram SCHEDULED_TRANSACTIONS ||--o{ PAYEES : "belongs to" SCHEDULED_TRANSACTIONS ||--o{ DATES : "scheduled on" ``` - diff --git a/main.py b/main.py index e06bcec..41f079a 100644 --- a/main.py +++ b/main.py @@ -8,7 +8,6 @@ import logging.config import logging.handlers import config.exit_codes as ec -#from dash_app import app from pipeline.pipeline_main import pipeline_main def set_up_logging(): @@ -58,7 +57,15 @@ config['BUDGET_ID'] = BUDGET_ID if __name__ == '__main__': try: pipeline_main(config) - # app.run() #debug=True) + + # Check if the data was successfully created + data_exists = os.path.exists('data/processed') and os.listdir('data/processed') + if data_exists: + from dash_app import app + app.run() # debug=True + else: + logging.error('Data pipeline did not produce any data. Dash app will not run.') + sys.exit(ec.NO_DATA_PRODUCED) except SystemExit as e: exit_code = e.code if exit_code == ec.SUCCESS: diff --git a/pipeline/dimensions.py b/pipeline/dimensions.py index efa1390..146e303 100644 --- a/pipeline/dimensions.py +++ b/pipeline/dimensions.py @@ -200,7 +200,7 @@ class DimDate(Dimensions): try: # Create a new column to indicate if the date is a weekday or weekend dates_df = dates_df.with_columns([ - (pl.col('weekday') < 5).alias('is_weekday') # True for weekdays (Monday to Friday), False for weekends (Saturday and Sunday) + (pl.col('weekday') < 6).alias('is_weekday') # True for weekdays (Monday to Friday), False for weekends (Saturday and Sunday) ]) except Exception as e: logging.error(f"Failed to create a new column to indicate if the date is a weekday or weekend: {e}")