Refactor join conditions in dash_app

fix is weekday issue, making fridays a weekend
update ERD
This commit is contained in:
Jake Pullen
2024-08-29 11:02:15 +01:00
parent bd0ebd38e9
commit 91d67896d1
5 changed files with 30 additions and 13 deletions
+2 -1
View File
@@ -10,4 +10,5 @@ NOT_FOUND = 8
CONFLICT = 9
MOVE_FILE_ERROR = 10
DUPLICATE_RESOLUTION_ERROR = 11
UNIQUE_ID_NOT_FOUND = 12
UNIQUE_ID_NOT_FOUND = 12
NO_DATA_PRODUCED = 13
+3 -3
View File
@@ -14,9 +14,9 @@ scheduled_transactions = pl.read_parquet('data/warehouse/scheduled_transactions.
transactions = pl.read_parquet('data/warehouse/transactions.parquet')
# Join transactions with accounts, categories, and payees to create a master DataFrame
master_df = transactions.join(categories, left_on='category_id', right_on='id', suffix='_category')\
.join(accounts, left_on='account_id', right_on='id', suffix='_account')\
.join(payees, left_on='payee_id', right_on='id', suffix='_payee')\
master_df = transactions.join(categories, left_on='category_id', right_on='category_id', suffix='_category')\
.join(accounts, left_on='account_id', right_on='account_id', suffix='_account')\
.join(payees, left_on='payee_id', right_on='payee_id', suffix='_payee')\
.join(dates, left_on='transaction_date', right_on='date_id', suffix='_date')\
# Create aggregations
+15 -6
View File
@@ -34,23 +34,29 @@ erDiagram
}
DATES {
int date_id
string date
string date_id
date date
int year
int month
int day
boolean is_weekday
int weekday
}
TRANSACTIONS {
int transaction_id
str transaction_id
int account_id
int category_id
int payee_id
int date_id
int transaction_date
decimal amount
boolean cleared
boolean approved
boolean deleted
string memo
string flag_color
str transfer_account_id
}
SCHEDULED_TRANSACTIONS {
@@ -58,10 +64,14 @@ erDiagram
int account_id
int category_id
int payee_id
int date_id
str date_first
str date_next
decimal amount
string frequency
boolean deleted
text memo
string flag_color
str transfer_account_id
}
TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
@@ -73,4 +83,3 @@ erDiagram
SCHEDULED_TRANSACTIONS ||--o{ PAYEES : "belongs to"
SCHEDULED_TRANSACTIONS ||--o{ DATES : "scheduled on"
```
+9 -2
View File
@@ -8,7 +8,6 @@ import logging.config
import logging.handlers
import config.exit_codes as ec
#from dash_app import app
from pipeline.pipeline_main import pipeline_main
def set_up_logging():
@@ -58,7 +57,15 @@ config['BUDGET_ID'] = BUDGET_ID
if __name__ == '__main__':
try:
pipeline_main(config)
# app.run() #debug=True)
# Check if the data was successfully created
data_exists = os.path.exists('data/processed') and os.listdir('data/processed')
if data_exists:
from dash_app import app
app.run() # debug=True
else:
logging.error('Data pipeline did not produce any data. Dash app will not run.')
sys.exit(ec.NO_DATA_PRODUCED)
except SystemExit as e:
exit_code = e.code
if exit_code == ec.SUCCESS:
+1 -1
View File
@@ -200,7 +200,7 @@ class DimDate(Dimensions):
try:
# Create a new column to indicate if the date is a weekday or weekend
dates_df = dates_df.with_columns([
(pl.col('weekday') < 5).alias('is_weekday') # True for weekdays (Monday to Friday), False for weekends (Saturday and Sunday)
(pl.col('weekday') < 6).alias('is_weekday') # True for weekdays (Monday to Friday), False for weekends (Saturday and Sunday)
])
except Exception as e:
logging.error(f"Failed to create a new column to indicate if the date is a weekday or weekend: {e}")