Handle missing data warehouse files and bad join in dash_app

This commit is contained in:
Jake Pullen
2024-08-29 11:31:35 +01:00
parent 91d67896d1
commit 975f0df22b
2 changed files with 28 additions and 15 deletions
+2
View File
@@ -12,3 +12,5 @@ MOVE_FILE_ERROR = 10
DUPLICATE_RESOLUTION_ERROR = 11 DUPLICATE_RESOLUTION_ERROR = 11
UNIQUE_ID_NOT_FOUND = 12 UNIQUE_ID_NOT_FOUND = 12
NO_DATA_PRODUCED = 13 NO_DATA_PRODUCED = 13
MISSING_DATA_FILES = 14
BAD_JOIN = 15
+16 -5
View File
@@ -5,22 +5,33 @@ import plotly.express as px
from dash import Dash, html, dcc from dash import Dash, html, dcc
import dash_bootstrap_components as dbc import dash_bootstrap_components as dbc
import pandas as pd import pandas as pd
import logging
import sys
import config.exit_codes as ec
try:
accounts = pl.read_parquet('data/warehouse/accounts.parquet') accounts = pl.read_parquet('data/warehouse/accounts.parquet')
categories = pl.read_parquet('data/warehouse/categories.parquet') categories = pl.read_parquet('data/warehouse/categories.parquet')
dates = pl.read_parquet('data/warehouse/dates.parquet') dates = pl.read_parquet('data/warehouse/dates.parquet')
payees = pl.read_parquet('data/warehouse/payees.parquet') payees = pl.read_parquet('data/warehouse/payees.parquet')
scheduled_transactions = pl.read_parquet('data/warehouse/scheduled_transactions.parquet') scheduled_transactions = pl.read_parquet('data/warehouse/scheduled_transactions.parquet')
transactions = pl.read_parquet('data/warehouse/transactions.parquet') transactions = pl.read_parquet('data/warehouse/transactions.parquet')
except FileNotFoundError:
logging.error('Data warehouse files not found. Run the data pipeline to create them.')
sys.exit(ec.MISSING_DATA_FILES)
try:
# Join transactions with accounts, categories, and payees to create a master DataFrame # Join transactions with accounts, categories, and payees to create a master DataFrame
master_df = transactions.join(categories, left_on='category_id', right_on='category_id', suffix='_category')\ master_transactions = transactions.join(categories, left_on='category_id', right_on='category_id', suffix='_category')\
.join(accounts, left_on='account_id', right_on='account_id', suffix='_account')\ .join(accounts, left_on='account_id', right_on='account_id', suffix='_account')\
.join(payees, left_on='payee_id', right_on='payee_id', suffix='_payee')\ .join(payees, left_on='payee_id', right_on='payee_id', suffix='_payee')\
.join(dates, left_on='transaction_date', right_on='date_id', suffix='_date')\ .join(dates, left_on='transaction_date', right_on='date_id', suffix='_date')
except Exception as e:
logging.error(f'Error joining DataFrames: {e}')
sys.exit(ec.BAD_JOIN)
# Create aggregations # Create aggregations
spend_per_day = master_df.sql(''' spend_per_day = master_transactions.sql('''
SELECT SELECT
date, date,
year, year,
@@ -34,7 +45,7 @@ spend_per_day = master_df.sql('''
''' '''
) )
spend_per_category = master_df.sql(''' spend_per_category = master_transactions.sql('''
SELECT SELECT
category_name, category_name,
ABS(SUM(transaction_amount)) as total ABS(SUM(transaction_amount)) as total
@@ -45,7 +56,7 @@ spend_per_category = master_df.sql('''
''' '''
) )
spend_per_payee = master_df.sql(''' spend_per_payee = master_transactions.sql('''
SELECT SELECT
payee_name, payee_name,
ABS(SUM(transaction_amount)) as total ABS(SUM(transaction_amount)) as total