Files
data_pipeline_for_YNAB/data_check.py
T

69 lines
2.1 KiB
Python

import polars as pl
from datetime import date, timedelta
accounts = pl.read_parquet('data/warehouse/accounts.parquet')
categories = pl.read_parquet('data/warehouse/categories.parquet')
dates = pl.read_parquet('data/warehouse/dates.parquet')
payees = pl.read_parquet('data/warehouse/payees.parquet')
scheduled_transactions = pl.read_parquet('data/warehouse/scheduled_transactions.parquet')
transactions = pl.read_parquet('data/warehouse/transactions.parquet')
master_transactions = transactions.join(categories, left_on='category_id', right_on='category_id', suffix='_category')\
.join(accounts, left_on='account_id', right_on='account_id', suffix='_account')\
.join(payees, left_on='payee_id', right_on='payee_id', suffix='_payee')\
.join(dates, left_on='transaction_date', right_on='date_id', suffix='_date')
# Create aggregations
spend_per_day = master_transactions.sql('''
SELECT
date,
year,
month,
day,
ABS(SUM(transaction_amount)) as total
FROM self
WHERE category_name != 'Inflow: Ready to Assign'
GROUP BY date, year, month, day
ORDER BY date DESC
'''
)
spend_per_category = master_transactions.sql('''
SELECT
category_name,
ABS(SUM(transaction_amount)) as total
FROM self
WHERE category_name != 'Inflow: Ready to Assign'
GROUP BY category_name
ORDER BY total DESC
'''
)
spend_per_payee = master_transactions.sql('''
SELECT
payee_name,
ABS(SUM(transaction_amount)) as total
FROM self
WHERE payee_name != 'Starting Balance'
AND transaction_amount < 0
GROUP BY payee_name
ORDER BY total DESC
'''
)
def update_dates(start_date, end_date):
print("start date", start_date)
print("end date", end_date)
print(master_transactions)
master_data = master_transactions.filter(
pl.col("date").is_between(start_date, end_date)
)
return master_data
today = date.today()
one_year_ago = today - timedelta(days=5)
data = update_dates(start_date=one_year_ago, end_date=today)
print(data)