changes to make dash app work

This commit is contained in:
Jake Pullen
2024-08-27 15:12:44 +01:00
parent 173c0594a8
commit 7b80b52998
4 changed files with 126 additions and 34 deletions
+98 -29
View File
@@ -1,47 +1,106 @@
'''Module to create a Dash app that displays visualizations of YNAB data.'''
import polars as pl import polars as pl
import plotly.express as px import plotly.express as px
from dash import Dash, html, dcc from dash import Dash, html, dcc
import dash_bootstrap_components as dbc import dash_bootstrap_components as dbc
from dash.dash_table import DataTable import pandas as pd
# Incorporate data # Load data
df = pl.read_parquet('data/warehouse/transactions.parquet') accounts = pl.read_parquet('data/warehouse/accounts.parquet')
print("Data loaded from Parquet file:") categories = pl.read_parquet('data/warehouse/categories.parquet')
print(df) dates = pl.read_parquet('data/warehouse/dates.parquet')
payees = pl.read_parquet('data/warehouse/payees.parquet')
scheduled_transactions = pl.read_parquet('data/warehouse/scheduled_transactions.parquet')
transactions = pl.read_parquet('data/warehouse/transactions.parquet')
relevant_data = df.sql(''' # Join transactions with accounts, categories, and payees to create a master DataFrame
master_df = transactions.join(categories, left_on='category_id', right_on='id', suffix='_category')\
.join(accounts, left_on='account_id', right_on='id', suffix='_account')\
.join(payees, left_on='payee_id', right_on='id', suffix='_payee')\
.join(dates, left_on='transaction_date', right_on='date_id', suffix='_date')\
# Create aggregations
spend_per_day = master_df.sql('''
SELECT SELECT
date, date,
sum(transaction_amount) as total year,
month,
day,
ABS(SUM(transaction_amount)) as total
FROM self FROM self
GROUP BY date WHERE category_name != 'Inflow: Ready to Assign'
GROUP BY date, year, month, day
ORDER BY date DESC ORDER BY date DESC
''' '''
) )
print("Data after SQL query:")
print(relevant_data) spend_per_category = master_df.sql('''
SELECT
category_name,
ABS(SUM(transaction_amount)) as total
FROM self
WHERE category_name != 'Inflow: Ready to Assign'
GROUP BY category_name
ORDER BY total DESC
'''
)
spend_per_payee = master_df.sql('''
SELECT
payee_name,
ABS(SUM(transaction_amount)) as total
FROM self
WHERE payee_name != 'Starting Balance'
AND transaction_amount < 0
GROUP BY payee_name
ORDER BY total DESC
'''
)
# Convert DataFrame to list of dictionaries # Convert DataFrame to list of dictionaries
data = relevant_data.to_dicts() spend_per_day_data = spend_per_day.to_dicts()
print("Data converted to list of dictionaries:") spend_per_category_data = spend_per_category.to_dicts()
print(data) spend_per_payee_data = spend_per_payee.to_dicts()
# Initialize the app with a dark theme # Convert list of dictionaries to Pandas DataFrame
app = Dash(external_stylesheets=[dbc.themes.DARKLY]) spend_per_day_df = pd.DataFrame(spend_per_day_data)
spend_per_category_df = pd.DataFrame(spend_per_category_data)
spend_per_payee_df = pd.DataFrame(spend_per_payee_data)
# Create the line graph with dark mode styling spend_per_day_line = px.line(spend_per_day_df, x="date", y="total")
fig = px.line(relevant_data.to_pandas(), x="date", y="total", title='Spend Per Day') spend_per_day_line.update_layout(
fig.update_layout(
plot_bgcolor='black', plot_bgcolor='black',
paper_bgcolor='black', paper_bgcolor='black',
font_color='white' font_color='white'
) )
spend_per_category_bar = px.bar(spend_per_category_df, x="category_name", y="total")
spend_per_category_bar.update_layout(
plot_bgcolor='black',
paper_bgcolor='black',
font_color='white'
)
spend_per_payee_bar = px.bar(spend_per_payee_df, x="payee_name", y="total")
spend_per_payee_bar.update_layout(
plot_bgcolor='black',
paper_bgcolor='black',
font_color='white'
)
# Initialize the app with a dark theme
app = Dash(external_stylesheets=[dbc.themes.DARKLY])
# App layout # App layout
app.layout = dbc.Container( app.layout = dbc.Container(
[ [
dbc.Row( dbc.Row(
dbc.Col(html.Div("My First App with My Data", className="text-center text-light"), width=12) dbc.Col(
html.Div("Data Pipeline For YNAB, Preview Visualisations",
className="text-center text-light"),
width=12
)
), ),
dbc.Row( dbc.Row(
[ [
@@ -49,14 +108,24 @@ app.layout = dbc.Container(
dbc.Card( dbc.Card(
dbc.CardBody( dbc.CardBody(
[ [
html.H4("Data Table", className="card-title"), html.H4("Spend Per Day", className="card-title"),
DataTable( dcc.Graph(figure=spend_per_day_line)
data=data, ]
columns=[{"name": i, "id": i} for i in relevant_data.columns], ),
page_size=5, className="mb-4"
style_header={'backgroundColor': 'black', 'color': 'white'}, ),
style_cell={'backgroundColor': 'black', 'color': 'white'} width=12
) )
]
),
dbc.Row(
[
dbc.Col(
dbc.Card(
dbc.CardBody(
[
html.H4("Spend Per Category", className="card-title"),
dcc.Graph(figure=spend_per_category_bar)
] ]
), ),
className="mb-4" className="mb-4"
@@ -67,8 +136,8 @@ app.layout = dbc.Container(
dbc.Card( dbc.Card(
dbc.CardBody( dbc.CardBody(
[ [
html.H4("Spend Per Day", className="card-title"), html.H4("Spend Per Payee", className="card-title"),
dcc.Graph(figure=fig) dcc.Graph(figure=spend_per_payee_bar)
] ]
), ),
className="mb-4" className="mb-4"
+12
View File
@@ -191,6 +191,18 @@ class DimDate(Dimensions):
except Exception as e: except Exception as e:
logging.error(f"Failed to create a new column to indicate if the date is a weekday or weekend: {e}") logging.error(f"Failed to create a new column to indicate if the date is a weekday or weekend: {e}")
return return
# Create a primary key by concatenating year, month, and day with no separators
try:
dates_df = dates_df.with_columns([
(pl.col('year').cast(pl.Utf8) +
pl.col('month').cast(pl.Utf8).str.zfill(2) +
pl.col('day').cast(pl.Utf8).str.zfill(2)
).alias('date_id')
])
except Exception as e:
logging.error(f"Failed to create the primary key column: {e}")
return
# Write the DataFrame to a new parquet file # Write the DataFrame to a new parquet file
logging.info("Writing the transformed dates DataFrame to parquet file") logging.info("Writing the transformed dates DataFrame to parquet file")
try: try:
+14 -3
View File
@@ -27,12 +27,23 @@ class FactTransactions(Facts):
# Transform the DataFrame # Transform the DataFrame
logging.info("Transforming the transactions DataFrame") logging.info("Transforming the transactions DataFrame")
try:
# Ensure the date column is in datetime format
transactions_df = transactions_df.with_columns([
pl.col("date").str.strptime(pl.Date, format="%Y-%m-%d").alias("date")
])
except Exception as e:
logging.error(f"Failed to covert the date to date format: {e}")
return
try: try:
transactions_df = ( transactions_df = (
transactions_df transactions_df
.with_columns([ .with_columns([
pl.col("id").alias("transaction_id"), pl.col("id").alias("transaction_id"),
pl.col("date").alias("transaction_date"), (pl.col("date").dt.year().cast(pl.Utf8) +
pl.col("date").dt.month().cast(pl.Utf8).str.zfill(2) +
pl.col("date").dt.day().cast(pl.Utf8).str.zfill(2)).alias("transaction_date"),
pl.col("amount").alias("transaction_amount"), pl.col("amount").alias("transaction_amount"),
pl.col("memo").alias("transaction_memo"), pl.col("memo").alias("transaction_memo"),
pl.col("cleared").alias("transaction_cleared"), pl.col("cleared").alias("transaction_cleared"),
@@ -45,7 +56,7 @@ class FactTransactions(Facts):
]) ])
.with_columns([ .with_columns([
pl.col("memo").fill_null("unknown"), pl.col("memo").fill_null("unknown"),
(pl.col("amount") / 100).alias("transaction_amount"), (pl.col("amount") / 1000).alias("transaction_amount"),
]) ])
.drop([ .drop([
"transfer_transaction_id", "matched_transaction_id", "import_id", "transfer_transaction_id", "matched_transaction_id", "import_id",
@@ -98,7 +109,7 @@ class FactScheduledTransactions(Facts):
]) ])
.with_columns([ .with_columns([
pl.col("memo").fill_null("unknown"), pl.col("memo").fill_null("unknown"),
(pl.col("amount") / 100).alias("scheduled_transaction_amount"), (pl.col("amount") / 1000).alias("scheduled_transaction_amount"),
]) ])
.drop([ .drop([
"subtransactions", "deleted","flag_name","account_name", "subtransactions", "deleted","flag_name","account_name",
+1 -1
View File
@@ -130,7 +130,7 @@ Then move the files back in one at a time oldest to newest and run again for eac
df = df.with_columns( df = df.with_columns(
pl.when(pl.col(col).is_null()) pl.when(pl.col(col).is_null())
.then(pl.lit("null")) .then(pl.lit("null"))
.otherwise(pl.col(col).map_elements(lambda x: str(x) if x is not None else "null")) .otherwise(pl.col(col).map_elements(lambda x: str(x) if x is not None else "null", return_dtype=pl.Utf8))
.alias(col) .alias(col)
) )
return df return df