changes to make dash app work

This commit is contained in:
Jake Pullen
2024-08-27 15:12:44 +01:00
parent 173c0594a8
commit 7b80b52998
4 changed files with 126 additions and 34 deletions
+12
View File
@@ -191,6 +191,18 @@ class DimDate(Dimensions):
except Exception as e:
logging.error(f"Failed to create a new column to indicate if the date is a weekday or weekend: {e}")
return
# Create a primary key by concatenating year, month, and day with no separators
try:
dates_df = dates_df.with_columns([
(pl.col('year').cast(pl.Utf8) +
pl.col('month').cast(pl.Utf8).str.zfill(2) +
pl.col('day').cast(pl.Utf8).str.zfill(2)
).alias('date_id')
])
except Exception as e:
logging.error(f"Failed to create the primary key column: {e}")
return
# Write the DataFrame to a new parquet file
logging.info("Writing the transformed dates DataFrame to parquet file")
try:
+14 -3
View File
@@ -27,12 +27,23 @@ class FactTransactions(Facts):
# Transform the DataFrame
logging.info("Transforming the transactions DataFrame")
try:
# Ensure the date column is in datetime format
transactions_df = transactions_df.with_columns([
pl.col("date").str.strptime(pl.Date, format="%Y-%m-%d").alias("date")
])
except Exception as e:
logging.error(f"Failed to covert the date to date format: {e}")
return
try:
transactions_df = (
transactions_df
.with_columns([
pl.col("id").alias("transaction_id"),
pl.col("date").alias("transaction_date"),
(pl.col("date").dt.year().cast(pl.Utf8) +
pl.col("date").dt.month().cast(pl.Utf8).str.zfill(2) +
pl.col("date").dt.day().cast(pl.Utf8).str.zfill(2)).alias("transaction_date"),
pl.col("amount").alias("transaction_amount"),
pl.col("memo").alias("transaction_memo"),
pl.col("cleared").alias("transaction_cleared"),
@@ -45,7 +56,7 @@ class FactTransactions(Facts):
])
.with_columns([
pl.col("memo").fill_null("unknown"),
(pl.col("amount") / 100).alias("transaction_amount"),
(pl.col("amount") / 1000).alias("transaction_amount"),
])
.drop([
"transfer_transaction_id", "matched_transaction_id", "import_id",
@@ -98,7 +109,7 @@ class FactScheduledTransactions(Facts):
])
.with_columns([
pl.col("memo").fill_null("unknown"),
(pl.col("amount") / 100).alias("scheduled_transaction_amount"),
(pl.col("amount") / 1000).alias("scheduled_transaction_amount"),
])
.drop([
"subtransactions", "deleted","flag_name","account_name",
+1 -1
View File
@@ -130,7 +130,7 @@ Then move the files back in one at a time oldest to newest and run again for eac
df = df.with_columns(
pl.when(pl.col(col).is_null())
.then(pl.lit("null"))
.otherwise(pl.col(col).map_elements(lambda x: str(x) if x is not None else "null"))
.otherwise(pl.col(col).map_elements(lambda x: str(x) if x is not None else "null", return_dtype=pl.Utf8))
.alias(col)
)
return df