starting to build the warehouse

This commit is contained in:
Jake Pullen
2024-08-09 14:23:52 +01:00
parent 33c9496eb0
commit 165464820b
9 changed files with 67 additions and 7 deletions
+1
View File
@@ -5,3 +5,4 @@ server_knowledge_cache.json
data/* data/*
.venv/* .venv/*
__pycache__/* __pycache__/*
*/__pycache__/*
+1 -1
View File
@@ -6,7 +6,7 @@ entities:
- transactions - transactions
- scheduled_transactions - scheduled_transactions
base_url: https://api.ynab.com/v1/budgets base_url: https://api.ynab.com/v1/budgets
knowledge_file: server_knowledge_cache.json knowledge_file: data\server_knowledge_cache.json
primary_keys: primary_keys:
accounts: accounts:
unique_id: id unique_id: id
+57 -1
View File
@@ -20,5 +20,61 @@ erDiagram
string account_type_name string account_type_name
} }
CATEGORIES {
int category_id
string category_name
string category_group_name
boolean hidden
text note
decimal budgeted
decimal activity
decimal balance
boolean deleted
}
PAYEES {
int payee_id
string payee_name
boolean deleted
}
DATES {
int date_id
string date
int year
int month
int day
}
TRANSACTIONS {
int transaction_id
int account_id
int category_id
int payee_id
int date_id
decimal amount
boolean cleared
boolean approved
boolean deleted
}
SCHEDULED_TRANSACTIONS {
int scheduled_transaction_id
int account_id
int category_id
int payee_id
int date_id
decimal amount
string frequency
boolean deleted
}
ACCOUNTS ||--o{ ACCOUNT_TYPES : "has type" ACCOUNTS ||--o{ ACCOUNT_TYPES : "has type"
``` TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
TRANSACTIONS ||--o{ PAYEES : "belongs to"
TRANSACTIONS ||--o{ DATES : "occurred on"
SCHEDULED_TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
SCHEDULED_TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
SCHEDULED_TRANSACTIONS ||--o{ PAYEES : "belongs to"
SCHEDULED_TRANSACTIONS ||--o{ DATES : "scheduled on"
+2 -2
View File
@@ -3,8 +3,8 @@ import dotenv
import logging import logging
import yaml import yaml
from ingest import Ingest from pipeline.ingest import Ingest
from raw_to_base import RawToBase from pipeline.raw_to_base import RawToBase
dotenv.load_dotenv() dotenv.load_dotenv()
View File
+3
View File
@@ -48,6 +48,9 @@ class Ingest:
with open(self.knowledge_file, 'r') as f: with open(self.knowledge_file, 'r') as f:
knowledge_cache = json.load(f) knowledge_cache = json.load(f)
except FileNotFoundError: except FileNotFoundError:
# If the file does not exist, create an empty cache
# also create the file so we can save to it later
os.makedirs(os.path.dirname(self.knowledge_file), exist_ok=True)
knowledge_cache = {} knowledge_cache = {}
knowledge_cache[entity] = server_knowledge knowledge_cache[entity] = server_knowledge
+3 -3
View File
@@ -3,7 +3,7 @@ import polars as pl
entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions'] entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions']
for entity in entities: for entity in entities:
print(f"Processing entity: {entity}") # print(f"Processing entity: {entity}")
file_path = f'data/base/{entity}.parquet' file_path = f'data/base/{entity}.parquet'
# Read the parquet file into a polars DataFrame # Read the parquet file into a polars DataFrame
entity_df = pl.read_parquet(file_path) entity_df = pl.read_parquet(file_path)
@@ -11,5 +11,5 @@ for entity in entities:
print(f"Schema of {entity} DataFrame:") print(f"Schema of {entity} DataFrame:")
print(entity_df.schema) print(entity_df.schema)
# Display the first few rows of the DataFrame # Display the first few rows of the DataFrame
print(f"First few rows of {entity} DataFrame:") # print(f"First few rows of {entity} DataFrame:")
print(entity_df.head()) # print(entity_df.head())