starting to build the warehouse

This commit is contained in:
Jake Pullen
2024-08-09 14:23:52 +01:00
parent 33c9496eb0
commit 165464820b
9 changed files with 67 additions and 7 deletions
+1
View File
@@ -5,3 +5,4 @@ server_knowledge_cache.json
data/*
.venv/*
__pycache__/*
*/__pycache__/*
+1 -1
View File
@@ -6,7 +6,7 @@ entities:
- transactions
- scheduled_transactions
base_url: https://api.ynab.com/v1/budgets
knowledge_file: server_knowledge_cache.json
knowledge_file: data\server_knowledge_cache.json
primary_keys:
accounts:
unique_id: id
+57 -1
View File
@@ -20,5 +20,61 @@ erDiagram
string account_type_name
}
CATEGORIES {
int category_id
string category_name
string category_group_name
boolean hidden
text note
decimal budgeted
decimal activity
decimal balance
boolean deleted
}
PAYEES {
int payee_id
string payee_name
boolean deleted
}
DATES {
int date_id
string date
int year
int month
int day
}
TRANSACTIONS {
int transaction_id
int account_id
int category_id
int payee_id
int date_id
decimal amount
boolean cleared
boolean approved
boolean deleted
}
SCHEDULED_TRANSACTIONS {
int scheduled_transaction_id
int account_id
int category_id
int payee_id
int date_id
decimal amount
string frequency
boolean deleted
}
ACCOUNTS ||--o{ ACCOUNT_TYPES : "has type"
```
TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
TRANSACTIONS ||--o{ PAYEES : "belongs to"
TRANSACTIONS ||--o{ DATES : "occurred on"
SCHEDULED_TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
SCHEDULED_TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
SCHEDULED_TRANSACTIONS ||--o{ PAYEES : "belongs to"
SCHEDULED_TRANSACTIONS ||--o{ DATES : "scheduled on"
+2 -2
View File
@@ -3,8 +3,8 @@ import dotenv
import logging
import yaml
from ingest import Ingest
from raw_to_base import RawToBase
from pipeline.ingest import Ingest
from pipeline.raw_to_base import RawToBase
dotenv.load_dotenv()
View File
+3
View File
@@ -48,6 +48,9 @@ class Ingest:
with open(self.knowledge_file, 'r') as f:
knowledge_cache = json.load(f)
except FileNotFoundError:
# If the file does not exist, create an empty cache
# also create the file so we can save to it later
os.makedirs(os.path.dirname(self.knowledge_file), exist_ok=True)
knowledge_cache = {}
knowledge_cache[entity] = server_knowledge
+3 -3
View File
@@ -3,7 +3,7 @@ import polars as pl
entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions']
for entity in entities:
print(f"Processing entity: {entity}")
# print(f"Processing entity: {entity}")
file_path = f'data/base/{entity}.parquet'
# Read the parquet file into a polars DataFrame
entity_df = pl.read_parquet(file_path)
@@ -11,5 +11,5 @@ for entity in entities:
print(f"Schema of {entity} DataFrame:")
print(entity_df.schema)
# Display the first few rows of the DataFrame
print(f"First few rows of {entity} DataFrame:")
print(entity_df.head())
# print(f"First few rows of {entity} DataFrame:")
# print(entity_df.head())