starting to build the warehouse
This commit is contained in:
@@ -5,3 +5,4 @@ server_knowledge_cache.json
|
||||
data/*
|
||||
.venv/*
|
||||
__pycache__/*
|
||||
*/__pycache__/*
|
||||
|
||||
+1
-1
@@ -6,7 +6,7 @@ entities:
|
||||
- transactions
|
||||
- scheduled_transactions
|
||||
base_url: https://api.ynab.com/v1/budgets
|
||||
knowledge_file: server_knowledge_cache.json
|
||||
knowledge_file: data\server_knowledge_cache.json
|
||||
primary_keys:
|
||||
accounts:
|
||||
unique_id: id
|
||||
|
||||
+57
-1
@@ -20,5 +20,61 @@ erDiagram
|
||||
string account_type_name
|
||||
}
|
||||
|
||||
CATEGORIES {
|
||||
int category_id
|
||||
string category_name
|
||||
string category_group_name
|
||||
boolean hidden
|
||||
text note
|
||||
decimal budgeted
|
||||
decimal activity
|
||||
decimal balance
|
||||
boolean deleted
|
||||
}
|
||||
|
||||
PAYEES {
|
||||
int payee_id
|
||||
string payee_name
|
||||
boolean deleted
|
||||
}
|
||||
|
||||
DATES {
|
||||
int date_id
|
||||
string date
|
||||
int year
|
||||
int month
|
||||
int day
|
||||
}
|
||||
|
||||
TRANSACTIONS {
|
||||
int transaction_id
|
||||
int account_id
|
||||
int category_id
|
||||
int payee_id
|
||||
int date_id
|
||||
decimal amount
|
||||
boolean cleared
|
||||
boolean approved
|
||||
boolean deleted
|
||||
}
|
||||
|
||||
SCHEDULED_TRANSACTIONS {
|
||||
int scheduled_transaction_id
|
||||
int account_id
|
||||
int category_id
|
||||
int payee_id
|
||||
int date_id
|
||||
decimal amount
|
||||
string frequency
|
||||
boolean deleted
|
||||
}
|
||||
|
||||
ACCOUNTS ||--o{ ACCOUNT_TYPES : "has type"
|
||||
```
|
||||
TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
|
||||
TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
|
||||
TRANSACTIONS ||--o{ PAYEES : "belongs to"
|
||||
TRANSACTIONS ||--o{ DATES : "occurred on"
|
||||
SCHEDULED_TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
|
||||
SCHEDULED_TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
|
||||
SCHEDULED_TRANSACTIONS ||--o{ PAYEES : "belongs to"
|
||||
SCHEDULED_TRANSACTIONS ||--o{ DATES : "scheduled on"
|
||||
@@ -3,8 +3,8 @@ import dotenv
|
||||
import logging
|
||||
import yaml
|
||||
|
||||
from ingest import Ingest
|
||||
from raw_to_base import RawToBase
|
||||
from pipeline.ingest import Ingest
|
||||
from pipeline.raw_to_base import RawToBase
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
@@ -48,6 +48,9 @@ class Ingest:
|
||||
with open(self.knowledge_file, 'r') as f:
|
||||
knowledge_cache = json.load(f)
|
||||
except FileNotFoundError:
|
||||
# If the file does not exist, create an empty cache
|
||||
# also create the file so we can save to it later
|
||||
os.makedirs(os.path.dirname(self.knowledge_file), exist_ok=True)
|
||||
knowledge_cache = {}
|
||||
|
||||
knowledge_cache[entity] = server_knowledge
|
||||
@@ -3,7 +3,7 @@ import polars as pl
|
||||
entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions']
|
||||
|
||||
for entity in entities:
|
||||
print(f"Processing entity: {entity}")
|
||||
# print(f"Processing entity: {entity}")
|
||||
file_path = f'data/base/{entity}.parquet'
|
||||
# Read the parquet file into a polars DataFrame
|
||||
entity_df = pl.read_parquet(file_path)
|
||||
@@ -11,5 +11,5 @@ for entity in entities:
|
||||
print(f"Schema of {entity} DataFrame:")
|
||||
print(entity_df.schema)
|
||||
# Display the first few rows of the DataFrame
|
||||
print(f"First few rows of {entity} DataFrame:")
|
||||
print(entity_df.head())
|
||||
# print(f"First few rows of {entity} DataFrame:")
|
||||
# print(entity_df.head())
|
||||
|
||||
Reference in New Issue
Block a user