starting to build the warehouse
This commit is contained in:
@@ -5,3 +5,4 @@ server_knowledge_cache.json
|
|||||||
data/*
|
data/*
|
||||||
.venv/*
|
.venv/*
|
||||||
__pycache__/*
|
__pycache__/*
|
||||||
|
*/__pycache__/*
|
||||||
|
|||||||
+1
-1
@@ -6,7 +6,7 @@ entities:
|
|||||||
- transactions
|
- transactions
|
||||||
- scheduled_transactions
|
- scheduled_transactions
|
||||||
base_url: https://api.ynab.com/v1/budgets
|
base_url: https://api.ynab.com/v1/budgets
|
||||||
knowledge_file: server_knowledge_cache.json
|
knowledge_file: data\server_knowledge_cache.json
|
||||||
primary_keys:
|
primary_keys:
|
||||||
accounts:
|
accounts:
|
||||||
unique_id: id
|
unique_id: id
|
||||||
|
|||||||
+57
-1
@@ -20,5 +20,61 @@ erDiagram
|
|||||||
string account_type_name
|
string account_type_name
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CATEGORIES {
|
||||||
|
int category_id
|
||||||
|
string category_name
|
||||||
|
string category_group_name
|
||||||
|
boolean hidden
|
||||||
|
text note
|
||||||
|
decimal budgeted
|
||||||
|
decimal activity
|
||||||
|
decimal balance
|
||||||
|
boolean deleted
|
||||||
|
}
|
||||||
|
|
||||||
|
PAYEES {
|
||||||
|
int payee_id
|
||||||
|
string payee_name
|
||||||
|
boolean deleted
|
||||||
|
}
|
||||||
|
|
||||||
|
DATES {
|
||||||
|
int date_id
|
||||||
|
string date
|
||||||
|
int year
|
||||||
|
int month
|
||||||
|
int day
|
||||||
|
}
|
||||||
|
|
||||||
|
TRANSACTIONS {
|
||||||
|
int transaction_id
|
||||||
|
int account_id
|
||||||
|
int category_id
|
||||||
|
int payee_id
|
||||||
|
int date_id
|
||||||
|
decimal amount
|
||||||
|
boolean cleared
|
||||||
|
boolean approved
|
||||||
|
boolean deleted
|
||||||
|
}
|
||||||
|
|
||||||
|
SCHEDULED_TRANSACTIONS {
|
||||||
|
int scheduled_transaction_id
|
||||||
|
int account_id
|
||||||
|
int category_id
|
||||||
|
int payee_id
|
||||||
|
int date_id
|
||||||
|
decimal amount
|
||||||
|
string frequency
|
||||||
|
boolean deleted
|
||||||
|
}
|
||||||
|
|
||||||
ACCOUNTS ||--o{ ACCOUNT_TYPES : "has type"
|
ACCOUNTS ||--o{ ACCOUNT_TYPES : "has type"
|
||||||
```
|
TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
|
||||||
|
TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
|
||||||
|
TRANSACTIONS ||--o{ PAYEES : "belongs to"
|
||||||
|
TRANSACTIONS ||--o{ DATES : "occurred on"
|
||||||
|
SCHEDULED_TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
|
||||||
|
SCHEDULED_TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
|
||||||
|
SCHEDULED_TRANSACTIONS ||--o{ PAYEES : "belongs to"
|
||||||
|
SCHEDULED_TRANSACTIONS ||--o{ DATES : "scheduled on"
|
||||||
@@ -3,8 +3,8 @@ import dotenv
|
|||||||
import logging
|
import logging
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from ingest import Ingest
|
from pipeline.ingest import Ingest
|
||||||
from raw_to_base import RawToBase
|
from pipeline.raw_to_base import RawToBase
|
||||||
|
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
|||||||
@@ -48,6 +48,9 @@ class Ingest:
|
|||||||
with open(self.knowledge_file, 'r') as f:
|
with open(self.knowledge_file, 'r') as f:
|
||||||
knowledge_cache = json.load(f)
|
knowledge_cache = json.load(f)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
|
# If the file does not exist, create an empty cache
|
||||||
|
# also create the file so we can save to it later
|
||||||
|
os.makedirs(os.path.dirname(self.knowledge_file), exist_ok=True)
|
||||||
knowledge_cache = {}
|
knowledge_cache = {}
|
||||||
|
|
||||||
knowledge_cache[entity] = server_knowledge
|
knowledge_cache[entity] = server_knowledge
|
||||||
@@ -3,7 +3,7 @@ import polars as pl
|
|||||||
entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions']
|
entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions']
|
||||||
|
|
||||||
for entity in entities:
|
for entity in entities:
|
||||||
print(f"Processing entity: {entity}")
|
# print(f"Processing entity: {entity}")
|
||||||
file_path = f'data/base/{entity}.parquet'
|
file_path = f'data/base/{entity}.parquet'
|
||||||
# Read the parquet file into a polars DataFrame
|
# Read the parquet file into a polars DataFrame
|
||||||
entity_df = pl.read_parquet(file_path)
|
entity_df = pl.read_parquet(file_path)
|
||||||
@@ -11,5 +11,5 @@ for entity in entities:
|
|||||||
print(f"Schema of {entity} DataFrame:")
|
print(f"Schema of {entity} DataFrame:")
|
||||||
print(entity_df.schema)
|
print(entity_df.schema)
|
||||||
# Display the first few rows of the DataFrame
|
# Display the first few rows of the DataFrame
|
||||||
print(f"First few rows of {entity} DataFrame:")
|
# print(f"First few rows of {entity} DataFrame:")
|
||||||
print(entity_df.head())
|
# print(entity_df.head())
|
||||||
|
|||||||
Reference in New Issue
Block a user