starting to build the warehouse

2024-08-09 14:23:52 +01:00
parent 33c9496eb0
commit 165464820b
9 changed files with 67 additions and 7 deletions
@@ -5,3 +5,4 @@ server_knowledge_cache.json
 data/*
 .venv/*
 __pycache__/*
+*/__pycache__/*
@@ -6,7 +6,7 @@ entities:
  - transactions
  - scheduled_transactions
 base_url: https://api.ynab.com/v1/budgets
-knowledge_file: server_knowledge_cache.json
+knowledge_file: data\server_knowledge_cache.json
 primary_keys:
  accounts:
    unique_id: id
@@ -20,5 +20,61 @@ erDiagram
        string account_type_name
    }
    
+    CATEGORIES {
+        int category_id
+        string category_name
+        string category_group_name
+        boolean hidden
+        text note
+        decimal budgeted
+        decimal activity
+        decimal balance
+        boolean deleted
+    }
+    
+    PAYEES {
+        int payee_id
+        string payee_name
+        boolean deleted
+    }
+    
+    DATES {
+        int date_id
+        string date
+        int year
+        int month
+        int day
+    }
+    
+    TRANSACTIONS {
+        int transaction_id
+        int account_id
+        int category_id
+        int payee_id
+        int date_id
+        decimal amount
+        boolean cleared
+        boolean approved
+        boolean deleted
+    }
+    
+    SCHEDULED_TRANSACTIONS {
+        int scheduled_transaction_id
+        int account_id
+        int category_id
+        int payee_id
+        int date_id
+        decimal amount
+        string frequency
+        boolean deleted
+    }
+    
    ACCOUNTS ||--o{ ACCOUNT_TYPES : "has type"
-```
+    TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
+    TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
+    TRANSACTIONS ||--o{ PAYEES : "belongs to"
+    TRANSACTIONS ||--o{ DATES : "occurred on"
+    SCHEDULED_TRANSACTIONS ||--o{ ACCOUNTS : "belongs to"
+    SCHEDULED_TRANSACTIONS ||--o{ CATEGORIES : "belongs to"
+    SCHEDULED_TRANSACTIONS ||--o{ PAYEES : "belongs to"
+    SCHEDULED_TRANSACTIONS ||--o{ DATES : "scheduled on"
@@ -3,8 +3,8 @@ import dotenv
 import logging
 import yaml

-from ingest import Ingest
-from raw_to_base import RawToBase
+from pipeline.ingest import Ingest
+from pipeline.raw_to_base import RawToBase

 dotenv.load_dotenv()

@@ -48,6 +48,9 @@ class Ingest:
            with open(self.knowledge_file, 'r') as f:
                knowledge_cache = json.load(f)
        except FileNotFoundError:
+            # If the file does not exist, create an empty cache
+            # also create the file so we can save to it later
+            os.makedirs(os.path.dirname(self.knowledge_file), exist_ok=True)
            knowledge_cache = {}
        
        knowledge_cache[entity] = server_knowledge
@@ -3,7 +3,7 @@ import polars as pl
 entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions']

 for entity in entities:
-    print(f"Processing entity: {entity}")
+   # print(f"Processing entity: {entity}")
    file_path = f'data/base/{entity}.parquet'
    # Read the parquet file into a polars DataFrame
    entity_df = pl.read_parquet(file_path)
@@ -11,5 +11,5 @@ for entity in entities:
    print(f"Schema of {entity} DataFrame:")
    print(entity_df.schema)
    # Display the first few rows of the DataFrame
-    print(f"First few rows of {entity} DataFrame:")
-    print(entity_df.head())
+   # print(f"First few rows of {entity} DataFrame:")
+   # print(entity_df.head())