From 31b82dc1d00c4ae8d4f9d7649a25faeefd74d98e Mon Sep 17 00:00:00 2001 From: Jake Pullen Date: Sun, 28 Jul 2024 10:45:59 +0100 Subject: [PATCH] fine tuning injestion --- ReadMe.md | 22 ++++++++++ __pycache__/injest.cpython-310.pyc | Bin 0 -> 3420 bytes injest.py | 63 ++++++++++++++++------------- main.py | 10 +++++ 4 files changed, 68 insertions(+), 27 deletions(-) create mode 100644 ReadMe.md create mode 100644 __pycache__/injest.cpython-310.pyc diff --git a/ReadMe.md b/ReadMe.md new file mode 100644 index 0000000..57695d2 --- /dev/null +++ b/ReadMe.md @@ -0,0 +1,22 @@ +# Budget Management System + +This project is a Budget Management System that fetches and caches budget-related data from an API. It organizes the data into various categories and handles rate limits to ensure smooth operation. + +## Project Structure + +## Setup + +1. **Set up environment variables:** + + Create a `.env` file in the root directory and add your API token and budget ID: + ``` + API_TOKEN=your_api_token + BUDGET_ID=your_budget_id + ``` + +## Usage + +## Contributing + +## License + diff --git a/__pycache__/injest.cpython-310.pyc b/__pycache__/injest.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..340d397d3d76d0049b66c72df460227dc92e5a20 GIT binary patch literal 3420 zcmd1j<>g{vU|>QW#Pga~Pr^G-DJan9mf& z1g4pzn87ql6pK4U3R4Pm3quNXGgB063S%&XCd*5ZseZ{I5*ahW%(r1+U`S;EnHj~D z!kEI;!Vtxr!kog=!Vtxh!kWU?!Vtxp!k)s>!Vtxl!kNO=!Vtxt!kxm?!Vtxg!W+z> z$#;t_GcPN(xFi{5Cd>{H8{`LPkRMVQ7#M08Qy3)~(im$P;z4XCD4P*vR}DiJ!)%7R zOqq;ed8TxR6xKx$HggSQ3L8`{OATWRyCg#mLp*B@!veO2AQpQK!vc6)%B)H($;{8IVs}bSEJ`g>&}6&C zk(ivEUz%4^e2XhNu_QG;zbG@c_!e7keqKpN@h!H3#LCpv;#)i=MTvREiOC?{#ka(Y zlQU9NN^??E;!(sok`jwk<4cQjZt-R3<(KEArlhCFr)B1(YH}8V0;7n9fq~%`dq!$v zN@`K@EsoT@lFX9K)Z!u@1_p*AUXY|nPJUuaJVJMJVsb|6Eq-(X>9o|6lyp(vb zig>V5mGLQwC5f6$w^)i(bJA{cL*gYqGcPTFB}0)Y0|Ub^HT{hI+*JLn#Ozf4qSS)? zV*Rwtyu`fZ)cBP041I_Oy@JYH9P#m)d6^~g@tmLt6kuRr;9=xp6k!CBT#Q_dJd9QR zP}MM%$&9ebXJB9e#XkoF0|PkzJs21mN*J;j7ckW@WHB#fEK(|A0kKjTo0rqUxdQs~TLz3Q7z5 zzZmtaSoOf6!C3whl>SySf--DMYF>#Z=Pj0!%-qyl9K|I?X&_dSBq&^@K!h|X%s6rr zvr|(ti;9b6Kw`2WF_x6l+yYH@NT9Mo(%mfRbR1AtJNTTCn6kz0G6k=5RUnLKX zC0sgDVu}?xrsgm(FjUEwFw`()F+!6da|zP|<{IXOjI}Jtp`FKJ$WSbf9M<6sdE6`v zB`lR1phO8ulC><640(*@3^hzOEH#Wa3{Z2Kz;Vf3!j#2Y!%_n-QgOtkJF@xUxMcM! zk^lvgB8X4|5y}h<44Q0^SOAq1MWCoCk^{wupj&26s$YJITYhO?ifd6(eo>JINHr@s zhKe*nY6OZ?i^@`q5Q!NQ34$P_(PBat8lLDnAkhJGOfkr9OpJVtJd85`SsHklI2f7! zb1?HUfultQHCnJ6gHlznF)%QIauT?x$N^UsSq#v8#8ksnq+h~R!d%0&fF*@#A!9RB z3{x$0Eej}2B^YX1K}AOja}Dckh7=}no?@BHx{#5P0hF7v8H#jESQoITuz^HCAq&D_ z5q6jeINwC*1|^oH>iT5nW|kBwfKmq!NI(HBV5LwcqEM7tSejZ~QmjyvnwyxJmzkHY z_fiC;(8{r(peR2vIU_SKU7;wkBvl~?WQv}GbADcNW=d+2LP26_F-S74w5TK_wFqvM zUX?tuy$X<8K_RnPAvd!aq(e`Wqlga_d(7#nCAXM!6ANxJXXcgMVl6Jn$t=0Wo|B)R zo|%_^iv?7^-(oLMECRVplMNj2w>XMYiwpAeic@b1f}9l(c2+#Zl3Tnm|HS7erlj6t z1xwx%f_p9=CJ)J?;3NVnGH(eaXQU=)$0JPC162<~;Oc>eQG}6$QG$_!k<(KOak# zI5a7s>p`g~K;winAJqES3eVHH;}t z3z=$}z$%z)7(tBzP)SoCv zVMo%zoWcR-F^MzOGM8{=vDPrxFl2G3aP~6QveYmv;DMFgS-dG+*-S;TC5#LBQn*33 zL7E>mEDQJ-GSsrxu%_@xGAv+C;a$jB%LZ~I+=YBKY_l2WGK2Kgu%$4RG8Rp)VXI-v zW-gjs!>~Z0hOvfiArm^NX7g`7>TPait z_~k3)rItgSn3i9pP$i+K5S*P^Pyli}xGAInDZunJg^Tz>hM9x15Nk?mQfc}v7Eq8B zfr~w3kT6$rVnJp}VoqjND!2`Hi!-S-B|WtyJ~O4r0;GTgo)JJfvB(}I#$8;JSW;RX zpPUcMI;r3i7hD`0f@GNs@(YSUxk8fzoDm@fFdu5D-C`~+$|(Xh?GYY^SCXLMN6U9b zpd1ELUJNpsi&2D;k5P(If{}$$jFFE~fQgHdi;0VojfsPak4b`2fSHGhgPDhsk5Pn) z^>wn!wZZeSUa#76n-2G9E==X9Bdq%96UTcU^z`TNSi|yWHLAuLGe}O0^)-7 dVl(s>hYiGipyGm|7}R(Mo6N|;$iXPY3;=I)V5k59 literal 0 HcmV?d00001 diff --git a/injest.py b/injest.py index 59e2c1e..f1ad6f9 100644 --- a/injest.py +++ b/injest.py @@ -1,8 +1,28 @@ -class injest: - def __init__(self): +import os +import time +import json +import logging +import requests + +class injest: + def __init__(self, injest_info): + self.API_TOKEN = injest_info['API_TOKEN'], + self.BUDGET_ID = injest_info['BUDGET_ID'], + self.headers = {'Authorization': f'Bearer {self.API_TOKEN}'}, + self.entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions'], + self.base_url = injest_info['base_url'], + self.knowledge_file = injest_info['knowledge_file'] + self.knowledge_cache = self.load_knowledge_cache() + self.fetch_and_cache_entity_data() + + def load_knowledge_cache(self): + if os.path.exists(self.knowledge_file): + with open(self.knowledge_file, 'r') as f: + return json.load(f) + return {} - def update_entity_data_cache(entity, data): + def update_entity_data_cache(self,entity, data): current_time = time.strftime('%Y%m%d%H%M%S') directory = f'data/{entity}' # Directory name is the entity's name if not os.path.exists(directory): @@ -11,20 +31,19 @@ class injest: with open(entity_file, 'w') as f: json.dump(data, f, indent=4) - def update_server_knowledge_cache(entity, server_knowledge): - knowledge_file = 'server_knowledge_cache.json' + def update_server_knowledge_cache(self,entity, server_knowledge): try: - with open(knowledge_file, 'r') as f: + with open(self.knowledge_file, 'r') as f: knowledge_cache = json.load(f) except FileNotFoundError: knowledge_cache = {} knowledge_cache[entity] = server_knowledge - with open(knowledge_file, 'w') as f: + with open(self.knowledge_file, 'w') as f: json.dump(knowledge_cache, f, indent=4) - def check_rate_limit(response): + def check_rate_limit(self,response): rate_limit_header = response.headers.get('X-Rate-Limit') if rate_limit_header: requests_made, limit = map(int, rate_limit_header.split('/')) @@ -36,28 +55,18 @@ class injest: else: logging.warning("X-Rate-Limit header is missing.") - def fetch_and_cache_entity_data(budget_id): - entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions'] - base_url = 'https://api.ynab.com/v1/budgets' - knowledge_file = 'server_knowledge_cache.json' - - # Load existing server knowledge cache - try: - with open(knowledge_file, 'r') as f: - knowledge_cache = json.load(f) - except FileNotFoundError: - knowledge_cache = {} - - for entity in entities: - last_knowledge = knowledge_cache.get(entity, 0) + def fetch_and_cache_entity_data(self): + for entity in self.entities: + logging.debug(f'entity type is {type(entity)}') + last_knowledge = self.knowledge_cache.get(entity, 0) logging.debug(f'Last Knowledge of {entity.capitalize()}: {last_knowledge}') - url = f'{base_url}/{budget_id}/{entity}' + url = f'{self.base_url}/{self.budget_id}/{entity}' if last_knowledge: logging.info(f'Fetching {entity} data since last knowledge: {last_knowledge}') url = url + f'?last_knowledge_of_server={last_knowledge}' - response = requests.get(url, headers=headers) - check_rate_limit(response) # Check and handle rate limit + response = requests.get(url, headers=self.headers) + self.check_rate_limit(response) # Check and handle rate limit if response.status_code == 429: # HTTP 429 Too Many Requests logging.error("Rate limit exceeded. Pausing until the limit is reset.") @@ -72,11 +81,11 @@ class injest: # Check if there is new server knowledge if server_knowledge is not None and server_knowledge != last_knowledge: # Update server knowledge cache - update_server_knowledge_cache(entity, server_knowledge) + self.update_server_knowledge_cache(entity, server_knowledge) # Update entity data cache without server knowledge entity_data = data['data'] entity_data.pop('server_knowledge', None) # Remove server knowledge if exists - update_entity_data_cache(entity, entity_data) + self.update_entity_data_cache(entity, entity_data) else: logging.info(f"No new data for {entity}. Skipping cache update.") diff --git a/main.py b/main.py index 9014473..9bb9ead 100644 --- a/main.py +++ b/main.py @@ -13,3 +13,13 @@ BUDGET_ID = os.getenv('BUDGET_ID') headers = {'Authorization': f'Bearer {API_TOKEN}'} logging.basicConfig(level=logging.DEBUG) +injest_info = {} +#entities = ['accounts', 'categories', 'months', 'payees', 'transactions', 'scheduled_transactions'] +#injest_info['entities'] = entities +injest_info['base_url'] = 'https://api.ynab.com/v1/budgets' +injest_info['knowledge_file'] = 'server_knowledge_cache.json' +injest_info['API_TOKEN'] = API_TOKEN +injest_info['BUDGET_ID'] = BUDGET_ID + + +injest(injest_info)#.fetch_and_cache_entity_data()