Bug fix no more request limit (#18)

* added tests

* Removed method no longer used due to YNAB api Changes
This commit is contained in:
Jake-Pullen
2025-04-04 18:49:44 +01:00
committed by GitHub
parent d155a4c907
commit 5af82e5753
8 changed files with 432 additions and 63 deletions
+32 -44
View File
@@ -4,13 +4,11 @@ import json
import logging
import requests
import sys
import yaml
from typing import Dict, Any
import config.exit_codes as ec
class Ingest:
def __init__(self, config: Dict[str, Any]):
"""
Initialize the Ingest class with the provided configuration.
@@ -22,19 +20,9 @@ class Ingest:
self.entities = config['entities']
self.raw_data_path = config['raw_data_path']
self.headers = {'Authorization': f'Bearer {self.api_token}'}
self.knowledge_cache = self.load_knowledge_cache()
self.MAX_RETRIES = config['REQUESTS_MAX_RETRIES']
self.RETRY_DELAY = config['REQUESTS_RETRY_DELAY']
self.fetch_and_cache_entity_data()
def load_knowledge_cache(self) -> Dict[str, Any]:
"""
Load the knowledge cache from the file if it exists.
"""
if os.path.exists(self.knowledge_file):
with open(self.knowledge_file, 'r') as f:
return json.load(f)
return {}
def save_entity_data_to_raw(self, entity: str, data: Dict[str, Any]):
"""
@@ -50,8 +38,18 @@ class Ingest:
with open(entity_file, 'w') as f:
json.dump(data, f, indent=4)
except Exception as e:
logging.error(f"Error saving {entity} data: {e}")
logging.error(f"Failed to save data for {entity} to {entity_file}")
raise e
def load_knowledge_cache(self) -> Dict[str, Any]:
"""
Load the knowledge cache from the file if it exists.
"""
if not os.path.exists(self.knowledge_file):
os.makedirs(os.path.dirname(self.knowledge_file),exist_ok=True)
return {}
with open(self.knowledge_file, 'r') as f:
return json.load(f)
def update_server_knowledge_cache(self, entity: str, server_knowledge: Any):
"""
@@ -64,31 +62,21 @@ class Ingest:
logging.info(f"Knowledge file not found. Creating a new one at {self.knowledge_file}. This is normal for the first run.")
os.makedirs(os.path.dirname(self.knowledge_file), exist_ok=True)
knowledge_cache = {}
knowledge_cache[entity] = server_knowledge
with open(self.knowledge_file, 'w') as f:
json.dump(knowledge_cache, f, indent=4)
def check_rate_limit(self, response: requests.Response):
"""
Check and handle the rate limit based on the response headers.
"""
rate_limit_header = response.headers.get('X-Rate-Limit')
if rate_limit_header:
requests_made, limit = map(int, rate_limit_header.split('/'))
remaining_requests = limit - requests_made
logging.info(f"Rate Limit: {remaining_requests}/{limit} requests remaining.")
if remaining_requests < 20:
logging.warning("Approaching rate limit. Consider pausing further requests.")
# Implement pause or delay logic here if necessary
if remaining_requests == 1:
logging.error("Rate limit exceeded. ending requests here and moving on with what we have.")
return True #returning True here to break out of any more ingestions
else:
logging.warning("X-Rate-Limit header is missing.")
knowledge_cache = self.load_knowledge_cache()
knowledge_cache[entity] = server_knowledge
try:
with open(self.knowledge_file, 'w') as f:
json.dump(knowledge_cache, f, indent=4)
except Exception as e:
logging.error(f"Failed to update knowledge cache for {entity} in {self.knowledge_file}")
raise e
def handle_response(self, response) -> bool:
if response.status_code == 400:
logging.error("Bad request. The request could not be understood by the API due to malformed syntax or validation errors.")
@@ -100,14 +88,14 @@ class Ingest:
logging.error("Forbidden. Access is denied.")
sys.exit(ec.FORBIDDEN)
elif response.status_code == 404:
logging.error("Not found. The specified URI does not exist.")
logging.error("Not found. The specified URL does not exist.")
sys.exit(ec.NOT_FOUND)
elif response.status_code == 409:
logging.error("Conflict. The resource cannot be saved due to a conflict.")
sys.exit(ec.CONFLICT)
elif response.status_code == 429:
logging.error("Too many requests. You have made too many requests in a short amount of time.")
return True
return True
elif response.status_code == 500:
logging.error("Internal server error. The API experienced an unexpected error.")
return True
@@ -118,7 +106,7 @@ class Ingest:
response.raise_for_status()
return False
def fetch_and_cache_entity_data(self):
def start_ingestion(self):
"""
Fetch and cache data for all entities.
"""
@@ -128,11 +116,13 @@ class Ingest:
logging.warning(f"Raw data exists for {entity} processing any raw data we already have.")
break # break here instead of continue as we dont want to update our server knowledge cache and potentially miss data.
last_knowledge = self.knowledge_cache.get(entity, 0)
knowledge_cache = self.load_knowledge_cache()
last_knowledge = knowledge_cache.get(entity, 0)
#logging.debug(f'Last Knowledge of {entity}: {last_knowledge}')
logging.info(f'Fetching {entity} data since last knowledge: {last_knowledge}')
url = f'{self.base_url}/{self.budget_id}/{entity}?last_knowledge_of_server={last_knowledge}'
response = None
for attempt in range(self.MAX_RETRIES):
try:
response = requests.get(url, headers=self.headers)
@@ -146,11 +136,12 @@ class Ingest:
else:
logging.error("Max retries reached. Exiting.")
sys.exit(ec.REQUESTS_ERROR)
data = response.json()
logging.debug(f'response data: {data}')
server_knowledge = data['data'].get('server_knowledge')
logging.debug(f'{entity} new server knowledge: {server_knowledge}')
if server_knowledge is not None and server_knowledge != last_knowledge:
self.update_server_knowledge_cache(entity, server_knowledge)
entity_data = data['data']
@@ -158,6 +149,3 @@ class Ingest:
self.save_entity_data_to_raw(entity, entity_data)
else:
logging.info(f"No new data for {entity}. Skipping cache update.")
if self.check_rate_limit(response):
break # break out here and continue processing the data we have.