From be2c89bcc2ade99019e4d4caa5c1e5e768016560 Mon Sep 17 00:00:00 2001
From: Jake Pullen <hello@jake-is.me>
Date: Tue, 11 Nov 2025 21:32:16 +0000
Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=94=97=20I=20Am=20Speed?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config.py                      |  3 +-
 main.py                        | 17 +++++------
 modules/__init__.py            |  9 +++++-
 modules/batch_nimrod.py        |  6 ++--
 modules/combine_timeseries.py  | 14 ++++-----
 modules/generate_timeseries.py | 52 +++++++++++++++-------------------
 6 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/config.py b/config.py
index 6037fd0..1583872 100644
--- a/config.py
+++ b/config.py
@@ -3,5 +3,4 @@ class Config:
     ASC_TOP_FOLDER = "./asc_files"
     CSV_TOP_FOLDER = "./csv_files"
     COMBINED_FOLDER = "./combined_files"
-    AREAS_FILE = 'areas.csv'
-
+    AREAS_FILE = "areas.csv"
diff --git a/main.py b/main.py
index 5b38218..14a866b 100644
--- a/main.py
+++ b/main.py
@@ -25,32 +25,33 @@ if __name__ == "__main__":
 
     batch = BatchNimrod(Config)
     timeseries = GenerateTimeseries(Config)
-    combiner= CombineTimeseries(Config, locations)
+    combiner = CombineTimeseries(Config, locations)
 
     start = time.time()
     logging.info("Starting to process DAT to ASC")
-    batch_checkpoint = time.time()
-    if dat_file_count != asc_file_count:
+    if len(dat_file_count) != len(asc_file_count):
         batch.process_nimrod_files()
+        batch_checkpoint = time.time()
         elapsed_time = batch_checkpoint - start
         logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds")
     else:
         logging.info("No need to process DAT files, skipping...")
+        batch_checkpoint = time.time()
         time.sleep(1)
 
     for place in locations:
-        logging.info(f'{place[0]} started generating timeseries data.')
+        logging.info(f"{place[0]} started generating timeseries data.")
         timeseries.extract_cropped_rain_data(place)
         place_checkpoint = time.time()
         since_asc_create = place_checkpoint - batch_checkpoint
         elapsed_time = place_checkpoint - start
         logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds")
-        logging.info(f'total time so far {elapsed_time:.2f} seconds')
+        logging.info(f"total time so far {elapsed_time:.2f} seconds")
 
-    logging.info('combining CSVs into groups')
+    logging.info("combining CSVs into groups")
     combiner.combine_csv_files()
-    logging.info('CSVs combined!')
+    logging.info("CSVs combined!")
     end = time.time()
     elapsed_time = end - start
 
-    logging.info(f'All Complete total time {elapsed_time:.2f} seconds')
\ No newline at end of file
+    logging.info(f"All Complete total time {elapsed_time:.2f} seconds")
diff --git a/modules/__init__.py b/modules/__init__.py
index dc088df..6c3385d 100644
--- a/modules/__init__.py
+++ b/modules/__init__.py
@@ -1,4 +1,11 @@
 from .nimrod import Nimrod
 from .batch_nimrod import BatchNimrod
 from .generate_timeseries import GenerateTimeseries
-from .combine_timeseries import CombineTimeseries
\ No newline at end of file
+from .combine_timeseries import CombineTimeseries
+
+__all__ = [
+    "Nimrod",
+    "BatchNimrod",
+    "GenerateTimeseries",
+    "CombineTimeseries"
+]
\ No newline at end of file
diff --git a/modules/batch_nimrod.py b/modules/batch_nimrod.py
index 46964db..8961f1e 100644
--- a/modules/batch_nimrod.py
+++ b/modules/batch_nimrod.py
@@ -4,7 +4,7 @@ from pathlib import Path
 import logging
 
 
-class BatchNimrod():
+class BatchNimrod:
     def __init__(self, config) -> None:
         self.config = config
 
@@ -32,7 +32,7 @@ class BatchNimrod():
 
                 with open(out_file_path, "w") as outfile:
                     image.extract_asc(outfile)
-                
+
                 # delete dat file here
 
                 logging.debug(f"Successfully processed: {in_file_full}")
@@ -44,4 +44,4 @@ class BatchNimrod():
             except Nimrod.PayloadReadError as e:
                 logging.error(f"Failed to load the raster data in {in_file_full}")
                 logging.error(e)
-                continue
\ No newline at end of file
+                continue
diff --git a/modules/combine_timeseries.py b/modules/combine_timeseries.py
index 29ff408..1ba7d59 100644
--- a/modules/combine_timeseries.py
+++ b/modules/combine_timeseries.py
@@ -1,5 +1,6 @@
 import pandas as pd
 
+
 class CombineTimeseries:
     def __init__(self, config, locations):
         self.config = config
@@ -7,7 +8,6 @@ class CombineTimeseries:
         self.grouped_locations = {}
         self.build_location_groups()
 
-    
     def build_location_groups(self):
         for location in self.locations:
             group = location[4]  # output group is at index 4
@@ -15,19 +15,17 @@ class CombineTimeseries:
                 self.grouped_locations[group] = []
             self.grouped_locations[group].append(location)
 
-    
-    
     def combine_csv_files(self):
         for group, loc_list in self.grouped_locations.items():
             combined_df = None
             for loc in loc_list:
-                csv_to_load = f'./csv_files/{loc[0]}_timeseries_data.csv'
+                csv_to_load = f"./csv_files/{loc[0]}_timeseries_data.csv"
                 df = pd.read_csv(csv_to_load, index_col=0)
                 if combined_df is None:
                     combined_df = df
                 else:
-                    combined_df = combined_df.join(df, how='inner')
-            output_file = f'{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv'
+                    combined_df = combined_df.join(df, how="inner")
+            output_file = (
+                f"{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv"
+            )
             combined_df.to_csv(output_file)
-
-
diff --git a/modules/generate_timeseries.py b/modules/generate_timeseries.py
index 881e042..8eeb2ef 100644
--- a/modules/generate_timeseries.py
+++ b/modules/generate_timeseries.py
@@ -1,8 +1,9 @@
 from __future__ import division, print_function
 import numpy as np
-import glob
+from pathlib import Path
 import pandas as pd
 from datetime import datetime
+import os
 
 
 class GenerateTimeseries:
@@ -22,7 +23,6 @@ class GenerateTimeseries:
             header_data = [float(f.__next__().split()[1]) for x in range(6)]
         return header_data
 
-
     def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple:
         """Calculate crop coordinates based on header data
 
@@ -45,7 +45,7 @@ class GenerateTimeseries:
         ncols_basin = 2  # hardcoded, likely to change?
 
         cellres_radar = radar_header[4]
-        cellres_basin = 1000 # 1km
+        cellres_basin = 1000  # 1km
 
         xp = x0_basin - x0_radar
         yp = y0_basin - y0_radar
@@ -59,10 +59,8 @@ class GenerateTimeseries:
         start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar))
         end_row = np.ceil(nrows_radar - (yp / cellres_radar))
 
-        #print(start_col, start_row, end_col, end_row)
         return int(start_col), int(start_row), int(end_col), int(end_row)
 
-
     def extract_cropped_rain_data(self, location):
         """Extract cropped rain data and create rainfall timeseries
 
@@ -72,29 +70,24 @@ class GenerateTimeseries:
         rainfile = []
         datetime_list = []
 
-        for f in glob.iglob(f'{self.config.ASC_TOP_FOLDER}/*.asc'):
-            # print(f)
-            radar_header = self._read_ascii_header(f)
+        for file_name in os.listdir(Path(self.config.ASC_TOP_FOLDER)):
+            file_path = Path(self.config.ASC_TOP_FOLDER, file_name)
+
+            radar_header = self._read_ascii_header(str(file_path))
+
+            # Calculate crop coordinates
             start_col, start_row, end_col, end_row = self._calculate_crop_coords(
                 location, radar_header
             )
 
-            start_col = int(round(start_col))
-            start_row = int(round(start_row))
-            end_col = int(round(end_col))
-            end_row = int(round(end_row))
-
-            cur_rawgrid = np.genfromtxt(
-                f, skip_header=6, filling_values=0.0, loose=True, invalid_raise=False
-            )
+            cur_rawgrid = np.loadtxt(file_path, skiprows=6, dtype=float, delimiter=None)
 
             cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
-            # Flatten the cropped rain data into a 1D array
-            cur_rainrow = cur_croppedrain.flatten()
-            rainfile.append(cur_rainrow[2]/32)
+
+            rainfile.append(cur_croppedrain.flatten()[2] / 32)
 
             # Extract datetime from filename
-            filename = f.split("/")[-1]  # Get just the filename
+            filename = os.path.basename(file_path)  # Get just the filename
             date_str = filename[:8]  # YYYYMMDD
             time_str = filename[8:12]  # HHMM
 
@@ -102,15 +95,16 @@ class GenerateTimeseries:
             parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
             datetime_list.append(parsed_date)
 
-        rainfile_arr = np.vstack(rainfile)
-
         # Create DataFrame with datetime index
-        df = pd.DataFrame(rainfile_arr, index=datetime_list)
-        # sort the dataframe into date order 
+        df = pd.DataFrame({"rainfall": rainfile}, index=datetime_list)
+
+        # Sort the dataframe into date order
         sorted_df = df.sort_index()
-        # add headers 
-        header_row = [location[1]]
-        file_name = f"csv_files/{location[0]}_timeseries_data.csv"
-        sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime')
-
 
+        sorted_df.to_csv(
+            f"csv_files/{location[0]}_timeseries_data.csv",
+            sep=",",
+            float_format="%1.4f",
+            header=[location[1]],
+            index_label="datetime",
+        )