chore: 🔧 More cleaning

2025-11-11 11:54:28 +00:00
parent 71af242dcb
commit b7d0f6cd99
6 changed files with 130 additions and 171 deletions
@@ -1,36 +1,6 @@
 import yaml
 import logging
 class Config:
-    def __init__(self) -> None:
+    DAT_TOP_FOLDER = "./dat_files"
-        self.IN_TOP_FOLDER = "./dat_files"
+    ASC_TOP_FOLDER = "./asc_files"
-        self.OUT_TOP_FOLDER = "./asc_files"
+    CSV_TOP_FOLDER = "./csv_files"
-        self.CSV_TOP_FOLER = "./csv_files"
+    AREAS_FILE = 'areas.csv'
        self.AREAS_FILE = 'areas.csv'
    def load_areas(self) -> dict:
        """
        Load configuration from YAML file.
        Returns:
            dict: Configuration dictionary containing bounding box information.
        Raises:
            FileNotFoundError: If the config.yaml file is not found.
            yaml.YAMLError: If there's an error parsing the YAML file.
        """
        try:
            with open(, "r") as file:
                config = yaml.safe_load(file)
                return config.get("bounding_box_info", {})
        except FileNotFoundError:
            logging.error(
                f"Config file {CONFIG_FILE} not found. Using default configuration."
            )
            return {}
        except yaml.YAMLError as e:
            logging.error(f"Error parsing YAML file: {e}")
            return {}
@@ -1,3 +0,0 @@
 IN_TOP_FOLDER: "./dat_files"
 OUT_TOP_FOLDER: "./asc_files"
 CSV_TOP_FOLER: "./csv_files"
@@ -1,46 +1,48 @@
 import logging
-import yaml
+import time
 import os
 from pathlib import Path
-CONFIG_FILE = "config.yaml"
+from config import Config
 from modules import BatchNimrod, GenerateTimeseries
 logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 )
-def load_config() -> dict:
+if __name__ == "__main__":
-    """
+    os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True)
-    Load configuration from YAML file.
+    os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True)
    dat_file_count = [f for f in os.listdir(Path(Config.DAT_TOP_FOLDER))]
    asc_file_count = [f for f in os.listdir(Path(Config.ASC_TOP_FOLDER))]
-    Returns:
+    locations = [
-        dict: Configuration dictionary containing bounding box information.
+        # loc name, loc id, x loc,   y loc,  resolution
        ["BRICSC", "TM0816", 608500, 216500, 1000],  
        ["HEACSC", "TF6842", 568500, 342500, 1000], 
    ]
-    Raises:
+    batch = BatchNimrod(Config)
-        FileNotFoundError: If the config.yaml file is not found.
+    timeseries = GenerateTimeseries(Config)
        yaml.YAMLError: If there's an error parsing the YAML file.
    """
    try:
        with open(CONFIG_FILE, "r") as file:
            config = yaml.safe_load(file)
            return config.get("bounding_box_info", {})
    except FileNotFoundError:
        logging.error(
            f"Config file {CONFIG_FILE} not found. Using default configuration."
        )
        return {}
    except yaml.YAMLError as e:
        logging.error(f"Error parsing YAML file: {e}")
        return {}
    start = time.time()
    logging.info("Starting to process DAT to ASC")
    if dat_file_count != asc_file_count:
        batch.process_nimrod_files()
        batch_checkpoint = time.time()
        elapsed_time = batch_checkpoint - start
        logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds")
    else:
        logging.info("No need to process DAT files, skipping...")
        time.sleep(1)
-os.makedirs(Path(OUT_TOP_FOLDER), exist_ok=True)
+    for place in locations:
-os.makedirs(Path(CSV_TOP_FOLDER), exist_ok=True)
+        logging.info(f'{place[0]} started generating timeseries data.')
        timeseries.extract_cropped_rain_data(place)
        place_checkpoint = time.time()
        since_asc_create = place_checkpoint - batch_checkpoint
        elapsed_time = place_checkpoint - start
        logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds")
        logging.info(f'total time so far {elapsed_time:.2f} seconds')
-
+    logging.info(f'All Complete')
 # if __name__ == "__main__":
 #     start = time.time()
 #     process_nimrod_files()
 #     end = time.time()
 #     elapsed_time = end - start
 #     logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
@@ -1,2 +1,3 @@
 from .nimrod import Nimrod
-from .batch_nimrod import process_nimrod_files
+from .batch_nimrod import BatchNimrod
 from .generate_timeseries import GenerateTimeseries
@@ -13,22 +13,22 @@ class BatchNimrod():
        Process all Nimrod files in the input directory, applying bounding box clipping
        and exporting to ASC format.
-        This function reads all files from IN_TOP_FOLDER, applies the appropriate bounding
+        This function reads all files from DAT_TOP_FOLDER, applies the appropriate bounding
        box for each area, and exports clipped raster data to OUT_TOP_FOLDER.
        """
        # Read all file names in the folder
-        files_to_process = [f for f in os.listdir(Path(self.config.IN_TOP_FOLDER))]
+        files_to_process = [f for f in os.listdir(Path(self.config.DAT_TOP_FOLDER))]
        logging.info(f"Processing {len(files_to_process)} files...")
-        for in_file in os.listdir(Path(self.config.IN_TOP_FOLDER)):
+        for in_file in os.listdir(Path(self.config.DAT_TOP_FOLDER)):
-            in_file_full = Path(self.config.IN_TOP_FOLDER, in_file)
+            in_file_full = Path(self.config.DAT_TOP_FOLDER, in_file)
            try:
                image = Nimrod(open(in_file_full, "rb"))
                out_file_name = f"{image.get_validity_time()}.asc"
-                out_file_path = Path(self.config.OUT_TOP_FOLDER, out_file_name)
+                out_file_path = Path(self.config.ASC_TOP_FOLDER, out_file_name)
                with open(out_file_path, "w") as outfile:
                    image.extract_asc(outfile)
@@ -4,12 +4,12 @@ import glob
 import pandas as pd
 from datetime import datetime
 # Configuration
 asc_path = "asc_files/"
 asc_wildcard_file = "*.asc"
 asc_mult_source = asc_path + asc_wildcard_file
-def read_ascii_header(ascii_raster_file: str) -> list:
+class GenerateTimeseries:
    def __init__(self, config):
        self.config = config
    def _read_ascii_header(self, ascii_raster_file: str) -> list:
        """Reads header information from an ASCII DEM
        Args:
@@ -23,7 +23,7 @@ def read_ascii_header(ascii_raster_file: str) -> list:
        return header_data
-def calculate_crop_coords(basin_header: list, radar_header: list) -> tuple:
+    def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple:
        """Calculate crop coordinates based on header data
        Args:
@@ -41,8 +41,8 @@ def calculate_crop_coords(basin_header: list, radar_header: list) -> tuple:
        nrows_radar = radar_header[1]
-    nrows_basin = 2  # hardcoded, we always expect 2 rows
+        nrows_basin = 2  # hardcoded, likely to change?
-    ncols_basin = 2  # hardcoded, we always expect 2 columns
+        ncols_basin = 2  # hardcoded, likely to change?
        cellres_radar = radar_header[4]
        cellres_basin = basin_header[4]
@@ -63,21 +63,19 @@ def calculate_crop_coords(basin_header: list, radar_header: list) -> tuple:
        return int(start_col), int(start_row), int(end_col), int(end_row)
-def extract_cropped_rain_data(location):
+    def extract_cropped_rain_data(self, location):
        """Extract cropped rain data and create rainfall timeseries
        Returns:
            None
        """
        rainfile = []
    # Create datetime list
        datetime_list = []
-    print(location)
+
-    for f in glob.iglob(asc_mult_source):
+        for f in glob.iglob(f'{self.config.ASC_TOP_FOLDER}/*.asc'):
            # print(f)
-        radar_header = read_ascii_header(f)
+            radar_header = self._read_ascii_header(f)
-        start_col, start_row, end_col, end_row = calculate_crop_coords(
+            start_col, start_row, end_col, end_row = self._calculate_crop_coords(
                location, radar_header
            )
@@ -93,11 +91,10 @@ def extract_cropped_rain_data(location):
            cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
            # Flatten the cropped rain data into a 1D array
            cur_rainrow = cur_croppedrain.flatten()
-        rainfile.append(cur_rainrow)
+            rainfile.append(cur_rainrow[2]/32)
            # Extract datetime from filename
            filename = f.split("/")[-1]  # Get just the filename
        # 20240929 0015
            date_str = filename[:8]  # YYYYMMDD
            time_str = filename[8:12]  # HHMM
@@ -112,16 +109,8 @@ def extract_cropped_rain_data(location):
        # sort the dataframe into date order 
        sorted_df = df.sort_index()
        # add headers 
-    header_row = ['rainfall_1', 'rainfall_2', 'rainfall_3', 'rainfall_4']
+        header_row = [location[1]]
        file_name = f"csv_files/{location[0]}_timeseries_data.csv"
        sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime')
 if __name__ == "__main__":
    locations = [
        # loc name, loc id, x loc,   y loc,  resolution
        ["BRICSC", "TM0816", 608500, 216500, 1000],  
        ["HEACSC", "TF6842", 568500, 342500, 1000], 
    ]
    for place in locations:
        extract_cropped_rain_data(place)