From be2c89bcc2ade99019e4d4caa5c1e5e768016560 Mon Sep 17 00:00:00 2001 From: Jake Pullen Date: Tue, 11 Nov 2025 21:32:16 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=94=97=20I=20Am=20Speed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 3 +- main.py | 17 +++++------ modules/__init__.py | 9 +++++- modules/batch_nimrod.py | 6 ++-- modules/combine_timeseries.py | 14 ++++----- modules/generate_timeseries.py | 52 +++++++++++++++------------------- 6 files changed, 50 insertions(+), 51 deletions(-) diff --git a/config.py b/config.py index 6037fd0..1583872 100644 --- a/config.py +++ b/config.py @@ -3,5 +3,4 @@ class Config: ASC_TOP_FOLDER = "./asc_files" CSV_TOP_FOLDER = "./csv_files" COMBINED_FOLDER = "./combined_files" - AREAS_FILE = 'areas.csv' - + AREAS_FILE = "areas.csv" diff --git a/main.py b/main.py index 5b38218..14a866b 100644 --- a/main.py +++ b/main.py @@ -25,32 +25,33 @@ if __name__ == "__main__": batch = BatchNimrod(Config) timeseries = GenerateTimeseries(Config) - combiner= CombineTimeseries(Config, locations) + combiner = CombineTimeseries(Config, locations) start = time.time() logging.info("Starting to process DAT to ASC") - batch_checkpoint = time.time() - if dat_file_count != asc_file_count: + if len(dat_file_count) != len(asc_file_count): batch.process_nimrod_files() + batch_checkpoint = time.time() elapsed_time = batch_checkpoint - start logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds") else: logging.info("No need to process DAT files, skipping...") + batch_checkpoint = time.time() time.sleep(1) for place in locations: - logging.info(f'{place[0]} started generating timeseries data.') + logging.info(f"{place[0]} started generating timeseries data.") timeseries.extract_cropped_rain_data(place) place_checkpoint = time.time() since_asc_create = place_checkpoint - batch_checkpoint elapsed_time = place_checkpoint - start logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds") - logging.info(f'total time so far {elapsed_time:.2f} seconds') + logging.info(f"total time so far {elapsed_time:.2f} seconds") - logging.info('combining CSVs into groups') + logging.info("combining CSVs into groups") combiner.combine_csv_files() - logging.info('CSVs combined!') + logging.info("CSVs combined!") end = time.time() elapsed_time = end - start - logging.info(f'All Complete total time {elapsed_time:.2f} seconds') \ No newline at end of file + logging.info(f"All Complete total time {elapsed_time:.2f} seconds") diff --git a/modules/__init__.py b/modules/__init__.py index dc088df..6c3385d 100644 --- a/modules/__init__.py +++ b/modules/__init__.py @@ -1,4 +1,11 @@ from .nimrod import Nimrod from .batch_nimrod import BatchNimrod from .generate_timeseries import GenerateTimeseries -from .combine_timeseries import CombineTimeseries \ No newline at end of file +from .combine_timeseries import CombineTimeseries + +__all__ = [ + "Nimrod", + "BatchNimrod", + "GenerateTimeseries", + "CombineTimeseries" +] \ No newline at end of file diff --git a/modules/batch_nimrod.py b/modules/batch_nimrod.py index 46964db..8961f1e 100644 --- a/modules/batch_nimrod.py +++ b/modules/batch_nimrod.py @@ -4,7 +4,7 @@ from pathlib import Path import logging -class BatchNimrod(): +class BatchNimrod: def __init__(self, config) -> None: self.config = config @@ -32,7 +32,7 @@ class BatchNimrod(): with open(out_file_path, "w") as outfile: image.extract_asc(outfile) - + # delete dat file here logging.debug(f"Successfully processed: {in_file_full}") @@ -44,4 +44,4 @@ class BatchNimrod(): except Nimrod.PayloadReadError as e: logging.error(f"Failed to load the raster data in {in_file_full}") logging.error(e) - continue \ No newline at end of file + continue diff --git a/modules/combine_timeseries.py b/modules/combine_timeseries.py index 29ff408..1ba7d59 100644 --- a/modules/combine_timeseries.py +++ b/modules/combine_timeseries.py @@ -1,5 +1,6 @@ import pandas as pd + class CombineTimeseries: def __init__(self, config, locations): self.config = config @@ -7,7 +8,6 @@ class CombineTimeseries: self.grouped_locations = {} self.build_location_groups() - def build_location_groups(self): for location in self.locations: group = location[4] # output group is at index 4 @@ -15,19 +15,17 @@ class CombineTimeseries: self.grouped_locations[group] = [] self.grouped_locations[group].append(location) - - def combine_csv_files(self): for group, loc_list in self.grouped_locations.items(): combined_df = None for loc in loc_list: - csv_to_load = f'./csv_files/{loc[0]}_timeseries_data.csv' + csv_to_load = f"./csv_files/{loc[0]}_timeseries_data.csv" df = pd.read_csv(csv_to_load, index_col=0) if combined_df is None: combined_df = df else: - combined_df = combined_df.join(df, how='inner') - output_file = f'{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv' + combined_df = combined_df.join(df, how="inner") + output_file = ( + f"{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv" + ) combined_df.to_csv(output_file) - - diff --git a/modules/generate_timeseries.py b/modules/generate_timeseries.py index 881e042..8eeb2ef 100644 --- a/modules/generate_timeseries.py +++ b/modules/generate_timeseries.py @@ -1,8 +1,9 @@ from __future__ import division, print_function import numpy as np -import glob +from pathlib import Path import pandas as pd from datetime import datetime +import os class GenerateTimeseries: @@ -22,7 +23,6 @@ class GenerateTimeseries: header_data = [float(f.__next__().split()[1]) for x in range(6)] return header_data - def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple: """Calculate crop coordinates based on header data @@ -45,7 +45,7 @@ class GenerateTimeseries: ncols_basin = 2 # hardcoded, likely to change? cellres_radar = radar_header[4] - cellres_basin = 1000 # 1km + cellres_basin = 1000 # 1km xp = x0_basin - x0_radar yp = y0_basin - y0_radar @@ -59,10 +59,8 @@ class GenerateTimeseries: start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar)) end_row = np.ceil(nrows_radar - (yp / cellres_radar)) - #print(start_col, start_row, end_col, end_row) return int(start_col), int(start_row), int(end_col), int(end_row) - def extract_cropped_rain_data(self, location): """Extract cropped rain data and create rainfall timeseries @@ -72,29 +70,24 @@ class GenerateTimeseries: rainfile = [] datetime_list = [] - for f in glob.iglob(f'{self.config.ASC_TOP_FOLDER}/*.asc'): - # print(f) - radar_header = self._read_ascii_header(f) + for file_name in os.listdir(Path(self.config.ASC_TOP_FOLDER)): + file_path = Path(self.config.ASC_TOP_FOLDER, file_name) + + radar_header = self._read_ascii_header(str(file_path)) + + # Calculate crop coordinates start_col, start_row, end_col, end_row = self._calculate_crop_coords( location, radar_header ) - start_col = int(round(start_col)) - start_row = int(round(start_row)) - end_col = int(round(end_col)) - end_row = int(round(end_row)) - - cur_rawgrid = np.genfromtxt( - f, skip_header=6, filling_values=0.0, loose=True, invalid_raise=False - ) + cur_rawgrid = np.loadtxt(file_path, skiprows=6, dtype=float, delimiter=None) cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col] - # Flatten the cropped rain data into a 1D array - cur_rainrow = cur_croppedrain.flatten() - rainfile.append(cur_rainrow[2]/32) + + rainfile.append(cur_croppedrain.flatten()[2] / 32) # Extract datetime from filename - filename = f.split("/")[-1] # Get just the filename + filename = os.path.basename(file_path) # Get just the filename date_str = filename[:8] # YYYYMMDD time_str = filename[8:12] # HHMM @@ -102,15 +95,16 @@ class GenerateTimeseries: parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M") datetime_list.append(parsed_date) - rainfile_arr = np.vstack(rainfile) - # Create DataFrame with datetime index - df = pd.DataFrame(rainfile_arr, index=datetime_list) - # sort the dataframe into date order + df = pd.DataFrame({"rainfall": rainfile}, index=datetime_list) + + # Sort the dataframe into date order sorted_df = df.sort_index() - # add headers - header_row = [location[1]] - file_name = f"csv_files/{location[0]}_timeseries_data.csv" - sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime') - + sorted_df.to_csv( + f"csv_files/{location[0]}_timeseries_data.csv", + sep=",", + float_format="%1.4f", + header=[location[1]], + index_label="datetime", + )