chore: 🔧 More cleaning

This commit is contained in:
2025-11-11 11:54:28 +00:00
parent 71af242dcb
commit b7d0f6cd99
6 changed files with 130 additions and 171 deletions
+4 -34
View File
@@ -1,36 +1,6 @@
import yaml
import logging
class Config: class Config:
def __init__(self) -> None: DAT_TOP_FOLDER = "./dat_files"
self.IN_TOP_FOLDER = "./dat_files" ASC_TOP_FOLDER = "./asc_files"
self.OUT_TOP_FOLDER = "./asc_files" CSV_TOP_FOLDER = "./csv_files"
self.CSV_TOP_FOLER = "./csv_files" AREAS_FILE = 'areas.csv'
self.AREAS_FILE = 'areas.csv'
def load_areas(self) -> dict:
"""
Load configuration from YAML file.
Returns:
dict: Configuration dictionary containing bounding box information.
Raises:
FileNotFoundError: If the config.yaml file is not found.
yaml.YAMLError: If there's an error parsing the YAML file.
"""
try:
with open(, "r") as file:
config = yaml.safe_load(file)
return config.get("bounding_box_info", {})
except FileNotFoundError:
logging.error(
f"Config file {CONFIG_FILE} not found. Using default configuration."
)
return {}
except yaml.YAMLError as e:
logging.error(f"Error parsing YAML file: {e}")
return {}
-3
View File
@@ -1,3 +0,0 @@
IN_TOP_FOLDER: "./dat_files"
OUT_TOP_FOLDER: "./asc_files"
CSV_TOP_FOLER: "./csv_files"
+36 -34
View File
@@ -1,46 +1,48 @@
import logging import logging
import yaml import time
import os
from pathlib import Path
CONFIG_FILE = "config.yaml" from config import Config
from modules import BatchNimrod, GenerateTimeseries
logging.basicConfig( logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
) )
def load_config() -> dict: if __name__ == "__main__":
""" os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True)
Load configuration from YAML file. os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True)
dat_file_count = [f for f in os.listdir(Path(Config.DAT_TOP_FOLDER))]
asc_file_count = [f for f in os.listdir(Path(Config.ASC_TOP_FOLDER))]
Returns: locations = [
dict: Configuration dictionary containing bounding box information. # loc name, loc id, x loc, y loc, resolution
["BRICSC", "TM0816", 608500, 216500, 1000],
["HEACSC", "TF6842", 568500, 342500, 1000],
]
Raises: batch = BatchNimrod(Config)
FileNotFoundError: If the config.yaml file is not found. timeseries = GenerateTimeseries(Config)
yaml.YAMLError: If there's an error parsing the YAML file.
"""
try:
with open(CONFIG_FILE, "r") as file:
config = yaml.safe_load(file)
return config.get("bounding_box_info", {})
except FileNotFoundError:
logging.error(
f"Config file {CONFIG_FILE} not found. Using default configuration."
)
return {}
except yaml.YAMLError as e:
logging.error(f"Error parsing YAML file: {e}")
return {}
start = time.time()
logging.info("Starting to process DAT to ASC")
if dat_file_count != asc_file_count:
batch.process_nimrod_files()
batch_checkpoint = time.time()
elapsed_time = batch_checkpoint - start
logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds")
else:
logging.info("No need to process DAT files, skipping...")
time.sleep(1)
os.makedirs(Path(OUT_TOP_FOLDER), exist_ok=True) for place in locations:
os.makedirs(Path(CSV_TOP_FOLDER), exist_ok=True) logging.info(f'{place[0]} started generating timeseries data.')
timeseries.extract_cropped_rain_data(place)
place_checkpoint = time.time()
since_asc_create = place_checkpoint - batch_checkpoint
elapsed_time = place_checkpoint - start
logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds")
logging.info(f'total time so far {elapsed_time:.2f} seconds')
logging.info(f'All Complete')
# if __name__ == "__main__":
# start = time.time()
# process_nimrod_files()
# end = time.time()
# elapsed_time = end - start
# logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
+2 -1
View File
@@ -1,2 +1,3 @@
from .nimrod import Nimrod from .nimrod import Nimrod
from .batch_nimrod import process_nimrod_files from .batch_nimrod import BatchNimrod
from .generate_timeseries import GenerateTimeseries
+5 -5
View File
@@ -13,22 +13,22 @@ class BatchNimrod():
Process all Nimrod files in the input directory, applying bounding box clipping Process all Nimrod files in the input directory, applying bounding box clipping
and exporting to ASC format. and exporting to ASC format.
This function reads all files from IN_TOP_FOLDER, applies the appropriate bounding This function reads all files from DAT_TOP_FOLDER, applies the appropriate bounding
box for each area, and exports clipped raster data to OUT_TOP_FOLDER. box for each area, and exports clipped raster data to OUT_TOP_FOLDER.
""" """
# Read all file names in the folder # Read all file names in the folder
files_to_process = [f for f in os.listdir(Path(self.config.IN_TOP_FOLDER))] files_to_process = [f for f in os.listdir(Path(self.config.DAT_TOP_FOLDER))]
logging.info(f"Processing {len(files_to_process)} files...") logging.info(f"Processing {len(files_to_process)} files...")
for in_file in os.listdir(Path(self.config.IN_TOP_FOLDER)): for in_file in os.listdir(Path(self.config.DAT_TOP_FOLDER)):
in_file_full = Path(self.config.IN_TOP_FOLDER, in_file) in_file_full = Path(self.config.DAT_TOP_FOLDER, in_file)
try: try:
image = Nimrod(open(in_file_full, "rb")) image = Nimrod(open(in_file_full, "rb"))
out_file_name = f"{image.get_validity_time()}.asc" out_file_name = f"{image.get_validity_time()}.asc"
out_file_path = Path(self.config.OUT_TOP_FOLDER, out_file_name) out_file_path = Path(self.config.ASC_TOP_FOLDER, out_file_name)
with open(out_file_path, "w") as outfile: with open(out_file_path, "w") as outfile:
image.extract_asc(outfile) image.extract_asc(outfile)
+15 -26
View File
@@ -4,12 +4,12 @@ import glob
import pandas as pd import pandas as pd
from datetime import datetime from datetime import datetime
# Configuration
asc_path = "asc_files/"
asc_wildcard_file = "*.asc"
asc_mult_source = asc_path + asc_wildcard_file
def read_ascii_header(ascii_raster_file: str) -> list: class GenerateTimeseries:
def __init__(self, config):
self.config = config
def _read_ascii_header(self, ascii_raster_file: str) -> list:
"""Reads header information from an ASCII DEM """Reads header information from an ASCII DEM
Args: Args:
@@ -23,7 +23,7 @@ def read_ascii_header(ascii_raster_file: str) -> list:
return header_data return header_data
def calculate_crop_coords(basin_header: list, radar_header: list) -> tuple: def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple:
"""Calculate crop coordinates based on header data """Calculate crop coordinates based on header data
Args: Args:
@@ -41,8 +41,8 @@ def calculate_crop_coords(basin_header: list, radar_header: list) -> tuple:
nrows_radar = radar_header[1] nrows_radar = radar_header[1]
nrows_basin = 2 # hardcoded, we always expect 2 rows nrows_basin = 2 # hardcoded, likely to change?
ncols_basin = 2 # hardcoded, we always expect 2 columns ncols_basin = 2 # hardcoded, likely to change?
cellres_radar = radar_header[4] cellres_radar = radar_header[4]
cellres_basin = basin_header[4] cellres_basin = basin_header[4]
@@ -63,21 +63,19 @@ def calculate_crop_coords(basin_header: list, radar_header: list) -> tuple:
return int(start_col), int(start_row), int(end_col), int(end_row) return int(start_col), int(start_row), int(end_col), int(end_row)
def extract_cropped_rain_data(location): def extract_cropped_rain_data(self, location):
"""Extract cropped rain data and create rainfall timeseries """Extract cropped rain data and create rainfall timeseries
Returns: Returns:
None None
""" """
rainfile = [] rainfile = []
# Create datetime list
datetime_list = [] datetime_list = []
print(location)
for f in glob.iglob(asc_mult_source): for f in glob.iglob(f'{self.config.ASC_TOP_FOLDER}/*.asc'):
# print(f) # print(f)
radar_header = read_ascii_header(f) radar_header = self._read_ascii_header(f)
start_col, start_row, end_col, end_row = calculate_crop_coords( start_col, start_row, end_col, end_row = self._calculate_crop_coords(
location, radar_header location, radar_header
) )
@@ -93,11 +91,10 @@ def extract_cropped_rain_data(location):
cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col] cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
# Flatten the cropped rain data into a 1D array # Flatten the cropped rain data into a 1D array
cur_rainrow = cur_croppedrain.flatten() cur_rainrow = cur_croppedrain.flatten()
rainfile.append(cur_rainrow) rainfile.append(cur_rainrow[2]/32)
# Extract datetime from filename # Extract datetime from filename
filename = f.split("/")[-1] # Get just the filename filename = f.split("/")[-1] # Get just the filename
# 20240929 0015
date_str = filename[:8] # YYYYMMDD date_str = filename[:8] # YYYYMMDD
time_str = filename[8:12] # HHMM time_str = filename[8:12] # HHMM
@@ -112,16 +109,8 @@ def extract_cropped_rain_data(location):
# sort the dataframe into date order # sort the dataframe into date order
sorted_df = df.sort_index() sorted_df = df.sort_index()
# add headers # add headers
header_row = ['rainfall_1', 'rainfall_2', 'rainfall_3', 'rainfall_4'] header_row = [location[1]]
file_name = f"csv_files/{location[0]}_timeseries_data.csv" file_name = f"csv_files/{location[0]}_timeseries_data.csv"
sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime') sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime')
if __name__ == "__main__":
locations = [
# loc name, loc id, x loc, y loc, resolution
["BRICSC", "TM0816", 608500, 216500, 1000],
["HEACSC", "TF6842", 568500, 342500, 1000],
]
for place in locations:
extract_cropped_rain_data(place)