chore: 🔧 More cleaning

This commit is contained in:
2025-11-11 11:54:28 +00:00
parent 71af242dcb
commit b7d0f6cd99
6 changed files with 130 additions and 171 deletions
+4 -34
View File
@@ -1,36 +1,6 @@
import yaml
import logging
class Config: class Config:
def __init__(self) -> None: DAT_TOP_FOLDER = "./dat_files"
self.IN_TOP_FOLDER = "./dat_files" ASC_TOP_FOLDER = "./asc_files"
self.OUT_TOP_FOLDER = "./asc_files" CSV_TOP_FOLDER = "./csv_files"
self.CSV_TOP_FOLER = "./csv_files" AREAS_FILE = 'areas.csv'
self.AREAS_FILE = 'areas.csv'
def load_areas(self) -> dict:
"""
Load configuration from YAML file.
Returns:
dict: Configuration dictionary containing bounding box information.
Raises:
FileNotFoundError: If the config.yaml file is not found.
yaml.YAMLError: If there's an error parsing the YAML file.
"""
try:
with open(, "r") as file:
config = yaml.safe_load(file)
return config.get("bounding_box_info", {})
except FileNotFoundError:
logging.error(
f"Config file {CONFIG_FILE} not found. Using default configuration."
)
return {}
except yaml.YAMLError as e:
logging.error(f"Error parsing YAML file: {e}")
return {}
-3
View File
@@ -1,3 +0,0 @@
IN_TOP_FOLDER: "./dat_files"
OUT_TOP_FOLDER: "./asc_files"
CSV_TOP_FOLER: "./csv_files"
+36 -34
View File
@@ -1,46 +1,48 @@
import logging import logging
import yaml import time
import os
from pathlib import Path
CONFIG_FILE = "config.yaml" from config import Config
from modules import BatchNimrod, GenerateTimeseries
logging.basicConfig( logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
) )
def load_config() -> dict: if __name__ == "__main__":
""" os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True)
Load configuration from YAML file. os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True)
dat_file_count = [f for f in os.listdir(Path(Config.DAT_TOP_FOLDER))]
asc_file_count = [f for f in os.listdir(Path(Config.ASC_TOP_FOLDER))]
Returns: locations = [
dict: Configuration dictionary containing bounding box information. # loc name, loc id, x loc, y loc, resolution
["BRICSC", "TM0816", 608500, 216500, 1000],
["HEACSC", "TF6842", 568500, 342500, 1000],
]
Raises: batch = BatchNimrod(Config)
FileNotFoundError: If the config.yaml file is not found. timeseries = GenerateTimeseries(Config)
yaml.YAMLError: If there's an error parsing the YAML file.
"""
try:
with open(CONFIG_FILE, "r") as file:
config = yaml.safe_load(file)
return config.get("bounding_box_info", {})
except FileNotFoundError:
logging.error(
f"Config file {CONFIG_FILE} not found. Using default configuration."
)
return {}
except yaml.YAMLError as e:
logging.error(f"Error parsing YAML file: {e}")
return {}
start = time.time()
logging.info("Starting to process DAT to ASC")
if dat_file_count != asc_file_count:
batch.process_nimrod_files()
batch_checkpoint = time.time()
elapsed_time = batch_checkpoint - start
logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds")
else:
logging.info("No need to process DAT files, skipping...")
time.sleep(1)
os.makedirs(Path(OUT_TOP_FOLDER), exist_ok=True) for place in locations:
os.makedirs(Path(CSV_TOP_FOLDER), exist_ok=True) logging.info(f'{place[0]} started generating timeseries data.')
timeseries.extract_cropped_rain_data(place)
place_checkpoint = time.time()
since_asc_create = place_checkpoint - batch_checkpoint
elapsed_time = place_checkpoint - start
logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds")
logging.info(f'total time so far {elapsed_time:.2f} seconds')
logging.info(f'All Complete')
# if __name__ == "__main__":
# start = time.time()
# process_nimrod_files()
# end = time.time()
# elapsed_time = end - start
# logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
+2 -1
View File
@@ -1,2 +1,3 @@
from .nimrod import Nimrod from .nimrod import Nimrod
from .batch_nimrod import process_nimrod_files from .batch_nimrod import BatchNimrod
from .generate_timeseries import GenerateTimeseries
+5 -5
View File
@@ -13,22 +13,22 @@ class BatchNimrod():
Process all Nimrod files in the input directory, applying bounding box clipping Process all Nimrod files in the input directory, applying bounding box clipping
and exporting to ASC format. and exporting to ASC format.
This function reads all files from IN_TOP_FOLDER, applies the appropriate bounding This function reads all files from DAT_TOP_FOLDER, applies the appropriate bounding
box for each area, and exports clipped raster data to OUT_TOP_FOLDER. box for each area, and exports clipped raster data to OUT_TOP_FOLDER.
""" """
# Read all file names in the folder # Read all file names in the folder
files_to_process = [f for f in os.listdir(Path(self.config.IN_TOP_FOLDER))] files_to_process = [f for f in os.listdir(Path(self.config.DAT_TOP_FOLDER))]
logging.info(f"Processing {len(files_to_process)} files...") logging.info(f"Processing {len(files_to_process)} files...")
for in_file in os.listdir(Path(self.config.IN_TOP_FOLDER)): for in_file in os.listdir(Path(self.config.DAT_TOP_FOLDER)):
in_file_full = Path(self.config.IN_TOP_FOLDER, in_file) in_file_full = Path(self.config.DAT_TOP_FOLDER, in_file)
try: try:
image = Nimrod(open(in_file_full, "rb")) image = Nimrod(open(in_file_full, "rb"))
out_file_name = f"{image.get_validity_time()}.asc" out_file_name = f"{image.get_validity_time()}.asc"
out_file_path = Path(self.config.OUT_TOP_FOLDER, out_file_name) out_file_path = Path(self.config.ASC_TOP_FOLDER, out_file_name)
with open(out_file_path, "w") as outfile: with open(out_file_path, "w") as outfile:
image.extract_asc(outfile) image.extract_asc(outfile)
+81 -92
View File
@@ -4,124 +4,113 @@ import glob
import pandas as pd import pandas as pd
from datetime import datetime from datetime import datetime
# Configuration
asc_path = "asc_files/"
asc_wildcard_file = "*.asc"
asc_mult_source = asc_path + asc_wildcard_file
def read_ascii_header(ascii_raster_file: str) -> list: class GenerateTimeseries:
"""Reads header information from an ASCII DEM def __init__(self, config):
self.config = config
Args: def _read_ascii_header(self, ascii_raster_file: str) -> list:
ascii_raster_file (str): Path to the ASCII raster file """Reads header information from an ASCII DEM
Returns: Args:
list: Header data as a list of floats ascii_raster_file (str): Path to the ASCII raster file
"""
with open(ascii_raster_file) as f: Returns:
header_data = [float(f.__next__().split()[1]) for x in range(6)] list: Header data as a list of floats
return header_data """
with open(ascii_raster_file) as f:
header_data = [float(f.__next__().split()[1]) for x in range(6)]
return header_data
def calculate_crop_coords(basin_header: list, radar_header: list) -> tuple: def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple:
"""Calculate crop coordinates based on header data """Calculate crop coordinates based on header data
Args: Args:
basin_header (list): Basin header data basin_header (list): Basin header data
radar_header (list): Radar header data radar_header (list): Radar header data
Returns: Returns:
tuple: (start_col, start_row, end_col, end_row) as integers tuple: (start_col, start_row, end_col, end_row) as integers
""" """
y0_radar = radar_header[3] y0_radar = radar_header[3]
x0_radar = radar_header[2] x0_radar = radar_header[2]
y0_basin = basin_header[3] y0_basin = basin_header[3]
x0_basin = basin_header[2] x0_basin = basin_header[2]
nrows_radar = radar_header[1] nrows_radar = radar_header[1]
nrows_basin = 2 # hardcoded, we always expect 2 rows nrows_basin = 2 # hardcoded, likely to change?
ncols_basin = 2 # hardcoded, we always expect 2 columns ncols_basin = 2 # hardcoded, likely to change?
cellres_radar = radar_header[4] cellres_radar = radar_header[4]
cellres_basin = basin_header[4] cellres_basin = basin_header[4]
xp = x0_basin - x0_radar xp = x0_basin - x0_radar
yp = y0_basin - y0_radar yp = y0_basin - y0_radar
xpp = ncols_basin * cellres_basin xpp = ncols_basin * cellres_basin
ypp = nrows_basin * cellres_basin ypp = nrows_basin * cellres_basin
start_col = np.floor(xp / cellres_radar) start_col = np.floor(xp / cellres_radar)
end_col = np.ceil((xpp + xp) / cellres_radar) end_col = np.ceil((xpp + xp) / cellres_radar)
start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar)) start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar))
end_row = np.ceil(nrows_radar - (yp / cellres_radar)) end_row = np.ceil(nrows_radar - (yp / cellres_radar))
#print(start_col, start_row, end_col, end_row) #print(start_col, start_row, end_col, end_row)
return int(start_col), int(start_row), int(end_col), int(end_row) return int(start_col), int(start_row), int(end_col), int(end_row)
def extract_cropped_rain_data(location): def extract_cropped_rain_data(self, location):
"""Extract cropped rain data and create rainfall timeseries """Extract cropped rain data and create rainfall timeseries
Returns: Returns:
None None
""" """
rainfile = [] rainfile = []
datetime_list = []
# Create datetime list for f in glob.iglob(f'{self.config.ASC_TOP_FOLDER}/*.asc'):
datetime_list = [] # print(f)
print(location) radar_header = self._read_ascii_header(f)
for f in glob.iglob(asc_mult_source): start_col, start_row, end_col, end_row = self._calculate_crop_coords(
# print(f) location, radar_header
radar_header = read_ascii_header(f) )
start_col, start_row, end_col, end_row = calculate_crop_coords(
location, radar_header
)
start_col = int(round(start_col)) start_col = int(round(start_col))
start_row = int(round(start_row)) start_row = int(round(start_row))
end_col = int(round(end_col)) end_col = int(round(end_col))
end_row = int(round(end_row)) end_row = int(round(end_row))
cur_rawgrid = np.genfromtxt( cur_rawgrid = np.genfromtxt(
f, skip_header=6, filling_values=0.0, loose=True, invalid_raise=False f, skip_header=6, filling_values=0.0, loose=True, invalid_raise=False
) )
cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col] cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
# Flatten the cropped rain data into a 1D array # Flatten the cropped rain data into a 1D array
cur_rainrow = cur_croppedrain.flatten() cur_rainrow = cur_croppedrain.flatten()
rainfile.append(cur_rainrow) rainfile.append(cur_rainrow[2]/32)
# Extract datetime from filename # Extract datetime from filename
filename = f.split("/")[-1] # Get just the filename filename = f.split("/")[-1] # Get just the filename
# 20240929 0015 date_str = filename[:8] # YYYYMMDD
date_str = filename[:8] # YYYYMMDD time_str = filename[8:12] # HHMM
time_str = filename[8:12] # HHMM
# Parse datetime # Parse datetime
parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M") parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
datetime_list.append(parsed_date) datetime_list.append(parsed_date)
rainfile_arr = np.vstack(rainfile) rainfile_arr = np.vstack(rainfile)
# Create DataFrame with datetime index # Create DataFrame with datetime index
df = pd.DataFrame(rainfile_arr, index=datetime_list) df = pd.DataFrame(rainfile_arr, index=datetime_list)
# sort the dataframe into date order # sort the dataframe into date order
sorted_df = df.sort_index() sorted_df = df.sort_index()
# add headers # add headers
header_row = ['rainfall_1', 'rainfall_2', 'rainfall_3', 'rainfall_4'] header_row = [location[1]]
file_name = f"csv_files/{location[0]}_timeseries_data.csv" file_name = f"csv_files/{location[0]}_timeseries_data.csv"
sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime') sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime')
if __name__ == "__main__":
locations = [
# loc name, loc id, x loc, y loc, resolution
["BRICSC", "TM0816", 608500, 216500, 1000],
["HEACSC", "TF6842", 568500, 342500, 1000],
]
for place in locations:
extract_cropped_rain_data(place)