chore: 🔧 More cleaning
This commit is contained in:
@@ -1,36 +1,6 @@
|
|||||||
import yaml
|
|
||||||
import logging
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
def __init__(self) -> None:
|
DAT_TOP_FOLDER = "./dat_files"
|
||||||
self.IN_TOP_FOLDER = "./dat_files"
|
ASC_TOP_FOLDER = "./asc_files"
|
||||||
self.OUT_TOP_FOLDER = "./asc_files"
|
CSV_TOP_FOLDER = "./csv_files"
|
||||||
self.CSV_TOP_FOLER = "./csv_files"
|
AREAS_FILE = 'areas.csv'
|
||||||
self.AREAS_FILE = 'areas.csv'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_areas(self) -> dict:
|
|
||||||
"""
|
|
||||||
Load configuration from YAML file.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Configuration dictionary containing bounding box information.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
FileNotFoundError: If the config.yaml file is not found.
|
|
||||||
yaml.YAMLError: If there's an error parsing the YAML file.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
with open(, "r") as file:
|
|
||||||
config = yaml.safe_load(file)
|
|
||||||
return config.get("bounding_box_info", {})
|
|
||||||
except FileNotFoundError:
|
|
||||||
logging.error(
|
|
||||||
f"Config file {CONFIG_FILE} not found. Using default configuration."
|
|
||||||
)
|
|
||||||
return {}
|
|
||||||
except yaml.YAMLError as e:
|
|
||||||
logging.error(f"Error parsing YAML file: {e}")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
IN_TOP_FOLDER: "./dat_files"
|
|
||||||
OUT_TOP_FOLDER: "./asc_files"
|
|
||||||
CSV_TOP_FOLER: "./csv_files"
|
|
||||||
@@ -1,46 +1,48 @@
|
|||||||
import logging
|
import logging
|
||||||
import yaml
|
import time
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
CONFIG_FILE = "config.yaml"
|
from config import Config
|
||||||
|
from modules import BatchNimrod, GenerateTimeseries
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||||
)
|
)
|
||||||
|
|
||||||
def load_config() -> dict:
|
if __name__ == "__main__":
|
||||||
"""
|
os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True)
|
||||||
Load configuration from YAML file.
|
os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True)
|
||||||
|
dat_file_count = [f for f in os.listdir(Path(Config.DAT_TOP_FOLDER))]
|
||||||
|
asc_file_count = [f for f in os.listdir(Path(Config.ASC_TOP_FOLDER))]
|
||||||
|
|
||||||
Returns:
|
locations = [
|
||||||
dict: Configuration dictionary containing bounding box information.
|
# loc name, loc id, x loc, y loc, resolution
|
||||||
|
["BRICSC", "TM0816", 608500, 216500, 1000],
|
||||||
|
["HEACSC", "TF6842", 568500, 342500, 1000],
|
||||||
|
]
|
||||||
|
|
||||||
Raises:
|
batch = BatchNimrod(Config)
|
||||||
FileNotFoundError: If the config.yaml file is not found.
|
timeseries = GenerateTimeseries(Config)
|
||||||
yaml.YAMLError: If there's an error parsing the YAML file.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
with open(CONFIG_FILE, "r") as file:
|
|
||||||
config = yaml.safe_load(file)
|
|
||||||
return config.get("bounding_box_info", {})
|
|
||||||
except FileNotFoundError:
|
|
||||||
logging.error(
|
|
||||||
f"Config file {CONFIG_FILE} not found. Using default configuration."
|
|
||||||
)
|
|
||||||
return {}
|
|
||||||
except yaml.YAMLError as e:
|
|
||||||
logging.error(f"Error parsing YAML file: {e}")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
logging.info("Starting to process DAT to ASC")
|
||||||
|
if dat_file_count != asc_file_count:
|
||||||
|
batch.process_nimrod_files()
|
||||||
|
batch_checkpoint = time.time()
|
||||||
|
elapsed_time = batch_checkpoint - start
|
||||||
|
logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds")
|
||||||
|
else:
|
||||||
|
logging.info("No need to process DAT files, skipping...")
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
os.makedirs(Path(OUT_TOP_FOLDER), exist_ok=True)
|
for place in locations:
|
||||||
os.makedirs(Path(CSV_TOP_FOLDER), exist_ok=True)
|
logging.info(f'{place[0]} started generating timeseries data.')
|
||||||
|
timeseries.extract_cropped_rain_data(place)
|
||||||
|
place_checkpoint = time.time()
|
||||||
|
since_asc_create = place_checkpoint - batch_checkpoint
|
||||||
|
elapsed_time = place_checkpoint - start
|
||||||
|
logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds")
|
||||||
|
logging.info(f'total time so far {elapsed_time:.2f} seconds')
|
||||||
|
|
||||||
|
logging.info(f'All Complete')
|
||||||
|
|
||||||
|
|
||||||
# if __name__ == "__main__":
|
|
||||||
# start = time.time()
|
|
||||||
# process_nimrod_files()
|
|
||||||
# end = time.time()
|
|
||||||
# elapsed_time = end - start
|
|
||||||
# logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
|
|
||||||
+2
-1
@@ -1,2 +1,3 @@
|
|||||||
from .nimrod import Nimrod
|
from .nimrod import Nimrod
|
||||||
from .batch_nimrod import process_nimrod_files
|
from .batch_nimrod import BatchNimrod
|
||||||
|
from .generate_timeseries import GenerateTimeseries
|
||||||
@@ -13,22 +13,22 @@ class BatchNimrod():
|
|||||||
Process all Nimrod files in the input directory, applying bounding box clipping
|
Process all Nimrod files in the input directory, applying bounding box clipping
|
||||||
and exporting to ASC format.
|
and exporting to ASC format.
|
||||||
|
|
||||||
This function reads all files from IN_TOP_FOLDER, applies the appropriate bounding
|
This function reads all files from DAT_TOP_FOLDER, applies the appropriate bounding
|
||||||
box for each area, and exports clipped raster data to OUT_TOP_FOLDER.
|
box for each area, and exports clipped raster data to OUT_TOP_FOLDER.
|
||||||
"""
|
"""
|
||||||
# Read all file names in the folder
|
# Read all file names in the folder
|
||||||
files_to_process = [f for f in os.listdir(Path(self.config.IN_TOP_FOLDER))]
|
files_to_process = [f for f in os.listdir(Path(self.config.DAT_TOP_FOLDER))]
|
||||||
|
|
||||||
logging.info(f"Processing {len(files_to_process)} files...")
|
logging.info(f"Processing {len(files_to_process)} files...")
|
||||||
|
|
||||||
for in_file in os.listdir(Path(self.config.IN_TOP_FOLDER)):
|
for in_file in os.listdir(Path(self.config.DAT_TOP_FOLDER)):
|
||||||
in_file_full = Path(self.config.IN_TOP_FOLDER, in_file)
|
in_file_full = Path(self.config.DAT_TOP_FOLDER, in_file)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
image = Nimrod(open(in_file_full, "rb"))
|
image = Nimrod(open(in_file_full, "rb"))
|
||||||
|
|
||||||
out_file_name = f"{image.get_validity_time()}.asc"
|
out_file_name = f"{image.get_validity_time()}.asc"
|
||||||
out_file_path = Path(self.config.OUT_TOP_FOLDER, out_file_name)
|
out_file_path = Path(self.config.ASC_TOP_FOLDER, out_file_name)
|
||||||
|
|
||||||
with open(out_file_path, "w") as outfile:
|
with open(out_file_path, "w") as outfile:
|
||||||
image.extract_asc(outfile)
|
image.extract_asc(outfile)
|
||||||
|
|||||||
@@ -4,124 +4,113 @@ import glob
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
# Configuration
|
|
||||||
asc_path = "asc_files/"
|
|
||||||
asc_wildcard_file = "*.asc"
|
|
||||||
asc_mult_source = asc_path + asc_wildcard_file
|
|
||||||
|
|
||||||
def read_ascii_header(ascii_raster_file: str) -> list:
|
class GenerateTimeseries:
|
||||||
"""Reads header information from an ASCII DEM
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
|
||||||
Args:
|
def _read_ascii_header(self, ascii_raster_file: str) -> list:
|
||||||
ascii_raster_file (str): Path to the ASCII raster file
|
"""Reads header information from an ASCII DEM
|
||||||
|
|
||||||
Returns:
|
Args:
|
||||||
list: Header data as a list of floats
|
ascii_raster_file (str): Path to the ASCII raster file
|
||||||
"""
|
|
||||||
with open(ascii_raster_file) as f:
|
Returns:
|
||||||
header_data = [float(f.__next__().split()[1]) for x in range(6)]
|
list: Header data as a list of floats
|
||||||
return header_data
|
"""
|
||||||
|
with open(ascii_raster_file) as f:
|
||||||
|
header_data = [float(f.__next__().split()[1]) for x in range(6)]
|
||||||
|
return header_data
|
||||||
|
|
||||||
|
|
||||||
def calculate_crop_coords(basin_header: list, radar_header: list) -> tuple:
|
def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple:
|
||||||
"""Calculate crop coordinates based on header data
|
"""Calculate crop coordinates based on header data
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
basin_header (list): Basin header data
|
basin_header (list): Basin header data
|
||||||
radar_header (list): Radar header data
|
radar_header (list): Radar header data
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: (start_col, start_row, end_col, end_row) as integers
|
tuple: (start_col, start_row, end_col, end_row) as integers
|
||||||
"""
|
"""
|
||||||
y0_radar = radar_header[3]
|
y0_radar = radar_header[3]
|
||||||
x0_radar = radar_header[2]
|
x0_radar = radar_header[2]
|
||||||
|
|
||||||
y0_basin = basin_header[3]
|
y0_basin = basin_header[3]
|
||||||
x0_basin = basin_header[2]
|
x0_basin = basin_header[2]
|
||||||
|
|
||||||
nrows_radar = radar_header[1]
|
nrows_radar = radar_header[1]
|
||||||
|
|
||||||
nrows_basin = 2 # hardcoded, we always expect 2 rows
|
nrows_basin = 2 # hardcoded, likely to change?
|
||||||
ncols_basin = 2 # hardcoded, we always expect 2 columns
|
ncols_basin = 2 # hardcoded, likely to change?
|
||||||
|
|
||||||
cellres_radar = radar_header[4]
|
cellres_radar = radar_header[4]
|
||||||
cellres_basin = basin_header[4]
|
cellres_basin = basin_header[4]
|
||||||
|
|
||||||
xp = x0_basin - x0_radar
|
xp = x0_basin - x0_radar
|
||||||
yp = y0_basin - y0_radar
|
yp = y0_basin - y0_radar
|
||||||
|
|
||||||
xpp = ncols_basin * cellres_basin
|
xpp = ncols_basin * cellres_basin
|
||||||
ypp = nrows_basin * cellres_basin
|
ypp = nrows_basin * cellres_basin
|
||||||
|
|
||||||
start_col = np.floor(xp / cellres_radar)
|
start_col = np.floor(xp / cellres_radar)
|
||||||
end_col = np.ceil((xpp + xp) / cellres_radar)
|
end_col = np.ceil((xpp + xp) / cellres_radar)
|
||||||
|
|
||||||
start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar))
|
start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar))
|
||||||
end_row = np.ceil(nrows_radar - (yp / cellres_radar))
|
end_row = np.ceil(nrows_radar - (yp / cellres_radar))
|
||||||
|
|
||||||
#print(start_col, start_row, end_col, end_row)
|
#print(start_col, start_row, end_col, end_row)
|
||||||
return int(start_col), int(start_row), int(end_col), int(end_row)
|
return int(start_col), int(start_row), int(end_col), int(end_row)
|
||||||
|
|
||||||
|
|
||||||
def extract_cropped_rain_data(location):
|
def extract_cropped_rain_data(self, location):
|
||||||
"""Extract cropped rain data and create rainfall timeseries
|
"""Extract cropped rain data and create rainfall timeseries
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
"""
|
"""
|
||||||
rainfile = []
|
rainfile = []
|
||||||
|
datetime_list = []
|
||||||
|
|
||||||
# Create datetime list
|
for f in glob.iglob(f'{self.config.ASC_TOP_FOLDER}/*.asc'):
|
||||||
datetime_list = []
|
# print(f)
|
||||||
print(location)
|
radar_header = self._read_ascii_header(f)
|
||||||
for f in glob.iglob(asc_mult_source):
|
start_col, start_row, end_col, end_row = self._calculate_crop_coords(
|
||||||
# print(f)
|
location, radar_header
|
||||||
radar_header = read_ascii_header(f)
|
)
|
||||||
start_col, start_row, end_col, end_row = calculate_crop_coords(
|
|
||||||
location, radar_header
|
|
||||||
)
|
|
||||||
|
|
||||||
start_col = int(round(start_col))
|
start_col = int(round(start_col))
|
||||||
start_row = int(round(start_row))
|
start_row = int(round(start_row))
|
||||||
end_col = int(round(end_col))
|
end_col = int(round(end_col))
|
||||||
end_row = int(round(end_row))
|
end_row = int(round(end_row))
|
||||||
|
|
||||||
cur_rawgrid = np.genfromtxt(
|
cur_rawgrid = np.genfromtxt(
|
||||||
f, skip_header=6, filling_values=0.0, loose=True, invalid_raise=False
|
f, skip_header=6, filling_values=0.0, loose=True, invalid_raise=False
|
||||||
)
|
)
|
||||||
|
|
||||||
cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
|
cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
|
||||||
# Flatten the cropped rain data into a 1D array
|
# Flatten the cropped rain data into a 1D array
|
||||||
cur_rainrow = cur_croppedrain.flatten()
|
cur_rainrow = cur_croppedrain.flatten()
|
||||||
rainfile.append(cur_rainrow)
|
rainfile.append(cur_rainrow[2]/32)
|
||||||
|
|
||||||
# Extract datetime from filename
|
# Extract datetime from filename
|
||||||
filename = f.split("/")[-1] # Get just the filename
|
filename = f.split("/")[-1] # Get just the filename
|
||||||
# 20240929 0015
|
date_str = filename[:8] # YYYYMMDD
|
||||||
date_str = filename[:8] # YYYYMMDD
|
time_str = filename[8:12] # HHMM
|
||||||
time_str = filename[8:12] # HHMM
|
|
||||||
|
|
||||||
# Parse datetime
|
# Parse datetime
|
||||||
parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
|
parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
|
||||||
datetime_list.append(parsed_date)
|
datetime_list.append(parsed_date)
|
||||||
|
|
||||||
rainfile_arr = np.vstack(rainfile)
|
rainfile_arr = np.vstack(rainfile)
|
||||||
|
|
||||||
# Create DataFrame with datetime index
|
# Create DataFrame with datetime index
|
||||||
df = pd.DataFrame(rainfile_arr, index=datetime_list)
|
df = pd.DataFrame(rainfile_arr, index=datetime_list)
|
||||||
# sort the dataframe into date order
|
# sort the dataframe into date order
|
||||||
sorted_df = df.sort_index()
|
sorted_df = df.sort_index()
|
||||||
# add headers
|
# add headers
|
||||||
header_row = ['rainfall_1', 'rainfall_2', 'rainfall_3', 'rainfall_4']
|
header_row = [location[1]]
|
||||||
file_name = f"csv_files/{location[0]}_timeseries_data.csv"
|
file_name = f"csv_files/{location[0]}_timeseries_data.csv"
|
||||||
sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime')
|
sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
locations = [
|
|
||||||
# loc name, loc id, x loc, y loc, resolution
|
|
||||||
["BRICSC", "TM0816", 608500, 216500, 1000],
|
|
||||||
["HEACSC", "TF6842", 568500, 342500, 1000],
|
|
||||||
]
|
|
||||||
for place in locations:
|
|
||||||
extract_cropped_rain_data(place)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user