chore: 🧹 Cleaning and organising

This commit is contained in:
2025-11-11 08:02:34 +00:00
parent 9ee4fa1e8c
commit 71af242dcb
8 changed files with 146 additions and 134 deletions
+2
View File
@@ -0,0 +1,2 @@
from .nimrod import Nimrod
from .batch_nimrod import process_nimrod_files
+44
View File
@@ -0,0 +1,44 @@
from modules.nimrod import Nimrod
import os
from pathlib import Path
import logging
class BatchNimrod():
def __init__(self, config) -> None:
self.config = config
def process_nimrod_files(self) -> None:
"""
Process all Nimrod files in the input directory, applying bounding box clipping
and exporting to ASC format.
This function reads all files from IN_TOP_FOLDER, applies the appropriate bounding
box for each area, and exports clipped raster data to OUT_TOP_FOLDER.
"""
# Read all file names in the folder
files_to_process = [f for f in os.listdir(Path(self.config.IN_TOP_FOLDER))]
logging.info(f"Processing {len(files_to_process)} files...")
for in_file in os.listdir(Path(self.config.IN_TOP_FOLDER)):
in_file_full = Path(self.config.IN_TOP_FOLDER, in_file)
try:
image = Nimrod(open(in_file_full, "rb"))
out_file_name = f"{image.get_validity_time()}.asc"
out_file_path = Path(self.config.OUT_TOP_FOLDER, out_file_name)
with open(out_file_path, "w") as outfile:
image.extract_asc(outfile)
logging.debug(f"Successfully processed: {in_file_full}")
except Nimrod.HeaderReadError as e:
logging.error(f"Failed to read file {in_file_full}, is it corrupt?")
logging.error(e)
continue
except Nimrod.PayloadReadError as e:
logging.error(f"Failed to load the raster data in {in_file_full}")
logging.error(e)
continue
+127
View File
@@ -0,0 +1,127 @@
from __future__ import division, print_function
import numpy as np
import glob
import pandas as pd
from datetime import datetime
# Configuration
asc_path = "asc_files/"
asc_wildcard_file = "*.asc"
asc_mult_source = asc_path + asc_wildcard_file
def read_ascii_header(ascii_raster_file: str) -> list:
"""Reads header information from an ASCII DEM
Args:
ascii_raster_file (str): Path to the ASCII raster file
Returns:
list: Header data as a list of floats
"""
with open(ascii_raster_file) as f:
header_data = [float(f.__next__().split()[1]) for x in range(6)]
return header_data
def calculate_crop_coords(basin_header: list, radar_header: list) -> tuple:
"""Calculate crop coordinates based on header data
Args:
basin_header (list): Basin header data
radar_header (list): Radar header data
Returns:
tuple: (start_col, start_row, end_col, end_row) as integers
"""
y0_radar = radar_header[3]
x0_radar = radar_header[2]
y0_basin = basin_header[3]
x0_basin = basin_header[2]
nrows_radar = radar_header[1]
nrows_basin = 2 # hardcoded, we always expect 2 rows
ncols_basin = 2 # hardcoded, we always expect 2 columns
cellres_radar = radar_header[4]
cellres_basin = basin_header[4]
xp = x0_basin - x0_radar
yp = y0_basin - y0_radar
xpp = ncols_basin * cellres_basin
ypp = nrows_basin * cellres_basin
start_col = np.floor(xp / cellres_radar)
end_col = np.ceil((xpp + xp) / cellres_radar)
start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar))
end_row = np.ceil(nrows_radar - (yp / cellres_radar))
#print(start_col, start_row, end_col, end_row)
return int(start_col), int(start_row), int(end_col), int(end_row)
def extract_cropped_rain_data(location):
"""Extract cropped rain data and create rainfall timeseries
Returns:
None
"""
rainfile = []
# Create datetime list
datetime_list = []
print(location)
for f in glob.iglob(asc_mult_source):
# print(f)
radar_header = read_ascii_header(f)
start_col, start_row, end_col, end_row = calculate_crop_coords(
location, radar_header
)
start_col = int(round(start_col))
start_row = int(round(start_row))
end_col = int(round(end_col))
end_row = int(round(end_row))
cur_rawgrid = np.genfromtxt(
f, skip_header=6, filling_values=0.0, loose=True, invalid_raise=False
)
cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
# Flatten the cropped rain data into a 1D array
cur_rainrow = cur_croppedrain.flatten()
rainfile.append(cur_rainrow)
# Extract datetime from filename
filename = f.split("/")[-1] # Get just the filename
# 20240929 0015
date_str = filename[:8] # YYYYMMDD
time_str = filename[8:12] # HHMM
# Parse datetime
parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
datetime_list.append(parsed_date)
rainfile_arr = np.vstack(rainfile)
# Create DataFrame with datetime index
df = pd.DataFrame(rainfile_arr, index=datetime_list)
# sort the dataframe into date order
sorted_df = df.sort_index()
# add headers
header_row = ['rainfall_1', 'rainfall_2', 'rainfall_3', 'rainfall_4']
file_name = f"csv_files/{location[0]}_timeseries_data.csv"
sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime')
if __name__ == "__main__":
locations = [
# loc name, loc id, x loc, y loc, resolution
["BRICSC", "TM0816", 608500, 216500, 1000],
["HEACSC", "TF6842", 568500, 342500, 1000],
]
for place in locations:
extract_cropped_rain_data(place)
+518
View File
@@ -0,0 +1,518 @@
#!/usr/bin/python3
"""
Extract data from UK Met Office Rain Radar NIMROD image files.
Parse NIMROD format image files, display header data and allow extraction of
raster image to an ESRI ASCII (.asc) format file. A bounding box may be
specified to clip the image to the area of interest. Can be imported as a
Python module or run directly as a command line script.
Author: Richard Thomas
Version: 1.0 (13 April 2015)
Public Repository: https://github.com/richard-thomas/MetOffice_NIMROD
Command line usage:
python nimrod.py [-h] [-q] [-x] [-bbox XMIN XMAX YMIN YMAX] [infile] [outfile]
positional arguments:
infile (Uncompressed) NIMROD input filename
outfile Output raster filename (*.asc)
optional arguments:
-h, --help show this help message and exit
-q, --query Display metadata
-x, --extract Extract raster file in ASC format
-bbox XMIN XMAX YMIN YMAX
Bounding box to clip raster data to
Note that any bounding box must be specified in the same units and projection
as the input file. The bounding box does not need to be contained by the input
raster but must intersect it.
Example command line usage:
python nimrod.py -bbox 279906 285444 283130 290440
-xq 200802252000_nimrod_ng_radar_rainrate_composite_1km_merged_UK_zip
plynlimon_catchments_rainfall.asc
Example Python module usage:
import nimrod
a = nimrod.Nimrod(open(
'200802252000_nimrod_ng_radar_rainrate_composite_1km_merged_UK_zip'))
a.query()
a.extract_asc(open('full_raster.asc', 'w'))
a.apply_bbox(279906, 285444, 283130, 290440)
a.query()
a.extract_asc(open('clipped_raster.asc', 'w'))
Notes:
1. Valid for v1.7 and v2.6-4 of NIMROD file specification
2. Assumes image origin is top left (i.e. that header[24] = 0)
3. Tested on UK composite 1km and 5km data, under Linux and Windows XP
4. Further details of NIMROD data and software at the NERC BADC website:
http://badc.nerc.ac.uk/browse/badc/ukmo-nimrod/
Copyright (c) 2015 Richard Thomas
(Nimrod.__init__() method based on read_nimrod.py by Charles Kilburn Aug 2008)
This program is free software: you can redistribute it and/or modify
it under the terms of the Artistic License 2.0 as published by the
Open Source Initiative (http://opensource.org/licenses/Artistic-2.0)
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
"""
import sys
import struct
import array
import argparse
from typing import IO
class Nimrod:
"""
Reading, querying and processing of NIMROD format rainfall data files.
This class provides functionality to parse NIMROD format files, display
header information, and extract raster data to ESRI ASCII format files.
Attributes:
hdr_element (List): List of all header elements indexed by element number
nrows (int): Number of rows in the raster image
ncols (int): Number of columns in the raster image
n_data_specific_reals (int): Number of data-specific real header entries
n_data_specific_ints (int): Number of data-specific integer header entries
y_top (float): Top northing coordinate
y_pixel_size (float): Size of pixel in y-direction
x_left (float): Left easting coordinate
x_pixel_size (float): Size of pixel in x-direction
x_right (float): Right easting coordinate
y_bottom (float): Bottom northing coordinate
data (array.array): Raster data array
"""
class RecordLenError(Exception):
"""
Exception Type: NIMROD record length read from file not as expected.
Attributes:
message (str): Error message describing the problem
"""
def __init__(self, actual: int, expected: int, location: str) -> None:
"""
Initialize the RecordLenError with details about the error.
Args:
actual (int): Actual record length read from file
expected (int): Expected record length
location (str): Description of position in file where error occurred
"""
self.message = "Incorrect record length %d bytes (expected %d) at %s." % (
actual,
expected,
location,
)
class HeaderReadError(Exception):
"""
Exception Type: Read error whilst parsing NIMROD header elements.
This exception is raised when there are issues reading the header data
from a NIMROD file.
"""
pass
class PayloadReadError(Exception):
"""
Exception Type: Read error whilst parsing NIMROD raster data.
This exception is raised when there are issues reading the raster data
payload from a NIMROD file.
"""
pass
class BboxRangeError(Exception):
"""
Exception Type: Bounding box specified out of range of raster image.
This exception is raised when a bounding box is specified that does not
intersect with the raster image data.
"""
pass
def __init__(self, infile: IO[bytes]) -> None:
"""
Parse all header and data info from a NIMROD data file into this object.
Args:
infile (IO[bytes]): NIMROD file object opened for binary reading
Raises:
RecordLenError: NIMROD record length read from file not as expected
HeaderReadError: Read error whilst parsing NIMROD header elements
PayloadReadError: Read error whilst parsing NIMROD raster data
"""
def check_record_len(infile: IO[bytes], expected: int, location: str) -> None:
"""
Check record length in C struct is as expected.
Args:
infile (IO[bytes]): file to read from
expected (int): expected value of record length read
location (str): description of position in file (for reporting)
Raises:
HeaderReadError: Read error whilst reading record length
RecordLenError: Unexpected NIMROD record length read from file
"""
# Unpack length from C struct (Big Endian, 4-byte long)
try:
(record_length,) = struct.unpack(">l", infile.read(4))
except Exception:
raise Nimrod.HeaderReadError
if record_length != expected:
raise Nimrod.RecordLenError(record_length, expected, location)
# Header should always be a fixed length record
check_record_len(infile, 512, "header start")
try:
# Read first 31 2-byte integers (header fields 1-31)
gen_ints = array.array("h")
data = infile.read(31 * 2) # 31 integers * 2 bytes each
gen_ints.frombytes(data)
gen_ints.byteswap()
# Read next 28 4-byte floats (header fields 32-59)
gen_reals = array.array("f")
data = infile.read(28 * 4) # 28 floats * 4 bytes each
gen_reals.frombytes(data)
gen_reals.byteswap()
# Read next 45 4-byte floats (header fields 60-104)
spec_reals = array.array("f")
data = infile.read(45 * 4) # 45 floats * 4 bytes each
spec_reals.frombytes(data)
spec_reals.byteswap()
# Read next 56 characters (header fields 105-107)
characters = infile.read(56)
# Read next 51 2-byte integers (header fields 108-)
spec_ints = array.array("h")
data = infile.read(51 * 2) # 51 integers * 2 bytes each
spec_ints.frombytes(data)
spec_ints.byteswap()
except Exception:
infile.close()
raise Nimrod.HeaderReadError
check_record_len(infile, 512, "header end")
# Extract strings and make duplicate entries to give meaningful names
chars = characters.decode("utf-8")
self.units = chars[0:8]
self.data_source = chars[8:32]
self.title = chars[32:55]
# Store header values in a list so they can be indexed by "element
# number" shown in NIMROD specification (starts at 1)
self.hdr_element = [None] # Dummy value at element 0
self.hdr_element.extend(gen_ints)
self.hdr_element.extend(gen_reals)
self.hdr_element.extend(spec_reals)
self.hdr_element.extend([self.units])
self.hdr_element.extend([self.data_source])
self.hdr_element.extend([self.title])
self.hdr_element.extend(spec_ints)
# Duplicate some of values to give more meaningful names
self.nrows = self.hdr_element[16]
self.ncols = self.hdr_element[17]
self.n_data_specific_reals = self.hdr_element[22]
self.n_data_specific_ints = self.hdr_element[23] + 1
# Note "+ 1" because header value is count from element 109
self.y_top = self.hdr_element[34]
self.y_pixel_size = self.hdr_element[35]
self.x_left = self.hdr_element[36]
self.x_pixel_size = self.hdr_element[37]
# Calculate other image bounds (note these are pixel centres)
self.x_right = self.x_left + self.x_pixel_size * (self.ncols - 1)
self.y_bottom = self.y_top - self.y_pixel_size * (self.nrows - 1)
# Read payload (actual raster data)
array_size = self.ncols * self.nrows
check_record_len(infile, array_size * 2, "data start")
self.data = array.array("h")
try:
data = infile.read(array_size * 2)
self.data.frombytes(data)
self.data.byteswap()
except Exception:
infile.close()
raise Nimrod.PayloadReadError
check_record_len(infile, array_size * 2, "data end")
infile.close()
def get_validity_time(self) -> str:
"""
Extract validity time from NIMROD file header and format as string.
Returns:
str: Validity time formatted as 'YYYYMMDDHHMM'
"""
return "%4.4d%2.2d%2.2d%2.2d%2.2d" % (
self.hdr_element[1],
self.hdr_element[2],
self.hdr_element[3],
self.hdr_element[4],
self.hdr_element[5],
)
def query(self) -> None:
"""
Print complete NIMROD file header information.
This method displays all header information from the parsed NIMROD file
in a formatted manner including general headers, data-specific headers,
and character headers along with validity time and coordinate ranges.
"""
print("NIMROD file raw header fields listed by element number:")
print("General (Integer) header entries:")
for i in range(1, 32):
print(i, "\t", self.hdr_element[i])
print("General (Real) header entries:")
for i in range(32, 60):
print(i, "\t", self.hdr_element[i])
print("Data Specific (Real) header entries (%d):" % self.n_data_specific_reals)
for i in range(60, 60 + self.n_data_specific_reals):
print(i, "\t", self.hdr_element[i])
print(
"Data Specific (Integer) header entries (%d):" % self.n_data_specific_ints
)
for i in range(108, 108 + self.n_data_specific_ints):
print(i, "\t", self.hdr_element[i])
print("Character header entries:")
print(" 105 Units: ", self.units)
print(" 106 Data source: ", self.data_source)
print(" 107 Title of field: ", self.title)
# Print out info & header fields
# Note that ranges are given to the edge of each pixel
print(
"\nValidity Time: %2.2d:%2.2d on %2.2d/%2.2d/%4.4d"
% (
self.hdr_element[4],
self.hdr_element[5],
self.hdr_element[3],
self.hdr_element[2],
self.hdr_element[1],
)
)
print(
"Easting range: %.1f - %.1f (at pixel steps of %.1f)"
% (
self.x_left - self.x_pixel_size / 2,
self.x_right + self.x_pixel_size / 2,
self.x_pixel_size,
)
)
print(
"Northing range: %.1f - %.1f (at pixel steps of %.1f)"
% (
self.y_bottom - self.y_pixel_size / 2,
self.y_top + self.y_pixel_size / 2,
self.y_pixel_size,
)
)
print("Image size: %d rows x %d cols" % (self.nrows, self.ncols))
def apply_bbox(self, xmin: float, xmax: float, ymin: float, ymax: float) -> None:
"""
Clip raster data to all pixels that intersect specified bounding box.
Note that existing object data is modified and all header values
affected are appropriately adjusted. Because pixels are specified by
their centre points, a bounding box that comes within half a pixel
width of the raster edge will intersect with the pixel.
Args:
xmin (float): Most negative easting or longitude of bounding box
xmax (float): Most positive easting or longitude of bounding box
ymin (float): Most negative northing or latitude of bounding box
ymax (float): Most positive northing or latitude of bounding box
Raises:
BboxRangeError: Bounding box specified out of range of raster image
"""
# Check if there is no overlap of bounding box with raster
if (
xmin > self.x_right + self.x_pixel_size / 2
or xmax < self.x_left - self.x_pixel_size / 2
or ymin > self.y_top + self.y_pixel_size / 2
or ymax < self.y_bottom - self.x_pixel_size / 2
):
raise Nimrod.BboxRangeError
# Limit bounds to within raster image
xmin = max(xmin, self.x_left)
xmax = min(xmax, self.x_right)
ymin = max(ymin, self.y_bottom)
ymax = min(ymax, self.y_top)
# Calculate min and max pixel index in each row and column to use
# Note addition of 0.5 as x_left location is centre of pixel
# ('int' truncates floats towards zero)
xMinPixelId = int((xmin - self.x_left) / self.x_pixel_size + 0.5)
xMaxPixelId = int((xmax - self.x_left) / self.x_pixel_size + 0.5)
# For y (northings), note the first data row stored is most north
yMinPixelId = int((self.y_top - ymax) / self.y_pixel_size + 0.5)
yMaxPixelId = int((self.y_top - ymin) / self.y_pixel_size + 0.5)
bbox_data = []
for i in range(yMinPixelId, yMaxPixelId + 1):
bbox_data.extend(
self.data[
i * self.ncols + xMinPixelId : i * self.ncols + xMaxPixelId + 1
]
)
# Update object where necessary
self.data = bbox_data
self.x_right = self.x_left + xMaxPixelId * self.x_pixel_size
self.x_left += xMinPixelId * self.x_pixel_size
self.ncols = xMaxPixelId - xMinPixelId + 1
self.y_bottom = self.y_top - yMaxPixelId * self.y_pixel_size
self.y_top -= yMinPixelId * self.y_pixel_size
self.nrows = yMaxPixelId - yMinPixelId + 1
self.hdr_element[16] = self.nrows
self.hdr_element[17] = self.ncols
self.hdr_element[34] = self.y_top
self.hdr_element[36] = self.x_left
def extract_asc(self, outfile: IO[str]) -> None:
"""
Write raster data to an ESRI ASCII (.asc) format file.
Args:
outfile (IO[str]): file object opened for writing text
Raises:
ValueError: If x_pixel_size != y_pixel_size (non-square pixels)
"""
# As ESRI ASCII format only supports square pixels, warn if not so
if self.x_pixel_size != self.y_pixel_size:
print(
"Warning: x_pixel_size(%d) != y_pixel_size(%d)"
% (self.x_pixel_size, self.y_pixel_size)
)
# Write header to output file. Note that data is valid at the centre
# of each pixel so "xllcenter" rather than "xllcorner" must be used
outfile.write("ncols %d\n" % self.ncols)
outfile.write("nrows %d\n" % self.nrows)
outfile.write("xllcenter %d\n" % self.x_left)
outfile.write("yllcenter %d\n" % self.y_bottom)
outfile.write("cellsize %.1f\n" % self.y_pixel_size)
outfile.write("nodata_value %.1f\n" % self.hdr_element[38])
# Write raster data to output file
for i in range(self.nrows):
for j in range(self.ncols - 1):
outfile.write("%d " % self.data[i * self.ncols + j])
outfile.write("%d\n" % self.data[i * self.ncols + self.ncols - 1])
outfile.close()
# -------------------------------------------------------------------------------
# Handle if called as a command line script
# (And as an example of how to invoke class methods from an importing module)
# -------------------------------------------------------------------------------
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Extract information and data from a NIMROD format file",
epilog="""Note that any bounding box must be specified in the same
units and projection as the input file. The bounding box
does not need to be contained by the input raster but
must intersect it.""",
)
parser.add_argument("-q", "--query", action="store_true", help="Display metadata")
parser.add_argument(
"-x", "--extract", action="store_true", help="Extract raster file in ASC format"
)
parser.add_argument(
"infile",
nargs="?",
type=argparse.FileType("rb"),
default=sys.stdin,
help="(Uncompressed) NIMROD input filename",
)
parser.add_argument(
"outfile",
nargs="?",
type=argparse.FileType("w"),
default=sys.stdout,
help="Output raster filename (*.asc)",
)
parser.add_argument(
"-bbox",
type=float,
nargs=4,
metavar=("XMIN", "XMAX", "YMIN", "YMAX"),
help="Bounding box to clip raster data to",
)
args = parser.parse_args()
if not args.query and not args.extract:
parser.print_help()
sys.exit(1)
# Initialise data object by reading NIMROD file
# (Only trap record length exception as others self-explanatory)
try:
rainfall_data = Nimrod(args.infile)
except Nimrod.RecordLenError as error:
sys.stderr.write("ERROR: %s\n" % error.message)
sys.exit(1)
if args.bbox:
print("Trimming NIMROD raster to bounding box...")
try:
rainfall_data.apply_bbox(*args.bbox)
except Nimrod.BboxRangeError:
sys.stderr.write("ERROR: bounding box not within raster image.\n")
sys.exit(1)
# Perform query after any bounding box trimming to allow sanity checking of
# size of resulting image
if args.query:
rainfall_data.query()
if args.extract:
print("Extracting NIMROD raster to ASC file...")
print(
" Outputting data array (%d rows x %d cols = %d pixels)"
% (
rainfall_data.nrows,
rainfall_data.ncols,
rainfall_data.nrows * rainfall_data.ncols,
)
)
rainfall_data.extract_asc(args.outfile)