Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
d386317957
|
|||
|
1c6418e044
|
@@ -10,6 +10,8 @@ wheels/
|
||||
.venv
|
||||
|
||||
dat_other/*
|
||||
tar_files/*
|
||||
gz_files/*
|
||||
dat_files/*
|
||||
asc_files/*
|
||||
csv_files/*
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# UK Met Office Rain Radar NIMROD Data Processor
|
||||
|
||||
This project provides tools for processing UK Met Office Rain Radar NIMROD image files. It allows extraction of raster data from NIMROD .dat format files and conversion to ESRI ASCII (.asc) format. It also allows the creation of timeseries data from the ASC files.
|
||||
This project provides tools for processing UK Met Office Rain Radar NIMROD image files. It allows extraction of raster data from NIMROD .dat format files and conversion to ESRI ASCII (.asc) format. It also allows the creation of timeseries data from the ASC files, formatted for Infoworks ICM.
|
||||
|
||||
## Overview
|
||||
|
||||
@@ -9,15 +9,22 @@ The project consists of a main pipeline workflow that processes multiple modules
|
||||
- `main.py`: Main pipeline orchestrator that calls on the modules as needed
|
||||
- `batch_nimrod.py`: Module for batch processing multiple NIMROD files with configurable bounding boxes
|
||||
- `generate_timeseries.py`: Module for extracting cropped rain data and creating rainfall timeseries
|
||||
- `extract.py`: Module for extracting the dat files from the .gz.tar files that are downloaded from source
|
||||
|
||||
## Features
|
||||
|
||||
### main.py
|
||||
|
||||
- Orchestrates the entire workflow pipeline
|
||||
- Uncompress the packed .gz.tar files to DAT files
|
||||
- Processes DAT files to ASC format
|
||||
- Generates timeseries data for specified locations
|
||||
- Combines grouped CSV files into consolidated datasets
|
||||
- Generates timeseries data for specified locations
|
||||
- Combines grouped CSV files into consolidated datasets formatted for Infoworks ICM
|
||||
|
||||
### extract.py
|
||||
|
||||
- Converts all .gz.tar files first to 288 (1 day) of .gz files
|
||||
- Converts all .gz files to .dat files ready for processing.
|
||||
|
||||
### batch_nimrod.py
|
||||
|
||||
@@ -44,24 +51,28 @@ It is recommended to use UV for environment and package handling.
|
||||
|
||||
1. Ensure all required packages are installed `uv sync`
|
||||
1. Adjust the config.py file to match your needs.
|
||||
1. Ensure your .dat files are in the DAT_TOP_FOLDER (as per config location)
|
||||
1. Ensure your .gz.tar files are in the TAR_TOP_FOLDER (as per config location)
|
||||
1. Ensure your zone csv files are in the ZONE_FOLDER (as per config location)
|
||||
1. RunMain Pipeline `uv run main.py` Note that you will have to set your environment variable `PYTHON_GIL=0` first
|
||||
1. find the output in the COMBINED_FOLDER (as per config location)
|
||||
|
||||
The main pipeline will:
|
||||
|
||||
1. Process DAT files to ASC format if needed
|
||||
1. Uncompress the .gz.tar files ready for processing
|
||||
1. Process DAT files to ASC format
|
||||
1. Generate timeseries data for specified locations
|
||||
1. Combine grouped CSV files into consolidated datasets
|
||||
1. Combine grouped locations into consolidated datasets
|
||||
|
||||
## Configuration
|
||||
|
||||
The `config.py` file defines folder paths:
|
||||
The `config.py` file defines folder paths and file deletion options:
|
||||
|
||||
- DAT_TOP_FOLDER: "./dat_files"
|
||||
- ASC_TOP_FOLDER: "./asc_files"
|
||||
- COMBINED_FOLDER: "./combined_files"
|
||||
- TAR_TOP_FOLDER = "./tar_files"
|
||||
- GZ_TOP_FOLDER = "./gz_files"
|
||||
- DAT_TOP_FOLDER = "./dat_files"
|
||||
- ASC_TOP_FOLDER = "./asc_files"
|
||||
- COMBINED_FOLDER = "./combined_files"
|
||||
- ZONE_FOLDER = "./zone_inputs"
|
||||
|
||||
Example of how the zone csv files should look:
|
||||
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
class Config:
|
||||
TAR_TOP_FOLDER = "./tar_files"
|
||||
GZ_TOP_FOLDER = "./gz_files"
|
||||
DAT_TOP_FOLDER = "./dat_files"
|
||||
ASC_TOP_FOLDER = "./asc_files"
|
||||
COMBINED_FOLDER = "./combined_files"
|
||||
|
||||
ZONE_FOLDER = "./zone_inputs"
|
||||
|
||||
delete_dat_after_processing = False
|
||||
delete_tar_after_processing = False
|
||||
delete_gz_after_processing = True
|
||||
delete_dat_after_processing = True
|
||||
delete_asc_after_processing = True
|
||||
|
||||
@@ -6,12 +6,13 @@ import concurrent.futures
|
||||
from pathlib import Path
|
||||
|
||||
from config import Config
|
||||
from modules import BatchNimrod, GenerateTimeseries
|
||||
from modules import BatchNimrod, GenerateTimeseries, Extract
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
|
||||
|
||||
def process_pipeline(dat_file):
|
||||
# 1. Process DAT to ASC
|
||||
asc_file = batch._process_single_file(dat_file)
|
||||
@@ -22,9 +23,21 @@ def process_pipeline(dat_file):
|
||||
file_results = timeseries.process_asc_file(asc_file, locations)
|
||||
return file_results
|
||||
|
||||
|
||||
def initialise_folders():
|
||||
folder_list = [
|
||||
Config.ASC_TOP_FOLDER,
|
||||
Config.COMBINED_FOLDER,
|
||||
Config.GZ_TOP_FOLDER,
|
||||
Config.DAT_TOP_FOLDER,
|
||||
Config.TAR_TOP_FOLDER,
|
||||
]
|
||||
for path in folder_list:
|
||||
Path(path).mkdir(exist_ok=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True)
|
||||
os.makedirs(Path(Config.COMBINED_FOLDER), exist_ok=True)
|
||||
initialise_folders()
|
||||
|
||||
locations = []
|
||||
zones = set()
|
||||
@@ -44,6 +57,7 @@ if __name__ == "__main__":
|
||||
logging.info(f"Count of 1km Grids: {len(locations)}")
|
||||
logging.info(f"Count of Zones: {len(zones)}")
|
||||
|
||||
extraction = Extract(Config)
|
||||
batch = BatchNimrod(Config)
|
||||
timeseries = GenerateTimeseries(Config, locations)
|
||||
|
||||
@@ -55,6 +69,9 @@ if __name__ == "__main__":
|
||||
# Initialize results structure
|
||||
results = {loc[0]: {"dates": [], "values": []} for loc in locations}
|
||||
|
||||
logging.info("Extracting tar and gz files")
|
||||
extraction.run_extraction()
|
||||
|
||||
# Get list of DAT files
|
||||
dat_files = [
|
||||
f for f in os.listdir(Path(Config.DAT_TOP_FOLDER)) if not f.startswith(".")
|
||||
|
||||
+2
-5
@@ -1,9 +1,6 @@
|
||||
from .nimrod import Nimrod
|
||||
from .batch_nimrod import BatchNimrod
|
||||
from .generate_timeseries import GenerateTimeseries
|
||||
from .extract import Extract
|
||||
|
||||
__all__ = [
|
||||
"Nimrod",
|
||||
"BatchNimrod",
|
||||
"GenerateTimeseries",
|
||||
]
|
||||
__all__ = ["Nimrod", "BatchNimrod", "GenerateTimeseries", "Extract"]
|
||||
|
||||
Executable
+62
@@ -0,0 +1,62 @@
|
||||
import tarfile
|
||||
import gzip
|
||||
import shutil
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class Extract:
|
||||
# Directory containing .tar files
|
||||
def __init__(self, Config):
|
||||
self.config = Config
|
||||
|
||||
def _extract_tar(self):
|
||||
for tar_file in os.listdir(self.config.TAR_TOP_FOLDER):
|
||||
# only handle .tar files
|
||||
if not tar_file.endswith(".tar"):
|
||||
pass
|
||||
|
||||
tar_path = Path(self.config.TAR_TOP_FOLDER, tar_file)
|
||||
|
||||
# Create a folder for extracted tar contents
|
||||
extract_folder = Path(
|
||||
self.config.GZ_TOP_FOLDER, tar_file.replace(".tar", "")
|
||||
)
|
||||
Path(extract_folder).mkdir(exist_ok=True)
|
||||
|
||||
# Extract .tar file
|
||||
with tarfile.open(tar_path, "r") as tar:
|
||||
tar.extractall(path=extract_folder)
|
||||
|
||||
if self.config.delete_tar_after_processing:
|
||||
os.remove(tar_path)
|
||||
|
||||
def _extract_gz(self):
|
||||
for root, _, files in os.walk(self.config.GZ_TOP_FOLDER):
|
||||
for file in files:
|
||||
# only handle .gz files
|
||||
if not file.endswith(".dat.gz"):
|
||||
pass # adjust if extension differs
|
||||
gz_path = Path(root, file)
|
||||
dat_path = Path(self.config.DAT_TOP_FOLDER, file.replace(".gz", ""))
|
||||
|
||||
# Unzip .gz file
|
||||
with gzip.open(gz_path, "rb") as f_in:
|
||||
with open(dat_path, "wb") as f_out:
|
||||
shutil.copyfileobj(f_in, f_out)
|
||||
|
||||
if self.config.delete_gz_after_processing:
|
||||
os.remove(gz_path)
|
||||
|
||||
try:
|
||||
shutil.rmtree(self.config.GZ_TOP_FOLDER)
|
||||
print("processing complete and GZ files deleted")
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
print(
|
||||
f"processing complete but GZ folder delete failed. Please delete manually ({self.config.GZ_TOP_FOLDER})"
|
||||
)
|
||||
|
||||
def run_extraction(self):
|
||||
self._extract_tar()
|
||||
self._extract_gz()
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "met-office"
|
||||
version = "1.1.1"
|
||||
version = "1.2.0"
|
||||
description = "Convert .dat nimrod files to .asc files"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.14"
|
||||
|
||||
Reference in New Issue
Block a user