chore: ⚙️ added option to delete processing files
This commit is contained in:
@@ -1,101 +0,0 @@
|
||||
# UK Met Office Rain Radar NIMROD Data Processor
|
||||
|
||||
This project provides tools for processing UK Met Office Rain Radar NIMROD image files. It allows extraction of raster data from NIMROD .dat format files and conversion to ESRI ASCII (.asc) format with optional bounding box clipping.
|
||||
|
||||
## Overview
|
||||
|
||||
The project consists of a main pipeline workflow that processes multiple modules in sequence:
|
||||
- `main.py`: Main pipeline orchestrator that calls on the modules as needed
|
||||
- `batch_nimrod.py`: Module for batch processing multiple NIMROD files with configurable bounding boxes
|
||||
- `generate_timeseries.py`: Module for extracting cropped rain data and creating rainfall timeseries
|
||||
- `combine_timeseries.py`: Module for combining grouped timeseries CSVs into consolidated datasets
|
||||
|
||||
## Features
|
||||
|
||||
### main.py
|
||||
- Orchestrates the entire workflow pipeline
|
||||
- Processes DAT files to ASC format
|
||||
- Generates timeseries data for specified locations
|
||||
- Combines grouped CSV files into consolidated datasets
|
||||
|
||||
### batch_nimrod.py
|
||||
- Process multiple NIMROD dat files
|
||||
- Automatically extract datetime from file data
|
||||
- Export clipped raster data to ASC format
|
||||
|
||||
### generate_timeseries.py
|
||||
- Extract cropped rain data based on specified locations
|
||||
- Create rainfall timeseries CSVs for each location
|
||||
- Parse datetime from filename and create proper datetime index
|
||||
|
||||
### combine_timeseries.py
|
||||
- Combine multiple timeseries CSV files into grouped datasets
|
||||
- Group locations by specified output groups
|
||||
- Create consolidated CSV files for each group
|
||||
|
||||
## Usage
|
||||
|
||||
It is recommended to use UV for environment and package handling.
|
||||
[Link to uv install](https://docs.astral.sh/uv/getting-started/installation/)
|
||||
|
||||
|
||||
### Main Pipeline (main.py)
|
||||
```bash
|
||||
uv run main.py
|
||||
```
|
||||
|
||||
The main pipeline will:
|
||||
1. Process DAT files to ASC format if needed
|
||||
2. Generate timeseries data for specified locations
|
||||
3. Combine grouped CSV files into consolidated datasets
|
||||
|
||||
## Configuration
|
||||
|
||||
The `config.py` file defines folder paths:
|
||||
- DAT_TOP_FOLDER: "./dat_files"
|
||||
- ASC_TOP_FOLDER: "./asc_files"
|
||||
- CSV_TOP_FOLDER: "./csv_files"
|
||||
- COMBINED_FOLDER: "./combined_files"
|
||||
|
||||
The `main.py` script defines locations and their properties:
|
||||
- Location name (e.g., "BRICSC")
|
||||
- Location ID (e.g., "TM0816")
|
||||
- X coordinate (e.g., 608500)
|
||||
- Y coordinate (e.g., 216500)
|
||||
- Output group (e.g., 1)
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
dat_files/
|
||||
└──*.dat files
|
||||
|
||||
asc_files/
|
||||
└──*.dat files
|
||||
|
||||
csv_files/
|
||||
├── TQ1234_timeseries_data.csv
|
||||
├── ...
|
||||
└── TQ5678_timeseries_data.csv
|
||||
combined_files/
|
||||
├── zone_1_timeseries_data.csv
|
||||
├── ...
|
||||
└── zone_50_timeseries_data.csv
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.12+
|
||||
- [UV Installed](https://docs.astral.sh/uv/getting-started/installation/)
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
[Richard Thomas - Original Nimrod dat to asc file conversion](https://github.com/richard-thomas/MetOffice_NIMROD)
|
||||
[Declan Valters - building the timeseries from the asc files](https://github.com/dvalters/NIMROD-toolbox)
|
||||
|
||||
## Version update 2025
|
||||
|
||||
Update by Jake Pullen, for the use of Anglian Water.
|
||||
Added the batch_nimrod module to convert large amounts of files
|
||||
Cleaned up the original codes and added docstrings & typehints
|
||||
Added main pipeline workflow that calls on the modules as needed to take the dat files and create grouped timeseries data CSVs
|
||||
@@ -5,4 +5,6 @@ class Config:
|
||||
COMBINED_FOLDER = "./combined_files"
|
||||
ZONE_FOLDER = "./zone_inputs"
|
||||
|
||||
delete_dat_after_processing = False
|
||||
delete_dat_after_processing = False
|
||||
delete_asc_after_processing = True
|
||||
delete_csv_after_combining = True
|
||||
@@ -28,7 +28,7 @@ if __name__ == "__main__":
|
||||
easting = int(row[2]) # Easting column
|
||||
northing = int(row[3]) # Northing column
|
||||
zone = int(row[6]) # ZoneID column
|
||||
|
||||
|
||||
locations.append([zone_id, easting, northing, zone])
|
||||
|
||||
# # testing locations, can be removed.
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import polars as pd
|
||||
import os
|
||||
|
||||
|
||||
class CombineTimeseries:
|
||||
@@ -25,7 +26,12 @@ class CombineTimeseries:
|
||||
combined_df = df
|
||||
else:
|
||||
combined_df = combined_df.join(df, on='datetime')
|
||||
|
||||
if self.config.delete_csv_after_combining:
|
||||
os.remove(csv_to_load)
|
||||
|
||||
output_file = (
|
||||
f"{self.config.COMBINED_FOLDER}/zone_{group}_timeseries_data.csv"
|
||||
)
|
||||
combined_df.write_csv(output_file)
|
||||
sorted_df = combined_df.sort('datetime')
|
||||
sorted_df.write_csv(output_file)
|
||||
|
||||
@@ -116,7 +116,12 @@ class GenerateTimeseries:
|
||||
'date': parsed_date,
|
||||
'value': val
|
||||
})
|
||||
|
||||
if self.config.delete_asc_after_processing:
|
||||
os.remove(file_path)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing file {file_name}: {e}")
|
||||
|
||||
@@ -444,7 +444,6 @@ class Nimrod:
|
||||
# (And as an example of how to invoke class methods from an importing module)
|
||||
# -------------------------------------------------------------------------------
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Extract information and data from a NIMROD format file",
|
||||
|
||||
Reference in New Issue
Block a user