From 9aaf8a5e88ad231a19f8ccb17b0a655a732d035a Mon Sep 17 00:00:00 2001 From: Jake Pullen Date: Mon, 15 Dec 2025 10:13:11 +0000 Subject: [PATCH] Now deleting existing combined csv files after confirmation at start. --- README.MD | 2 +- main.py | 11 ++++------- pyproject.toml | 4 ++-- uv.lock | 2 +- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/README.MD b/README.MD index 5c672dc..7f8e489 100644 --- a/README.MD +++ b/README.MD @@ -15,7 +15,7 @@ The project consists of a main pipeline workflow that processes multiple modules ### main.py -- **Startup Safety Check**: Scans the `COMBINED_FOLDER` at startup and warns the user if existing files are found, offering a chance to abort to prevent accidental data mixing. +- **Startup Safety Check**: Scans the `COMBINED_FOLDER` at startup and warns the user if existing files are found, Deleting existing files if continue is accepted. - **Batch Processing**: Processes input tar files in configurable batches to manage resource usage. - **End-to-End Processing**: Extracts GZ files, processes DAT/ASC, and appends to CSV in a single thread per file. - **Concurrency**: Uses multi-threading to process individual GZ files within a batch concurrently. diff --git a/main.py b/main.py index 4a52b93..a8a33ae 100644 --- a/main.py +++ b/main.py @@ -92,13 +92,16 @@ if __name__ == "__main__": f"Found {len(existing_combined)} files in {Config.COMBINED_FOLDER}" ) logging.warning( - "You may want to remove these before continuing to avoid duplicates or messy data." + "If you continue these WILL BE DELETED, Please make sure you have them saved." ) logging.warning("!" * 80) response = input("Continue? (Y/N): ").strip().lower() if response != "y": logging.info("Aborting...") exit(0) + else: + shutil.rmtree(Path(Config.COMBINED_FOLDER)) # Delete everything including the directory + Path(Config.COMBINED_FOLDER).mkdir() extraction = Extract(Config) batch = BatchNimrod(Config) @@ -130,12 +133,6 @@ if __name__ == "__main__": # 1. Extract batch (TAR -> GZ) logging.info("Extracting tar files for batch") extraction.extract_tar_batch(batch_files) - # Note: We do NOT run extract_gz_batch anymore. We will find GZ files and process them. - - # Get list of GZ files (recursively or flat?) - # extract_tar_batch puts them in GZ_TOP_FOLDER/tar_name_without_ext - # So we need to look there. - # Ideally we know where we put them. gz_files_to_process = [] for tar_file in batch_files: diff --git a/pyproject.toml b/pyproject.toml index 46833a1..f12af06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "met-office" -version = "1.3.1" -description = "Convert .dat nimrod files to .asc files" +version = "1.3.2" +description = "Convert nimrod files to .csv timeseries" readme = "README.md" requires-python = ">=3.14" dependencies = [ diff --git a/uv.lock b/uv.lock index 6d8e156..12c8c03 100644 --- a/uv.lock +++ b/uv.lock @@ -4,7 +4,7 @@ requires-python = ">=3.14" [[package]] name = "met-office" -version = "1.3.1" +version = "1.3.2" source = { virtual = "." } dependencies = [ { name = "numpy" },