From 9aaf8a5e88ad231a19f8ccb17b0a655a732d035a Mon Sep 17 00:00:00 2001
From: Jake Pullen <hello@jake-is.me>
Date: Mon, 15 Dec 2025 10:13:11 +0000
Subject: [PATCH] Now deleting existing combined csv files after confirmation
 at start.

---
 README.MD      |  2 +-
 main.py        | 11 ++++-------
 pyproject.toml |  4 ++--
 uv.lock        |  2 +-
 4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/README.MD b/README.MD
index 5c672dc..7f8e489 100644
--- a/README.MD
+++ b/README.MD
@@ -15,7 +15,7 @@ The project consists of a main pipeline workflow that processes multiple modules
 
 ### main.py
 
-- **Startup Safety Check**: Scans the `COMBINED_FOLDER` at startup and warns the user if existing files are found, offering a chance to abort to prevent accidental data mixing.
+- **Startup Safety Check**: Scans the `COMBINED_FOLDER` at startup and warns the user if existing files are found, Deleting existing files if continue is accepted.
 - **Batch Processing**: Processes input tar files in configurable batches to manage resource usage.
 - **End-to-End Processing**: Extracts GZ files, processes DAT/ASC, and appends to CSV in a single thread per file.
 - **Concurrency**: Uses multi-threading to process individual GZ files within a batch concurrently.
diff --git a/main.py b/main.py
index 4a52b93..a8a33ae 100644
--- a/main.py
+++ b/main.py
@@ -92,13 +92,16 @@ if __name__ == "__main__":
             f"Found {len(existing_combined)} files in {Config.COMBINED_FOLDER}"
         )
         logging.warning(
-            "You may want to remove these before continuing to avoid duplicates or messy data."
+            "If you continue these WILL BE DELETED, Please make sure you have them saved."
         )
         logging.warning("!" * 80)
         response = input("Continue? (Y/N): ").strip().lower()
         if response != "y":
             logging.info("Aborting...")
             exit(0)
+        else:
+            shutil.rmtree(Path(Config.COMBINED_FOLDER))  # Delete everything including the directory
+            Path(Config.COMBINED_FOLDER).mkdir()
 
     extraction = Extract(Config)
     batch = BatchNimrod(Config)
@@ -130,12 +133,6 @@ if __name__ == "__main__":
         # 1. Extract batch (TAR -> GZ)
         logging.info("Extracting tar files for batch")
         extraction.extract_tar_batch(batch_files)
-        # Note: We do NOT run extract_gz_batch anymore. We will find GZ files and process them.
-
-        # Get list of GZ files (recursively or flat?)
-        # extract_tar_batch puts them in GZ_TOP_FOLDER/tar_name_without_ext
-        # So we need to look there.
-        # Ideally we know where we put them.
 
         gz_files_to_process = []
         for tar_file in batch_files:
diff --git a/pyproject.toml b/pyproject.toml
index 46833a1..f12af06 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "met-office"
-version = "1.3.1"
-description = "Convert .dat nimrod files to .asc files"
+version = "1.3.2"
+description = "Convert nimrod files to .csv timeseries"
 readme = "README.md"
 requires-python = ">=3.14"
 dependencies = [
diff --git a/uv.lock b/uv.lock
index 6d8e156..12c8c03 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4,7 +4,7 @@ requires-python = ">=3.14"
 
 [[package]]
 name = "met-office"
-version = "1.3.1"
+version = "1.3.2"
 source = { virtual = "." }
 dependencies = [
     { name = "numpy" },