feat: ✨ Added reading zone info from csv

2025-11-12 12:02:58 +00:00
parent be2c89bcc2
commit e38d21598f
9 changed files with 150 additions and 108 deletions
@@ -17,10 +17,10 @@ class BatchNimrod:
        box for each area, and exports clipped raster data to OUT_TOP_FOLDER.
        """
        # Read all file names in the folder
-        files_to_process = [f for f in os.listdir(Path(self.config.DAT_TOP_FOLDER))]
-
-        logging.info(f"Processing {len(files_to_process)} files...")
+        files_to_process = len([f for f in os.listdir(Path(self.config.DAT_TOP_FOLDER))])

+        logging.info(f"Processing {files_to_process} files...")
+        file_counter = 0
        for in_file in os.listdir(Path(self.config.DAT_TOP_FOLDER)):
            in_file_full = Path(self.config.DAT_TOP_FOLDER, in_file)

@@ -33,9 +33,13 @@ class BatchNimrod:
                with open(out_file_path, "w") as outfile:
                    image.extract_asc(outfile)

-                # delete dat file here
+                if self.config.delete_dat_after_processing:
+                    os.remove(in_file_full)

+                file_counter += 1
                logging.debug(f"Successfully processed: {in_file_full}")
+                if file_counter %10 == 0:
+                    logging.info(f'processed {file_counter} out of {files_to_process} files')

            except Nimrod.HeaderReadError as e:
                logging.error(f"Failed to read file {in_file_full}, is it corrupt?")
@@ -1,4 +1,4 @@
-import pandas as pd
+import polars as pd


 class CombineTimeseries:
@@ -10,7 +10,7 @@ class CombineTimeseries:

    def build_location_groups(self):
        for location in self.locations:
-            group = location[4]  # output group is at index 4
+            group = location[3]  # zone number
            if group not in self.grouped_locations:
                self.grouped_locations[group] = []
            self.grouped_locations[group].append(location)
@@ -20,12 +20,12 @@ class CombineTimeseries:
            combined_df = None
            for loc in loc_list:
                csv_to_load = f"./csv_files/{loc[0]}_timeseries_data.csv"
-                df = pd.read_csv(csv_to_load, index_col=0)
+                df = pd.read_csv(csv_to_load)
                if combined_df is None:
                    combined_df = df
                else:
-                    combined_df = combined_df.join(df, how="inner")
+                    combined_df = combined_df.join(df, on='datetime')
            output_file = (
-                f"{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv"
+                f"{self.config.COMBINED_FOLDER}/zone_{group}_timeseries_data.csv"
            )
-            combined_df.to_csv(output_file)
+            combined_df.write_csv(output_file)
@@ -1,7 +1,7 @@
 from __future__ import division, print_function
 import numpy as np
 from pathlib import Path
-import pandas as pd
+import polars as pd
 from datetime import datetime
 import os

@@ -36,8 +36,8 @@ class GenerateTimeseries:
        y0_radar = radar_header[3]
        x0_radar = radar_header[2]

-        y0_basin = basin_header[3]
-        x0_basin = basin_header[2]
+        y0_basin = basin_header[2]
+        x0_basin = basin_header[1]

        nrows_radar = radar_header[1]

@@ -96,15 +96,17 @@ class GenerateTimeseries:
            datetime_list.append(parsed_date)

        # Create DataFrame with datetime index
-        df = pd.DataFrame({"rainfall": rainfile}, index=datetime_list)
+        df = pd.DataFrame({"datetime": datetime_list, location[0]: rainfile})

        # Sort the dataframe into date order
-        sorted_df = df.sort_index()
+        sorted_df = df.sort("datetime")

-        sorted_df.to_csv(
+        # Set datetime as index
+        sorted_df = sorted_df.with_columns(
+            pd.Series(datetime_list).alias("datetime")
+        ).set_sorted("datetime")
+
+        sorted_df.write_csv(
            f"csv_files/{location[0]}_timeseries_data.csv",
-            sep=",",
-            float_format="%1.4f",
-            header=[location[1]],
-            index_label="datetime",
+            float_precision=4
        )