feat: working PoC

This commit is contained in:
2025-11-11 16:41:06 +00:00
parent b7d0f6cd99
commit 19dbfc1958
6 changed files with 48 additions and 6 deletions
+1
View File
@@ -12,4 +12,5 @@ wheels/
dat_files/* dat_files/*
asc_files/* asc_files/*
csv_files/* csv_files/*
combined_files/*
*.tar.gz *.tar.gz
+1
View File
@@ -2,5 +2,6 @@ class Config:
DAT_TOP_FOLDER = "./dat_files" DAT_TOP_FOLDER = "./dat_files"
ASC_TOP_FOLDER = "./asc_files" ASC_TOP_FOLDER = "./asc_files"
CSV_TOP_FOLDER = "./csv_files" CSV_TOP_FOLDER = "./csv_files"
COMBINED_FOLDER = "./combined_files"
AREAS_FILE = 'areas.csv' AREAS_FILE = 'areas.csv'
+9 -4
View File
@@ -4,7 +4,7 @@ import os
from pathlib import Path from pathlib import Path
from config import Config from config import Config
from modules import BatchNimrod, GenerateTimeseries from modules import BatchNimrod, GenerateTimeseries, CombineTimeseries
logging.basicConfig( logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -13,17 +13,19 @@ logging.basicConfig(
if __name__ == "__main__": if __name__ == "__main__":
os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True) os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True)
os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True) os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True)
os.makedirs(Path(Config.COMBINED_FOLDER), exist_ok=True)
dat_file_count = [f for f in os.listdir(Path(Config.DAT_TOP_FOLDER))] dat_file_count = [f for f in os.listdir(Path(Config.DAT_TOP_FOLDER))]
asc_file_count = [f for f in os.listdir(Path(Config.ASC_TOP_FOLDER))] asc_file_count = [f for f in os.listdir(Path(Config.ASC_TOP_FOLDER))]
locations = [ locations = [
# loc name, loc id, x loc, y loc, resolution # loc name, loc id, x loc, y loc, output group
["BRICSC", "TM0816", 608500, 216500, 1000], ["BRICSC", "TM0816", 608500, 216500, 1],
["HEACSC", "TF6842", 568500, 342500, 1000], ["HEACSC", "TF6842", 568500, 342500, 1],
] ]
batch = BatchNimrod(Config) batch = BatchNimrod(Config)
timeseries = GenerateTimeseries(Config) timeseries = GenerateTimeseries(Config)
combiner= CombineTimeseries(Config, locations)
start = time.time() start = time.time()
logging.info("Starting to process DAT to ASC") logging.info("Starting to process DAT to ASC")
@@ -44,5 +46,8 @@ if __name__ == "__main__":
elapsed_time = place_checkpoint - start elapsed_time = place_checkpoint - start
logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds") logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds")
logging.info(f'total time so far {elapsed_time:.2f} seconds') logging.info(f'total time so far {elapsed_time:.2f} seconds')
combiner.combine_csv_files()
logging.info(f'All Complete') logging.info(f'All Complete')
+2 -1
View File
@@ -1,3 +1,4 @@
from .nimrod import Nimrod from .nimrod import Nimrod
from .batch_nimrod import BatchNimrod from .batch_nimrod import BatchNimrod
from .generate_timeseries import GenerateTimeseries from .generate_timeseries import GenerateTimeseries
from .combine_timeseries import CombineTimeseries
+34
View File
@@ -0,0 +1,34 @@
import pandas as pd
class CombineTimeseries:
def __init__(self, config, locations):
self.config = config
self.locations = locations
self.grouped_locations = {}
self.build_location_groups()
def build_location_groups(self):
for location in self.locations:
group = location[4] # output group is at index 4
if group not in self.grouped_locations:
self.grouped_locations[group] = []
self.grouped_locations[group].append(location)
def combine_csv_files(self):
for group, loc_list in self.grouped_locations.items():
print(f"Group {group}:")
combined_df = None
for loc in loc_list:
csv_to_load = f'./csv_files/{loc[0]}_timeseries_data.csv'
df = pd.read_csv(csv_to_load, index_col=0)
if combined_df is None:
combined_df = df
else:
combined_df = combined_df.join(df, how='inner')
output_file = f'{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv'
combined_df.to_csv(output_file)
+1 -1
View File
@@ -45,7 +45,7 @@ class GenerateTimeseries:
ncols_basin = 2 # hardcoded, likely to change? ncols_basin = 2 # hardcoded, likely to change?
cellres_radar = radar_header[4] cellres_radar = radar_header[4]
cellres_basin = basin_header[4] cellres_basin = 1000 # 1km
xp = x0_basin - x0_radar xp = x0_basin - x0_radar
yp = y0_basin - y0_radar yp = y0_basin - y0_radar