diff --git a/.gitignore b/.gitignore index cd5441d..fed3f49 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ wheels/ dat_files/* asc_files/* csv_files/* +combined_files/* *.tar.gz \ No newline at end of file diff --git a/config.py b/config.py index 62b0c04..6037fd0 100644 --- a/config.py +++ b/config.py @@ -2,5 +2,6 @@ class Config: DAT_TOP_FOLDER = "./dat_files" ASC_TOP_FOLDER = "./asc_files" CSV_TOP_FOLDER = "./csv_files" + COMBINED_FOLDER = "./combined_files" AREAS_FILE = 'areas.csv' diff --git a/main.py b/main.py index ee8fd0d..119f110 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ import os from pathlib import Path from config import Config -from modules import BatchNimrod, GenerateTimeseries +from modules import BatchNimrod, GenerateTimeseries, CombineTimeseries logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" @@ -13,17 +13,19 @@ logging.basicConfig( if __name__ == "__main__": os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True) os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True) + os.makedirs(Path(Config.COMBINED_FOLDER), exist_ok=True) dat_file_count = [f for f in os.listdir(Path(Config.DAT_TOP_FOLDER))] asc_file_count = [f for f in os.listdir(Path(Config.ASC_TOP_FOLDER))] locations = [ - # loc name, loc id, x loc, y loc, resolution - ["BRICSC", "TM0816", 608500, 216500, 1000], - ["HEACSC", "TF6842", 568500, 342500, 1000], + # loc name, loc id, x loc, y loc, output group + ["BRICSC", "TM0816", 608500, 216500, 1], + ["HEACSC", "TF6842", 568500, 342500, 1], ] batch = BatchNimrod(Config) timeseries = GenerateTimeseries(Config) + combiner= CombineTimeseries(Config, locations) start = time.time() logging.info("Starting to process DAT to ASC") @@ -44,5 +46,8 @@ if __name__ == "__main__": elapsed_time = place_checkpoint - start logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds") logging.info(f'total time so far {elapsed_time:.2f} seconds') + + combiner.combine_csv_files() + logging.info(f'All Complete') \ No newline at end of file diff --git a/modules/__init__.py b/modules/__init__.py index 4a07d68..dc088df 100644 --- a/modules/__init__.py +++ b/modules/__init__.py @@ -1,3 +1,4 @@ from .nimrod import Nimrod from .batch_nimrod import BatchNimrod -from .generate_timeseries import GenerateTimeseries \ No newline at end of file +from .generate_timeseries import GenerateTimeseries +from .combine_timeseries import CombineTimeseries \ No newline at end of file diff --git a/modules/combine_timeseries.py b/modules/combine_timeseries.py new file mode 100644 index 0000000..2281c33 --- /dev/null +++ b/modules/combine_timeseries.py @@ -0,0 +1,34 @@ +import pandas as pd + +class CombineTimeseries: + def __init__(self, config, locations): + self.config = config + self.locations = locations + self.grouped_locations = {} + self.build_location_groups() + + + def build_location_groups(self): + for location in self.locations: + group = location[4] # output group is at index 4 + if group not in self.grouped_locations: + self.grouped_locations[group] = [] + self.grouped_locations[group].append(location) + + + + def combine_csv_files(self): + for group, loc_list in self.grouped_locations.items(): + print(f"Group {group}:") + combined_df = None + for loc in loc_list: + csv_to_load = f'./csv_files/{loc[0]}_timeseries_data.csv' + df = pd.read_csv(csv_to_load, index_col=0) + if combined_df is None: + combined_df = df + else: + combined_df = combined_df.join(df, how='inner') + output_file = f'{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv' + combined_df.to_csv(output_file) + + diff --git a/modules/generate_timeseries.py b/modules/generate_timeseries.py index 33ec912..881e042 100644 --- a/modules/generate_timeseries.py +++ b/modules/generate_timeseries.py @@ -45,7 +45,7 @@ class GenerateTimeseries: ncols_basin = 2 # hardcoded, likely to change? cellres_radar = radar_header[4] - cellres_basin = basin_header[4] + cellres_basin = 1000 # 1km xp = x0_basin - x0_radar yp = y0_basin - y0_radar