feat: 🔗 I Am Speed

This commit is contained in:
2025-11-11 21:32:16 +00:00
parent a40e6d2872
commit be2c89bcc2
6 changed files with 50 additions and 51 deletions
+1 -2
View File
@@ -3,5 +3,4 @@ class Config:
ASC_TOP_FOLDER = "./asc_files" ASC_TOP_FOLDER = "./asc_files"
CSV_TOP_FOLDER = "./csv_files" CSV_TOP_FOLDER = "./csv_files"
COMBINED_FOLDER = "./combined_files" COMBINED_FOLDER = "./combined_files"
AREAS_FILE = 'areas.csv' AREAS_FILE = "areas.csv"
+9 -8
View File
@@ -25,32 +25,33 @@ if __name__ == "__main__":
batch = BatchNimrod(Config) batch = BatchNimrod(Config)
timeseries = GenerateTimeseries(Config) timeseries = GenerateTimeseries(Config)
combiner= CombineTimeseries(Config, locations) combiner = CombineTimeseries(Config, locations)
start = time.time() start = time.time()
logging.info("Starting to process DAT to ASC") logging.info("Starting to process DAT to ASC")
batch_checkpoint = time.time() if len(dat_file_count) != len(asc_file_count):
if dat_file_count != asc_file_count:
batch.process_nimrod_files() batch.process_nimrod_files()
batch_checkpoint = time.time()
elapsed_time = batch_checkpoint - start elapsed_time = batch_checkpoint - start
logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds") logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds")
else: else:
logging.info("No need to process DAT files, skipping...") logging.info("No need to process DAT files, skipping...")
batch_checkpoint = time.time()
time.sleep(1) time.sleep(1)
for place in locations: for place in locations:
logging.info(f'{place[0]} started generating timeseries data.') logging.info(f"{place[0]} started generating timeseries data.")
timeseries.extract_cropped_rain_data(place) timeseries.extract_cropped_rain_data(place)
place_checkpoint = time.time() place_checkpoint = time.time()
since_asc_create = place_checkpoint - batch_checkpoint since_asc_create = place_checkpoint - batch_checkpoint
elapsed_time = place_checkpoint - start elapsed_time = place_checkpoint - start
logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds") logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds")
logging.info(f'total time so far {elapsed_time:.2f} seconds') logging.info(f"total time so far {elapsed_time:.2f} seconds")
logging.info('combining CSVs into groups') logging.info("combining CSVs into groups")
combiner.combine_csv_files() combiner.combine_csv_files()
logging.info('CSVs combined!') logging.info("CSVs combined!")
end = time.time() end = time.time()
elapsed_time = end - start elapsed_time = end - start
logging.info(f'All Complete total time {elapsed_time:.2f} seconds') logging.info(f"All Complete total time {elapsed_time:.2f} seconds")
+7
View File
@@ -2,3 +2,10 @@ from .nimrod import Nimrod
from .batch_nimrod import BatchNimrod from .batch_nimrod import BatchNimrod
from .generate_timeseries import GenerateTimeseries from .generate_timeseries import GenerateTimeseries
from .combine_timeseries import CombineTimeseries from .combine_timeseries import CombineTimeseries
__all__ = [
"Nimrod",
"BatchNimrod",
"GenerateTimeseries",
"CombineTimeseries"
]
+1 -1
View File
@@ -4,7 +4,7 @@ from pathlib import Path
import logging import logging
class BatchNimrod(): class BatchNimrod:
def __init__(self, config) -> None: def __init__(self, config) -> None:
self.config = config self.config = config
+6 -8
View File
@@ -1,5 +1,6 @@
import pandas as pd import pandas as pd
class CombineTimeseries: class CombineTimeseries:
def __init__(self, config, locations): def __init__(self, config, locations):
self.config = config self.config = config
@@ -7,7 +8,6 @@ class CombineTimeseries:
self.grouped_locations = {} self.grouped_locations = {}
self.build_location_groups() self.build_location_groups()
def build_location_groups(self): def build_location_groups(self):
for location in self.locations: for location in self.locations:
group = location[4] # output group is at index 4 group = location[4] # output group is at index 4
@@ -15,19 +15,17 @@ class CombineTimeseries:
self.grouped_locations[group] = [] self.grouped_locations[group] = []
self.grouped_locations[group].append(location) self.grouped_locations[group].append(location)
def combine_csv_files(self): def combine_csv_files(self):
for group, loc_list in self.grouped_locations.items(): for group, loc_list in self.grouped_locations.items():
combined_df = None combined_df = None
for loc in loc_list: for loc in loc_list:
csv_to_load = f'./csv_files/{loc[0]}_timeseries_data.csv' csv_to_load = f"./csv_files/{loc[0]}_timeseries_data.csv"
df = pd.read_csv(csv_to_load, index_col=0) df = pd.read_csv(csv_to_load, index_col=0)
if combined_df is None: if combined_df is None:
combined_df = df combined_df = df
else: else:
combined_df = combined_df.join(df, how='inner') combined_df = combined_df.join(df, how="inner")
output_file = f'{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv' output_file = (
f"{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv"
)
combined_df.to_csv(output_file) combined_df.to_csv(output_file)
+22 -28
View File
@@ -1,8 +1,9 @@
from __future__ import division, print_function from __future__ import division, print_function
import numpy as np import numpy as np
import glob from pathlib import Path
import pandas as pd import pandas as pd
from datetime import datetime from datetime import datetime
import os
class GenerateTimeseries: class GenerateTimeseries:
@@ -22,7 +23,6 @@ class GenerateTimeseries:
header_data = [float(f.__next__().split()[1]) for x in range(6)] header_data = [float(f.__next__().split()[1]) for x in range(6)]
return header_data return header_data
def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple: def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple:
"""Calculate crop coordinates based on header data """Calculate crop coordinates based on header data
@@ -59,10 +59,8 @@ class GenerateTimeseries:
start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar)) start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar))
end_row = np.ceil(nrows_radar - (yp / cellres_radar)) end_row = np.ceil(nrows_radar - (yp / cellres_radar))
#print(start_col, start_row, end_col, end_row)
return int(start_col), int(start_row), int(end_col), int(end_row) return int(start_col), int(start_row), int(end_col), int(end_row)
def extract_cropped_rain_data(self, location): def extract_cropped_rain_data(self, location):
"""Extract cropped rain data and create rainfall timeseries """Extract cropped rain data and create rainfall timeseries
@@ -72,29 +70,24 @@ class GenerateTimeseries:
rainfile = [] rainfile = []
datetime_list = [] datetime_list = []
for f in glob.iglob(f'{self.config.ASC_TOP_FOLDER}/*.asc'): for file_name in os.listdir(Path(self.config.ASC_TOP_FOLDER)):
# print(f) file_path = Path(self.config.ASC_TOP_FOLDER, file_name)
radar_header = self._read_ascii_header(f)
radar_header = self._read_ascii_header(str(file_path))
# Calculate crop coordinates
start_col, start_row, end_col, end_row = self._calculate_crop_coords( start_col, start_row, end_col, end_row = self._calculate_crop_coords(
location, radar_header location, radar_header
) )
start_col = int(round(start_col)) cur_rawgrid = np.loadtxt(file_path, skiprows=6, dtype=float, delimiter=None)
start_row = int(round(start_row))
end_col = int(round(end_col))
end_row = int(round(end_row))
cur_rawgrid = np.genfromtxt(
f, skip_header=6, filling_values=0.0, loose=True, invalid_raise=False
)
cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col] cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
# Flatten the cropped rain data into a 1D array
cur_rainrow = cur_croppedrain.flatten() rainfile.append(cur_croppedrain.flatten()[2] / 32)
rainfile.append(cur_rainrow[2]/32)
# Extract datetime from filename # Extract datetime from filename
filename = f.split("/")[-1] # Get just the filename filename = os.path.basename(file_path) # Get just the filename
date_str = filename[:8] # YYYYMMDD date_str = filename[:8] # YYYYMMDD
time_str = filename[8:12] # HHMM time_str = filename[8:12] # HHMM
@@ -102,15 +95,16 @@ class GenerateTimeseries:
parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M") parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
datetime_list.append(parsed_date) datetime_list.append(parsed_date)
rainfile_arr = np.vstack(rainfile)
# Create DataFrame with datetime index # Create DataFrame with datetime index
df = pd.DataFrame(rainfile_arr, index=datetime_list) df = pd.DataFrame({"rainfall": rainfile}, index=datetime_list)
# sort the dataframe into date order
# Sort the dataframe into date order
sorted_df = df.sort_index() sorted_df = df.sort_index()
# add headers
header_row = [location[1]]
file_name = f"csv_files/{location[0]}_timeseries_data.csv"
sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime')
sorted_df.to_csv(
f"csv_files/{location[0]}_timeseries_data.csv",
sep=",",
float_format="%1.4f",
header=[location[1]],
index_label="datetime",
)