feat: 🔗 I Am Speed
This commit is contained in:
@@ -3,5 +3,4 @@ class Config:
|
|||||||
ASC_TOP_FOLDER = "./asc_files"
|
ASC_TOP_FOLDER = "./asc_files"
|
||||||
CSV_TOP_FOLDER = "./csv_files"
|
CSV_TOP_FOLDER = "./csv_files"
|
||||||
COMBINED_FOLDER = "./combined_files"
|
COMBINED_FOLDER = "./combined_files"
|
||||||
AREAS_FILE = 'areas.csv'
|
AREAS_FILE = "areas.csv"
|
||||||
|
|
||||||
|
|||||||
@@ -29,28 +29,29 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
logging.info("Starting to process DAT to ASC")
|
logging.info("Starting to process DAT to ASC")
|
||||||
batch_checkpoint = time.time()
|
if len(dat_file_count) != len(asc_file_count):
|
||||||
if dat_file_count != asc_file_count:
|
|
||||||
batch.process_nimrod_files()
|
batch.process_nimrod_files()
|
||||||
|
batch_checkpoint = time.time()
|
||||||
elapsed_time = batch_checkpoint - start
|
elapsed_time = batch_checkpoint - start
|
||||||
logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds")
|
logging.info(f"DAT to ASC completed in {elapsed_time:.2f} seconds")
|
||||||
else:
|
else:
|
||||||
logging.info("No need to process DAT files, skipping...")
|
logging.info("No need to process DAT files, skipping...")
|
||||||
|
batch_checkpoint = time.time()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
for place in locations:
|
for place in locations:
|
||||||
logging.info(f'{place[0]} started generating timeseries data.')
|
logging.info(f"{place[0]} started generating timeseries data.")
|
||||||
timeseries.extract_cropped_rain_data(place)
|
timeseries.extract_cropped_rain_data(place)
|
||||||
place_checkpoint = time.time()
|
place_checkpoint = time.time()
|
||||||
since_asc_create = place_checkpoint - batch_checkpoint
|
since_asc_create = place_checkpoint - batch_checkpoint
|
||||||
elapsed_time = place_checkpoint - start
|
elapsed_time = place_checkpoint - start
|
||||||
logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds")
|
logging.info(f"{place[0]} completed in {since_asc_create:.2f} seconds")
|
||||||
logging.info(f'total time so far {elapsed_time:.2f} seconds')
|
logging.info(f"total time so far {elapsed_time:.2f} seconds")
|
||||||
|
|
||||||
logging.info('combining CSVs into groups')
|
logging.info("combining CSVs into groups")
|
||||||
combiner.combine_csv_files()
|
combiner.combine_csv_files()
|
||||||
logging.info('CSVs combined!')
|
logging.info("CSVs combined!")
|
||||||
end = time.time()
|
end = time.time()
|
||||||
elapsed_time = end - start
|
elapsed_time = end - start
|
||||||
|
|
||||||
logging.info(f'All Complete total time {elapsed_time:.2f} seconds')
|
logging.info(f"All Complete total time {elapsed_time:.2f} seconds")
|
||||||
|
|||||||
@@ -2,3 +2,10 @@ from .nimrod import Nimrod
|
|||||||
from .batch_nimrod import BatchNimrod
|
from .batch_nimrod import BatchNimrod
|
||||||
from .generate_timeseries import GenerateTimeseries
|
from .generate_timeseries import GenerateTimeseries
|
||||||
from .combine_timeseries import CombineTimeseries
|
from .combine_timeseries import CombineTimeseries
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Nimrod",
|
||||||
|
"BatchNimrod",
|
||||||
|
"GenerateTimeseries",
|
||||||
|
"CombineTimeseries"
|
||||||
|
]
|
||||||
@@ -4,7 +4,7 @@ from pathlib import Path
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
class BatchNimrod():
|
class BatchNimrod:
|
||||||
def __init__(self, config) -> None:
|
def __init__(self, config) -> None:
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
class CombineTimeseries:
|
class CombineTimeseries:
|
||||||
def __init__(self, config, locations):
|
def __init__(self, config, locations):
|
||||||
self.config = config
|
self.config = config
|
||||||
@@ -7,7 +8,6 @@ class CombineTimeseries:
|
|||||||
self.grouped_locations = {}
|
self.grouped_locations = {}
|
||||||
self.build_location_groups()
|
self.build_location_groups()
|
||||||
|
|
||||||
|
|
||||||
def build_location_groups(self):
|
def build_location_groups(self):
|
||||||
for location in self.locations:
|
for location in self.locations:
|
||||||
group = location[4] # output group is at index 4
|
group = location[4] # output group is at index 4
|
||||||
@@ -15,19 +15,17 @@ class CombineTimeseries:
|
|||||||
self.grouped_locations[group] = []
|
self.grouped_locations[group] = []
|
||||||
self.grouped_locations[group].append(location)
|
self.grouped_locations[group].append(location)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def combine_csv_files(self):
|
def combine_csv_files(self):
|
||||||
for group, loc_list in self.grouped_locations.items():
|
for group, loc_list in self.grouped_locations.items():
|
||||||
combined_df = None
|
combined_df = None
|
||||||
for loc in loc_list:
|
for loc in loc_list:
|
||||||
csv_to_load = f'./csv_files/{loc[0]}_timeseries_data.csv'
|
csv_to_load = f"./csv_files/{loc[0]}_timeseries_data.csv"
|
||||||
df = pd.read_csv(csv_to_load, index_col=0)
|
df = pd.read_csv(csv_to_load, index_col=0)
|
||||||
if combined_df is None:
|
if combined_df is None:
|
||||||
combined_df = df
|
combined_df = df
|
||||||
else:
|
else:
|
||||||
combined_df = combined_df.join(df, how='inner')
|
combined_df = combined_df.join(df, how="inner")
|
||||||
output_file = f'{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv'
|
output_file = (
|
||||||
|
f"{self.config.COMBINED_FOLDER}/group_{group}_timeseries_data.csv"
|
||||||
|
)
|
||||||
combined_df.to_csv(output_file)
|
combined_df.to_csv(output_file)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
from __future__ import division, print_function
|
from __future__ import division, print_function
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import glob
|
from pathlib import Path
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
class GenerateTimeseries:
|
class GenerateTimeseries:
|
||||||
@@ -22,7 +23,6 @@ class GenerateTimeseries:
|
|||||||
header_data = [float(f.__next__().split()[1]) for x in range(6)]
|
header_data = [float(f.__next__().split()[1]) for x in range(6)]
|
||||||
return header_data
|
return header_data
|
||||||
|
|
||||||
|
|
||||||
def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple:
|
def _calculate_crop_coords(self, basin_header: list, radar_header: list) -> tuple:
|
||||||
"""Calculate crop coordinates based on header data
|
"""Calculate crop coordinates based on header data
|
||||||
|
|
||||||
@@ -59,10 +59,8 @@ class GenerateTimeseries:
|
|||||||
start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar))
|
start_row = np.floor(nrows_radar - ((yp + ypp) / cellres_radar))
|
||||||
end_row = np.ceil(nrows_radar - (yp / cellres_radar))
|
end_row = np.ceil(nrows_radar - (yp / cellres_radar))
|
||||||
|
|
||||||
#print(start_col, start_row, end_col, end_row)
|
|
||||||
return int(start_col), int(start_row), int(end_col), int(end_row)
|
return int(start_col), int(start_row), int(end_col), int(end_row)
|
||||||
|
|
||||||
|
|
||||||
def extract_cropped_rain_data(self, location):
|
def extract_cropped_rain_data(self, location):
|
||||||
"""Extract cropped rain data and create rainfall timeseries
|
"""Extract cropped rain data and create rainfall timeseries
|
||||||
|
|
||||||
@@ -72,29 +70,24 @@ class GenerateTimeseries:
|
|||||||
rainfile = []
|
rainfile = []
|
||||||
datetime_list = []
|
datetime_list = []
|
||||||
|
|
||||||
for f in glob.iglob(f'{self.config.ASC_TOP_FOLDER}/*.asc'):
|
for file_name in os.listdir(Path(self.config.ASC_TOP_FOLDER)):
|
||||||
# print(f)
|
file_path = Path(self.config.ASC_TOP_FOLDER, file_name)
|
||||||
radar_header = self._read_ascii_header(f)
|
|
||||||
|
radar_header = self._read_ascii_header(str(file_path))
|
||||||
|
|
||||||
|
# Calculate crop coordinates
|
||||||
start_col, start_row, end_col, end_row = self._calculate_crop_coords(
|
start_col, start_row, end_col, end_row = self._calculate_crop_coords(
|
||||||
location, radar_header
|
location, radar_header
|
||||||
)
|
)
|
||||||
|
|
||||||
start_col = int(round(start_col))
|
cur_rawgrid = np.loadtxt(file_path, skiprows=6, dtype=float, delimiter=None)
|
||||||
start_row = int(round(start_row))
|
|
||||||
end_col = int(round(end_col))
|
|
||||||
end_row = int(round(end_row))
|
|
||||||
|
|
||||||
cur_rawgrid = np.genfromtxt(
|
|
||||||
f, skip_header=6, filling_values=0.0, loose=True, invalid_raise=False
|
|
||||||
)
|
|
||||||
|
|
||||||
cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
|
cur_croppedrain = cur_rawgrid[start_row:end_row, start_col:end_col]
|
||||||
# Flatten the cropped rain data into a 1D array
|
|
||||||
cur_rainrow = cur_croppedrain.flatten()
|
rainfile.append(cur_croppedrain.flatten()[2] / 32)
|
||||||
rainfile.append(cur_rainrow[2]/32)
|
|
||||||
|
|
||||||
# Extract datetime from filename
|
# Extract datetime from filename
|
||||||
filename = f.split("/")[-1] # Get just the filename
|
filename = os.path.basename(file_path) # Get just the filename
|
||||||
date_str = filename[:8] # YYYYMMDD
|
date_str = filename[:8] # YYYYMMDD
|
||||||
time_str = filename[8:12] # HHMM
|
time_str = filename[8:12] # HHMM
|
||||||
|
|
||||||
@@ -102,15 +95,16 @@ class GenerateTimeseries:
|
|||||||
parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
|
parsed_date = datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
|
||||||
datetime_list.append(parsed_date)
|
datetime_list.append(parsed_date)
|
||||||
|
|
||||||
rainfile_arr = np.vstack(rainfile)
|
|
||||||
|
|
||||||
# Create DataFrame with datetime index
|
# Create DataFrame with datetime index
|
||||||
df = pd.DataFrame(rainfile_arr, index=datetime_list)
|
df = pd.DataFrame({"rainfall": rainfile}, index=datetime_list)
|
||||||
# sort the dataframe into date order
|
|
||||||
|
# Sort the dataframe into date order
|
||||||
sorted_df = df.sort_index()
|
sorted_df = df.sort_index()
|
||||||
# add headers
|
|
||||||
header_row = [location[1]]
|
|
||||||
file_name = f"csv_files/{location[0]}_timeseries_data.csv"
|
|
||||||
sorted_df.to_csv(file_name, sep=",", float_format="%1.4f", header=header_row, index_label='datetime')
|
|
||||||
|
|
||||||
|
|
||||||
|
sorted_df.to_csv(
|
||||||
|
f"csv_files/{location[0]}_timeseries_data.csv",
|
||||||
|
sep=",",
|
||||||
|
float_format="%1.4f",
|
||||||
|
header=[location[1]],
|
||||||
|
index_label="datetime",
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user