fix: 🐛 bring the combine into the write to csv step
This commit is contained in:
@@ -14,7 +14,7 @@ logging.basicConfig(
|
||||
|
||||
if __name__ == "__main__":
|
||||
os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True)
|
||||
os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True)
|
||||
#os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True)
|
||||
os.makedirs(Path(Config.COMBINED_FOLDER), exist_ok=True)
|
||||
|
||||
locations = []
|
||||
@@ -92,11 +92,11 @@ if __name__ == "__main__":
|
||||
|
||||
logging.info("Writing CSV files...")
|
||||
timeseries.write_results_to_csv(results, locations)
|
||||
results.clear()
|
||||
# results.clear()
|
||||
|
||||
logging.info("combining CSVs into groups")
|
||||
combiner.combine_csv_files()
|
||||
logging.info("CSVs combined!")
|
||||
# logging.info("combining CSVs into groups")
|
||||
# combiner.combine_csv_files()
|
||||
# logging.info("CSVs combined!")
|
||||
end = time.time()
|
||||
elapsed_time = end - start
|
||||
|
||||
|
||||
@@ -170,24 +170,98 @@ class GenerateTimeseries:
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
raise
|
||||
|
||||
# def write_results_to_csv(self, results, locations):
|
||||
# """Write extracted data to CSV files for each location.
|
||||
|
||||
# Args:
|
||||
# results (dict): Aggregated results {zone_id: {'dates': [], 'values': []}}
|
||||
# locations (list): List of location data
|
||||
# """
|
||||
# for location in locations:
|
||||
# grid_square = location[0]
|
||||
# zone = location[3]
|
||||
# data = results[grid_square]
|
||||
|
||||
# if not data['dates']:
|
||||
# print(f"No data found for {grid_square}")
|
||||
# continue
|
||||
|
||||
# df = pd.DataFrame({"datetime": data['dates'], grid_square: data['values']})
|
||||
|
||||
# # Sort the dataframe into date order
|
||||
# sorted_df = df.sort("datetime")
|
||||
|
||||
# # Format datetime column
|
||||
# sorted_df = sorted_df.with_columns(
|
||||
# pd.col("datetime").dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
# )
|
||||
|
||||
# output_path = Path(self.config.CSV_TOP_FOLDER) / f"{zone}_timeseries_data.csv"
|
||||
# sorted_df.write_csv(
|
||||
# output_path,
|
||||
# float_precision=4
|
||||
# )
|
||||
# logging.info("All CSV files written.")
|
||||
|
||||
def write_results_to_csv(self, results, locations):
|
||||
"""Write extracted data to CSV files for each location.
|
||||
"""Write extracted data to CSV files for each zone.
|
||||
|
||||
Args:
|
||||
results (dict): Aggregated results {zone_id: {'dates': [], 'values': []}}
|
||||
locations (list): List of location data
|
||||
locations (list): List of location data [zone_id, easting, northing, zone]
|
||||
"""
|
||||
for location in locations:
|
||||
zone_id = location[0]
|
||||
data = results[zone_id]
|
||||
# Map zone_id -> zone
|
||||
zone_map = {loc[0]: loc[3] for loc in locations}
|
||||
|
||||
if not data['dates']:
|
||||
print(f"No data found for {zone_id}")
|
||||
continue
|
||||
# Group results by zone and collect all unique dates
|
||||
zone_data = {}
|
||||
for loc in locations:
|
||||
zone_id = loc[0]
|
||||
zone_name = loc[3]
|
||||
|
||||
df = pd.DataFrame({"datetime": data['dates'], zone_id: data['values']})
|
||||
if zone_name not in zone_data:
|
||||
zone_data[zone_name] = {'dates': [], 'values': {}}
|
||||
|
||||
# Sort the dataframe into date order
|
||||
zone_data[zone_name]['values'][zone_id] = results[zone_id]['values']
|
||||
zone_data[zone_name]['dates'].extend(results[zone_id]['dates'])
|
||||
|
||||
# Get unique sorted dates across all zones
|
||||
for zone_name, data in zone_data.items():
|
||||
data['dates'] = sorted(set(data['dates']))
|
||||
|
||||
# Now write one CSV per zone with aligned timestamps
|
||||
for zone_name, data in zone_data.items():
|
||||
dates = data['dates']
|
||||
values_dict = data['values']
|
||||
|
||||
# Create aligned DataFrame
|
||||
df_dict = {"datetime": dates}
|
||||
for grid_square, values in values_dict.items():
|
||||
# Align values to the common dates
|
||||
aligned_values = []
|
||||
value_iter = iter(values)
|
||||
date_iter = iter(dates)
|
||||
|
||||
current_date = next(date_iter, None)
|
||||
current_value = next(value_iter, None)
|
||||
|
||||
for expected_date in dates:
|
||||
if current_date == expected_date:
|
||||
aligned_values.append(current_value)
|
||||
try:
|
||||
current_date = next(date_iter)
|
||||
current_value = next(value_iter)
|
||||
except StopIteration:
|
||||
current_date = None
|
||||
current_value = None
|
||||
else:
|
||||
aligned_values.append(None) # Missing value
|
||||
|
||||
df_dict[grid_square] = aligned_values
|
||||
|
||||
df = pd.DataFrame(df_dict)
|
||||
|
||||
# Sort by datetime (already sorted)
|
||||
sorted_df = df.sort("datetime")
|
||||
|
||||
# Format datetime column
|
||||
@@ -195,9 +269,7 @@ class GenerateTimeseries:
|
||||
pd.col("datetime").dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
)
|
||||
|
||||
output_path = Path(self.config.CSV_TOP_FOLDER) / f"{zone_id}_timeseries_data.csv"
|
||||
sorted_df.write_csv(
|
||||
output_path,
|
||||
float_precision=4
|
||||
)
|
||||
output_path = Path(self.config.COMBINED_FOLDER) / f"{zone_name}_timeseries_data.csv"
|
||||
sorted_df.write_csv(output_path, float_precision=4)
|
||||
|
||||
logging.info("All CSV files written.")
|
||||
Reference in New Issue
Block a user