fix: 🐛 bring the combine into the write to csv step
This commit is contained in:
@@ -14,7 +14,7 @@ logging.basicConfig(
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True)
|
os.makedirs(Path(Config.ASC_TOP_FOLDER), exist_ok=True)
|
||||||
os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True)
|
#os.makedirs(Path(Config.CSV_TOP_FOLDER), exist_ok=True)
|
||||||
os.makedirs(Path(Config.COMBINED_FOLDER), exist_ok=True)
|
os.makedirs(Path(Config.COMBINED_FOLDER), exist_ok=True)
|
||||||
|
|
||||||
locations = []
|
locations = []
|
||||||
@@ -92,11 +92,11 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
logging.info("Writing CSV files...")
|
logging.info("Writing CSV files...")
|
||||||
timeseries.write_results_to_csv(results, locations)
|
timeseries.write_results_to_csv(results, locations)
|
||||||
results.clear()
|
# results.clear()
|
||||||
|
|
||||||
logging.info("combining CSVs into groups")
|
# logging.info("combining CSVs into groups")
|
||||||
combiner.combine_csv_files()
|
# combiner.combine_csv_files()
|
||||||
logging.info("CSVs combined!")
|
# logging.info("CSVs combined!")
|
||||||
end = time.time()
|
end = time.time()
|
||||||
elapsed_time = end - start
|
elapsed_time = end - start
|
||||||
|
|
||||||
|
|||||||
@@ -170,24 +170,98 @@ class GenerateTimeseries:
|
|||||||
executor.shutdown(wait=False, cancel_futures=True)
|
executor.shutdown(wait=False, cancel_futures=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
# def write_results_to_csv(self, results, locations):
|
||||||
|
# """Write extracted data to CSV files for each location.
|
||||||
|
|
||||||
|
# Args:
|
||||||
|
# results (dict): Aggregated results {zone_id: {'dates': [], 'values': []}}
|
||||||
|
# locations (list): List of location data
|
||||||
|
# """
|
||||||
|
# for location in locations:
|
||||||
|
# grid_square = location[0]
|
||||||
|
# zone = location[3]
|
||||||
|
# data = results[grid_square]
|
||||||
|
|
||||||
|
# if not data['dates']:
|
||||||
|
# print(f"No data found for {grid_square}")
|
||||||
|
# continue
|
||||||
|
|
||||||
|
# df = pd.DataFrame({"datetime": data['dates'], grid_square: data['values']})
|
||||||
|
|
||||||
|
# # Sort the dataframe into date order
|
||||||
|
# sorted_df = df.sort("datetime")
|
||||||
|
|
||||||
|
# # Format datetime column
|
||||||
|
# sorted_df = sorted_df.with_columns(
|
||||||
|
# pd.col("datetime").dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
# )
|
||||||
|
|
||||||
|
# output_path = Path(self.config.CSV_TOP_FOLDER) / f"{zone}_timeseries_data.csv"
|
||||||
|
# sorted_df.write_csv(
|
||||||
|
# output_path,
|
||||||
|
# float_precision=4
|
||||||
|
# )
|
||||||
|
# logging.info("All CSV files written.")
|
||||||
|
|
||||||
def write_results_to_csv(self, results, locations):
|
def write_results_to_csv(self, results, locations):
|
||||||
"""Write extracted data to CSV files for each location.
|
"""Write extracted data to CSV files for each zone.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
results (dict): Aggregated results {zone_id: {'dates': [], 'values': []}}
|
results (dict): Aggregated results {zone_id: {'dates': [], 'values': []}}
|
||||||
locations (list): List of location data
|
locations (list): List of location data [zone_id, easting, northing, zone]
|
||||||
"""
|
"""
|
||||||
for location in locations:
|
# Map zone_id -> zone
|
||||||
zone_id = location[0]
|
zone_map = {loc[0]: loc[3] for loc in locations}
|
||||||
data = results[zone_id]
|
|
||||||
|
|
||||||
if not data['dates']:
|
# Group results by zone and collect all unique dates
|
||||||
print(f"No data found for {zone_id}")
|
zone_data = {}
|
||||||
continue
|
for loc in locations:
|
||||||
|
zone_id = loc[0]
|
||||||
|
zone_name = loc[3]
|
||||||
|
|
||||||
df = pd.DataFrame({"datetime": data['dates'], zone_id: data['values']})
|
if zone_name not in zone_data:
|
||||||
|
zone_data[zone_name] = {'dates': [], 'values': {}}
|
||||||
|
|
||||||
# Sort the dataframe into date order
|
zone_data[zone_name]['values'][zone_id] = results[zone_id]['values']
|
||||||
|
zone_data[zone_name]['dates'].extend(results[zone_id]['dates'])
|
||||||
|
|
||||||
|
# Get unique sorted dates across all zones
|
||||||
|
for zone_name, data in zone_data.items():
|
||||||
|
data['dates'] = sorted(set(data['dates']))
|
||||||
|
|
||||||
|
# Now write one CSV per zone with aligned timestamps
|
||||||
|
for zone_name, data in zone_data.items():
|
||||||
|
dates = data['dates']
|
||||||
|
values_dict = data['values']
|
||||||
|
|
||||||
|
# Create aligned DataFrame
|
||||||
|
df_dict = {"datetime": dates}
|
||||||
|
for grid_square, values in values_dict.items():
|
||||||
|
# Align values to the common dates
|
||||||
|
aligned_values = []
|
||||||
|
value_iter = iter(values)
|
||||||
|
date_iter = iter(dates)
|
||||||
|
|
||||||
|
current_date = next(date_iter, None)
|
||||||
|
current_value = next(value_iter, None)
|
||||||
|
|
||||||
|
for expected_date in dates:
|
||||||
|
if current_date == expected_date:
|
||||||
|
aligned_values.append(current_value)
|
||||||
|
try:
|
||||||
|
current_date = next(date_iter)
|
||||||
|
current_value = next(value_iter)
|
||||||
|
except StopIteration:
|
||||||
|
current_date = None
|
||||||
|
current_value = None
|
||||||
|
else:
|
||||||
|
aligned_values.append(None) # Missing value
|
||||||
|
|
||||||
|
df_dict[grid_square] = aligned_values
|
||||||
|
|
||||||
|
df = pd.DataFrame(df_dict)
|
||||||
|
|
||||||
|
# Sort by datetime (already sorted)
|
||||||
sorted_df = df.sort("datetime")
|
sorted_df = df.sort("datetime")
|
||||||
|
|
||||||
# Format datetime column
|
# Format datetime column
|
||||||
@@ -195,9 +269,7 @@ class GenerateTimeseries:
|
|||||||
pd.col("datetime").dt.strftime("%Y-%m-%d %H:%M:%S")
|
pd.col("datetime").dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
)
|
)
|
||||||
|
|
||||||
output_path = Path(self.config.CSV_TOP_FOLDER) / f"{zone_id}_timeseries_data.csv"
|
output_path = Path(self.config.COMBINED_FOLDER) / f"{zone_name}_timeseries_data.csv"
|
||||||
sorted_df.write_csv(
|
sorted_df.write_csv(output_path, float_precision=4)
|
||||||
output_path,
|
|
||||||
float_precision=4
|
|
||||||
)
|
|
||||||
logging.info("All CSV files written.")
|
logging.info("All CSV files written.")
|
||||||
Reference in New Issue
Block a user