diff --git a/CMakeLists.txt b/CMakeLists.txt index a320eab3a0..28d71f09b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -372,7 +372,6 @@ target_link_libraries(ngen NGen::parallel NGen::state_save_restore NGen::bmi_protocols - NGen::state_save_restore ) if(NGEN_WITH_SQLITE) diff --git a/Dockerfile b/Dockerfile index 8b721f514a..f7b574b896 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,10 +7,10 @@ ARG ORG=ngwpc ARG NGEN_FORCING_IMAGE_TAG=latest ARG NGEN_FORCING_IMAGE=ghcr.io/${ORG}/ngen-bmi-forcing:${NGEN_FORCING_IMAGE_TAG} -FROM ${NGEN_FORCING_IMAGE} AS base +#FROM ${NGEN_FORCING_IMAGE} AS base # Uncomment when building locally -#FROM ngen-bmi-forcing AS base +FROM ngen-bmi-forcing AS base # OCI Metadata Arguments ARG NGEN_FORCING_IMAGE @@ -471,6 +471,11 @@ RUN --mount=type=cache,target=/root/.cache/pip,id=pip-cache \ cd extern/topoflow-glacier; \ pip install . +RUN --mount=type=cache,target=/root/.cache/pip,id=pip-cache \ + set -eux; \ + cd extern/topoflow-glacier; \ + pip install . + RUN set -eux && \ mkdir --parents /ngencerf/data/ngen-run-logs/ && \ mkdir --parents /ngen-app/bin/ && \ diff --git a/extern/t-route b/extern/t-route index e2f7d5dcb7..13536b5c9b 160000 --- a/extern/t-route +++ b/extern/t-route @@ -1 +1 @@ -Subproject commit e2f7d5dcb7efcc684523f759b438ad250543ccdd +Subproject commit 13536b5c9bbe4906c798344d150499d56d09f368 diff --git a/include/core/NgenSimulation.hpp b/include/core/NgenSimulation.hpp index 3189045edc..62744e4588 100644 --- a/include/core/NgenSimulation.hpp +++ b/include/core/NgenSimulation.hpp @@ -83,6 +83,23 @@ class NgenSimulation private: void advance_models_one_output_step(); + /** Set T-route input that may require merging results from other MPI processes. The T-route values will only be set for the MPI rank 0 process. + * + * If MPI is running with multiple processes, blocking MPI calls will be made to merge the results. + * @param simulation_values Pointer to vector of simulation results + * @param feature_indexes Pointer to the map between feature IDs and the relative timestep index + * @param id_var_name T-route BMI var name for the feature IDs. `feature_indexes` will be converted to a list and passed to this variable + * @param value_var_name T-route BMI var name for the simulation results + * @param features Features colection used to filter out remote sender nexuses when merging values + */ + void set_troute_inputs( + const std::vector *simulation_values, + const std::unordered_map *feature_indexes, + const std::string id_var_name, + const std::string value_var_name, + const NgenSimulation::hy_features_t &features + ); + int simulation_step_; std::shared_ptr sim_time_; diff --git a/include/core/nexus/HY_PointHydroNexusRemote.hpp b/include/core/nexus/HY_PointHydroNexusRemote.hpp index aa3857d67b..c9cfafd613 100644 --- a/include/core/nexus/HY_PointHydroNexusRemote.hpp +++ b/include/core/nexus/HY_PointHydroNexusRemote.hpp @@ -72,7 +72,6 @@ class HY_PointHydroNexusRemote : public HY_PointHydroNexus communication_type get_communicator_type() { return type; } private: - void post_receives(); void process_communications(); int world_rank; diff --git a/include/realizations/catchment/Bmi_Module_Formulation.hpp b/include/realizations/catchment/Bmi_Module_Formulation.hpp index 150bd2ac38..318fafe064 100644 --- a/include/realizations/catchment/Bmi_Module_Formulation.hpp +++ b/include/realizations/catchment/Bmi_Module_Formulation.hpp @@ -7,8 +7,6 @@ #include "Bmi_Adapter.hpp" #include #include "bmi_utilities.hpp" - -#include #include "bmi/protocols.hpp" using data_access::MEAN; @@ -59,11 +57,6 @@ namespace realization { /** * Get the collection of forcing output property names this instance can provide. - * - * This is part of the @ref ForcingProvider interface. This interface must be implemented for items of this - * type to be usable as "forcing" providers for situations when some other object needs to receive as an input - * (i.e., one of its forcings) a data property output from this object. - * * For this type, this is the collection of BMI output variables, plus any aliases included in the formulation * config's output variable mapping. * diff --git a/include/realizations/catchment/Bmi_Multi_Formulation.hpp b/include/realizations/catchment/Bmi_Multi_Formulation.hpp index 1eb2487917..3387f9fa1e 100644 --- a/include/realizations/catchment/Bmi_Multi_Formulation.hpp +++ b/include/realizations/catchment/Bmi_Multi_Formulation.hpp @@ -627,7 +627,7 @@ namespace realization { // Since this is a nested formulation, support usage of the '{{id}}' syntax for init config file paths. Catchment_Formulation::config_pattern_substitution(properties, BMI_REALIZATION_CFG_PARAM_REQ__INIT_CONFIG, - "{{id}}", id); + "{{id}}", Catchment_Formulation::config_pattern_id_replacement(id)); // Call create_formulation to perform the rest of the typical initialization steps for the formulation. mod->create_formulation(properties); diff --git a/include/realizations/catchment/Catchment_Formulation.hpp b/include/realizations/catchment/Catchment_Formulation.hpp index 31522e953c..b9ababa3b2 100644 --- a/include/realizations/catchment/Catchment_Formulation.hpp +++ b/include/realizations/catchment/Catchment_Formulation.hpp @@ -32,6 +32,12 @@ namespace realization { static void config_pattern_substitution(geojson::PropertyMap &properties, const std::string &key, const std::string &pattern, const std::string &replacement); + /**Remove leading non-numeric characters from the ID string. + * + * This may be needed to correct NGEN adding an identifying prefix to the ID with system file names without the prefix. + */ + static std::string config_pattern_id_replacement(const std::string &id); + /** * Get a header line appropriate for a file made up of entries from this type's implementation of * ``get_output_line_for_timestep``. diff --git a/include/realizations/catchment/Formulation_Manager.hpp b/include/realizations/catchment/Formulation_Manager.hpp index 89383e792d..746129b90e 100644 --- a/include/realizations/catchment/Formulation_Manager.hpp +++ b/include/realizations/catchment/Formulation_Manager.hpp @@ -170,13 +170,18 @@ namespace realization { for (std::pair catchment_config : *possible_catchment_configs) { ss.str(""); ss << "Processing catchment: " << catchment_config.first << std::endl; LOG(ss.str(), LogLevel::DEBUG); + // ensure catchment's ID starts with "cat-" so it can be found in the fabric + std::string catchment_id = catchment_config.first; + if (strncmp(catchment_id.c_str(), "cat-", 4) != 0) { + catchment_id = "cat-" + catchment_id; + } - int catchment_index = fabric->find(catchment_config.first); + int catchment_index = fabric->find(catchment_id); if (catchment_index == -1) { #ifndef NGEN_QUIET ss.str(""); ss << "Formulation_Manager::read: Cannot create formulation for catchment " - << catchment_config.first + << catchment_id << " that isn't identified in the hydrofabric or requested subset" << std::endl; LOG(ss.str(), LogLevel::WARNING); #endif @@ -200,7 +205,7 @@ namespace realization { this->add_formulation( this->construct_formulation_from_config( simulation_time_config, - catchment_config.first, + catchment_id, catchment_formulation, output_stream ) @@ -553,7 +558,7 @@ namespace realization { global_copy.formulation.parameters, BMI_REALIZATION_CFG_PARAM_REQ__INIT_CONFIG, "{{id}}", - identifier + Catchment_Formulation::config_pattern_id_replacement(identifier) ); } else { ss.str(""); ss << "init_config is present but empty for identifier: " << identifier << std::endl; @@ -665,7 +670,9 @@ namespace realization { // Replace {{id}} if present if (id_index != std::string::npos) { - filepattern = filepattern.replace(id_index, sizeof("{{id}}") - 1, identifier); + // account generate the regex to search for the ID with or without a prefix + std::string pattern_id = Catchment_Formulation::config_pattern_id_replacement(identifier); + filepattern = filepattern.replace(id_index, sizeof("{{id}}") - 1, pattern_id); } // Compile the file pattern as a regex diff --git a/src/NGen.cpp b/src/NGen.cpp index b4b8d14978..dd4ab6b58e 100644 --- a/src/NGen.cpp +++ b/src/NGen.cpp @@ -448,11 +448,12 @@ int run_ngen(int argc, char* argv[], int mpi_num_procs, int mpi_rank) { #if NGEN_WITH_SQLITE3 try { nexus_collection = ngen::geopackage::read(nexusDataFile, "nexus", nexus_subset_ids); - } catch (...) { + } catch (std::exception &e) { // Handle all exceptions std::string msg = "Geopackage error occurred reading nexuses: " + nexusDataFile; LOG(msg,LogLevel::FATAL); - throw std::runtime_error(msg); + LOG(LogLevel::FATAL, e.what()); + throw; } #else LOG(LogLevel::FATAL, "SQLite3 support required to read GeoPackage files."); @@ -480,11 +481,12 @@ int run_ngen(int argc, char* argv[], int mpi_num_procs, int mpi_rank) { try { catchment_collection = ngen::geopackage::read(catchmentDataFile, "divides", catchment_subset_ids); - } catch (...) { + } catch (std::exception &e) { // Handle all exceptions std::string msg = "Geopackage error occurred reading divides: " + catchmentDataFile; LOG(msg,LogLevel::FATAL); - throw std::runtime_error(msg); + LOG(LogLevel::FATAL, e.what()); + throw; } #else diff --git a/src/core/Layer.cpp b/src/core/Layer.cpp index 432b918aa3..5de09354c5 100644 --- a/src/core/Layer.cpp +++ b/src/core/Layer.cpp @@ -46,10 +46,6 @@ void ngen::Layer::update_models(boost::span catchment_outflows, +" at feature id "+id; throw std::runtime_error(msg); } -#if NGEN_WITH_ROUTING - int results_index = catchment_indexes[id]; - catchment_outflows[results_index] += response; -#endif // NGEN_WITH_ROUTING if (r_c->get_output_header_count() > 0) { // only write output if config specifies output values std::string output = std::to_string(output_time_index)+","+current_timestamp+","+ @@ -57,16 +53,31 @@ void ngen::Layer::update_models(boost::span catchment_outflows, r_c->write_output(output); } //TODO put this somewhere else. For now, just trying to ensure we get m^3/s into nexus output - double area; + double area_sq_km; try { - area = catchment_data->get_feature(id)->get_property("areasqkm").as_real_number(); + area_sq_km = catchment_data->get_feature(id)->get_property("areasqkm").as_real_number(); } catch(std::invalid_argument &e) { - area = catchment_data->get_feature(id)->get_property("area_sqkm").as_real_number(); + area_sq_km = catchment_data->get_feature(id)->get_property("area_sqkm").as_real_number(); } - double response_m_s = response * (area * 1000000); + double area_sq_m = area_sq_km * 1'000'000; //TODO put this somewhere else as well, for now, an implicit assumption is that a module's get_response returns //m/timestep +#if NGEN_WITH_ROUTING + // t-route NHF takes in catchment results in (m^3/s) + // depth (m) x area (m^2) / dt (seconds) + int results_index = catchment_indexes[id]; + catchment_outflows[results_index] += + // response is meters per timestep (m/t) + response + // divide by timestep seconds to get to meters per second: (m/t) * (t/s) = (m/s) + / simulation_time.get_output_interval_seconds() + // multiply by square meters: (m/s) * (m^2) = (m^3/s) + * area_sq_m; +#endif // NGEN_WITH_ROUTING + // NOTE: the conversion below loos like it's missing a conversion from per timestep to per second + // Maintaining the current code in case there's a step later that accounts for this + double response_m_s = response * area_sq_m; //since we are operating on a 1 hour (3600s) dt, we need to scale the output appropriately //so no response is m^2/hr...m^2/hr * 1hr/3600s = m^3/hr double response_m_h = response_m_s / 3600.0; diff --git a/src/core/NgenSimulation.cpp b/src/core/NgenSimulation.cpp index 9addca80d4..f2caad5831 100644 --- a/src/core/NgenSimulation.cpp +++ b/src/core/NgenSimulation.cpp @@ -218,98 +218,79 @@ double NgenSimulation::get_nexus_outflow(int nexus_index, int timestep_index) co return nexus_downstream_flows_[timestep_index * nexus_indexes_.size() + nexus_index]; } -void NgenSimulation::run_routing(NgenSimulation::hy_features_t &features, std::string const& t_route_config_file_with_path) -{ -#if NGEN_WITH_ROUTING - std::vector *routing_nexus_downflows = &nexus_downstream_flows_; - std::unordered_map *routing_nexus_indexes = &nexus_indexes_; - - size_t number_of_timesteps = sim_time_->get_total_output_times(); - if (nexus_downstream_flows_.size() != number_of_timesteps * nexus_indexes_.size()) { - std::string msg = "Routing input data in NgenSimulation::nexus_downstream_flows_ does not reflect a full-duration run"; - LOG(msg, LogLevel::FATAL); - throw std::runtime_error(msg); - } - +void NgenSimulation::set_troute_inputs( + const std::vector *simulation_values, + const std::unordered_map *feature_indexes, + const std::string id_var_name, + const std::string value_var_name, + const NgenSimulation::hy_features_t &features +) { #if NGEN_WITH_MPI - std::vector all_nexus_downflows; - std::unordered_map all_nexus_indexes; - - if (mpi_num_procs_ > 1) { - std::vector local_nexus_ids; - for (const auto& nexus : nexus_indexes_) { - local_nexus_ids.push_back(nexus.first); + std::vector all_values; + std::unordered_map all_indexes; + if (this->mpi_num_procs_ > 1) { + size_t number_of_timesteps = sim_time_->get_total_output_times(); + // create a list of local IDs + std::vector local_ids; + for (const auto& id_pair : *feature_indexes) { + local_ids.push_back(id_pair.first); } - // MPI_Gather all nexus IDs into a single vector - std::vector all_nexus_ids = parallel::gather_strings(local_nexus_ids, mpi_rank_, mpi_num_procs_); + // MPI_Gather all IDs into a single vector + std::vector all_ids = parallel::gather_strings(local_ids, mpi_rank_, mpi_num_procs_); if (mpi_rank_ == 0) { // filter to only the unique IDs - std::sort(all_nexus_ids.begin(), all_nexus_ids.end()); - all_nexus_ids.erase( - std::unique(all_nexus_ids.begin(), all_nexus_ids.end()), - all_nexus_ids.end() + std::sort(all_ids.begin(), all_ids.end()); + all_ids.erase( + std::unique(all_ids.begin(), all_ids.end()), + all_ids.end() ); } - // MPI_Broadcast so all processes share the nexus IDs - all_nexus_ids = std::move(parallel::broadcast_strings(all_nexus_ids, mpi_rank_, mpi_num_procs_)); + // MPI_Broadcast so all processes share the IDs + all_ids = std::move(parallel::broadcast_strings(all_ids, mpi_rank_, mpi_num_procs_)); // MPI_Reduce to collect the results from processes - if (mpi_rank_ == 0) { - all_nexus_downflows.resize(number_of_timesteps * all_nexus_ids.size(), 0.0); + if (this->mpi_rank_ == 0) { + all_values.resize(number_of_timesteps * all_ids.size(), 0.0); } std::vector local_buffer(number_of_timesteps); std::vector receive_buffer(number_of_timesteps, 0.0); - for (int i = 0; i < all_nexus_ids.size(); ++i) { - std::string nexus_id = all_nexus_ids[i]; - if (nexus_indexes_.find(nexus_id) != nexus_indexes_.end() && !features.is_remote_sender_nexus(nexus_id)) { + for (int i = 0; i < all_ids.size(); ++i) { + std::string current_id = all_ids[i]; + if (feature_indexes->find(current_id) != feature_indexes->end() && !features.is_remote_sender_nexus(current_id)) { // if this process has the id and receives/records data, copy the values to the buffer - int nexus_index = nexus_indexes_[nexus_id]; + int id_index = feature_indexes->at(current_id); for (int step = 0; step < number_of_timesteps; ++step) { - int offset = step * nexus_indexes_.size() + nexus_index; - local_buffer[step] = nexus_downstream_flows_[offset]; + int offset = step * feature_indexes->size() + id_index; + local_buffer[step] = simulation_values->at(offset); } } else { // if this process does not have the id, fill with 0 to make sure it doesn't affect reduce sum std::fill(local_buffer.begin(), local_buffer.end(), 0.0); } MPI_Reduce(local_buffer.data(), receive_buffer.data(), number_of_timesteps, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - if (mpi_rank_ == 0) { - // copy reduce values to a combined downflows vector - all_nexus_indexes[nexus_id] = i; + if (this->mpi_rank_ == 0) { + // copy reduce values to a combined values vector + all_indexes[current_id] = i; for (int step = 0; step < number_of_timesteps; ++step) { - int offset = step * all_nexus_ids.size() + i; - all_nexus_downflows[offset] = receive_buffer[step]; + int offset = step * all_ids.size() + i; + all_values[offset] = receive_buffer[step]; receive_buffer[step] = 0.0; } } } - if (mpi_rank_ == 0) { - // update root's local data for running t-route below - routing_nexus_indexes = &all_nexus_indexes; - routing_nexus_downflows = &all_nexus_downflows; + if (this->mpi_rank_ == 0) { + // change rank 0's indexes and values to the MPI merged results for setting below + simulation_values = &all_values; + feature_indexes = &all_indexes; } } #endif // NGEN_WITH_MPI - - if (mpi_rank_ == 0) { // Run t-route from single process - LOG(LogLevel::INFO, "Running T-Route on nexus outflows."); - - // Note: Currently, delta_time is set in the t-route yaml configuration file, and the - // number_of_timesteps is determined from the total number of nexus outputs in t-route. - // It is recommended to still pass these values to the routing_py_adapter object in - // case a future implementation needs these two values from the ngen framework. - int delta_time = sim_time_->get_output_interval_seconds(); - - // model for routing - if (this->py_troute_ == NULL) { - this->make_troute(t_route_config_file_with_path); - } - this->py_troute_->set_value_unchecked("ngen_dt", &delta_time, 1); - +#if NGEN_WITH_ROUTING + if (this->mpi_rank_ == 0) { // set up nexus id indexes - std::vector nexus_df_index(routing_nexus_indexes->size()); - for (const auto& key_value : *routing_nexus_indexes) { + std::vector df_index(feature_indexes->size()); + for (const auto& key_value : *feature_indexes) { int id_index = key_value.second; // Convert string ID into numbers for T-route index @@ -320,21 +301,63 @@ void NgenSimulation::run_routing(NgenSimulation::hy_features_t &features, std::s id_as_int = std::stoi(numbers); } if (id_as_int == -1) { - std::string error_msg = "Cannot convert the nexus ID to an integer: " + key_value.first; + std::string error_msg = "Cannot convert the ID to an integer: " + key_value.first; LOG(LogLevel::FATAL, error_msg); throw std::runtime_error(error_msg); } - nexus_df_index[id_index] = id_as_int; + df_index[id_index] = id_as_int; } // use unchecked messaging to allow the BMI to change its container size - py_troute_->set_value_unchecked("land_surface_water_source__id", nexus_df_index.data(), nexus_df_index.size()); - py_troute_->set_value_unchecked("land_surface_water_source__volume_flow_rate", routing_nexus_downflows->data(), routing_nexus_downflows->size()); - // run the T-Route model and create outputs through Update - py_troute_->Update(); + py_troute_->set_value_unchecked(id_var_name, df_index.data(), df_index.size()); + py_troute_->set_value_unchecked(value_var_name, simulation_values->data(), simulation_values->size()); } #endif // NGEN_WITH_ROUTING } +void NgenSimulation::run_routing(NgenSimulation::hy_features_t &features, std::string const& t_route_config_file_with_path) +{ +#if NGEN_WITH_ROUTING + size_t number_of_timesteps = sim_time_->get_total_output_times(); + if (nexus_downstream_flows_.size() != number_of_timesteps * nexus_indexes_.size()) { + std::string msg = "Routing input data in NgenSimulation::nexus_downstream_flows_ does not reflect a full-duration run"; + LOG(msg, LogLevel::FATAL); + throw std::runtime_error(msg); + } + if (mpi_rank_ == 0) { // Run t-route from single process + LOG(LogLevel::INFO, "Running T-Route on simulation outputs."); + + // Note: Currently, delta_time is set in the t-route yaml configuration file, and the + // number_of_timesteps is determined from the total number of nexus outputs in t-route. + // It is recommended to still pass these values to the routing_py_adapter object in + // case a future implementation needs these two values from the ngen framework. + int delta_time = sim_time_->get_output_interval_seconds(); + + // model for routing + if (this->py_troute_ == NULL) { + this->make_troute(t_route_config_file_with_path); + } + this->py_troute_->set_value_unchecked("ngen_dt", &delta_time, 1); + } + // set the inputs from catchment and nexus results + this->set_troute_inputs( + &this->nexus_downstream_flows_, + &this->nexus_indexes_, + "land_surface_water_source__id", + "land_surface_water_source__volume_flow_rate", + features + ); + this->set_troute_inputs( + &this->catchment_outflows_, + &this->catchment_indexes_, + "catchment_water_source__id", + "catchment_water_source__volume_flow_rate", + features + ); + if (this->mpi_rank_ == 0) + this->py_troute_->Update(); +#endif // NGEN_WITH_ROUTING +} + size_t NgenSimulation::get_num_output_times() const { return sim_time_->get_total_output_times(); diff --git a/src/geopackage/read.cpp b/src/geopackage/read.cpp index 50bc062526..1a17a0050b 100644 --- a/src/geopackage/read.cpp +++ b/src/geopackage/read.cpp @@ -32,12 +32,6 @@ std::shared_ptr ngen::geopackage::read( // Check for malicious/invalid layer input check_table_name(layer); std::vector features; - if (ids.size() > 0) - features.reserve(ids.size()); - double min_x = std::numeric_limits::infinity(); - double min_y = std::numeric_limits::infinity(); - double max_x = -std::numeric_limits::infinity(); - double max_y = -std::numeric_limits::infinity(); LOG(LogLevel::DEBUG, "Establishing connection to geopackage %s.", gpkg_path.c_str()); ngen::sqlite::database db{gpkg_path}; @@ -62,103 +56,133 @@ std::shared_ptr ngen::geopackage::read( throw std::runtime_error(errmsg); } - // Introspect if the layer is divides to see which ID field is in use - std::string id_column = "id"; - if(layer == "divides"){ - try { - //TODO: A bit primitive. Actually introspect the schema somehow? https://www.sqlite.org/c3ref/funclist.html - auto query_get_first_row = db.query("SELECT divide_id FROM " + layer + " LIMIT 1"); - id_column = "divide_id"; - } - catch (const std::exception& e){ - #ifndef NGEN_QUIET - // output debug info on what is read exactly - read_ss << "WARN: Using legacy ID column \"id\" in layer " << layer << " is DEPRECATED and may stop working at any time." << std::endl; - LOG(read_ss.str(), LogLevel::WARNING); read_ss.str(""); - #endif - } + std::string id_column; + std::string feature_query; + if (layer == "divides") { + id_column = "div_id"; + feature_query = + "SELECT " + "('cat-' || divides.div_id) AS id, " + "('nex-' || flowpaths.dn_nex_id) AS toid, " + "flowpaths.slope AS So, " + "divides.area_sqkm AS areasqkm, " // faster for later code to rename the field here + "divides.geom AS geom " + "FROM divides " + "LEFT JOIN flowpaths " + "ON divides.div_id = flowpaths.div_id"; + } else if (layer == "nexus") { + id_column = "nex_id"; + feature_query = + "SELECT " + "('nex-' || nexus.nex_id) AS id, " + "CASE " + "WHEN flowpaths.div_id IS NULL THEN 'terminal' " + "ELSE ('cat-' || flowpaths.div_id) " + "END AS toid, " + "CASE " + "WHEN flowpaths.slope IS NULL THEN 0.0 " + "ELSE flowpaths.slope " + "END AS So, " + "nexus.geom AS geom " + "FROM nexus " + "LEFT JOIN flowpaths " + "ON nexus.dn_fp_id = flowpaths.fp_id"; + } else { + Logger::LogAndThrow("Geopackage read only accepts layers `divides` and `nexus`. The layer entered was " + layer); } - // execute sub-queries if the number of IDs gets too long or once if ids.size() == 0 - int bind_limit = 900; - boost::span id_span(ids); - for (int i = 0; i < ids.size() || (i == 0 && ids.size() == 0); i += bind_limit) { - int span_size = (i + bind_limit >= ids.size()) ? (ids.size() - i) : bind_limit; - boost::span sub_ids = id_span.subspan(i, span_size); - - // Layer exists, getting statement for it - // - // this creates a string in the form: - // WHERE id IN (?, ?, ?, ...) - // so that it can be bound by SQLite. - // This is safer than trying to concatenate - // the IDs together. - std::string joined_ids = ""; - if (!sub_ids.empty()) { - joined_ids = " WHERE "+id_column+" IN (?"; - for (size_t i = 1; i < sub_ids.size(); i++) { - joined_ids += ", ?"; + std::string joined_ids = ""; + if (!ids.empty()) { + bool non_sentinel_found = false; + std::stringstream filter; + filter << " WHERE " << layer << '.' << id_column << " IN ("; + for (const auto &filter_id : ids) { + size_t sep_index = filter_id.find('-'); + if (sep_index == std::string::npos) { + sep_index = 0; + } else { + sep_index++; } - joined_ids += ")"; - } - - // Get number of features - auto query_get_layer_count = db.query("SELECT COUNT(*) FROM " + layer + joined_ids, sub_ids); - query_get_layer_count.next(); - const int layer_feature_count = query_get_layer_count.get(0); - - #ifndef NGEN_QUIET - // output debug info on what is read exactly - read_ss << "Reading " << layer_feature_count << " features from layer " << layer << " using ID column `"<< id_column << "`"; - if (!sub_ids.empty()) { - read_ss << " (id subset:"; - for (auto& id : sub_ids) { - read_ss << " " << id; + int id_num = std::atoi(filter_id.c_str() + sep_index); + if (id_num <= 0) { + // check if the failed item is a fake terminal and igore if it is + std::string terminal = "wb-TERMINAL_SENTINEL-"; + if (strncmp(filter_id.c_str(), terminal.c_str(), terminal.length()) != 0) { + Logger::LogAndThrow("Could not convert input " + layer + " ID into a number: " + filter_id); + } + } else { + if (non_sentinel_found) // only add comma after finding at least one non-sentinel + filter << ','; + non_sentinel_found = true; + filter << id_num; } - read_ss << ")"; } - read_ss << std::endl; - LOG(read_ss.str(), LogLevel::DEBUG); read_ss.str(""); - #endif - - // Get layer feature metadata (geometry column name + type) - auto query_get_layer_geom_meta = db.query("SELECT column_name FROM gpkg_geometry_columns WHERE table_name = ?", layer); - query_get_layer_geom_meta.next(); - const std::string layer_geometry_column = query_get_layer_geom_meta.get(0); - - // Get layer - LOG(LogLevel::DEBUG, "Reading %d features from layer %s.", layer_feature_count, layer.c_str()); - auto query_get_layer = db.query("SELECT * FROM " + layer + joined_ids, sub_ids); - query_get_layer.next(); - - // build features out of layer query - if (ids.size() == 0) - features.reserve(layer_feature_count); - while(!query_get_layer.done()) { - geojson::Feature feature = build_feature( - query_get_layer, - id_column, - layer_geometry_column - ); - - features.push_back(feature); - query_get_layer.next(); + if (non_sentinel_found) { + filter << ')'; + joined_ids = filter.str(); + } else { + // if all IDs were sentinels, just make the query return nothing + joined_ids = " WHERE 1=0"; } + } + + // Get number of features + auto query_get_layer_count = db.query("SELECT COUNT(*) FROM " + layer + joined_ids); + query_get_layer_count.next(); + const int layer_feature_count = query_get_layer_count.get(0); + features.reserve(layer_feature_count); + if (!ids.empty() && ids.size() != layer_feature_count) { + LOG(LogLevel::WARNING, "The number of input IDs (%d) does not equal the number of features with those IDs in the geopackage (%d) for layer %s.", + ids.size(), layer_feature_count, layer.c_str()); + } - // get layer bounding box from features - // - // GeoPackage contains a bounding box in the SQLite DB, - // however, it is in the SRS of the GPKG. By creating - // the bbox after the features are built, the projection - // is already done. This also should be fairly cheap to do. - for (const auto& feature : features) { - const auto& bbox = feature->get_bounding_box(); - min_x = bbox[0] < min_x ? bbox[0] : min_x; - min_y = bbox[1] < min_y ? bbox[1] : min_y; - max_x = bbox[2] > max_x ? bbox[2] : max_x; - max_y = bbox[3] > max_y ? bbox[3] : max_y; + #ifndef NGEN_QUIET + // output debug info on what is read exactly + read_ss << "Reading " << layer_feature_count << " features from layer " << layer << " using ID column `"<< id_column << "`"; + if (!ids.empty()) { + read_ss << " (id subset:"; + for (auto& id : ids) { + read_ss << " " << id; } + read_ss << ")"; + } + read_ss << std::endl; + LOG(read_ss.str(), LogLevel::DEBUG); read_ss.str(""); + #endif + + // Get layer + LOG(LogLevel::DEBUG, "Reading %d features from layer %s.", layer_feature_count, layer.c_str()); + auto query_get_layer = db.query(feature_query + joined_ids); + query_get_layer.next(); + + // build features out of layer query + while(!query_get_layer.done()) { + geojson::Feature feature = build_feature( + query_get_layer, + "id", + "geom" + ); + + features.push_back(feature); + query_get_layer.next(); + } + // get layer bounding box from features + // + // GeoPackage contains a bounding box in the SQLite DB, + // however, it is in the SRS of the GPKG. By creating + // the bbox after the features are built, the projection + // is already done. This also should be fairly cheap to do. + double min_x = std::numeric_limits::infinity(); + double min_y = std::numeric_limits::infinity(); + double max_x = -std::numeric_limits::infinity(); + double max_y = -std::numeric_limits::infinity(); + for (const auto& feature : features) { + const auto& bbox = feature->get_bounding_box(); + min_x = bbox[0] < min_x ? bbox[0] : min_x; + min_y = bbox[1] < min_y ? bbox[1] : min_y; + max_x = bbox[2] > max_x ? bbox[2] : max_x; + max_y = bbox[3] > max_y ? bbox[3] : max_y; } auto fc = std::make_shared( diff --git a/src/partitionGenerator.cpp b/src/partitionGenerator.cpp index 16c6b438b7..d58f0d6d50 100644 --- a/src/partitionGenerator.cpp +++ b/src/partitionGenerator.cpp @@ -433,11 +433,12 @@ int main(int argc, char* argv[]) { #if NGEN_WITH_SQLITE3 try { - catchment_collection = ngen::geopackage::read(catchmentDataFile, "divides", catchment_subset_ids); - } catch (...) { + catchment_collection = ngen::geopackage::read(catchmentDataFile, "divides", catchment_subset_ids); + } catch (std::exception &e) { // Handle all exceptions std::string msg = "Geopackage error occurred reading divides: " + catchmentDataFile; - LOG(msg,LogLevel::FATAL, msg); + LOG(LogLevel::FATAL, msg); + LOG(LogLevel::FATAL, e.what()); throw std::runtime_error(msg); } #else @@ -474,14 +475,15 @@ int main(int argc, char* argv[]) #if NGEN_WITH_SQLITE3 try { global_nexus_collection = ngen::geopackage::read(nexusDataFile, "nexus", nexus_subset_ids); - } catch (...) { + } catch (std::exception &e) { // Handle all exceptions std::string msg = "Geopackage error occurred reading nexuses: " + nexusDataFile; LOG(msg,LogLevel::FATAL); - throw std::runtime_error(msg); + LOG(LogLevel::FATAL, e.what()); + throw; } #else - LOG(msg,LogLevel::FATAL, "SQLite3 support required to read GeoPackage files."); + LOG(LogLevel::FATAL, "SQLite3 support required to read GeoPackage files."); throw std::runtime_error("SQLite3 support required to read GeoPackage files."); #endif } diff --git a/src/realizations/catchment/Catchment_Formulation.cpp b/src/realizations/catchment/Catchment_Formulation.cpp index 00b22e0cf0..71c2972d0d 100644 --- a/src/realizations/catchment/Catchment_Formulation.cpp +++ b/src/realizations/catchment/Catchment_Formulation.cpp @@ -59,6 +59,17 @@ namespace realization { // LOG(ss.str(), LogLevel::DEBUG); } + std::string Catchment_Formulation::config_pattern_id_replacement(const std::string &id) { + size_t index = id.find_last_of('-'); + if (index != std::string::npos && ++index < id.length()) { + // check if first character after the last hyphen is a number + if (static_cast(id[index]) - static_cast('0') <= 9) { + return id.substr(index); + } + } + return id; + } + std::string Catchment_Formulation::get_output_header_line(std::string delimiter) const { return "Total Discharge"; } diff --git a/src/state_save_restore/CMakeLists.txt b/src/state_save_restore/CMakeLists.txt index b068d6d4ab..099e46124d 100644 --- a/src/state_save_restore/CMakeLists.txt +++ b/src/state_save_restore/CMakeLists.txt @@ -14,4 +14,3 @@ target_include_directories(state_save_restore PUBLIC ${PROJECT_SOURCE_DIR}/include ) -