From 59000b25494dfdbfcaae1bad32d206aaf5a78078 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Thu, 9 Jan 2025 12:28:20 -0800 Subject: [PATCH 1/2] traverser: clear error messages before traversal operations Recent debugging revealed that the traverser error message wasn't getting cleared after some traversal operations, leading to spurious or error logging that can be unrelated to the error cause. Start each public traverser API call with clear_err_message () to ensure error messages are correctly attributed to the cause. --- resource/traversers/dfu.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/resource/traversers/dfu.cpp b/resource/traversers/dfu.cpp index f331c3123..97996c7ec 100644 --- a/resource/traversers/dfu.cpp +++ b/resource/traversers/dfu.cpp @@ -312,6 +312,9 @@ bool dfu_traverser_t::is_initialized () const int dfu_traverser_t::initialize () { + // Clear the error message to disambiguate errors + clear_err_message (); + int rc = 0; vtx_t root; if (!get_graph () || !get_graph_db () || !get_match_cb ()) { @@ -350,6 +353,9 @@ int dfu_traverser_t::run (Jobspec::Jobspec &jobspec, int64_t jobid, int64_t *at) { + // Clear the error message to disambiguate errors + clear_err_message (); + subsystem_t dom = get_match_cb ()->dom_subsystem (); graph_duration_t graph_duration = get_graph_db ()->metadata.graph_duration; if (!get_graph () || !get_graph_db () @@ -413,6 +419,9 @@ int dfu_traverser_t::run (const std::string &str, int64_t at, uint64_t duration) { + // Clear the error message to disambiguate errors + clear_err_message (); + if (!get_match_cb () || !get_graph () || !get_graph_db () || !reader || at < 0) { errno = EINVAL; return -1; @@ -435,11 +444,16 @@ int dfu_traverser_t::run (const std::string &str, int dfu_traverser_t::find (std::shared_ptr &writers, const std::string &criteria) { + // Clear the error message to disambiguate errors + clear_err_message (); return detail::dfu_impl_t::find (writers, criteria); } int dfu_traverser_t::remove (int64_t jobid) { + // Clear the error message to disambiguate errors + clear_err_message (); + subsystem_t dom = get_match_cb ()->dom_subsystem (); if (!get_graph () || !get_graph_db () || get_graph_db ()->metadata.roots.find (dom) == get_graph_db ()->metadata.roots.end () @@ -457,6 +471,9 @@ int dfu_traverser_t::remove (const std::string &R_to_cancel, int64_t jobid, bool &full_cancel) { + // Clear the error message to disambiguate errors + clear_err_message (); + subsystem_t dom = get_match_cb ()->dom_subsystem (); if (!get_graph () || !get_graph_db () || get_graph_db ()->metadata.roots.find (dom) == get_graph_db ()->metadata.roots.end () @@ -471,11 +488,15 @@ int dfu_traverser_t::remove (const std::string &R_to_cancel, int dfu_traverser_t::mark (const std::string &root_path, resource_pool_t::status_t status) { + // Clear the error message to disambiguate errors + clear_err_message (); return detail::dfu_impl_t::mark (root_path, status); } int dfu_traverser_t::mark (std::set &ranks, resource_pool_t::status_t status) { + // Clear the error message to disambiguate errors + clear_err_message (); return detail::dfu_impl_t::mark (ranks, status); } From 73222a7d60df48e8e6e2d56b649670acdcb00bd2 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Tue, 17 Dec 2024 23:29:36 -0800 Subject: [PATCH 2/2] resource: remove error message clearing The public traverser API functions now clear the traverser error message prior to traversal operations. Remove the function calls in resource to clear the error message. --- resource/modules/resource_match.cpp | 3 --- resource/reapi/bindings/c++/reapi_cli_impl.hpp | 2 -- resource/utilities/command.cpp | 5 ----- 3 files changed, 10 deletions(-) diff --git a/resource/modules/resource_match.cpp b/resource/modules/resource_match.cpp index c66905b90..13c0b7251 100644 --- a/resource/modules/resource_match.cpp +++ b/resource/modules/resource_match.cpp @@ -1201,7 +1201,6 @@ static int mark_now (std::shared_ptr &ctx, "%s: traverser::mark: %s", __FUNCTION__, ctx->traverser->err_message ().c_str ()); - ctx->traverser->clear_err_message (); goto done; } flux_log (ctx->h, @@ -2685,7 +2684,6 @@ static int run_find (std::shared_ptr &ctx, "%s: %s", __FUNCTION__, ctx->traverser->err_message ().c_str ()); - ctx->traverser->clear_err_message (); } goto error; } @@ -2956,7 +2954,6 @@ static void set_status_request_cb (flux_t *h, "%s: traverser::mark: %s", __FUNCTION__, ctx->traverser->err_message ().c_str ()); - ctx->traverser->clear_err_message (); errmsg = "Failed to set status of resource vertex"; goto error; } diff --git a/resource/reapi/bindings/c++/reapi_cli_impl.hpp b/resource/reapi/bindings/c++/reapi_cli_impl.hpp index 1d81410fe..e9fda8306 100644 --- a/resource/reapi/bindings/c++/reapi_cli_impl.hpp +++ b/resource/reapi/bindings/c++/reapi_cli_impl.hpp @@ -682,7 +682,6 @@ int resource_query_t::remove_job (const uint64_t jobid) } } else { m_err_msg += traverser->err_message (); - traverser->clear_err_message (); } return rc; } @@ -716,7 +715,6 @@ int resource_query_t::remove_job (const uint64_t jobid, const std::string &R, bo } } else { m_err_msg += traverser->err_message (); - traverser->clear_err_message (); } return rc; diff --git a/resource/utilities/command.cpp b/resource/utilities/command.cpp index 782d0dd0f..94e9ecc06 100644 --- a/resource/utilities/command.cpp +++ b/resource/utilities/command.cpp @@ -115,7 +115,6 @@ static int do_remove (std::shared_ptr &ctx, int64_t jobid) } } else { std::cout << ctx->traverser->err_message (); - ctx->traverser->clear_err_message (); } return rc; } @@ -135,7 +134,6 @@ static int do_partial_remove (std::shared_ptr &ctx, } } else { std::cout << ctx->traverser->err_message (); - ctx->traverser->clear_err_message (); } return rc; } @@ -295,7 +293,6 @@ static int run_match (std::shared_ptr &ctx, if (ctx->traverser->err_message () != "") { std::cerr << "ERROR: " << ctx->traverser->err_message (); - ctx->traverser->clear_err_message (); } if ((rc = ctx->writers->emit (o)) < 0) { std::cerr << "ERROR: match writer emit: " << strerror (errno) << std::endl; @@ -436,7 +433,6 @@ static int update_run (std::shared_ptr &ctx, std::cerr << "ERROR: traverser run () returned error " << std::endl; if (ctx->traverser->err_message () != "") { std::cerr << "ERROR: " << ctx->traverser->err_message (); - ctx->traverser->clear_err_message (); } } } @@ -619,7 +615,6 @@ int cmd_find (std::shared_ptr &ctx, std::vector if ((rc = ctx->traverser->find (ctx->writers, criteria)) < 0) { if (ctx->traverser->err_message () != "") { std::cerr << "ERROR: " << ctx->traverser->err_message (); - ctx->traverser->clear_err_message (); } goto done; }