Skip to content

Commit

Permalink
chore: more error tracking metrics (#27276)
Browse files Browse the repository at this point in the history
  • Loading branch information
daibhin authored Jan 6, 2025
1 parent e31a3ce commit b279355
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 11 deletions.
4 changes: 4 additions & 0 deletions rust/cymbal/src/frames/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use sqlx::PgPool;
use crate::{
config::Config,
error::UnhandledError,
metric_consts::{FRAME_CACHE_HITS, FRAME_CACHE_MISSES, FRAME_DB_HITS},
symbol_store::{saving::SymbolSetRecord, Catalog},
};

Expand Down Expand Up @@ -35,13 +36,16 @@ impl Resolver {
catalog: &Catalog,
) -> Result<Frame, UnhandledError> {
if let Some(result) = self.cache.get(&frame.frame_id()) {
metrics::counter!(FRAME_CACHE_HITS).increment(1);
return Ok(result.contents);
}
metrics::counter!(FRAME_CACHE_MISSES).increment(1);

if let Some(result) =
ErrorTrackingStackFrame::load(pool, team_id, &frame.frame_id(), self.result_ttl).await?
{
self.cache.insert(frame.frame_id(), result.clone());
metrics::counter!(FRAME_DB_HITS).increment(1);
return Ok(result.contents);
}

Expand Down
12 changes: 9 additions & 3 deletions rust/cymbal/src/issue_resolution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use uuid::Uuid;

use crate::{
error::UnhandledError,
metric_consts::ISSUE_CREATED,
types::{FingerprintedErrProps, OutputErrProps},
};

Expand Down Expand Up @@ -74,12 +75,17 @@ impl Issue {
self.description
)
.fetch_one(executor)
.await?;

.await?
// TODO - I'm fairly sure the Option here is a bug in sqlx, so the unwrap will
// never be hit, but nonetheless I'm not 100% sure the "no rows" case actually
// means the insert was not done.
Ok(did_insert.unwrap_or(false))
.unwrap_or(false);

if did_insert {
metrics::counter!(ISSUE_CREATED).increment(1);
}

Ok(did_insert)
}
}

Expand Down
2 changes: 2 additions & 0 deletions rust/cymbal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use common_types::ClickHouseEvent;
use error::{EventError, UnhandledError};
use fingerprinting::generate_fingerprint;
use issue_resolution::resolve_issue;
use metric_consts::FRAME_RESOLUTION;
use tracing::warn;
use types::{Exception, RawErrProps, Stacktrace};

Expand Down Expand Up @@ -114,6 +115,7 @@ async fn process_exception(
// thrown at the wall), with some cross-group concurrency.
handles.push(tokio::spawn(async move {
context.worker_liveness.report_healthy().await;
metrics::counter!(FRAME_RESOLUTION).increment(1);
let res = context
.resolver
.resolve(&frame, team_id, &context.pool, &context.catalog)
Expand Down
5 changes: 3 additions & 2 deletions rust/cymbal/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use cymbal::{
config::Config,
hack::kafka::{send_keyed_iter_to_kafka, RecvErr},
handle_event,
metric_consts::{ERRORS, EVENT_RECEIVED, MAIN_LOOP_TIME, STACK_PROCESSED},
metric_consts::{ERRORS, EVENT_PROCESSED, EVENT_RECEIVED, MAIN_LOOP_TIME},
};
use envconfig::Envconfig;
use tokio::task::JoinHandle;
Expand Down Expand Up @@ -100,6 +100,8 @@ async fn main() {
}
};

metrics::counter!(EVENT_PROCESSED).increment(1);

output.push(event);
offsets.push(offset);
}
Expand All @@ -117,7 +119,6 @@ async fn main() {
offset.store().unwrap();
}

metrics::counter!(STACK_PROCESSED).increment(1);
whole_loop.label("finished", "true").fin();
}
}
14 changes: 9 additions & 5 deletions rust/cymbal/src/metric_consts.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,33 @@
pub const ERRORS: &str = "cymbal_errors";
pub const EVENT_RECEIVED: &str = "cymbal_event_received";
pub const STACK_PROCESSED: &str = "cymbal_stack_track_processed";
pub const BASIC_FETCHES: &str = "cymbal_basic_fetches";
pub const EVENT_PROCESSED: &str = "cymbal_event_processed";
pub const SOURCEMAP_HEADER_FOUND: &str = "cymbal_sourcemap_header_found";
pub const SOURCEMAP_BODY_REF_FOUND: &str = "cymbal_sourcemap_body_ref_found";
pub const SOURCEMAP_NOT_FOUND: &str = "cymbal_sourcemap_not_found";
pub const SOURCEMAP_BODY_FETCHES: &str = "cymbal_sourcemap_body_fetches";
pub const STORE_CACHE_HITS: &str = "cymbal_store_cache_hits";
pub const STORE_CACHE_MISSES: &str = "cymbal_store_cache_misses";
pub const STORE_CACHED_BYTES: &str = "cymbal_store_cached_bytes";
pub const STORE_CACHE_SIZE: &str = "cymbal_store_cache_size";
pub const STORE_CACHE_EVICTIONS: &str = "cymbal_store_cache_evictions";
pub const STORE_CACHE_EVICTION_RUNS: &str = "cymbal_store_cache_eviction_runs";
pub const MAIN_LOOP_TIME: &str = "cymbal_main_loop_time";
pub const PER_FRAME_TIME: &str = "cymbal_per_frame_time";
pub const PER_STACK_TIME: &str = "cymbal_per_stack_time";
pub const PER_FRAME_GROUP_TIME: &str = "cymbal_per_frame_group_time";
pub const SYMBOL_SET_DB_FETCHES: &str = "cymbal_symbol_set_db_fetches";
pub const SYMBOL_SET_DB_HITS: &str = "cymbal_symbol_set_db_hits";
pub const SYMBOL_SET_DB_MISSES: &str = "cymbal_symbol_set_db_misses";
pub const SYMBOL_SET_SAVED: &str = "cymbal_symbol_set_saved";
pub const SAVED_SYMBOL_SET_LOADED: &str = "cymbal_saved_symbol_set_loaded";
pub const SAVED_SYMBOL_SET_ERROR_RETURNED: &str = "cymbal_saved_symbol_set_error_returned";
pub const SYMBOL_SET_FETCH_RETRY: &str = "cymbal_symbol_set_fetch_retry";
pub const FRAME_RESOLUTION: &str = "cymbal_frame_resolution";
pub const FRAME_RESOLVED: &str = "cymbal_frame_resolved";
pub const FRAME_CACHE_HITS: &str = "cymbal_frame_cache_hits";
pub const FRAME_CACHE_MISSES: &str = "cymbal_frame_cache_misses";
pub const FRAME_DB_HITS: &str = "cymbal_frame_db_hits";
pub const FRAME_NOT_RESOLVED: &str = "cymbal_frame_not_resolved";
pub const S3_FETCH: &str = "cymbal_s3_fetch";
pub const S3_PUT: &str = "cymbal_s3_put";
pub const SOURCEMAP_FETCH: &str = "cymbal_sourcemap_fetch";
pub const SAVE_SYMBOL_SET: &str = "cymbal_save_symbol_set";
pub const SOURCEMAP_PARSE: &str = "cymbal_sourcemap_parse";
pub const ISSUE_CREATED: &str = "cymbal_issue_created";
8 changes: 7 additions & 1 deletion rust/cymbal/src/symbol_store/saving.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ use crate::{
error::{Error, FrameError, UnhandledError},
metric_consts::{
SAVED_SYMBOL_SET_ERROR_RETURNED, SAVED_SYMBOL_SET_LOADED, SAVE_SYMBOL_SET,
SYMBOL_SET_FETCH_RETRY, SYMBOL_SET_SAVED,
SYMBOL_SET_DB_FETCHES, SYMBOL_SET_DB_HITS, SYMBOL_SET_DB_MISSES, SYMBOL_SET_FETCH_RETRY,
SYMBOL_SET_SAVED,
},
};

Expand Down Expand Up @@ -136,7 +137,10 @@ where
async fn fetch(&self, team_id: i32, r: Self::Ref) -> Result<Self::Fetched, Error> {
let set_ref = r.to_string();
info!("Fetching symbol set data for {}", set_ref);
metrics::counter!(SYMBOL_SET_DB_FETCHES).increment(1);

if let Some(record) = SymbolSetRecord::load(&self.pool, team_id, &set_ref).await? {
metrics::counter!(SYMBOL_SET_DB_HITS).increment(1);
if let Some(storage_ptr) = record.storage_ptr {
info!("Found s3 saved symbol set data for {}", set_ref);
let data = self.s3_client.get(&self.bucket, &storage_ptr).await?;
Expand Down Expand Up @@ -171,6 +175,8 @@ where
metrics::counter!(SYMBOL_SET_FETCH_RETRY).increment(1);
}

metrics::counter!(SYMBOL_SET_DB_MISSES).increment(1);

match self.inner.fetch(team_id, r).await {
// NOTE: We don't save the data here, because we want to save it only after parsing
Ok(data) => {
Expand Down

0 comments on commit b279355

Please sign in to comment.