Skip to content

Commit

Permalink
[QC-1037] IncreasingEntries fault tolerance (#2020)
Browse files Browse the repository at this point in the history
* [QC-1037] IncreasingEntries: tolerate a number of bad cycles

* [QC-1037] IncreasingEntries: tolerate a number of bad cycles

* Update Modules/Common/src/IncreasingEntries.cxx

Co-authored-by: Piotr Konopka <[email protected]>

* change name of parameter

* doc

* fix format

* format

---------

Co-authored-by: Piotr Konopka <[email protected]>
  • Loading branch information
Barthelemy and knopers8 authored Nov 1, 2023
1 parent c518497 commit 2f86036
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 26 deletions.
18 changes: 14 additions & 4 deletions Modules/Common/include/Common/IncreasingEntries.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace o2::quality_control_modules::common
{

/// \brief Check if the number of Entries has increased or not
/// If it does not increase, the quality is bad.
/// If it does not increase over the past N cycles (N=1 by default), the quality is bad.
/// The behaviour can be modified with the customParameter "mustIncrease". If set to "false",
/// it will actually have a bad quality if the number of entries increases.
class IncreasingEntries : public o2::quality_control::checker::CheckInterface
Expand All @@ -42,13 +42,23 @@ class IncreasingEntries : public o2::quality_control::checker::CheckInterface
std::string getAcceptedType() override;

private:
std::map<std::string, double> mLastEntries;
std::map<std::string, double> mLastEntries; // moName -> number of entries

// count the number of faults we have seen in a row for each object
std::map<std::string, size_t> mMoFaultCount; // moName -> number of faults in a row

// the pave text with the error message
std::shared_ptr<TPaveText> mPaveText;
bool mMustIncrease = true;

// store the faults to beautify them later
std::vector<std::string> mFaultyObjectsNames;

ClassDefOverride(IncreasingEntries, 2);
// decides whether the number of entries must increase or it must remain the same
bool mMustIncrease = true;
// The number of cycles during which the number of entries did not move until we set the quality bad.
int mBadCyclesLimit = 1;

ClassDefOverride(IncreasingEntries, 3);
};

} // namespace o2::quality_control_modules::common
Expand Down
48 changes: 27 additions & 21 deletions Modules/Common/src/IncreasingEntries.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,23 @@ namespace o2::quality_control_modules::common

void IncreasingEntries::configure()
{
try {
mMustIncrease = parseBoolParam(mCustomParameters, "default", "default", "mustIncrease");
} catch (AliceO2::Common::ObjectNotFoundError& exc) {
mMustIncrease = true; // if not there, default behaviour
}
auto option = mCustomParameters.atOptional("mustIncrease");
mMustIncrease = option.has_value() ? decodeBool(option.value()) : true;
ILOG(Debug, Support) << "mustIncrease: " << mMustIncrease << ENDM;

option = mCustomParameters.atOptional("nBadCyclesLimit");
mBadCyclesLimit = option.has_value() ? stoi(option.value()) : 1;
ILOG(Debug, Support) << "nBadCyclesLimit: " << mBadCyclesLimit << ENDM;

mPaveText = make_shared<TPaveText>(1, 0.125, 0.6, 0, "NDC");
mPaveText->SetFillColor(kRed);
mPaveText->SetMargin(0);
if (mMustIncrease) {
mPaveText = make_shared<TPaveText>(1, 0.125, 0.6, 0, "NDC");
mPaveText->AddText("Number of Entries has *not* changed");
mPaveText->AddText("in the past cycle");
mPaveText->SetFillColor(kRed);
mPaveText->SetMargin(0);
mPaveText->AddText(string("in the past ") + mBadCyclesLimit + " cycle(s)");
} else {
mPaveText = make_shared<TPaveText>(1, 0.125, 0.6, 0, "NDC");
mPaveText->AddText("Number of Entries has *changed*");
mPaveText->AddText("in the past cycle");
mPaveText->SetFillColor(kRed);
mPaveText->SetMargin(0);
mPaveText->AddText(string("in the past ") + mBadCyclesLimit + " cycle(s)");
}
}

Expand All @@ -68,19 +66,27 @@ Quality IncreasingEntries::check(std::map<std::string, std::shared_ptr<MonitorOb
continue;
}

double previousNumberEntries = mLastEntries.count(moName) > 0 ? mLastEntries.at(moName) : 0;
double currentNumberEntries = histo->GetEntries();
const double previousNumberEntries = mLastEntries.count(moName) > 0 ? mLastEntries.at(moName) : 0;
const double currentNumberEntries = histo->GetEntries();
size_t faultCount = mMoFaultCount.count(moName) > 0 ? mMoFaultCount.at(moName) : 0;

if (mMustIncrease && previousNumberEntries == currentNumberEntries) {
result = Quality::Bad;
result.addReason(FlagReasonFactory::NoDetectorData(), "Number of entries stopped increasing.");
mFaultyObjectsNames.push_back(mo->getName());
} else if (!mMustIncrease && previousNumberEntries != currentNumberEntries) {
if (mMustIncrease == (previousNumberEntries == currentNumberEntries)) {
faultCount++;
} else {
faultCount = 0;
}

if (faultCount >= mBadCyclesLimit) {
result = Quality::Bad;
result.addReason(FlagReasonFactory::Unknown(), "Number of entries has increased.");
mFaultyObjectsNames.push_back(mo->getName());
if (mMustIncrease) {
result.addReason(FlagReasonFactory::NoDetectorData(), "Number of entries stopped increasing.");
} else {
result.addReason(FlagReasonFactory::Unknown(), "Number of entries has increased.");
}
}

mMoFaultCount[moName] = faultCount;
mLastEntries[moName] = currentNumberEntries;
}
return result;
Expand Down
14 changes: 13 additions & 1 deletion doc/Advanced.md
Original file line number Diff line number Diff line change
Expand Up @@ -1478,7 +1478,7 @@ One can also enable publishing metrics related to CPU/memory usage. To do so, us

## Common check `IncreasingEntries`

This check make sures that the number of entries has increased in the past cycle. If not it will display a pavetext
This check make sures that the number of entries has increased in the past cycle(s). If not, it will display a pavetext
on the plot and set the quality to bad.

If you use `SetBinContent` the number of entries does not increase creating a false positive. Please call `ResetStats()`
Expand All @@ -1491,6 +1491,18 @@ The behaviour of the check can be inverted by setting the customparameter "mustI
}
```

The number of cycles during which we tolerate increasing (or not respectively) the number of entries can be set with the custom parameter `nBadCyclesLimit`:
```
"extendedCheckParameters": {
"default": {
"default": {
"nBadCyclesLimit": "3",
}
}
}
```
In the example above, the quality goes to bad when there are 3 cycles in a row with no increase in the number of entries.

## Update the shmem segment size of a detector

In consul go to `o2/runtime/aliecs/defaults` and modify the file corresponding to the detector: [det]_qc_shm_segment_size
Expand Down

0 comments on commit 2f86036

Please sign in to comment.