Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove redundant data members from InputFileCatalog to reduce memory use #47013

Merged
merged 4 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions FWCore/Catalog/interface/InputFileCatalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace edm {
class FileCatalogItem {
public:
FileCatalogItem(std::vector<std::string> const& pfns, std::string const& lfn) : pfns_(pfns), lfn_(lfn) {}
FileCatalogItem(std::vector<std::string> pfns, std::string lfn) : pfns_(std::move(pfns)), lfn_(std::move(lfn)) {}

std::string const& fileName(unsigned iCatalog) const { return pfns_[iCatalog]; }
std::string const& logicalFileName() const { return lfn_; }
Expand All @@ -33,7 +33,7 @@ namespace edm {

class InputFileCatalog {
public:
InputFileCatalog(std::vector<std::string> const& fileNames,
InputFileCatalog(std::vector<std::string> fileNames,
std::string const& override,
bool useLFNasPFNifLFNnotFound = false,
//switching between two catalog types
Expand All @@ -42,19 +42,19 @@ namespace edm {

~InputFileCatalog();
std::vector<FileCatalogItem> const& fileCatalogItems() const { return fileCatalogItems_; }
std::vector<std::string> const& logicalFileNames() const { return logicalFileNames_; }
std::vector<std::string> fileNames(unsigned iCatalog) const;
bool empty() const { return fileCatalogItems_.empty(); }
static bool isPhysical(std::string const& name) { return (name.empty() || name.find(':') != std::string::npos); }

private:
void init(std::string const& override, bool useLFNasPFNifLFNnotFound, edm::CatalogType catType);
void init(std::vector<std::string> logicalFileNames,
std::string const& override,
bool useLFNasPFNifLFNnotFound,
edm::CatalogType catType);
void findFile(std::string const& lfn,
std::vector<std::string>& pfns,
bool useLFNasPFNifLFNnotFound,
edm::CatalogType catType);
std::vector<std::string> logicalFileNames_;
std::vector<std::string> fileNames_;
std::vector<FileCatalogItem> fileCatalogItems_;
edm::propagate_const<std::unique_ptr<FileLocator>> overrideFileLocator_;

Expand Down
30 changes: 15 additions & 15 deletions FWCore/Catalog/src/InputFileCatalog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

namespace edm {

InputFileCatalog::InputFileCatalog(std::vector<std::string> const& fileNames,
InputFileCatalog::InputFileCatalog(std::vector<std::string> fileNames,
std::string const& override,
bool useLFNasPFNifLFNnotFound,
edm::CatalogType catType)
: logicalFileNames_(fileNames), fileNames_(fileNames), fileCatalogItems_(), overrideFileLocator_() {
init(override, useLFNasPFNifLFNnotFound, catType);
: fileCatalogItems_(), overrideFileLocator_() {
init(std::move(fileNames), override, useLFNasPFNifLFNnotFound, catType);
}

InputFileCatalog::~InputFileCatalog() {}
Expand All @@ -36,7 +36,8 @@ namespace edm {
return tmp;
}

void InputFileCatalog::init(std::string const& inputOverride,
void InputFileCatalog::init(std::vector<std::string> logicalFileNames,
std::string const& inputOverride,
bool useLFNasPFNifLFNnotFound,
edm::CatalogType catType) {
typedef std::vector<std::string>::iterator iter;
Expand Down Expand Up @@ -123,32 +124,31 @@ namespace edm {
throw ex;
}

for (iter it = fileNames_.begin(), lt = logicalFileNames_.begin(), itEnd = fileNames_.end(); it != itEnd;
++it, ++lt) {
boost::trim(*it);
for (auto& lfn : logicalFileNames) {
boost::trim(lfn);
std::vector<std::string> pfns;
if (it->empty()) {
if (lfn.empty()) {
cms::Exception ex("FileCatalog");
ex << "An empty string specified in the fileNames parameter for input source";
ex.addContext("Calling edm::InputFileCatalog::init()");
throw ex;
}
if (isPhysical(*it)) {
if (it->back() == ':') {
if (isPhysical(lfn)) {
if (lfn.back() == ':') {
cms::Exception ex("FileCatalog");
ex << "An empty physical file name specified in the fileNames parameter for input source";
ex.addContext("Calling edm::InputFileCatalog::init()");
throw ex;
}
pfns.push_back(*it);
pfns.push_back(lfn);
// Clear the LFN.
lt->clear();
lfn.clear();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the clear? The lifetime of the container should make this go away at the end of the function anyway. In fact, why have maybePfn anyway?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the clear? The lifetime of the container should make this go away at the end of the function anyway.

The lfn is moved to fileCatalogItems_ member at the end of the loop, line 153. The code has a convention to have the LFN empty when the input file name was a PFN. (I would not change that behavior in this PR, but we could think about it for longer term)

In fact, why have maybePfn anyway?

Staring at the code more now I agree, the use of maybePfn could be replaced with lfn (the lfn gets trimmed anyway in the else block).

} else {
boost::trim(*lt);
findFile(*lt, pfns, useLFNasPFNifLFNnotFound, catType);
findFile(lfn, pfns, useLFNasPFNifLFNnotFound, catType);
}
lfn.shrink_to_fit(); // try to release memory

fileCatalogItems_.push_back(FileCatalogItem(pfns, *lt));
fileCatalogItems_.emplace_back(std::move(pfns), std::move(lfn));
}
}

Expand Down
2 changes: 1 addition & 1 deletion FWCore/Catalog/test/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<use name="FWCore/Catalog"/>
<use name="FWCore/ServiceRegistry"/>
<use name="catch2"/>
<bin name="edmCatalogFileLocator_t" file="FileLocator_t.cpp">
<bin name="TestFWCoreCatalog" file="FileLocator_t.cpp,InputFileCatalog_t.cpp">
</bin>
94 changes: 11 additions & 83 deletions FWCore/Catalog/test/FileLocator_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,96 +2,24 @@
#include "FWCore/Catalog/interface/SiteLocalConfig.h"
#include "FWCore/Utilities/interface/Exception.h"
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
#include <filesystem>

#include "TestSiteLocalConfig.h"

#include <filesystem>
#include <string>

#define CATCH_CONFIG_MAIN
#include "catch.hpp"

namespace {
class TestSiteLocalConfig : public edm::SiteLocalConfig {
public:
//constructor using trivial data catalogs
TestSiteLocalConfig(std::vector<std::string> catalogs) : m_trivialCatalogs(std::move(catalogs)) {}
//constructor using Rucio data catalogs
TestSiteLocalConfig(std::vector<edm::CatalogAttributes> catalogs) : m_catalogs(std::move(catalogs)) {}
std::vector<std::string> const& trivialDataCatalogs() const final { return m_trivialCatalogs; }
std::vector<edm::CatalogAttributes> const& dataCatalogs() const final { return m_catalogs; }
std::filesystem::path const storageDescriptionPath(const edm::CatalogAttributes& aDataCatalog) const final {
return std::filesystem::path();
}

std::string const lookupCalibConnect(std::string const& input) const final { return std::string(); }
std::string const rfioType(void) const final { return std::string(); }

std::string const* sourceCacheTempDir() const final { return nullptr; }
double const* sourceCacheMinFree() const final { return nullptr; }
std::string const* sourceCacheHint() const final { return nullptr; }
std::string const* sourceCloneCacheHint() const final { return nullptr; }
std::string const* sourceReadHint() const final { return nullptr; }
unsigned int const* sourceTTreeCacheSize() const final { return nullptr; }
unsigned int const* sourceTimeout() const final { return nullptr; }
bool enablePrefetching() const final { return false; }
unsigned int debugLevel() const final { return 0; }
std::vector<std::string> const* sourceNativeProtocols() const final { return nullptr; }
struct addrinfo const* statisticsDestination() const final { return nullptr; }
std::set<std::string> const* statisticsInfo() const final { return nullptr; }
std::string const& siteName(void) const final { return m_emptyString; }
std::string const& subSiteName(void) const final { return m_emptyString; }
bool useLocalConnectString() const final { return false; }
std::string const& localConnectPrefix() const final { return m_emptyString; }
std::string const& localConnectSuffix() const final { return m_emptyString; }

private:
std::vector<std::string> m_trivialCatalogs;
std::vector<edm::CatalogAttributes> m_catalogs;
std::filesystem::path m_storageDescription_path;
std::string m_emptyString;
};
} // namespace

TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]") {
//catalog for testing "prefix"
edm::CatalogAttributes aCatalog;
aCatalog.site = "T1_US_FNAL";
aCatalog.subSite = "T1_US_FNAL";
aCatalog.storageSite = "T1_US_FNAL";
aCatalog.volume = "American_Federation";
aCatalog.protocol = "XRootD";
std::vector<edm::CatalogAttributes> tmp{aCatalog};
//catalog for testing "rules"
aCatalog.site = "T1_US_FNAL";
aCatalog.subSite = "T1_US_FNAL";
aCatalog.storageSite = "T1_US_FNAL";
aCatalog.volume = "FNAL_dCache_EOS";
aCatalog.protocol = "XRootD";
tmp.push_back(aCatalog);
//catalog for testing chained "rules"
aCatalog.site = "T1_US_FNAL";
aCatalog.subSite = "T1_US_FNAL";
aCatalog.storageSite = "T1_US_FNAL";
aCatalog.volume = "FNAL_dCache_EOS";
aCatalog.protocol = "root";
tmp.push_back(aCatalog);

//create the services
edm::ServiceToken tempToken(
edm::ServiceRegistry::createContaining(std::unique_ptr<edm::SiteLocalConfig>(new TestSiteLocalConfig(tmp))));

std::string CMSSW_BASE(std::getenv("CMSSW_BASE"));
std::string CMSSW_RELEASE_BASE(std::getenv("CMSSW_RELEASE_BASE"));
std::string file_name("/src/FWCore/Catalog/test/storage.json");
std::string full_file_name = std::filesystem::exists((CMSSW_BASE + file_name).c_str())
? CMSSW_BASE + file_name
: CMSSW_RELEASE_BASE + file_name;
TEST_CASE("FileLocator with Rucio data catalog", "[FWCore/Catalog]") {
edm::ServiceToken tempToken = edmtest::catalog::makeTestSiteLocalConfigToken();

SECTION("prefix") {
edm::ServiceRegistry::Operate operate(tempToken);
//empty catalog
edm::CatalogAttributes tmp_cat;
//use the first catalog provided by site local config
edm::FileLocator fl(tmp_cat, 0, full_file_name);
edm::FileLocator fl(tmp_cat, 0);
CHECK("root://cmsxrootd.fnal.gov/store/group/bha/bho" ==
fl.pfn("/store/group/bha/bho", edm::CatalogType::RucioCatalog));
}
Expand All @@ -100,7 +28,7 @@ TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]"
//empty catalog
edm::CatalogAttributes tmp_cat;
//use the second catalog provided by site local config
edm::FileLocator fl(tmp_cat, 1, full_file_name);
edm::FileLocator fl(tmp_cat, 1);
const std::array<const char*, 7> lfn = {{"/bha/bho",
"bha",
"file:bha",
Expand All @@ -119,7 +47,7 @@ TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]"
//empty catalog
edm::CatalogAttributes tmp_cat;
//use the third catalog provided by site local config above
edm::FileLocator fl(tmp_cat, 2, full_file_name);
edm::FileLocator fl(tmp_cat, 2);
const std::array<const char*, 7> lfn = {{"/bha/bho",
"bha",
"file:bha",
Expand All @@ -139,7 +67,7 @@ TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]"
}
}

TEST_CASE("FileLocator", "[filelocator]") {
TEST_CASE("FileLocator with TrivialFileCatalog", "[FWCore/Catalog]") {
std::string CMSSW_BASE(std::getenv("CMSSW_BASE"));
std::string CMSSW_RELEASE_BASE(std::getenv("CMSSW_RELEASE_BASE"));
std::string file_name("/src/FWCore/Catalog/test/simple_catalog.xml");
Expand All @@ -149,8 +77,8 @@ TEST_CASE("FileLocator", "[filelocator]") {

//create the services
std::vector<std::string> tmp{std::string("trivialcatalog_file:") + full_file_name + "?protocol=xrd"};
edm::ServiceToken tempToken(
edm::ServiceRegistry::createContaining(std::unique_ptr<edm::SiteLocalConfig>(new TestSiteLocalConfig(tmp))));
edm::ServiceToken tempToken(edm::ServiceRegistry::createContaining(
std::unique_ptr<edm::SiteLocalConfig>(std::make_unique<edmtest::catalog::TestSiteLocalConfig>(tmp))));

//make the services available
SECTION("standard") {
Expand Down
139 changes: 139 additions & 0 deletions FWCore/Catalog/test/InputFileCatalog_t.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#include "FWCore/Catalog/interface/InputFileCatalog.h"
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"

#include "TestSiteLocalConfig.h"

#include "catch.hpp"

TEST_CASE("InputFileCatalog with Rucio data catalog", "[FWCore/Catalog]") {
edm::ServiceToken tempToken = edmtest::catalog::makeTestSiteLocalConfigToken();

SECTION("Empty") {
edm::ServiceRegistry::Operate operate(tempToken);
edm::InputFileCatalog catalog({}, "");
REQUIRE(catalog.empty());
}

SECTION("isPhysical") {
REQUIRE(edm::InputFileCatalog::isPhysical(""));
REQUIRE(not edm::InputFileCatalog::isPhysical("/foo/bar"));
REQUIRE(edm::InputFileCatalog::isPhysical("file:/foo/bar"));
}

SECTION("Default behavior") {
edm::ServiceRegistry::Operate operate(tempToken);

edm::InputFileCatalog catalog(std::vector<std::string>{"/store/foo/bar", " file:/foo/bar", "root://foobar "}, "");

SECTION("fileNames") {
SECTION("Catalog 0") {
auto const names = catalog.fileNames(0);
REQUIRE(names.size() == 3);
CHECK(names[0] == "root://cmsxrootd.fnal.gov/store/foo/bar");
CHECK(names[1] == "file:/foo/bar");
CHECK(names[2] == "root://foobar");
}
// The fileNames() works only for catalog 0
// Catalog 1 or 2 leads to a crash because the input file list
// is a mixture of LFNs and PFNs
// This isn't really good behavior...
}

SECTION("fileCatalogItems") {
auto const& items = catalog.fileCatalogItems();
REQUIRE(items.size() == 3);
CHECK(items[0].logicalFileName() == "/store/foo/bar");
CHECK(items[1].logicalFileName() == "");
CHECK(items[2].logicalFileName() == "");

REQUIRE(items[0].fileNames().size() == 3);
REQUIRE(items[1].fileNames().size() == 1);
REQUIRE(items[2].fileNames().size() == 1);

SECTION("Catalog 0") {
CHECK(items[0].fileName(0) == "root://cmsxrootd.fnal.gov/store/foo/bar");
CHECK(items[1].fileName(0) == "file:/foo/bar");
CHECK(items[2].fileName(0) == "root://foobar");
}
SECTION("Catalog 1") {
CHECK(items[0].fileName(1) == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
}
SECTION("Catalog 2") { CHECK(items[0].fileName(2) == "root://host.domain//pnfs/cms/store/foo/bar"); }
}
}

SECTION("Override catalog") {
edm::ServiceRegistry::Operate operate(tempToken);

edm::InputFileCatalog catalog(std::vector<std::string>{"/store/foo/bar", " file:/foo/bar", "root://foobar "},
"T1_US_FNAL,,T1_US_FNAL,FNAL_dCache_EOS,XRootD");

SECTION("fileNames") {
auto const names = catalog.fileNames(0);
REQUIRE(names.size() == 3);
CHECK(names[0] == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
CHECK(names[1] == "file:/foo/bar");
CHECK(names[2] == "root://foobar");
}

SECTION("fileCatalogItems") {
auto const& items = catalog.fileCatalogItems();
REQUIRE(items.size() == 3);

CHECK(items[0].logicalFileName() == "/store/foo/bar");
CHECK(items[1].logicalFileName() == "");
CHECK(items[2].logicalFileName() == "");

REQUIRE(items[0].fileNames().size() == 1);
REQUIRE(items[1].fileNames().size() == 1);
REQUIRE(items[2].fileNames().size() == 1);

CHECK(items[0].fileName(0) == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
CHECK(items[1].fileName(0) == "file:/foo/bar");
CHECK(items[2].fileName(0) == "root://foobar");
}
}

SECTION("useLFNasPFNifLFNnotFound") {
edm::ServiceRegistry::Operate operate(tempToken);

edm::InputFileCatalog catalog(
std::vector<std::string>{"/store/foo/bar", "/tmp/foo/bar", "root://foobar "}, "", true);

SECTION("fileNames") {
SECTION("Catalog 0") {
auto const names = catalog.fileNames(0);
REQUIRE(names.size() == 3);
CHECK(names[0] == "root://cmsxrootd.fnal.gov/store/foo/bar");
CHECK(names[1] == "/tmp/foo/bar");
CHECK(names[2] == "root://foobar");
}
}

SECTION("fileCatalogItems") {
auto const& items = catalog.fileCatalogItems();
REQUIRE(items.size() == 3);
CHECK(items[0].logicalFileName() == "/store/foo/bar");
CHECK(items[1].logicalFileName() == "/tmp/foo/bar");
CHECK(items[2].logicalFileName() == "");

REQUIRE(items[0].fileNames().size() == 3);
REQUIRE(items[1].fileNames().size() == 3);
REQUIRE(items[2].fileNames().size() == 1);

SECTION("Catalog 0") {
CHECK(items[0].fileName(0) == "root://cmsxrootd.fnal.gov/store/foo/bar");
CHECK(items[1].fileName(0) == "/tmp/foo/bar");
CHECK(items[2].fileName(0) == "root://foobar");
}
SECTION("Catalog 1") {
CHECK(items[0].fileName(1) == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
CHECK(items[1].fileName(1) == "/tmp/foo/bar");
}
SECTION("Catalog 2") {
CHECK(items[0].fileName(2) == "root://host.domain//pnfs/cms/store/foo/bar");
CHECK(items[1].fileName(2) == "/tmp/foo/bar");
}
}
}
}
Loading