From 0928a3ff6ad55f8bf1d4c9f9d37d59b43b93981b Mon Sep 17 00:00:00 2001 From: Patrick Brosi Date: Sat, 18 Jan 2025 17:16:25 +0100 Subject: [PATCH] better variable names in shapevl, add some additional metrics, add option --report-level for full reports --- src/shapevl/Collector.cpp | 129 ++++++++++++++++++++++++++++-------- src/shapevl/Collector.h | 14 ++-- src/shapevl/ShapevlMain.cpp | 17 +++-- 3 files changed, 124 insertions(+), 36 deletions(-) diff --git a/src/shapevl/Collector.cpp b/src/shapevl/Collector.cpp index 588e40b..7305f51 100644 --- a/src/shapevl/Collector.cpp +++ b/src/shapevl/Collector.cpp @@ -6,6 +6,7 @@ #include #include #include + #include "ad/cppgtfs/gtfs/Feed.h" #include "pfaedle/Def.h" #include "shapevl/Collector.h" @@ -53,7 +54,10 @@ double Collector::add(const Trip* oldT, const Shape* oldS, const Trip* newT, } } + double accFd = 0; double fd = 0; + double d = 0; + double lenDiff = 0; // A "segment" is a path from station s_i to station s_{i+1} @@ -113,18 +117,62 @@ double Collector::add(const Trip* oldT, const Shape* oldS, const Trip* newT, auto newLCutS = util::geo::simplify(newLCut, f * (0.5 / util::geo::M_PER_DEG)); - auto old = _dCache.find(oldLCutS); + auto old = _accFdCache.find(oldLCutS); + if (old != _accFdCache.end()) { + auto match = old->second.find(newLCutS); + if (match != old->second.end()) { + accFd = match->second; + } else { + accFd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL); + _accFdCache[oldLCutS][newLCutS] = accFd; + } + } else { + accFd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL); + _accFdCache[oldLCutS][newLCutS] = accFd; + } + + old = _fdCache.find(oldLCutS); + if (old != _fdCache.end()) { + auto match = old->second.find(newLCutS); + if (match != old->second.end()) { + fd = match->second; + } else { + fd = util::geo::frechetDistHav(oldLCutS, newLCutS, SEGL); + _fdCache[oldLCutS][newLCutS] = fd; + } + } else { + fd = util::geo::frechetDistHav(oldLCutS, newLCutS, SEGL); + _fdCache[oldLCutS][newLCutS] = fd; + } + + old = _dCache.find(oldLCutS); if (old != _dCache.end()) { auto match = old->second.find(newLCutS); if (match != old->second.end()) { fd = match->second; } else { - fd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL); - _dCache[oldLCutS][newLCutS] = fd; + d = util::geo::dist(oldLCutS, newLCutS); + _dCache[oldLCutS][newLCutS] = d; } } else { - fd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL); - _dCache[oldLCutS][newLCutS] = fd; + d = util::geo::dist(oldLCutS, newLCutS); + _dCache[oldLCutS][newLCutS] = d; + } + + old = _lenDiffCache.find(oldLCutS); + if (old != _lenDiffCache.end()) { + auto match = old->second.find(newLCutS); + if (match != old->second.end()) { + fd = match->second; + } else { + lenDiff = + fabs(util::geo::latLngLen(oldLCutS) - util::geo::latLngLen(newLCutS)); + _lenDiffCache[oldLCutS][newLCutS] = lenDiff; + } + } else { + lenDiff = + fabs(util::geo::latLngLen(oldLCutS) - util::geo::latLngLen(newLCutS)); + _lenDiffCache[oldLCutS][newLCutS] = lenDiff; } auto dA = getDa(oldSegs, newSegs); @@ -141,11 +189,11 @@ double Collector::add(const Trip* oldT, const Shape* oldS, const Trip* newT, return 0; } - _fdSum += fd / totL; + _accFdSum += accFd / totL; _unmatchedSegSum += unmatchedSegments; _unmatchedSegLengthSum += unmatchedSegmentsLength; - double avgFd = fd / totL; + double avgFd = accFd / totL; double AN = static_cast(unmatchedSegments) / static_cast(oldSegs.size()); double AL = unmatchedSegmentsLength / totL; @@ -167,17 +215,41 @@ double Collector::add(const Trip* oldT, const Shape* oldS, const Trip* newT, << totL << " = " << AL << " d_f = " << avgFd; if (_reportOut) { - (*_reportOut) << std::fixed << std::setprecision(6); - (*_reportOut) << oldT->getId() << "\t" << AN << "\t" << AL << "\t" << avgFd - << "\t" << util::geo::getWKT(oldSegs) << "\t" - << util::geo::getWKT(newSegs) << "\t" << oldT->getRoute()->getShortName() << "\t"; - - for (const auto& st : oldT->getStopTimes()) { - (*_reportOut) << st.getStop()->getName() << "\t" - << st.getStop()->getLat() << "\t" - << st.getStop()->getLng() << "\t"; + if (_reportLevel == 0) { + (*_reportOut) << std::fixed << std::setprecision(6); + (*_reportOut) << oldT->getId() << "\t" << AN << "\t" << AL << "\t" + << avgFd << "\t" << oldT->getRoute()->getShortName() + << "\t"; + } else if (_reportLevel == 1) { + (*_reportOut) << std::fixed << std::setprecision(6); + (*_reportOut) << oldT->getId() << "\t" << AN << "\t" << AL << "\t" + << avgFd << "\t" << util::geo::getWKT(oldSegs) << "\t" + << util::geo::getWKT(newSegs) << "\t" + << oldT->getRoute()->getShortName() << "\t"; + + for (const auto& st : oldT->getStopTimes()) { + (*_reportOut) << st.getStop()->getName() << "\t" + << st.getStop()->getLat() << "\t" + << st.getStop()->getLng() << "\t"; + } + } else if (_reportLevel == 2) { + (*_reportOut) << std::fixed << std::setprecision(6); + (*_reportOut) << oldT->getId() << "\t" << AN << "\t" << AL << "\t" + << avgFd << "\t" << fd << "\t" + << d + << "\t" + << lenDiff + << "\t" << util::geo::getWKT(oldSegs) << "\t" + << util::geo::getWKT(newSegs) << "\t" + << oldT->getRoute()->getShortName() << "\t"; + + for (const auto& st : oldT->getStopTimes()) { + (*_reportOut) << st.getStop()->getName() << "\t" + << st.getStop()->getLat() << "\t" + << st.getStop()->getLng() << "\t"; + } } -(*_reportOut) << "\n"; + (*_reportOut) << "\n"; } return avgFd; @@ -198,9 +270,8 @@ std::vector Collector::segmentize( cuts.push_back(st.getShapeDistanceTravelled()); } - - size_t to = std::upper_bound(dists.begin(), dists.end(), cuts[0]) - - dists.begin(); + size_t to = + std::upper_bound(dists.begin(), dists.end(), cuts[0]) - dists.begin(); POINT lastP; if (to >= dists.size()) { @@ -210,13 +281,15 @@ std::vector Collector::segmentize( } else { double progr = (cuts[0] - dists[to - 1]) / (dists[to] - dists[to - 1]); lastP = shape[to - 1]; - lastP.setX(lastP.getX() + progr * (shape[to].getX() - shape[to-1].getX())); - lastP.setY(lastP.getY() + progr * (shape[to].getY() - shape[to-1].getY())); + lastP.setX(lastP.getX() + + progr * (shape[to].getX() - shape[to - 1].getX())); + lastP.setY(lastP.getY() + + progr * (shape[to].getY() - shape[to - 1].getY())); } for (size_t i = 1; i < cuts.size(); i++) { - size_t to = std::upper_bound(dists.begin(), dists.end(), cuts[i]) - - dists.begin(); + size_t to = + std::upper_bound(dists.begin(), dists.end(), cuts[i]) - dists.begin(); POINT curP; if (to >= dists.size()) { @@ -226,8 +299,10 @@ std::vector Collector::segmentize( } else { curP = shape[to - 1]; double progr = (cuts[i] - dists[to - 1]) / (dists[to] - dists[to - 1]); - curP.setX(curP.getX() + progr * (shape[to].getX() - shape[to-1].getX())); - curP.setY(curP.getY() + progr * (shape[to].getY() - shape[to-1].getY())); + curP.setX(curP.getX() + + progr * (shape[to].getX() - shape[to - 1].getX())); + curP.setY(curP.getY() + + progr * (shape[to].getY() - shape[to - 1].getY())); } auto curL = pl.getSegment(lastP, curP).getLine(); @@ -262,7 +337,7 @@ LINE Collector::getLine(const Shape* s, std::vector* dists) { const std::set& Collector::getResults() const { return _results; } // _____________________________________________________________________________ -double Collector::getAvgDist() const { return _fdSum / _results.size(); } +double Collector::getAvgDist() const { return _accFdSum / _results.size(); } // _____________________________________________________________________________ void Collector::printCsv(std::ostream* os, diff --git a/src/shapevl/Collector.h b/src/shapevl/Collector.h index 1680c1c..4315e03 100644 --- a/src/shapevl/Collector.h +++ b/src/shapevl/Collector.h @@ -47,10 +47,10 @@ struct lineCmp { */ class Collector { public: - Collector(std::ostream* reportOut) + Collector(std::ostream* reportOut, int reportLevel) : _trips(0), _noOrigShp(0), - _fdSum(0), + _accFdSum(0), _unmatchedSegSum(0), _unmatchedSegLengthSum(0), _an0(0), @@ -60,7 +60,7 @@ class Collector { _an50(0), _an70(0), _an90(0), - _reportOut(reportOut) {} + _reportOut(reportOut), _reportLevel(reportLevel) {} // Add a shape found by our tool newS for a trip t with newly calculated // station dist values with the old shape oldS @@ -91,8 +91,11 @@ class Collector { private: std::set _results; - std::map, lineCmp> _dCache; + std::map, lineCmp> _accFdCache; std::map, lineCmp> _dACache; + std::map, lineCmp> _fdCache; + std::map, lineCmp> _dCache; + std::map, lineCmp> _lenDiffCache; size_t _trips; size_t _noOrigShp; @@ -100,7 +103,7 @@ class Collector { std::vector _distDiffs; std::vector _hopDists; - double _fdSum; + double _accFdSum; size_t _unmatchedSegSum; double _unmatchedSegLengthSum; @@ -114,6 +117,7 @@ class Collector { size_t _an90 = 0; std::ostream* _reportOut; + int _reportLevel = 1; std::pair getDa(const std::vector& a, const std::vector& b); diff --git a/src/shapevl/ShapevlMain.cpp b/src/shapevl/ShapevlMain.cpp index bade8d9..0a16f53 100644 --- a/src/shapevl/ShapevlMain.cpp +++ b/src/shapevl/ShapevlMain.cpp @@ -27,11 +27,12 @@ void printHelp(int argc, char** argv) { << " [-f ] -g [-s] " << "\n"; std::cout - << "\nAllowed arguments:\n -g Ground truth GTFS file\n"; + << "\nAllowed arguments:\n -g Ground truth GTFS file\n"; std::cout << " -s Only output summary\n"; std::cout << " --json Output JSON\n"; - std::cout << " --avg Take avg of all inputs (only for --json)\n"; + std::cout << " --avg Take avg of all inputs (only for --json)\n"; std::cout << " -f Output full reports (per feed) to \n"; + std::cout << " -l Full report level (0 - 2), default 1\n"; std::cout << " -m MOTs to match (GTFS MOT or string, default: all)\n"; } @@ -135,6 +136,7 @@ int main(int argc, char** argv) { bool json = false; bool avg = false; bool unique = false; + int reportLevel = 1; for (int i = 1; i < argc; i++) { std::string cur = argv[i]; @@ -155,6 +157,13 @@ int main(int argc, char** argv) { unique = true; } else if (cur == "--avg") { avg = true; + } else if (cur == "-l") { + if (++i >= argc) { + LOG(ERROR) << "Missing argument for report level (-l)"; + exit(1); + } + std::cout << "A" << std::endl; + reportLevel = atoi(argv[i]); } else if (cur == "-f") { if (++i >= argc) { LOG(ERROR) << "Missing argument for full reports (-f)."; @@ -185,9 +194,9 @@ int main(int argc, char** argv) { reportStreams.back().open(fullReportPath + "/" + util::split(feedPath, '/').back() + ".fullreport.tsv"); - evalColls.push_back({&reportStreams.back()}); + evalColls.push_back({&reportStreams.back(), reportLevel}); } else { - evalColls.push_back({0}); + evalColls.push_back({0, reportLevel}); } count++; }