From 4fc795c96633ec9866b5d0cf7976d080e5aa0434 Mon Sep 17 00:00:00 2001 From: jamescowens Date: Tue, 24 Aug 2021 18:37:47 -0400 Subject: [PATCH 01/14] Scraper enhanced thread safety This implements qualifiers required for advanced static thread safety analysis by Clang and other advanced compilers. Note that a few minor thread safety issues were caught and fixed preparing this commit. Also, thread safety was implemented around the scraper globals and the AppCache reads. Even though these are seldom changed and there is little chance of an issue, they should be completely thread-safe as they are accessed by multiple threads. Note that some lambdas were used to do quick locked reads against the scraper globals in some of the scraper functions to facilitate use of the global in existing if statements without further change. This is a little ugly and some of the lambdas are repeated. This will be cleaned up when the scraper is restructured into classes later. --- src/gridcoin/gridcoin.cpp | 15 +- src/gridcoin/quorum.cpp | 49 +- src/gridcoin/researcher.cpp | 3 + src/gridcoin/scraper/fwd.h | 121 +--- src/gridcoin/scraper/scraper.cpp | 920 +++++++++++++++++-------- src/gridcoin/scraper/scraper.h | 107 ++- src/gridcoin/scraper/scraper_net.cpp | 266 +++---- src/gridcoin/scraper/scraper_net.h | 110 +-- src/gridcoin/superblock.h | 3 + src/main.cpp | 80 ++- src/sync.cpp | 4 +- src/test/gridcoin/superblock_tests.cpp | 7 +- 12 files changed, 1014 insertions(+), 671 deletions(-) diff --git a/src/gridcoin/gridcoin.cpp b/src/gridcoin/gridcoin.cpp index 71d88f34e3..9abf647af8 100644 --- a/src/gridcoin/gridcoin.cpp +++ b/src/gridcoin/gridcoin.cpp @@ -20,6 +20,7 @@ using namespace GRC; +extern CCriticalSection cs_ScraperGlobals; extern bool fExplorer; extern unsigned int nScraperSleep; extern unsigned int nActiveBeforeSB; @@ -276,10 +277,14 @@ void ThreadScraperSubscriber(void* parg) //! void InitializeScraper(ThreadHandlerPtr threads) { - // Default to 300 sec (5 min), clamp to 60 minimum, 600 maximum - converted to milliseconds. - nScraperSleep = std::clamp(gArgs.GetArg("-scrapersleep", 300), 60, 600) * 1000; - // Default to 14400 sec (4 hrs), clamp to 300 minimum, 86400 maximum (meaning active all of the time). - nActiveBeforeSB = std::clamp(gArgs.GetArg("-activebeforesb", 14400), 300, 86400); + { + LOCK(cs_ScraperGlobals); + + // Default to 300 sec (5 min), clamp to 60 minimum, 600 maximum - converted to milliseconds. + nScraperSleep = std::clamp(gArgs.GetArg("-scrapersleep", 300), 60, 600) * 1000; + // Default to 14400 sec (4 hrs), clamp to 300 minimum, 86400 maximum (meaning active all of the time). + nActiveBeforeSB = std::clamp(gArgs.GetArg("-activebeforesb", 14400), 300, 86400); + } // Run the scraper or subscriber housekeeping thread, but not both. The // subscriber housekeeping thread checks if the flag for the scraper thread @@ -311,6 +316,8 @@ void InitializeScraper(ThreadHandlerPtr threads) //! void InitializeExplorerFeatures() { + LOCK(cs_ScraperGlobals); + fExplorer = gArgs.GetBoolArg("-scraper", false) && gArgs.GetBoolArg("-explorer", false); } diff --git a/src/gridcoin/quorum.cpp b/src/gridcoin/quorum.cpp index 9873f131c3..15c56c3473 100644 --- a/src/gridcoin/quorum.cpp +++ b/src/gridcoin/quorum.cpp @@ -29,6 +29,7 @@ Superblock ScraperGetSuperblockContract( bool bContractDirectFromStatsUpdate = false, bool bFromHousekeeping = false); +extern CCriticalSection cs_ScraperGlobals; extern CCriticalSection cs_ConvergedScraperStatsCache; extern ConvergedScraperStats ConvergedScraperStatsCache; @@ -609,8 +610,12 @@ class SuperblockValidator return Result::UNKNOWN; } - if (m_superblock.Age(GetAdjustedTime()) > SCRAPER_CMANIFEST_RETENTION_TIME) { - return Result::HISTORICAL; + { + LOCK(cs_ScraperGlobals); + + if (m_superblock.Age(GetAdjustedTime()) > SCRAPER_CMANIFEST_RETENTION_TIME) { + return Result::HISTORICAL; + } } if (use_cache) { @@ -1037,23 +1042,35 @@ class SuperblockValidator // If the manifest for the beacon list is now empty, we cannot // proceed, but ProjectResolver should always select manifests // with a beacon list part: - if (manifest->vParts.empty()) { - LogPrintf("ValidateSuperblock(): beacon list part missing."); - return; - } - convergence.AddPart("BeaconList", manifest->vParts[0]); + // Note using fine-grained locking here to avoid lock-order issues and + // also to deal with Clang potential false positive below. + std::vector::iterator verified_beacons_entry_iter; + { + LOCK(manifest->cs_manifest); - // Find the offset of the verified beacons project part. Typically - // this exists at vParts offset 1 when a scraper verified at least - // one pending beacon. If it doesn't exist, omit the part from the - // reconstructed convergence: - const auto verified_beacons_entry_iter = std::find_if( - manifest->projects.begin(), - manifest->projects.end(), - [](const CScraperManifest::dentry& entry) { + if (manifest->vParts.empty()) { + LogPrintf("ValidateSuperblock(): beacon list part missing."); + return; + } + + convergence.AddPart("BeaconList", manifest->vParts[0]); + + // Find the offset of the verified beacons project part. Typically + // this exists at vParts offset 1 when a scraper verified at least + // one pending beacon. If it doesn't exist, omit the part from the + // reconstructed convergence: + verified_beacons_entry_iter = std::find_if( + manifest->projects.begin(), + manifest->projects.end(), + [](const CScraperManifest::dentry& entry) { return entry.project == "VerifiedBeacons"; }); + } + + // The manifest must be unlocked above and then relocked after cs_mapParts to avoid possible + // deadlock due to lock order. + LOCK2(CSplitBlob::cs_mapParts, manifest->cs_manifest); if (verified_beacons_entry_iter == manifest->projects.end()) { LogPrintf("ValidateSuperblock(): verified beacon project missing."); @@ -1229,6 +1246,8 @@ class SuperblockValidator const CScraperManifest_shared_ptr manifest = iter->second; + LOCK(manifest->cs_manifest); + for (const auto& entry : manifest->projects) { auto project_option = TallyProject(entry.project, scraper_id); diff --git a/src/gridcoin/researcher.cpp b/src/gridcoin/researcher.cpp index aa0202d227..314963de4a 100644 --- a/src/gridcoin/researcher.cpp +++ b/src/gridcoin/researcher.cpp @@ -29,6 +29,7 @@ using namespace GRC; extern CCriticalSection cs_main; +extern CCriticalSection cs_ScraperGlobals; extern std::string msMiningErrors; extern unsigned int nActiveBeforeSB; @@ -1086,6 +1087,8 @@ void Researcher::RunRenewBeaconJob() // window begins nActiveBeforeSB seconds before the next superblock. // This is four hours by default unless overridden by protocol entry. // + LOCK(cs_ScraperGlobals); + if (!Quorum::SuperblockNeeded(pindexBest->nTime + nActiveBeforeSB)) { TRY_LOCK(pwalletMain->cs_wallet, locked_wallet); diff --git a/src/gridcoin/scraper/fwd.h b/src/gridcoin/scraper/fwd.h index c53e4ef508..d25cd59f2f 100644 --- a/src/gridcoin/scraper/fwd.h +++ b/src/gridcoin/scraper/fwd.h @@ -47,12 +47,6 @@ enum class scraperSBvalidationtype ProjectLevelConvergence }; - -/********************* -* Global Vars * -*********************/ - - typedef std::string ScraperID; // The inner map is sorted in descending order of time. The pair is manifest hash, content hash. typedef std::multimap, std::greater > mCSManifest; @@ -75,66 +69,13 @@ typedef std::map mConvergedManifestPart_ptrs; struct ConvergedManifest { // Empty converged manifest constructor - ConvergedManifest() - { - nContentHash = {}; - ConsensusBlock = {}; - timestamp = 0; - bByParts = false; - - CScraperConvergedManifest_ptr = nullptr; - - ConvergedManifestPartPtrsMap = {}; - - mIncludedScraperManifests = {}; - - nUnderlyingManifestContentHash = {}; - - vIncludedScrapers = {}; - vExcludedScrapers = {}; - vScrapersNotPublishing = {}; - - mIncludedScrapersbyProject = {}; - mIncludedProjectsbyScraper = {}; - - mScraperConvergenceCountbyProject = {}; - - vExcludedProjects = {}; - } + ConvergedManifest(); // For constructing a dummy converged manifest from a single manifest - ConvergedManifest(CScraperManifest_shared_ptr& in) - { - ConsensusBlock = in->ConsensusBlock; - timestamp = GetAdjustedTime(); - bByParts = false; - - CScraperConvergedManifest_ptr = in; - - PopulateConvergedManifestPartPtrsMap(); - - ComputeConvergedContentHash(); - - nUnderlyingManifestContentHash = in->nContentHash; - } + ConvergedManifest(CScraperManifest_shared_ptr& in); // Call operator to update an already initialized ConvergedManifest with a passed in CScraperManifest - bool operator()(const CScraperManifest_shared_ptr& in) - { - ConsensusBlock = in->ConsensusBlock; - timestamp = GetAdjustedTime(); - bByParts = false; - - CScraperConvergedManifest_ptr = in; - - bool bConvergedContentHashMatches = PopulateConvergedManifestPartPtrsMap(); - - ComputeConvergedContentHash(); - - nUnderlyingManifestContentHash = in->nContentHash; - - return bConvergedContentHashMatches; - } + bool operator()(const CScraperManifest_shared_ptr& in); // IMPORTANT... nContentHash is NOT the hash of part hashes in the order of vParts unlike CScraper::manifest. // It is the hash of the data in the ConvergedManifestPartsMap in the order of the key. It represents @@ -174,61 +115,9 @@ struct ConvergedManifest // --------- project std::vector vExcludedProjects; - bool PopulateConvergedManifestPartPtrsMap() - { - if (CScraperConvergedManifest_ptr == nullptr) return false; - - int iPartNum = 0; - CDataStream ss(SER_NETWORK,1); - WriteCompactSize(ss, CScraperConvergedManifest_ptr->vParts.size()); - uint256 nContentHashCheck; - - for (const auto& iter : CScraperConvergedManifest_ptr->vParts) - { - std::string sProject; - - if (iPartNum == 0) - sProject = "BeaconList"; - else - sProject = CScraperConvergedManifest_ptr->projects[iPartNum-1].project; - - // Copy the pointer to the CPart into the map. This is ok, because the parts will be held - // until the CScraperManifest in this object is destroyed and all of the manifest refs to the part - // are gone. - ConvergedManifestPartPtrsMap.insert(std::make_pair(sProject, iter)); + bool PopulateConvergedManifestPartPtrsMap(); - // Serialize the hash to doublecheck the content hash. - ss << iter->hash; - - iPartNum++; - } - - ss << CScraperConvergedManifest_ptr->ConsensusBlock; - - nContentHashCheck = Hash(ss.begin(), ss.end()); - - if (nContentHashCheck != CScraperConvergedManifest_ptr->nContentHash) - { - LogPrintf("ERROR: PopulateConvergedManifestPartPtrsMap(): Selected Manifest content hash check failed! " - "nContentHashCheck = %s and nContentHash = %s.", - nContentHashCheck.GetHex(), CScraperConvergedManifest_ptr->nContentHash.GetHex()); - return false; - } - - return true; - } - - void ComputeConvergedContentHash() - { - CDataStream ss(SER_NETWORK,1); - - for (const auto& iter : ConvergedManifestPartPtrsMap) - { - ss << iter.second->data; - } - - nContentHash = Hash(ss.begin(), ss.end()); - } + void ComputeConvergedContentHash(); }; diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index cecf21aae5..b58650df5e 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -44,7 +44,10 @@ fs::path pathScraper = {}; extern CWallet* pwalletMain; +// Thread safety CCriticalSection cs_Scraper; +CCriticalSection cs_ScraperGlobals; +CCriticalSection cs_mScrapersExt; CCriticalSection cs_StructScraperFileManifest; CCriticalSection cs_ConvergedScraperStatsCache; CCriticalSection cs_TeamIDMap; @@ -57,6 +60,71 @@ std::vector vauthenicationetags; int64_t ndownloadsize = 0; int64_t nuploadsize = 0; +/********************* +* Global Defaults * +*********************/ + +// These can get overridden by the GetArgs in init.cpp or ScraperApplyAppCacheEntries. +// The appcache entries will take precedence. + +// The amount of time to wait between scraper loop runs. This is in +// milliseconds. +unsigned int nScraperSleep GUARDED_BY(cs_ScraperGlobals) = 300000; +// The amount of time before SB is due to start scraping. This is in +// seconds. +unsigned int nActiveBeforeSB GUARDED_BY(cs_ScraperGlobals) = 14400; + +// Explorer mode flag. Only effective if scraper is active. +bool fExplorer GUARDED_BY(cs_ScraperGlobals) = false; + +// These can be overridden by ScraperApplyAppCacheEntries(). + +// The flag to control whether non-current statistics files are retained. +bool SCRAPER_RETAIN_NONCURRENT_FILES GUARDED_BY(cs_ScraperGlobals) = true; +// Define 48 hour retention time for stats files, current or not. +int64_t SCRAPER_FILE_RETENTION_TIME GUARDED_BY(cs_ScraperGlobals) = 48 * 3600; +// Define extended file retention time for explorer mode. +int64_t EXPLORER_EXTENDED_FILE_RETENTION_TIME GUARDED_BY(cs_ScraperGlobals) = 168 * 3600; +// Define whether prior CScraperManifests are kept. +bool SCRAPER_CMANIFEST_RETAIN_NONCURRENT GUARDED_BY(cs_ScraperGlobals) = true; +// Define CManifest scraper object retention time. +int64_t SCRAPER_CMANIFEST_RETENTION_TIME GUARDED_BY(cs_ScraperGlobals) = 48 * 3600; +bool SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES GUARDED_BY(cs_ScraperGlobals) = false; + +// These are atomics so no explicit locking required. +std::atomic MAG_ROUND = 0.01; +std::atomic NETWORK_MAGNITUDE = 115000; +std::atomic CPID_MAG_LIMIT = GRC::Magnitude::MAX; + +// This settings below are important. This sets the minimum number of scrapers +// that must be available to form a convergence. Above this minimum, the ratio +// is followed. For example, if there are 4 scrapers, a ratio of 0.6 would require +// CEILING(0.6 * 4) = 3. See NumScrapersForSupermajority below. +// If there is only 1 scraper available, and the minimum is 2, then a convergence +// will not happen. Setting this below 2 will allow convergence to happen without +// cross checking, and is undesirable, because the scrapers are not supposed to be +// trusted entities. +unsigned int SCRAPER_CONVERGENCE_MINIMUM GUARDED_BY(cs_ScraperGlobals) = 2; +// 0.6 seems like a reasonable standard for agreement. It will require... +// 2 out of 3, 3 out of 4, 3 out of 5, 4 out of 6, 5 out of 7, 5 out of 8, etc. +double SCRAPER_CONVERGENCE_RATIO GUARDED_BY(cs_ScraperGlobals) = 0.6; +// By Project Fallback convergence rule as a ratio of projects converged vs whitelist. +// For 20 whitelisted projects this means up to five can be excluded and a contract formed. +double CONVERGENCE_BY_PROJECT_RATIO GUARDED_BY(cs_ScraperGlobals) = 0.75; +// Allow non-scraper nodes to download stats? +bool ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD GUARDED_BY(cs_ScraperGlobals) = false; +// Misbehaving scraper node banscore +unsigned int SCRAPER_MISBEHAVING_NODE_BANSCORE GUARDED_BY(cs_ScraperGlobals) = 0; +// Require team membership in team whitelist. +bool REQUIRE_TEAM_WHITELIST_MEMBERSHIP GUARDED_BY(cs_ScraperGlobals) = false; +// Default team whitelist +std::string TEAM_WHITELIST GUARDED_BY(cs_ScraperGlobals) = "Gridcoin"; +// This is the period after the deauthorizing of a scraper before the nodes will start +// to assign banscore to nodes sending unauthorized manifests. +int64_t SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD GUARDED_BY(cs_ScraperGlobals) = 300; + +AppCacheSectionExt mScrapersExt GUARDED_BY(cs_mScrapersExt) = {}; + enum class logattribute { // Can't use ERROR here because it is defined already in windows.h. @@ -122,6 +190,7 @@ void _log(logattribute eType, const std::string& sCall, const std::string& sMess template void ApplyCache(const std::string& key, T& result); +AppCacheSection GetScrapersCache(); void ScraperApplyAppCacheEntries(); void Scraper(bool bSingleShot = false); void ScraperSingleShot(); @@ -635,10 +704,11 @@ void _log(logattribute eType, const std::string& sCall, const std::string& sMess ScraperLogger& log = ScraperLogInstance(); log.output(sOut); + + // Send to UI for log window. + uiInterface.NotifyScraperEvent(scrapereventtypes::Log, CT_NEW, sOut); } - // Send to UI for log window. - uiInterface.NotifyScraperEvent(scrapereventtypes::Log, CT_NEW, sOut); // Critical, warning, and errors get sent to main debug log regardless of whether category is turned on. Info does not. if (eType != logattribute::INFO) LogPrintf(std::string(sType + ": Scraper: <" + sCall + ">: %s").c_str(), sMessage); @@ -722,6 +792,8 @@ std::vector GetTeamWhiteList() { std::string delimiter; + LOCK(cs_ScraperGlobals); + // Due to a delimiter changeout from "|" to "<>" we must check to see if "<>" is in use // in the protocol string. if (TEAM_WHITELIST.find("<>") != std::string::npos) @@ -914,11 +986,11 @@ void ApplyCache(const std::string& key, T& result) { if (entry.value == "false" || entry.value == "0") { - result = boost::lexical_cast(false); + result = false; } else if (entry.value == "true" || entry.value == "1") { - result = boost::lexical_cast(true); + result = true; } else { @@ -936,55 +1008,77 @@ void ApplyCache(const std::string& key, T& result) } } - void ScraperApplyAppCacheEntries() { - // If there are AppCache entries for the defaults in scraper.h override them. For the first two, this will also - // override any GetArgs supplied from the command line, which is appropriate as network policy should take precedence. - - ApplyCache("scrapersleep", nScraperSleep); - ApplyCache("activebeforesb", nActiveBeforeSB); - - ApplyCache("SCRAPER_RETAIN_NONCURRENT_FILES", SCRAPER_RETAIN_NONCURRENT_FILES); - ApplyCache("SCRAPER_FILE_RETENTION_TIME", SCRAPER_FILE_RETENTION_TIME); - ApplyCache("EXPLORER_EXTENDED_FILE_RETENTION_TIME", EXPLORER_EXTENDED_FILE_RETENTION_TIME); - ApplyCache("SCRAPER_CMANIFEST_RETAIN_NONCURRENT", SCRAPER_CMANIFEST_RETAIN_NONCURRENT); - ApplyCache("SCRAPER_CMANIFEST_RETENTION_TIME", SCRAPER_CMANIFEST_RETENTION_TIME); - ApplyCache("SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES", SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES); - ApplyCache("MAG_ROUND", MAG_ROUND); - ApplyCache("NETWORK_MAGNITUDE", NETWORK_MAGNITUDE); - ApplyCache("CPID_MAG_LIMIT", CPID_MAG_LIMIT); - ApplyCache("SCRAPER_CONVERGENCE_MINIMUM", SCRAPER_CONVERGENCE_MINIMUM); - ApplyCache("SCRAPER_CONVERGENCE_RATIO", SCRAPER_CONVERGENCE_RATIO); - ApplyCache("CONVERGENCE_BY_PROJECT_RATIO", CONVERGENCE_BY_PROJECT_RATIO); - ApplyCache("ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD", ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD); - ApplyCache("SCRAPER_MISBEHAVING_NODE_BANSCORE", SCRAPER_MISBEHAVING_NODE_BANSCORE); - ApplyCache("REQUIRE_TEAM_WHITELIST_MEMBERSHIP", REQUIRE_TEAM_WHITELIST_MEMBERSHIP); - ApplyCache("TEAM_WHITELIST", TEAM_WHITELIST); - ApplyCache("SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD", SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD); - - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "scrapersleep = " + ToString(nScraperSleep)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "activebeforesb = " + ToString(nActiveBeforeSB)); - - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_RETAIN_NONCURRENT_FILES = " + ToString(SCRAPER_RETAIN_NONCURRENT_FILES)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_FILE_RETENTION_TIME = " + ToString(SCRAPER_FILE_RETENTION_TIME)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "EXPLORER_EXTENDED_FILE_RETENTION_TIME = " + ToString(EXPLORER_EXTENDED_FILE_RETENTION_TIME)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CMANIFEST_RETAIN_NONCURRENT = " + ToString(SCRAPER_CMANIFEST_RETAIN_NONCURRENT)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CMANIFEST_RETENTION_TIME = " + ToString(SCRAPER_CMANIFEST_RETENTION_TIME)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES = " + ToString(SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "MAG_ROUND = " + ToString(MAG_ROUND)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "NETWORK_MAGNITUDE = " + ToString(NETWORK_MAGNITUDE)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "CPID_MAG_LIMIT = " + ToString(CPID_MAG_LIMIT)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CONVERGENCE_MINIMUM = " + ToString(SCRAPER_CONVERGENCE_MINIMUM)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CONVERGENCE_RATIO = " + ToString(SCRAPER_CONVERGENCE_RATIO)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "CONVERGENCE_BY_PROJECT_RATIO = " + ToString(CONVERGENCE_BY_PROJECT_RATIO)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD = " + ToString(ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_MISBEHAVING_NODE_BANSCORE = " + ToString(SCRAPER_MISBEHAVING_NODE_BANSCORE)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "REQUIRE_TEAM_WHITELIST_MEMBERSHIP = " + ToString(REQUIRE_TEAM_WHITELIST_MEMBERSHIP)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "TEAM_WHITELIST = " + TEAM_WHITELIST); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD = " + ToString(SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD)); - - AppCacheSection mScrapers = ReadCacheSection(Section::SCRAPER); + { + LOCK(cs_ScraperGlobals); + _log(logattribute::INFO, "LOCK", "cs_ScraperGlobals"); + + // If there are AppCache entries for the defaults in scraper.h override them. For the first two, this will also + // override any GetArgs supplied from the command line, which is appropriate as network policy should take precedence. + + ApplyCache("scrapersleep", nScraperSleep); + ApplyCache("activebeforesb", nActiveBeforeSB); + + ApplyCache("SCRAPER_RETAIN_NONCURRENT_FILES", SCRAPER_RETAIN_NONCURRENT_FILES); + ApplyCache("SCRAPER_FILE_RETENTION_TIME", SCRAPER_FILE_RETENTION_TIME); + ApplyCache("EXPLORER_EXTENDED_FILE_RETENTION_TIME", EXPLORER_EXTENDED_FILE_RETENTION_TIME); + ApplyCache("SCRAPER_CMANIFEST_RETAIN_NONCURRENT", SCRAPER_CMANIFEST_RETAIN_NONCURRENT); + ApplyCache("SCRAPER_CMANIFEST_RETENTION_TIME", SCRAPER_CMANIFEST_RETENTION_TIME); + ApplyCache("SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES", SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES); + + // We have to use regular doubles in the ApplyCache because the template will not work with atomics since they are + // not copyable. Here we use the implicit load with the = operator for assignment to the atomic variable. The + // purpose of this is that the functions using the below are called many, many times in the scraper, and want to + // avoid the heavyweight LOCKs for these. The slight loss in atomicity in the transfer from the filled in local + // double to assignment to the atomic is not important. + double mag_round = 0.0; + double network_magnitude = 0.0; + double cpid_mag_limit = 0.0; + + ApplyCache("MAG_ROUND", mag_round); + if (mag_round > 0.0) MAG_ROUND = mag_round; + + ApplyCache("NETWORK_MAGNITUDE", network_magnitude); + if (network_magnitude > 0.0) NETWORK_MAGNITUDE = network_magnitude; + + ApplyCache("CPID_MAG_LIMIT", cpid_mag_limit); + if (cpid_mag_limit > 0.0) CPID_MAG_LIMIT = cpid_mag_limit; + + ApplyCache("SCRAPER_CONVERGENCE_MINIMUM", SCRAPER_CONVERGENCE_MINIMUM); + ApplyCache("SCRAPER_CONVERGENCE_RATIO", SCRAPER_CONVERGENCE_RATIO); + ApplyCache("CONVERGENCE_BY_PROJECT_RATIO", CONVERGENCE_BY_PROJECT_RATIO); + ApplyCache("ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD", ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD); + ApplyCache("SCRAPER_MISBEHAVING_NODE_BANSCORE", SCRAPER_MISBEHAVING_NODE_BANSCORE); + ApplyCache("REQUIRE_TEAM_WHITELIST_MEMBERSHIP", REQUIRE_TEAM_WHITELIST_MEMBERSHIP); + ApplyCache("TEAM_WHITELIST", TEAM_WHITELIST); + ApplyCache("SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD", SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD); + + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "scrapersleep = " + ToString(nScraperSleep)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "activebeforesb = " + ToString(nActiveBeforeSB)); + + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_RETAIN_NONCURRENT_FILES = " + ToString(SCRAPER_RETAIN_NONCURRENT_FILES)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_FILE_RETENTION_TIME = " + ToString(SCRAPER_FILE_RETENTION_TIME)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "EXPLORER_EXTENDED_FILE_RETENTION_TIME = " + ToString(EXPLORER_EXTENDED_FILE_RETENTION_TIME)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CMANIFEST_RETAIN_NONCURRENT = " + ToString(SCRAPER_CMANIFEST_RETAIN_NONCURRENT)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CMANIFEST_RETENTION_TIME = " + ToString(SCRAPER_CMANIFEST_RETENTION_TIME)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES = " + ToString(SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "MAG_ROUND = " + ToString(MAG_ROUND)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "NETWORK_MAGNITUDE = " + ToString(NETWORK_MAGNITUDE)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "CPID_MAG_LIMIT = " + ToString(CPID_MAG_LIMIT)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CONVERGENCE_MINIMUM = " + ToString(SCRAPER_CONVERGENCE_MINIMUM)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CONVERGENCE_RATIO = " + ToString(SCRAPER_CONVERGENCE_RATIO)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "CONVERGENCE_BY_PROJECT_RATIO = " + ToString(CONVERGENCE_BY_PROJECT_RATIO)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD = " + ToString(ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_MISBEHAVING_NODE_BANSCORE = " + ToString(SCRAPER_MISBEHAVING_NODE_BANSCORE)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "REQUIRE_TEAM_WHITELIST_MEMBERSHIP = " + ToString(REQUIRE_TEAM_WHITELIST_MEMBERSHIP)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "TEAM_WHITELIST = " + TEAM_WHITELIST); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD = " + ToString(SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD)); + + _log(logattribute::INFO, "ENDLOCK", "cs_ScraperGlobals"); + } + + AppCacheSection mScrapers = GetScrapersCache(); _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "For information - authorized scraper address list"); for (auto const& entry : mScrapers) @@ -994,7 +1088,60 @@ void ScraperApplyAppCacheEntries() } } +AppCacheSection GetScrapersCache() +{ + //LOCK(cs_main); + + return ReadCacheSection(Section::SCRAPER); +} + +AppCacheSectionExt GetExtendedScrapersCache() +{ + AppCacheSection mScrapers = GetScrapersCache(); + + // For the IsManifestAuthorized() function... + /* We cannot use the AppCacheSection mScrapers in the raw, because there are two ways to deauthorize scrapers. + * The first way is to change the value of an existing entry to false. This works fine with mScrapers. The second way is to + * issue an addkey delete key. This will remove the key entirely, therefore deauthorizing the scraper. We need to preserve + * the key entry of the deleted record and when it was deleted to calculate a grace period. Why? To ensure that + * we do not generate islanding in the network in the case of a scraper deauthorization, we must apply a grace period + * after the timestamp of the marking of false/deletion, or from the time when the wallet came in sync, whichever is greater, before + * we start assigning a banscore to nodes that send/forward unauthorized manifests. This is because not all nodes + * may receive and accept the block that contains the transaction that modifies or deletes the scraper appcache entry + * at the same time, so there is a chance a node could send/forward an unauthorized manifest between when the scraper + * is deauthorized and the block containing that deauthorization is received by the sending node. + */ + + // So we are going to make use of AppCacheEntryExt and mScrapersExt, which are just like the normal AppCache structure, except they + // have an explicit deleted boolean. + + // First, walk the mScrapersExt map and see if it contains an entry that does not exist in mScrapers. If so, + // update the entry's value and timestamp and mark deleted. + LOCK(cs_mScrapersExt); + + for (auto const& entry : mScrapersExt) + { + const auto& iter = mScrapers.find(entry.first); + + if (iter == mScrapers.end()) + { + // Mark entry in mScrapersExt as deleted at the current adjusted time. The value is changed + // to false, because if it is deleted, it is also not authorized. + mScrapersExt[entry.first] = AppCacheEntryExt {"false", GetAdjustedTime(), true}; + } + + } + + // Now insert/update entries from mScrapers into mScrapersExt. + for (auto const& entry : mScrapers) + { + mScrapersExt[entry.first] = AppCacheEntryExt {entry.second.value, entry.second.timestamp, false}; + } + + // Return a copy of the global on purpose so the cs_mScrapersExt can be a short term lock. This map is very small. + return mScrapersExt; +} // This is the "main" scraper function. // It will be instantiated as a separate thread if -scraper is specified as a startup argument, @@ -1048,14 +1195,8 @@ void Scraper(bool bSingleShot) // UnauthorizedCScraperManifests should only be seen on the first invocation after getting in sync // See the comment on the function. - LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); - ScraperDeleteUnauthorizedCScraperManifests(); - // End LOCK(CScraperManifest::cs_mapManifest) - _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); - // End LOCK(cs_Scraper) _log(logattribute::INFO, "ENDLOCK", "cs_Scraper"); } @@ -1065,17 +1206,24 @@ void Scraper(bool bSingleShot) CBitcoinAddress AddressOut; CKey KeyOut; + // These are to ensure thread-safety of these globals and keep the locking scope to a minimum. These will go away + // when the scraper is restructured into a class. + auto active_before_SB = []() { LOCK(cs_ScraperGlobals); return nActiveBeforeSB; }; + auto scraper_sleep = []() { LOCK(cs_ScraperGlobals); return nScraperSleep; }; + auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; + auto require_team_whitelist_membership = []() { LOCK(cs_ScraperGlobals); return REQUIRE_TEAM_WHITELIST_MEMBERSHIP; }; + // Give nActiveBeforeSB seconds before superblock needed before we sync // Note there is a small while loop here to cull incoming manifests from // other scrapers while this one is quiescent. An unlikely but possible // situation because the nActiveBeforeSB may be set differently on other // scrapers. // If Scraper is called in singleshot mode, then skip the wait. - if (!bSingleShot && sbage <= (86400 - nActiveBeforeSB) && sbage >= 0) + if (!bSingleShot && sbage <= (86400 - active_before_SB()) && sbage >= 0) { // Don't let nBeforeSBSleep go less than zero, which could happen without max if wallet // started with sbage already older than 86400 - nActiveBeforeSB. - int64_t nBeforeSBSleep = std::max(86400 - nActiveBeforeSB - sbage, (int64_t) 0); + int64_t nBeforeSBSleep = std::max(86400 - active_before_SB() - sbage, (int64_t) 0); while (GetAdjustedTime() - nScraperThreadStartTime < nBeforeSBSleep) { @@ -1107,7 +1255,7 @@ void Scraper(bool bSingleShot) sbage = SuperblockAge(); _log(logattribute::INFO, "Scraper", "Superblock not needed. age=" + ToString(sbage)); - _log(logattribute::INFO, "Scraper", "Sleeping for " + ToString(nScraperSleep / 1000) +" seconds"); + _log(logattribute::INFO, "Scraper", "Sleeping for " + ToString(scraper_sleep() / 1000) +" seconds"); if (!MilliSleep(nScraperSleep)) return; } @@ -1163,13 +1311,13 @@ void Scraper(bool bSingleShot) // If team filtering is set by policy then pull down and retrieve team IDs as needed. This loads the TeamIDMap global. // Note that the call(s) to ScraperDirectoryAndConfigSanity() above will preload the team ID map from the persisted file // if it exists, so this will minimize the work that DownloadProjectTeamFiles() has to do, unless explorer mode (fExplorer) is true. - if (REQUIRE_TEAM_WHITELIST_MEMBERSHIP || fExplorer) DownloadProjectTeamFiles(projectWhitelist); + if (require_team_whitelist_membership() || explorer_mode()) DownloadProjectTeamFiles(projectWhitelist); DownloadProjectRacFilesByCPID(projectWhitelist); // If explorer mode is set (fExplorer is true), then download host files. These are currently not use for any other processing, // so there is no corresponding Process function for the host files. - if (fExplorer) DownloadProjectHostFiles(projectWhitelist); + if (explorer_mode()) DownloadProjectHostFiles(projectWhitelist); _log(logattribute::INFO, "Scraper", "download size so far: " + ToString(ndownloadsize) + " upload size so far: " + ToString(nuploadsize)); @@ -1226,6 +1374,8 @@ void Scraper(bool bSingleShot) // the function that called the singleshot. if (!bSingleShot) { + LOCK(cs_Scraper); + ScraperHousekeeping(); _log(logattribute::INFO, "Scraper", "Sleeping for " + ToString(nScraperSleep / 1000) +" seconds"); @@ -1266,6 +1416,8 @@ void ScraperSubscriber() "Note that this does NOT mean the subscriber is active. This simply does housekeeping " "functions."); + auto scraper_sleep = []() { LOCK(cs_ScraperGlobals); return nScraperSleep; }; + while(!fShutdown) { // Only proceed if wallet is in sync. Check every 8 seconds since no callback is available. @@ -1289,14 +1441,8 @@ void ScraperSubscriber() // UnauthorizedCScraperManifests should only be seen on the first invocation after getting in sync // See the comment on the function. - LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); - ScraperDeleteUnauthorizedCScraperManifests(); - // END LOCK(CScraperManifest::cs_mapManifest) - _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); - ScraperHousekeeping(); // END LOCK(cs_Scraper) @@ -1304,7 +1450,7 @@ void ScraperSubscriber() } // Use the same sleep interval configured for the scraper. - _log(logattribute::INFO, "ScraperSubscriber", "Sleeping for " + ToString(nScraperSleep / 1000) +" seconds"); + _log(logattribute::INFO, "ScraperSubscriber", "Sleeping for " + ToString(scraper_sleep() / 1000) +" seconds"); if (!MilliSleep(nScraperSleep)) return; } @@ -1312,17 +1458,19 @@ void ScraperSubscriber() UniValue testnewsb(const UniValue& params, bool fHelp); -bool ScraperHousekeeping() +bool ScraperHousekeeping() EXCLUSIVE_LOCKS_REQUIRED(cs_Scraper) { // Periodically generate converged manifests and generate SB contract and store in cache. Superblock superblock; - { - // Lock both cs_Scraper and cs_StructScraperFileManifest. - LOCK2(cs_Scraper, cs_StructScraperFileManifest); + superblock = ScraperGetSuperblockContract(true, false, true); - superblock = ScraperGetSuperblockContract(true, false, true); + if (!superblock.WellFormed()) + { + _log(logattribute::WARNING, "ScraperHousekeeping", "Superblock is not well formed. m_version = " + + ToString(superblock.m_version) + ", m_cpids.size() = " + ToString(superblock.m_cpids.size()) + + ", m_projects.size() = " + ToString(superblock.m_projects.size())); } { @@ -1367,6 +1515,12 @@ bool ScraperHousekeeping() // A lock on cs_Scraper should be taken before calling this function. bool ScraperDirectoryAndConfigSanity() { + auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; + auto scraper_retain_noncurrent_files = []() { LOCK(cs_ScraperGlobals); return SCRAPER_RETAIN_NONCURRENT_FILES; }; + auto scraper_file_retention_time = []() { LOCK(cs_ScraperGlobals); return SCRAPER_FILE_RETENTION_TIME; }; + auto explorer_extended_file_retention_time = []() { LOCK(cs_ScraperGlobals); return EXPLORER_EXTENDED_FILE_RETENTION_TIME; }; + auto require_team_whitelist_membership = []() { LOCK(cs_ScraperGlobals); return REQUIRE_TEAM_WHITELIST_MEMBERSHIP; }; + ScraperApplyAppCacheEntries(); // Check to see if the Scraper directory exists and is a directory. If not create it. @@ -1447,7 +1601,7 @@ bool ScraperDirectoryAndConfigSanity() { ScraperFileManifestMap::iterator entry_copy = entry++; - int64_t nFileRetentionTime = fExplorer ? EXPLORER_EXTENDED_FILE_RETENTION_TIME : SCRAPER_FILE_RETENTION_TIME; + int64_t nFileRetentionTime = explorer_mode() ? explorer_extended_file_retention_time() : scraper_file_retention_time(); if (LogInstance().WillLogCategory(BCLog::LogFlags::NOISY)) { @@ -1456,7 +1610,7 @@ bool ScraperDirectoryAndConfigSanity() if (!fs::exists(pathScraper / entry_copy->first) || ((GetAdjustedTime() - entry_copy->second.timestamp) > nFileRetentionTime) - || (!SCRAPER_RETAIN_NONCURRENT_FILES && entry_copy->second.current == false)) + || (!scraper_retain_noncurrent_files() && entry_copy->second.current == false)) { _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", "Removing stale or orphan manifest entry: " + entry_copy->first); DeleteScraperFileManifestEntry(entry_copy->second); @@ -1469,7 +1623,7 @@ bool ScraperDirectoryAndConfigSanity() // If network policy is set to filter on whitelisted teams, then load team ID map from file. This will prevent the heavyweight // team file downloads for projects whose team IDs have already been found and stored, unless explorer mode (fExplorer) is true. - if (REQUIRE_TEAM_WHITELIST_MEMBERSHIP) + if (require_team_whitelist_membership()) { LOCK(cs_TeamIDMap); _log(logattribute::INFO, "LOCK", "cs_TeamIDMap"); @@ -1571,8 +1725,10 @@ bool UserpassPopulated() bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) { + auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; + // If fExplorer is false then skip processing. (This should not be called anyway, but return immediately just in case. - if (!fExplorer) + if (!explorer_mode()) { _log(logattribute::INFO, "DownloadProjectHostFiles", "Not in explorer mode. Skipping host file download and processing."); return false; @@ -1687,6 +1843,9 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) { + auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; + auto require_team_whitelist_membership = []() { LOCK(cs_ScraperGlobals); return REQUIRE_TEAM_WHITELIST_MEMBERSHIP; }; + if (!projectWhitelist.Populated()) { _log(logattribute::CRITICAL, "DownloadProjectTeamFiles", "Whitelist is not populated"); @@ -1715,7 +1874,7 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) // If fExplorer is false, which means we do not need to retain team files, and there are no TeamID entries missing, // then skip processing altogether. - if (!fExplorer && !fProjTeamIDsMissing) + if (!explorer_mode() && !fProjTeamIDsMissing) { _log(logattribute::INFO, "DownloadProjectTeamFiles", "Correct team whitelist entries already in the team ID map for " + prjs.m_name + " project. Skipping team file download and processing."); @@ -1791,7 +1950,7 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) } - if (fExplorer) + if (explorer_mode()) { // Use eTag versioning ON THE DISK with eTag versioned team files per project. team_file_name = prjs.m_name + "-" + sTeamETag + "-team.gz"; @@ -1846,11 +2005,11 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) // If in explorer mode and new file downloaded, save team xml files to file manifest map with exclude from CSManifest flag set to true. // If not in explorer mode, this is not necessary, because the team xml file is just temporary and can be discarded after // processing. - if (fExplorer && bDownloadFlag) AlignScraperFileManifestEntries(team_file, "team", prjs.m_name, true); + if (explorer_mode() && bDownloadFlag) AlignScraperFileManifestEntries(team_file, "team", prjs.m_name, true); // If require team whitelist is set and bETagChanged is true, then process the file. This also populates/updated the team whitelist TeamIDs // in the TeamIDMap and the ETag entries in the ProjTeamETags map. - if (REQUIRE_TEAM_WHITELIST_MEMBERSHIP && bETagChanged) ProcessProjectTeamFile(prjs.m_name, team_file, sTeamETag); + if (require_team_whitelist_membership() && bETagChanged) ProcessProjectTeamFile(prjs.m_name, team_file, sTeamETag); _log(logattribute::INFO, "ENDLOCK", "cs_TeamIDMap"); } @@ -1863,6 +2022,8 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) // TeamIDMap and ProjTeamETags. bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, const std::string& etag) EXCLUSIVE_LOCKS_REQUIRED(cs_TeamIDMap) { + auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; + std::map mTeamIdsForProject; // If passed an empty file, immediately return false. @@ -1957,7 +2118,7 @@ bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, co _log(logattribute::INFO, "ProcessProjectTeamFile", "Stored Team ID entries."); // If not explorer mode, delete input file after processing. - if (!fExplorer && fs::exists(file)) fs::remove(file); + if (!explorer_mode() && fs::exists(file)) fs::remove(file); _log(logattribute::INFO, "ProcessProjectTeamFile", "Finished processing " + file.filename().string()); @@ -1971,6 +2132,8 @@ bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, co bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) { + auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; + if (!projectWhitelist.Populated()) { _log(logattribute::CRITICAL, "DownloadProjectRacFiles", "Whitelist is not populated"); @@ -2084,7 +2247,7 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) processed_rac_file_name = prjs.m_name + "-" + sRacETag + ".csv" + ".gz"; processed_rac_file = pathScraper / processed_rac_file_name; - if (fExplorer) + if (explorer_mode()) { // Use eTag versioning for source file. rac_file_name = prjs.m_name + "-" + sRacETag + "-user.gz"; @@ -2124,7 +2287,7 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) } // If in explorer mode, save user (rac) source xml files to file manifest map with exclude from CSManifest flag set to true. - if (fExplorer) AlignScraperFileManifestEntries(rac_file, "user_source", prjs.m_name, true); + if (explorer_mode()) AlignScraperFileManifestEntries(rac_file, "user_source", prjs.m_name, true); // Now that the source file is handled, process the file. ProcessProjectRacFileByCPID(prjs.m_name, rac_file, sRacETag, Consensus, GlobalVerifiedBeaconsCopy, IncomingVerifiedBeacons); @@ -2186,6 +2349,9 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil BeaconConsensus& Consensus, ScraperVerifiedBeacons& GlobalVerifiedBeaconsCopy, ScraperVerifiedBeacons& IncomingVerifiedBeacons) { + auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; + auto require_team_whitelist_membership = []() { LOCK(cs_ScraperGlobals); return REQUIRE_TEAM_WHITELIST_MEMBERSHIP; }; + // Set fileerror flag to true until made false by the completion of one successful injection of user stats into stream. bool bfileerror = true; @@ -2220,7 +2386,7 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil std::map mTeamIDsForProject = {}; // Take a lock on cs_TeamIDMap to populate local whitelist TeamID vector for this project. - if (REQUIRE_TEAM_WHITELIST_MEMBERSHIP) + if (require_team_whitelist_membership()) { LOCK(cs_TeamIDMap); _log(logattribute::INFO, "LOCK", "cs_TeamIDMap"); @@ -2307,7 +2473,7 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil } // Only do this if team membership filtering is specified by network policy. - if (REQUIRE_TEAM_WHITELIST_MEMBERSHIP) + if (require_team_whitelist_membership()) { // Set initial flag for whether user is on team whitelist to false. bool bOnTeamWhitelist = false; @@ -2399,7 +2565,7 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil } // If not in explorer mode, no need to retain source file. - if (!fExplorer) fs::remove(file); + if (!explorer_mode()) fs::remove(file); // Here, regardless of explorer mode, save processed rac files to file manifest map with exclude from CSManifest flag set to false. AlignScraperFileManifestEntries(gzetagfile, "user", project, false); @@ -2814,6 +2980,9 @@ void AlignScraperFileManifestEntries(const fs::path& file, const std::string& fi { ScraperFileManifestEntry NewRecord; + auto scraper_retain_noncurrent_files = []() { LOCK(cs_ScraperGlobals); return SCRAPER_RETAIN_NONCURRENT_FILES; }; + auto explorer_extended_file_retention_time = []() { LOCK(cs_ScraperGlobals); return EXPLORER_EXTENDED_FILE_RETENTION_TIME; }; + std::string file_name = file.filename().string(); NewRecord.filename = file_name; @@ -2855,8 +3024,8 @@ void AlignScraperFileManifestEntries(const fs::path& file, const std::string& fi // If filetype records are older than EXPLORER_EXTENDED_FILE_RETENTION_TIME delete record, or if fScraperRetainNonCurrentFiles is false, // delete all non-current records, including the one just marked non-current. (EXPLORER_EXTENDED_FILE_RETENTION_TIME rather // then SCRAPER_FILE_RETENTION_TIME is used, because this section is only active if fExplorer is true.) - if (entry_copy->second.filetype == filetype && (((GetAdjustedTime() - entry_copy->second.timestamp) > EXPLORER_EXTENDED_FILE_RETENTION_TIME) - || (entry_copy->second.project == sProject && entry_copy->second.current == false && !SCRAPER_RETAIN_NONCURRENT_FILES))) + if (entry_copy->second.filetype == filetype && (((GetAdjustedTime() - entry_copy->second.timestamp) > explorer_extended_file_retention_time()) + || (entry_copy->second.project == sProject && entry_copy->second.current == false && !scraper_retain_noncurrent_files()))) { DeleteScraperFileManifestEntry(entry_copy->second); } @@ -3329,7 +3498,7 @@ bool ProcessNetworkWideFromProjectStats(ScraperStats& mScraperStats) // Note the following is VERY inelegant. It CAPS the CPID magnitude to CPID_MAG_LIMIT. // No attempt to renormalize the magnitudes due to this cap is done at this time. This means // The total magnitude across projects will NOT match the total across all CPIDs and the network. - mByCPID_entry->second.first.statsvalue.dMag = std::min(CPID_MAG_LIMIT, mByCPID_entry->second.first.statsvalue.dMag); + mByCPID_entry->second.first.statsvalue.dMag = std::min(CPID_MAG_LIMIT, mByCPID_entry->second.first.statsvalue.dMag); // Increment number of projects tallied ++mByCPID_entry->second.second; } @@ -3347,7 +3516,7 @@ bool ProcessNetworkWideFromProjectStats(ScraperStats& mScraperStats) // Note the following is VERY inelegant. It CAPS the CPID magnitude to CPID_MAG_LIMIT. // No attempt to renormalize the magnitudes due to this cap is done at this time. This means // The total magnitude across projects will NOT match the total across all CPIDs and the network. - CPIDStatsEntry.statsvalue.dMag = std::min(CPID_MAG_LIMIT, byCPIDbyProjectEntry.second.statsvalue.dMag); + CPIDStatsEntry.statsvalue.dMag = std::min(CPID_MAG_LIMIT, byCPIDbyProjectEntry.second.statsvalue.dMag); // This is the first project encountered, because otherwise there would already be an entry. mByCPID[CPID] = std::make_pair(CPIDStatsEntry, 1); @@ -3691,6 +3860,9 @@ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash) // This is from the map find above. const CScraperManifest_shared_ptr manifest = pair->second; + LOCK(manifest->cs_manifest); + _log(logattribute::INFO, "LOCK", "cs_manifest"); + // Write out to files the parts. Note this assumes one-to-one part to file. Needs to // be fixed for more than one part per file. int iPartNum = 0; @@ -3731,6 +3903,7 @@ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash) iPartNum++; } + _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); return true; @@ -3751,6 +3924,8 @@ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash) // headaches. bool IsScraperAuthorized() { + LOCK(cs_ScraperGlobals); + return ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD; } @@ -3760,15 +3935,7 @@ bool IsScraperAuthorized() bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& KeyOut) { - AppCacheSection mScrapers = {}; - { - LOCK(cs_main); - _log(logattribute::INFO, "LOCK", "cs_main"); - - mScrapers = ReadCacheSection(Section::SCRAPER); - - _log(logattribute::INFO, "ENDLOCK", "cs_main"); - } + AppCacheSection mScrapers = GetScrapersCache(); std::string sScraperAddressFromConfig = gArgs.GetArg("-scraperkey", "false"); @@ -3913,15 +4080,11 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& // for a scraper is more than five minutes in the future (accounts for clock skew) then the publishing rate // of the scraper is deemed too high. This is actually used in CScraperManifest::IsManifestAuthorized to ban // a scraper that is abusing the network by sending too many manifests over a very short period of time. -bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& PubKey) +bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& PubKey) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { mmCSManifestsBinnedByScraper mMapCSManifestBinnedByScraper; - { - LOCK(CScraperManifest::cs_mapManifest); - - mMapCSManifestBinnedByScraper = BinCScraperManifestsByScraper(); - } + mMapCSManifestBinnedByScraper = BinCScraperManifestsByScraper(); CKeyID ManifestKeyID = PubKey.GetID(); @@ -3948,6 +4111,8 @@ bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& Pub std::multimap> mScraperManifests; + auto scraper_sleep = []() { LOCK(cs_ScraperGlobals); return nScraperSleep; }; + // Insert manifest referenced by the argument first (the "incoming" manifest). Note that it may NOT have the most recent time. // This is followed by the rest so that we have a unified map with the incoming in the right order. mScraperManifests.insert(std::make_pair(nTime, sManifestAddress)); @@ -3974,7 +4139,7 @@ bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& Pub // Go till 10 intervals (between samples) OR time interval reaches 5 expected scraper updates at 3 nScraperSleep scraper cycles per update, // whichever occurs first. - if (nIntervals == 10 || (nCurrentTime - nBeginTime) >= nScraperSleep * 3 * 5 / 1000) break; + if (nIntervals == 10 || (nCurrentTime - nBeginTime) >= scraper_sleep() * 3 * 5 / 1000) break; } // Do not allow the most recent manifest from a scraper to be more than five minutes into the future from GetAdjustedTime. (This takes @@ -4001,7 +4166,7 @@ bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& Pub // nScraperSleep is in milliseconds. If the average interval is less than 25% of nScraperSleep in seconds, ban the scraper. // Note that this is a factor of 24 faster than the expected rate given usual project update velocity. - if (nAvgTimeBetweenManifests < nScraperSleep / 8000) + if (nAvgTimeBetweenManifests < scraper_sleep() / 8000) { _log(logattribute::CRITICAL, "IsScraperMaximumManifestPublishingRateExceeded", "Scraper " + sManifestAddress + " has published too many manifests in too short a time:\n" + @@ -4027,34 +4192,62 @@ bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& Pub // authorized scraper list in the AppCache. If it passes the flag will be set to true. If it fails, the manifest will be deleted. All manifests // must be checked, because we have to deal with another condition where a scraper is deauthorized by network policy. This means manifests may // not be authorized even if the bCheckedAuthorized is true from a prior check. - -// A lock needs to be taken on CScraperManifest::cs_mapManifest before calling this function. unsigned int ScraperDeleteUnauthorizedCScraperManifests() { unsigned int nDeleted = 0; + LOCK(CScraperManifest::cs_mapManifest); + _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); + for (auto iter = CScraperManifest::mapManifest.begin(); iter != CScraperManifest::mapManifest.end(); ) { CScraperManifest_shared_ptr manifest = iter->second; + // We have to copy out the nTime and pubkey from the selected manifest, because the IsManifestAuthorized call + // chain traverses the map and locks the cs_manifests in turn, which creats a deadlock potential if the cs_manifest + // lock is already held on one of the manifests. + int64_t nTime = 0; + CPubKey pubkey; + { + LOCK(manifest->cs_manifest); + _log(logattribute::INFO, "LOCK", "cs_manifest"); + + nTime = manifest->nTime; + pubkey = manifest->pubkey; + + _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); + } + // We are not going to do anything with the banscore here, but it is an out parameter of IsManifestAuthorized. unsigned int banscore_out = 0; - if (CScraperManifest::IsManifestAuthorized(manifest->nTime, manifest->pubkey, banscore_out)) + if (CScraperManifest::IsManifestAuthorized(nTime, pubkey, banscore_out)) { + LOCK(manifest->cs_manifest); + manifest->bCheckedAuthorized = true; ++iter; + + _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); } else { + LOCK(manifest->cs_manifest); + _log(logattribute::INFO, "LOCK", "cs_manifest"); + _log(logattribute::WARNING, "ScraperDeleteUnauthorizedCScraperManifests", "Deleting unauthorized manifest with hash " + iter->first.GetHex()); // Delete from CScraperManifest map (also advances iter to the next valid element). Immediate flag is set, because there should be // no pending delete retention grace for this. iter = CScraperManifest::DeleteManifest(iter, true); nDeleted++; + + _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); } } + // End LOCK(CScraperManifest::cs_mapManifest) + _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); + return nDeleted; } @@ -4067,25 +4260,30 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS auto manifest = std::shared_ptr(new CScraperManifest()); - // The manifest name is the authorized address of the scraper. - manifest->sCManifestName = Address.ToString(); - - // Also store local sCManifestName, because the manifest will be std::moved by addManifest. - std::string sCManifestName = Address.ToString(); - - manifest->nTime = StructScraperFileManifest.timestamp; - - // Also store local nTime, because the manifest will be std::moved by addManifest. - int64_t nTime = StructScraperFileManifest.timestamp; - - manifest->ConsensusBlock = StructScraperFileManifest.nConsensusBlockHash; + std::string sCManifestName; + int64_t nTime = 0; // This will have to be changed to support files bigger than 32 MB, where more than one // part per object will be required. int iPartNum = 0; - // Inject the BeaconList part. { + LOCK2(CSplitBlob::cs_mapParts, manifest->cs_manifest); + _log(logattribute::INFO, "LOCK2", "cs_mapParts, cs_manifest"); + + // The manifest name is the authorized address of the scraper. + manifest->sCManifestName = Address.ToString(); + + // Also store local sCManifestName, because the manifest will be std::moved by addManifest. + sCManifestName = Address.ToString(); + + manifest->nTime = StructScraperFileManifest.timestamp; + + // Also store local nTime, because the manifest will be std::moved by addManifest. + nTime = StructScraperFileManifest.timestamp; + + manifest->ConsensusBlock = StructScraperFileManifest.nConsensusBlockHash; + // Read in BeaconList fs::path inputfile = "BeaconList.csv.gz"; fs::path inputfilewpath = pathScraper / inputfile; @@ -4127,120 +4325,124 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS manifest->addPartData(std::move(part)); iPartNum++; - } - // Inject the VerifiedBeaconList as a "project" called VerifiedBeacons. This is inelegant, but - // will maintain compatibility with older nodes. The older nodes will simply ignore this extra part - // because it will never match any whitelisted project. Only include it if it is not empty. - { - LOCK(cs_VerifiedBeacons); - _log(logattribute::INFO, "LOCK", "cs_VerifiedBeacons"); + // Inject the VerifiedBeaconList as a "project" called VerifiedBeacons. This is inelegant, but + // will maintain compatibility with older nodes. The older nodes will simply ignore this extra part + // because it will never match any whitelisted project. Only include it if it is not empty. + { + LOCK(cs_VerifiedBeacons); + _log(logattribute::INFO, "LOCK", "cs_VerifiedBeacons"); - ScraperVerifiedBeacons& ScraperVerifiedBeacons = GetVerifiedBeacons(); + ScraperVerifiedBeacons& ScraperVerifiedBeacons = GetVerifiedBeacons(); - if (!ScraperVerifiedBeacons.mVerifiedMap.empty()) - { - CScraperManifest::dentry ProjectEntry; + if (!ScraperVerifiedBeacons.mVerifiedMap.empty()) + { + CScraperManifest::dentry ProjectEntry; - ProjectEntry.project = "VerifiedBeacons"; - ProjectEntry.LastModified = ScraperVerifiedBeacons.timestamp; - ProjectEntry.current = true; + ProjectEntry.project = "VerifiedBeacons"; + ProjectEntry.LastModified = ScraperVerifiedBeacons.timestamp; + ProjectEntry.current = true; - // For now each object will only have one part. - ProjectEntry.part1 = iPartNum; - ProjectEntry.partc = 0; - ProjectEntry.GridcoinTeamID = -1; //Not used anymore + // For now each object will only have one part. + ProjectEntry.part1 = iPartNum; + ProjectEntry.partc = 0; + ProjectEntry.GridcoinTeamID = -1; //Not used anymore - ProjectEntry.last = 1; + ProjectEntry.last = 1; - manifest->projects.push_back(ProjectEntry); + manifest->projects.push_back(ProjectEntry); - CDataStream part(SER_NETWORK, 1); + CDataStream part(SER_NETWORK, 1); - part << ScraperVerifiedBeacons.mVerifiedMap; + part << ScraperVerifiedBeacons.mVerifiedMap; - manifest->addPartData(std::move(part)); + manifest->addPartData(std::move(part)); - iPartNum++; + iPartNum++; + } + + _log(logattribute::INFO, "ENDLOCK", "cs_VerifiedBeacons"); } - _log(logattribute::INFO, "ENDLOCK", "cs_VerifiedBeacons"); - } + for (auto const& entry : StructScraperFileManifest.mScraperFileManifest) + { + auto scraper_cmanifest_include_noncurrent_proj_files = + []() { LOCK(cs_ScraperGlobals); return SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES; }; + // If SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES is false, only include current files to send across the network. + // Also continue (exclude) if it is a non-publishable entry (excludefromcsmanifest is true). + if ((!scraper_cmanifest_include_noncurrent_proj_files() && !entry.second.current) || entry.second.excludefromcsmanifest) + continue; - for (auto const& entry : StructScraperFileManifest.mScraperFileManifest) - { - // If SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES is false, only include current files to send across the network. - // Also continue (exclude) if it is a non-publishable entry (excludefromcsmanifest is true). - if ((!SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES && !entry.second.current) || entry.second.excludefromcsmanifest) - continue; + fs::path inputfile = entry.first; - fs::path inputfile = entry.first; + //_log(logattribute::INFO, "ScraperSendFileManifestContents", "Input file for CScraperManifest is " + inputfile.string()); - //_log(logattribute::INFO, "ScraperSendFileManifestContents", "Input file for CScraperManifest is " + inputfile.string()); + fs::path inputfilewpath = pathScraper / inputfile; - fs::path inputfilewpath = pathScraper / inputfile; + // open input file, and associate with CAutoFile + FILE *file = fsbridge::fopen(inputfilewpath, "rb"); + CAutoFile filein(file, SER_DISK, CLIENT_VERSION); - // open input file, and associate with CAutoFile - FILE *file = fsbridge::fopen(inputfilewpath, "rb"); - CAutoFile filein(file, SER_DISK, CLIENT_VERSION); + if (filein.IsNull()) + { + _log(logattribute::ERR, "ScraperSendFileManifestContents", "Failed to open file (" + inputfile.string() + ")"); + return false; + } - if (filein.IsNull()) - { - _log(logattribute::ERR, "ScraperSendFileManifestContents", "Failed to open file (" + inputfile.string() + ")"); - return false; - } + // use file size to size memory buffer + int dataSize = fs::file_size(inputfilewpath); + std::vector vchData; + vchData.resize(dataSize); - // use file size to size memory buffer - int dataSize = fs::file_size(inputfilewpath); - std::vector vchData; - vchData.resize(dataSize); + // read data from file + try + { + filein.read((char *)&vchData[0], dataSize); + } + catch (std::exception &e) + { + _log(logattribute::ERR, "ScraperSendFileManifestContents", "Failed to read file (" + inputfile.string() + ")"); + return false; + } - // read data from file - try - { - filein.read((char *)&vchData[0], dataSize); - } - catch (std::exception &e) - { - _log(logattribute::ERR, "ScraperSendFileManifestContents", "Failed to read file (" + inputfile.string() + ")"); - return false; - } + filein.fclose(); - filein.fclose(); + CScraperManifest::dentry ProjectEntry; - CScraperManifest::dentry ProjectEntry; + ProjectEntry.project = entry.second.project; + std::string sProject = entry.second.project + "-"; - ProjectEntry.project = entry.second.project; - std::string sProject = entry.second.project + "-"; + std::string sinputfile = inputfile.string(); + std::string suffix = ".csv.gz"; - std::string sinputfile = inputfile.string(); - std::string suffix = ".csv.gz"; + // Remove project- + sinputfile.erase(sinputfile.find(sProject), sProject.length()); + // Remove suffix. What is left is the ETag. + ProjectEntry.ETag = sinputfile.erase(sinputfile.find(suffix), suffix.length()); - // Remove project- - sinputfile.erase(sinputfile.find(sProject), sProject.length()); - // Remove suffix. What is left is the ETag. - ProjectEntry.ETag = sinputfile.erase(sinputfile.find(suffix), suffix.length()); + ProjectEntry.LastModified = entry.second.timestamp; - ProjectEntry.LastModified = entry.second.timestamp; + // For now each object will only have one part. + ProjectEntry.part1 = iPartNum; + ProjectEntry.partc = 0; + ProjectEntry.GridcoinTeamID = -1; //Not used anymore - // For now each object will only have one part. - ProjectEntry.part1 = iPartNum; - ProjectEntry.partc = 0; - ProjectEntry.GridcoinTeamID = -1; //Not used anymore + ProjectEntry.current = entry.second.current; - ProjectEntry.current = entry.second.current; + ProjectEntry.last = 1; - ProjectEntry.last = 1; + manifest->projects.push_back(ProjectEntry); - manifest->projects.push_back(ProjectEntry); + CDataStream part(vchData, SER_NETWORK, 1); - CDataStream part(vchData, SER_NETWORK, 1); + manifest->addPartData(std::move(part)); - manifest->addPartData(std::move(part)); + iPartNum++; + } - iPartNum++; + _log(logattribute::INFO, "ENDLOCK2", "cs_mapParts, cs_manifest"); } // "Sign" and "send". @@ -4261,6 +4463,135 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS return bAddManifestSuccessful; } +ConvergedManifest::ConvergedManifest() +{ + nContentHash = {}; + ConsensusBlock = {}; + timestamp = 0; + bByParts = false; + + CScraperConvergedManifest_ptr = nullptr; + + ConvergedManifestPartPtrsMap = {}; + + mIncludedScraperManifests = {}; + + nUnderlyingManifestContentHash = {}; + + vIncludedScrapers = {}; + vExcludedScrapers = {}; + vScrapersNotPublishing = {}; + + mIncludedScrapersbyProject = {}; + mIncludedProjectsbyScraper = {}; + + mScraperConvergenceCountbyProject = {}; + + vExcludedProjects = {}; +} + +ConvergedManifest::ConvergedManifest(CScraperManifest_shared_ptr& in) +{ + // Make Clang happy. + LOCK(in->cs_manifest); + + ConsensusBlock = in->ConsensusBlock; + timestamp = GetAdjustedTime(); + bByParts = false; + + CScraperConvergedManifest_ptr = in; + + PopulateConvergedManifestPartPtrsMap(); + + ComputeConvergedContentHash(); + + nUnderlyingManifestContentHash = in->nContentHash; +} + +bool ConvergedManifest::operator()(const CScraperManifest_shared_ptr& in) +{ + LOCK(in->cs_manifest); + + ConsensusBlock = in->ConsensusBlock; + timestamp = GetAdjustedTime(); + bByParts = false; + + CScraperConvergedManifest_ptr = in; + + bool bConvergedContentHashMatches = WITH_LOCK(CScraperConvergedManifest_ptr->cs_manifest, + return PopulateConvergedManifestPartPtrsMap()); + + ComputeConvergedContentHash(); + + nUnderlyingManifestContentHash = in->nContentHash; + + return bConvergedContentHashMatches; +} + +bool ConvergedManifest::PopulateConvergedManifestPartPtrsMap() EXCLUSIVE_LOCKS_REQUIRED(CScraperConvergedManifest_ptr->cs_manifest) +{ + if (CScraperConvergedManifest_ptr == nullptr) return false; + + int iPartNum = 0; + CDataStream ss(SER_NETWORK,1); + WriteCompactSize(ss, CScraperConvergedManifest_ptr->vParts.size()); + uint256 nContentHashCheck; + + for (const auto& iter : CScraperConvergedManifest_ptr->vParts) + { + std::string sProject; + + if (iPartNum == 0) + sProject = "BeaconList"; + else + sProject = CScraperConvergedManifest_ptr->projects[iPartNum-1].project; + + // Copy the pointer to the CPart into the map. This is ok, because the parts will be held + // until the CScraperManifest in this object is destroyed and all of the manifest refs to the part + // are gone. + ConvergedManifestPartPtrsMap.insert(std::make_pair(sProject, iter)); + + // Serialize the hash to doublecheck the content hash. + ss << iter->hash; + + iPartNum++; + } + + ss << CScraperConvergedManifest_ptr->ConsensusBlock; + + nContentHashCheck = Hash(ss.begin(), ss.end()); + + if (nContentHashCheck != CScraperConvergedManifest_ptr->nContentHash) + { + LogPrintf("ERROR: PopulateConvergedManifestPartPtrsMap(): Selected Manifest content hash check failed! " + "nContentHashCheck = %s and nContentHash = %s.", + nContentHashCheck.GetHex(), CScraperConvergedManifest_ptr->nContentHash.GetHex()); + return false; + } + + return true; +} + +void ConvergedManifest::ComputeConvergedContentHash() +{ + CDataStream ss(SER_NETWORK,1); + + for (const auto& iter : ConvergedManifestPartPtrsMap) + { + ss << iter.second->data; + } + + nContentHash = Hash(ss.begin(), ss.end()); +} + + + + + + + + + // ------------------------------------ This an out parameter. bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifest) @@ -4364,8 +4695,7 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes } } - // Retrieve the complete list of scrapers from the AppCache to determine scrapers not publishing at all. - AppCacheSection mScrapers = ReadCacheSection(Section::SCRAPER); + AppCacheSection mScrapers = GetScrapersCache(); for (const auto& iScraper : mScrapers) { @@ -4402,7 +4732,7 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes // Select agreed upon (converged) CScraper manifest based on converged hash. auto pair = CScraperManifest::mapManifest.find(convergence->second.second); - const CScraperManifest_shared_ptr& manifest = pair->second; + CScraperManifest_shared_ptr manifest = pair->second; // Fill out the ConvergedManifest structure. Note this assumes one-to-one part to project statistics BLOB. Needs to // be fixed for more than one part per BLOB. This is easy in this case, because it is all from/referring to one manifest. @@ -4490,10 +4820,14 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project StructConvergedManifest.CScraperConvergedManifest_ptr = std::shared_ptr(new CScraperManifest); + LOCK(StructConvergedManifest.CScraperConvergedManifest_ptr->cs_manifest); + _log(logattribute::INFO, "LOCK", "cs_manifest"); + // We are going to do this for each project in the whitelist. unsigned int iCountSuccessfulConvergedProjects = 0; unsigned int nScraperCount = mMapCSManifestsBinnedByScraper.size(); + _log(logattribute::INFO, __func__, "Number of projects in the whitelist = " + ToString(projectWhitelist.size())); _log(logattribute::INFO, "ScraperConstructConvergedManifestByProject", "Number of Scrapers with manifests = " + ToString(nScraperCount)); for (const auto& iWhitelistProject : projectWhitelist) @@ -4509,8 +4843,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project std::multimap>::iterator ProjectConvergence; { - LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); + LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); + _log(logattribute::INFO, "LOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts"); // For the selected project in the whitelist, walk each scraper. for (const auto& iter : mMapCSManifestsBinnedByScraper) @@ -4525,6 +4859,9 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project auto pair = CScraperManifest::mapManifest.find(nCSManifestHash); CScraperManifest_shared_ptr manifest = pair->second; + LOCK(manifest->cs_manifest); + _log(logattribute::INFO, "LOCK", "manifest->cs_manifest"); + // Find the part number in the manifest that corresponds to the whitelisted project. // Once we find a part that corresponds to the selected project in the given manifest, then break, // because there can only be one part in a manifest corresponding to a given project. @@ -4579,9 +4916,12 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project } } + + _log(logattribute::INFO, "ENDLOCK", "manifest->cs_manifest"); } } - _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); + + _log(logattribute::INFO, "ENDLOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts"); } // Walk the time map (backwards in time because the sort order is descending), and select the first @@ -4604,7 +4944,15 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project + " agreeing."); // Get the actual part ----------------- by object hash. - auto iPart = CSplitBlob::mapParts.find(std::get<0>(iter.second)); + + std::map::iterator iPart; + { + LOCK(CSplitBlob::cs_mapParts); + _log(logattribute::INFO, "LOCK", "CSplitBlob::cs_mapParts"); + + iPart = CSplitBlob::mapParts.find(std::get<0>(iter.second)); + _log(logattribute::INFO, "ENDLOCK", "CSplitBlob::cs_mapParts"); + } uint256 nContentHashCheck = Hash(iPart->second.data.begin(), iPart->second.data.end()); @@ -4648,9 +4996,16 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project } } // projectWhitelist for loop + _log(logattribute::INFO, __func__, "StructConvergedManifest.ConvergedManifestPartPtrsMap.size() = " + + ToString(StructConvergedManifest.ConvergedManifestPartPtrsMap.size())); + + auto convergence_by_project_ratio = [](){ LOCK(cs_ScraperGlobals); return CONVERGENCE_BY_PROJECT_RATIO; }; + // If we meet the rule of CONVERGENCE_BY_PROJECT_RATIO, then proceed to fill out the rest of the map. - if ((double)iCountSuccessfulConvergedProjects / (double)projectWhitelist.size() >= CONVERGENCE_BY_PROJECT_RATIO) + if ((double)iCountSuccessfulConvergedProjects / (double)projectWhitelist.size() >= convergence_by_project_ratio()) { + AppCacheSection mScrapers = GetScrapersCache(); + // Fill out the rest of the ConvergedManifest structure. Note this assumes one-to-one part to project statistics BLOB. Needs to // be fixed for more than one part per BLOB. This is easy in this case, because it is all from/referring to one manifest. @@ -4659,20 +5014,23 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project // could have used a different BeaconList (subject to the consensus ladder). It makes sense to use the "newest" one that is associated // with a manifest that has the newest part associated with a successful part (project) level convergence. - LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); + LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); + _log(logattribute::INFO, "LOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts"); // Select manifest based on provided hash. auto pair = CScraperManifest::mapManifest.find(nManifestHashForConvergedBeaconList); CScraperManifest_shared_ptr manifest = pair->second; + LOCK(manifest->cs_manifest); + _log(logattribute::INFO, "LOCK", "manifest->cs_manifest"); + // Bail if BeaconList is not found or empty. if (pair == CScraperManifest::mapManifest.end() || manifest->vParts[0]->data.size() == 0) { - _log(logattribute::WARNING, "ScraperConstructConvergedManifestByProject", "BeaconList was not found in the converged manifests from the scrapers. \n" - "Falling back to attempt convergence by project."); + _log(logattribute::WARNING, "ScraperConstructConvergedManifestByProject", "BeaconList was not found in the converged manifests from the scrapers."); bConvergenceSuccessful = false; + _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); } else { @@ -4699,20 +5057,20 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project StructConvergedManifest.ConvergedManifestPartPtrsMap.insert(std::make_pair("VerifiedBeacons", manifest->vParts[nPart])); } + _log(logattribute::INFO, __func__, "After BeaconList and VerifiedBeacons insert StructConvergedManifest.ConvergedManifestPartPtrsMap.size() = " + + ToString(StructConvergedManifest.ConvergedManifestPartPtrsMap.size())); + StructConvergedManifest.ConsensusBlock = nConvergedConsensusBlock; // At this point all of the projects that meet convergence rules, along with the // BeaconList and the VerfiedBeacons are now in the ConvergedManifestPartPtrsMap. // We also need to populate them into the underlying CScraperConvergedManifest, because // that manifest will hold the references to the part pointers to ensure they don't disappear - // until the converged manifest is removed from the global cache. + // until the converged manifest is removed from the global cache. Note that the projects vector + // of project dentrys is not filled out, because it is actually not used in this context. // The BeaconList is element 0, do that first. - { - LOCK(CSplitBlob::cs_mapParts); - _log(logattribute::INFO, "LOCK", "CSplitBlob::cs_mapParts"); - auto iter = StructConvergedManifest.ConvergedManifestPartPtrsMap.find("BeaconList"); StructConvergedManifest.CScraperConvergedManifest_ptr->addPart(iter->second->hash); @@ -4734,8 +5092,6 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project StructConvergedManifest.CScraperConvergedManifest_ptr->addPart(iter->second->hash); } } - - _log(logattribute::INFO, "ENDLOCK", "CSplitBlob::cs_mapParts"); } StructConvergedManifest.ComputeConvergedContentHash(); @@ -4790,9 +5146,6 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project } } - // Retrieve the complete list of scrapers from the AppCache to determine scrapers not publishing at all. - AppCacheSection mScrapers = ReadCacheSection(Section::SCRAPER); - for (const auto& iScraper : mScrapers) { // Only include scrapers enabled in protocol. @@ -4809,7 +5162,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project } } - _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); + _log(logattribute::INFO, "ENDLOCK", "manifest->cs_manifest"); + _log(logattribute::INFO, "ENDLOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts"); } } @@ -4822,11 +5176,12 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project } return bConvergenceSuccessful; + + _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); } -// A lock should be taken on CScraperManifest::cs_Manifest before calling this function. -mmCSManifestsBinnedByScraper BinCScraperManifestsByScraper() +mmCSManifestsBinnedByScraper BinCScraperManifestsByScraper() EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { mmCSManifestsBinnedByScraper mMapCSManifestsBinnedByScraper; @@ -4835,13 +5190,22 @@ mmCSManifestsBinnedByScraper BinCScraperManifestsByScraper() { CScraperManifest_shared_ptr manifest = iter->second; - // Do not consider manifests that do not have all of their parts. - if (!manifest->isComplete()) continue; + std::string sManifestName; + int64_t nTime = 0; + uint256 nHash; + uint256 nContentHash; + { + LOCK(manifest->cs_manifest); + _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); - std::string sManifestName = manifest->sCManifestName; - int64_t nTime = manifest->nTime; - uint256 nHash = *manifest->phash; - uint256 nContentHash = manifest->nContentHash; + // Do not consider manifests that do not have all of their parts. + if (!manifest->isComplete()) continue; + + sManifestName = manifest->sCManifestName; + nTime = manifest->nTime; + nHash = *manifest->phash; + nContentHash = manifest->nContentHash; + } mCSManifest mManifestInner; @@ -4872,8 +5236,8 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() _log(logattribute::INFO, "ScraperDeleteCScraperManifests", "Deleting old CScraperManifests."); - LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); + auto scraper_cmanifest_retain_noncurrent = []() { LOCK(cs_ScraperGlobals); return SCRAPER_CMANIFEST_RETAIN_NONCURRENT; }; + auto scraper_cmanifest_retention_time = []() { LOCK(cs_ScraperGlobals); return SCRAPER_CMANIFEST_RETENTION_TIME; }; // First check for unauthorized manifests just in case a scraper has been deauthorized. // This is only done if in sync. @@ -4883,12 +5247,15 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() if (nDeleted) _log(logattribute::WARNING, "ScraperDeleteCScraperManifests", "Deleted " + ToString(nDeleted) + " unauthorized manifests."); } + LOCK(CScraperManifest::cs_mapManifest); + _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); + // Bin by scraper and order by manifest time within scraper bin. mmCSManifestsBinnedByScraper mMapCSManifestsBinnedByScraper = BinCScraperManifestsByScraper(); _log(logattribute::INFO, "ScraperDeleteCScraperManifests", "mMapCSManifestsBinnedByScraper size = " + ToString(mMapCSManifestsBinnedByScraper.size())); - if (!SCRAPER_CMANIFEST_RETAIN_NONCURRENT) + if (!scraper_cmanifest_retain_noncurrent()) { // For each scraper, delete every manifest EXCEPT the latest. for (auto iter = mMapCSManifestsBinnedByScraper.begin(); iter != mMapCSManifestsBinnedByScraper.end(); ++iter) @@ -4917,7 +5284,9 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() { CScraperManifest_shared_ptr manifest = iter->second; - if (GetAdjustedTime() - manifest->nTime > SCRAPER_CMANIFEST_RETENTION_TIME) + LOCK(manifest->cs_manifest); + + if (GetAdjustedTime() - manifest->nTime > scraper_cmanifest_retention_time()) { _log(logattribute::INFO, "ScraperDeleteCScraperManifests", "Deleting old CScraperManifest with hash " + iter->first.GetHex()); // Delete from CScraperManifest map @@ -5129,8 +5498,8 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac { Superblock empty_superblock; - // NOTE - OutOfSyncByAge calls PreviousBlockAge(), which takes a lock on cs_main. This is likely a deadlock culprit if called from here - // and the scraper or subscriber loop nearly simultaneously. So we use an atomic flag updated by the scraper or subscriber loop. + auto scraper_sleep = []() { LOCK(cs_ScraperGlobals); return nScraperSleep; }; + // If not in sync then immediately bail with an empty superblock. if (OutOfSyncByAge()) return empty_superblock; @@ -5143,8 +5512,11 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac _log(logattribute::INFO, "LOCK", "cs_ConvergedScraperStatsCache"); // If the cache is less than nScraperSleep in minutes old OR not dirty... - if (GetAdjustedTime() - ConvergedScraperStatsCache.nTime < (nScraperSleep / 1000) || ConvergedScraperStatsCache.bClean) + if (GetAdjustedTime() - ConvergedScraperStatsCache.nTime < (scraper_sleep() / 1000) || ConvergedScraperStatsCache.bClean) + { bConvergenceUpdateNeeded = false; + _log(logattribute::INFO, __func__, "Cached convergence is fresh, convergence update not needed."); + } // End LOCK(cs_ConvergedScraperStatsCache) _log(logattribute::INFO, "ENDLOCK", "cs_ConvergedScraperStatsCache"); @@ -5187,18 +5559,26 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac // which is not really necessary. { LOCK(cs_ConvergedScraperStatsCache); - _log(logattribute::INFO, "LOCK", "cs_ConvergedScraperStatsCache"); ConvergedScraperStatsCache.AddConvergenceToPastConvergencesMap(); Superblock superblock_Prev = ConvergedScraperStatsCache.NewFormatSuperblock; ConvergedScraperStatsCache.mScraperConvergedStats = mScraperConvergedStats; + ConvergedScraperStatsCache.nTime = GetAdjustedTime(); + ConvergedScraperStatsCache.Convergence = StructConvergedManifest; superblock = Superblock::FromConvergence(ConvergedScraperStatsCache); + if (!superblock.WellFormed()) + { + _log(logattribute::WARNING, __func__, "Superblock is not well formed. m_version = " + + ToString(superblock.m_version) + ", m_cpids.size() = " + ToString(superblock.m_cpids.size()) + + ", m_projects.size() = " + ToString(superblock.m_projects.size())); + } + ConvergedScraperStatsCache.NewFormatSuperblock = superblock; // Mark the cache clean, because it was just updated. @@ -5436,12 +5816,14 @@ UniValue convergencereport(const UniValue& params, bool fHelp) "Display local node report of scraper convergence.\n" ); + auto scraper_sleep = []() { LOCK(cs_ScraperGlobals); return nScraperSleep; }; + // See if converged stats/contract update needed... bool bConvergenceUpdateNeeded = true; { LOCK(cs_ConvergedScraperStatsCache); - if (GetAdjustedTime() - ConvergedScraperStatsCache.nTime < (nScraperSleep / 1000) || ConvergedScraperStatsCache.bClean) + if (GetAdjustedTime() - ConvergedScraperStatsCache.nTime < (scraper_sleep() / 1000) || ConvergedScraperStatsCache.bClean) { bConvergenceUpdateNeeded = false; } @@ -5750,7 +6132,7 @@ UniValue scraperreport(const UniValue& params, bool fHelp) global_scraper_net.pushKV("parts_map_size", parts_map_size); { - LOCK(cs_ConvergedScraperStatsCache); + LOCK2(CSplitBlob::cs_mapParts, cs_ConvergedScraperStatsCache); if (ConvergedScraperStatsCache.NewFormatSuperblock.WellFormed()) { @@ -5802,45 +6184,41 @@ UniValue scraperreport(const UniValue& params, bool fHelp) // new maps are all pointers to the already existing parts in the global map. part_objects_reduced = total_convergences_part_pointer_maps_size; + for (const auto& iter : CSplitBlob::mapParts) { - LOCK(CSplitBlob::cs_mapParts); + UniValue part_reference(UniValue::VOBJ); - for (const auto& iter : CSplitBlob::mapParts) - { - UniValue part_reference(UniValue::VOBJ); + const CSplitBlob::CPart& part = iter.second; - const CSplitBlob::CPart& part = iter.second; + uint64_t part_data_size = part.data.size(); - uint64_t part_data_size = part.data.size(); + total_part_data_size += part_data_size; - total_part_data_size += part_data_size; + uint64_t part_ref_count = part.refs.size(); - uint64_t part_ref_count = part.refs.size(); - - for (const auto&iter2 : part.refs) - { - global_cache_unique_part_references.insert(iter2.first); - } + for (const auto&iter2 : part.refs) + { + global_cache_unique_part_references.insert(iter2.first); + } - total_part_references += part_ref_count; + total_part_references += part_ref_count; - part_reference.pushKV("part_hash", part.hash.ToString()); - part_reference.pushKV("part_data_size", part_data_size); - part_reference.pushKV("ref_count", part_ref_count); + part_reference.pushKV("part_hash", part.hash.ToString()); + part_reference.pushKV("part_data_size", part_data_size); + part_reference.pushKV("ref_count", part_ref_count); - part_references_array.push_back(part_reference); - } + part_references_array.push_back(part_reference); + } - total_unique_part_references_to_manifests = global_cache_unique_part_references.size(); + total_unique_part_references_to_manifests = global_cache_unique_part_references.size(); - part_references.pushKV("part_references", part_references_array); - part_references.pushKV("total_part_references", total_part_references); - part_references.pushKV("total_unique_part_references_to_manifests", - total_unique_part_references_to_manifests); - part_references.pushKV("total_part_data_size", total_part_data_size); + part_references.pushKV("part_references", part_references_array); + part_references.pushKV("total_part_references", total_part_references); + part_references.pushKV("total_unique_part_references_to_manifests", + total_unique_part_references_to_manifests); + part_references.pushKV("total_part_data_size", total_part_data_size); - global_scraper_net.pushKV("global_parts_map_references", part_references); - } + global_scraper_net.pushKV("global_parts_map_references", part_references); converged_scraper_stats_cache.pushKV("current_convergence_publishing_scrapers", current_convergence_publishing_scrapers); diff --git a/src/gridcoin/scraper/scraper.h b/src/gridcoin/scraper/scraper.h index 51b595bdbe..b958d33366 100644 --- a/src/gridcoin/scraper/scraper.h +++ b/src/gridcoin/scraper/scraper.h @@ -24,71 +24,45 @@ #include "gridcoin/scraper/fwd.h" #include "gridcoin/superblock.h" -/********************* -* Global Defaults * -*********************/ - -// These can get overridden by the GetArgs in init.cpp or ScraperApplyAppCacheEntries. -// The appcache entries will take precedence. - -// The amount of time to wait between scraper loop runs. This is in -// milliseconds. -unsigned int nScraperSleep = 300000; -// The amount of time before SB is due to start scraping. This is in -// seconds. -unsigned int nActiveBeforeSB = 14400; - -// Explorer mode flag. Only effective if scraper is active. -bool fExplorer = false; - -// These can be overridden by ScraperApplyAppCacheEntries(). - -// The flag to control whether non-current statistics files are retained. -bool SCRAPER_RETAIN_NONCURRENT_FILES = true; -// Define 48 hour retention time for stats files, current or not. -int64_t SCRAPER_FILE_RETENTION_TIME = 48 * 3600; -// Define extended file retention time for explorer mode. -int64_t EXPLORER_EXTENDED_FILE_RETENTION_TIME = 168 * 3600; -// Define whether prior CScraperManifests are kept. -bool SCRAPER_CMANIFEST_RETAIN_NONCURRENT = true; -// Define CManifest scraper object retention time. -int64_t SCRAPER_CMANIFEST_RETENTION_TIME = 48 * 3600; -bool SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES = false; -double MAG_ROUND = 0.01; -double NETWORK_MAGNITUDE = 115000; -double CPID_MAG_LIMIT = GRC::Magnitude::MAX; -// This settings below are important. This sets the minimum number of scrapers -// that must be available to form a convergence. Above this minimum, the ratio -// is followed. For example, if there are 4 scrapers, a ratio of 0.6 would require -// CEILING(0.6 * 4) = 3. See NumScrapersForSupermajority below. -// If there is only 1 scraper available, and the minimum is 2, then a convergence -// will not happen. Setting this below 2 will allow convergence to happen without -// cross checking, and is undesirable, because the scrapers are not supposed to be -// trusted entities. -unsigned int SCRAPER_CONVERGENCE_MINIMUM = 2; -// 0.6 seems like a reasonable standard for agreement. It will require... -// 2 out of 3, 3 out of 4, 3 out of 5, 4 out of 6, 5 out of 7, 5 out of 8, etc. -double SCRAPER_CONVERGENCE_RATIO = 0.6; -// By Project Fallback convergence rule as a ratio of projects converged vs whitelist. -// For 20 whitelisted projects this means up to five can be excluded and a contract formed. -double CONVERGENCE_BY_PROJECT_RATIO = 0.75; -// Allow non-scraper nodes to download stats? -bool ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD = false; -// Misbehaving scraper node banscore -unsigned int SCRAPER_MISBEHAVING_NODE_BANSCORE = 0; -// Require team membership in team whitelist. -bool REQUIRE_TEAM_WHITELIST_MEMBERSHIP = false; -// Default team whitelist -std::string TEAM_WHITELIST = "Gridcoin"; -// This is the period after the deauthorizing of a scraper before the nodes will start -// to assign banscore to nodes sending unauthorized manifests. -int64_t SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD = 300; - - -AppCacheSectionExt mScrapersExt = {}; - -CCriticalSection cs_mScrapersExt; - +// Thread safety +extern CCriticalSection cs_Scraper; +extern CCriticalSection cs_ScraperGlobals; +extern CCriticalSection cs_mScrapersExt; +extern CCriticalSection cs_StructScraperFileManifest; +extern CCriticalSection cs_ConvergedScraperStatsCache; +extern CCriticalSection cs_TeamIDMap; +extern CCriticalSection cs_VerifiedBeacons; + +/******************************************** +* Global Defaults (externs for header file) * +*********************************************/ + +extern unsigned int nScraperSleep; +extern unsigned int nActiveBeforeSB; + +extern bool fExplorer; + +extern bool SCRAPER_RETAIN_NONCURRENT_FILES; +extern int64_t SCRAPER_FILE_RETENTION_TIME; +extern int64_t EXPLORER_EXTENDED_FILE_RETENTION_TIME; +extern bool SCRAPER_CMANIFEST_RETAIN_NONCURRENT; +extern int64_t SCRAPER_CMANIFEST_RETENTION_TIME; +extern bool SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES; +extern std::atomic MAG_ROUND; +extern std::atomic NETWORK_MAGNITUDE; +extern std::atomic CPID_MAG_LIMIT; +extern unsigned int SCRAPER_CONVERGENCE_MINIMUM; +extern double SCRAPER_CONVERGENCE_RATIO; +extern double CONVERGENCE_BY_PROJECT_RATIO; +extern bool ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD; +extern unsigned int SCRAPER_MISBEHAVING_NODE_BANSCORE; +extern bool REQUIRE_TEAM_WHITELIST_MEMBERSHIP; +extern std::string TEAM_WHITELIST; +extern int64_t SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD; + +extern CCriticalSection cs_mScrapersExt; + +extern AppCacheSectionExt mScrapersExt; /********************* * Functions * @@ -96,6 +70,7 @@ CCriticalSection cs_mScrapersExt; uint256 GetFileHash(const fs::path& inputfile); ScraperStatsAndVerifiedBeacons GetScraperStatsByConvergedManifest(const ConvergedManifest& StructConvergedManifest); +AppCacheSectionExt GetExtendedScrapersCache(); bool IsScraperAuthorized(); bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& KeyOut); bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& PubKey); @@ -137,6 +112,8 @@ double MagRound(double dMag) unsigned int NumScrapersForSupermajority(unsigned int nScraperCount) { + LOCK(cs_ScraperGlobals); + unsigned int nRequired = std::max(SCRAPER_CONVERGENCE_MINIMUM, (unsigned int)std::ceil(SCRAPER_CONVERGENCE_RATIO * nScraperCount)); return nRequired; diff --git a/src/gridcoin/scraper/scraper_net.cpp b/src/gridcoin/scraper/scraper_net.cpp index 2ba8d7fb58..943be5eb4c 100644 --- a/src/gridcoin/scraper/scraper_net.cpp +++ b/src/gridcoin/scraper/scraper_net.cpp @@ -26,15 +26,15 @@ extern unsigned int SCRAPER_MISBEHAVING_NODE_BANSCORE; extern int64_t SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD; extern int64_t SCRAPER_CMANIFEST_RETENTION_TIME; extern double CONVERGENCE_BY_PROJECT_RATIO; +extern CCriticalSection cs_ScraperGlobals; extern unsigned int nScraperSleep; -extern AppCacheSectionExt mScrapersExt; extern std::atomic g_nTimeBestReceived; extern ConvergedScraperStats ConvergedScraperStatsCache; extern CCriticalSection cs_mScrapersExt; extern CCriticalSection cs_ConvergedScraperStatsCache; +extern AppCacheSectionExt GetExtendedScrapersCache(); extern bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& PubKey); -// A lock needs to be taken on cs_mapParts before calling this function. bool CSplitBlob::RecvPart(CNode* pfrom, CDataStream& vRecv) { /* Part of larger hashed blob. Currently only used for scraper data sharing. @@ -46,6 +46,8 @@ bool CSplitBlob::RecvPart(CNode* pfrom, CDataStream& vRecv) uint256 hash(Hash(ss.begin(), ss.end())); mapAlreadyAskedFor.erase(CInv(MSG_PART, hash)); + LOCK(cs_mapParts); + auto ipart = mapParts.find(hash); if (ipart != mapParts.end()) @@ -57,10 +59,13 @@ bool CSplitBlob::RecvPart(CNode* pfrom, CDataStream& vRecv) { LogPrint(BCLog::LogFlags::MANIFEST, "received part %s %u refs", hash.GetHex(), (unsigned) part.refs.size()); - part.data = CSerializeData(vRecv.begin(),vRecv.end()); //TODO: replace with move constructor + part.data = CSerializeData(vRecv.begin(), vRecv.end()); //TODO: replace with move constructor for (const auto& ref : part.refs) { CSplitBlob& split = *ref.first; + + LOCK(split.cs_manifest); + ++split.cntPartsRcvd; assert(split.cntPartsRcvd <= split.vParts.size()); if (split.isComplete()) @@ -80,6 +85,8 @@ bool CSplitBlob::RecvPart(CNode* pfrom, CDataStream& vRecv) { if (pfrom) { + LOCK(cs_ScraperGlobals); + pfrom->Misbehaving(SCRAPER_MISBEHAVING_NODE_BANSCORE / 5); LogPrintf("WARNING: CSplitBlob::RecvPart: Spurious part received from %s. Adding %u banscore.", pfrom->addr.ToString(), SCRAPER_MISBEHAVING_NODE_BANSCORE / 5); @@ -88,9 +95,15 @@ bool CSplitBlob::RecvPart(CNode* pfrom, CDataStream& vRecv) } } -// A lock needs to be taken on cs_mapParts before calling this function. +bool CSplitBlob::isComplete() const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest) +{ + return cntPartsRcvd == vParts.size(); +} + void CSplitBlob::addPart(const uint256& ihash) { + LOCK2(cs_mapParts, cs_manifest); + assert(ihash != Hash(vParts.end(),vParts.end())); unsigned n = vParts.size(); @@ -106,9 +119,10 @@ void CSplitBlob::addPart(const uint256& ihash) part.refs.emplace(this, n); } -// A lock needs to be taken on cs_mapParts before calling this function. int CSplitBlob::addPartData(CDataStream&& vData) { + LOCK2(cs_mapParts, cs_manifest); + uint256 hash(Hash(vData.begin(), vData.end())); auto it = mapParts.emplace(hash, CPart(hash)); @@ -147,8 +161,7 @@ CSplitBlob::~CSplitBlob() } } -// A lock needs to be taken on cs_mapParts before calling this function. -void CSplitBlob::UseAsSource(CNode* pfrom) +void CSplitBlob::UseAsSource(CNode* pfrom) EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { if (pfrom) { @@ -163,8 +176,7 @@ void CSplitBlob::UseAsSource(CNode* pfrom) } } -// A lock needs to be taken on cs_mapParts before calling this function. -bool CSplitBlob::SendPartTo(CNode* pto, const uint256& hash) +bool CSplitBlob::SendPartTo(CNode* pto, const uint256& hash) EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_mapParts) { auto ipart = mapParts.find(hash); @@ -179,8 +191,36 @@ bool CSplitBlob::SendPartTo(CNode* pto, const uint256& hash) return false; } -// A lock needs to be taken on cs_mapManifest before calling this function. -bool CScraperManifest::AlreadyHave(CNode* pfrom, const CInv& inv) +CScraperManifest::CScraperManifest() {} + +CScraperManifest::CScraperManifest(CScraperManifest& manifest) +{ + // The custom copy constructor here is to copy everything except the mutex cs_manifest, which is actually taken during + // the copy. + LOCK(manifest.cs_manifest); + + phash = manifest.phash; + sCManifestName = manifest.sCManifestName; + + pubkey = manifest.pubkey; + signature = manifest.signature; + + projects = manifest.projects; + + BeaconList = manifest.BeaconList; + BeaconList_c = manifest.BeaconList_c; + ConsensusBlock = manifest.ConsensusBlock; + nTime = manifest.nTime; + + nContentHash = manifest.nContentHash; + + bCheckedAuthorized = manifest.bCheckedAuthorized; + + vParts = manifest.vParts; + cntPartsRcvd = manifest.cntPartsRcvd; +} + +bool CScraperManifest::AlreadyHave(CNode* pfrom, const CInv& inv) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { if (MSG_PART == inv.type) { @@ -202,7 +242,8 @@ bool CScraperManifest::AlreadyHave(CNode* pfrom, const CInv& inv) { // Only record UseAsSource if manifest is current to avoid spurious parts. { - LOCK(cs_mapParts); + // The lock order is important here + LOCK2(cs_mapParts, found->second->cs_manifest); if (found->second->IsManifestCurrent()) found->second->UseAsSource(pfrom); } @@ -216,8 +257,7 @@ bool CScraperManifest::AlreadyHave(CNode* pfrom, const CInv& inv) } } -// A lock needs to be taken on cs_mapManifest before calling this. -void CScraperManifest::PushInvTo(CNode* pto) +void CScraperManifest::PushInvTo(CNode* pto) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts) { /* send all keys from the index map as inventory */ /* FIXME: advertise only completed manifests */ @@ -227,13 +267,17 @@ void CScraperManifest::PushInvTo(CNode* pto) } } -// A lock needs to be taken on cs_mapManifest before calling this. -bool CScraperManifest::SendManifestTo(CNode* pto, const uint256& hash) +// Clang thread static safety analysis is showing a false positive where it claims cs_mapManifest is not held when +// SendManifestTo is called in ProcessMessage in main. The complaint is on the template specialization of the serialization +// of CScraperManifest in PushMessage. Manual inspection of the code shows the lock is held. +bool CScraperManifest::SendManifestTo(CNode* pto, const uint256& hash) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts) { auto it = mapManifest.find(hash); if (it == mapManifest.end()) return false; + LOCK(it->second->cs_manifest); + pto->PushMessage("scraperindex", *it->second); return true; @@ -261,8 +305,7 @@ void CScraperManifest::dentry::Unserialize(CDataStream& ss) ss >> last; } -// A lock needs to be taken on cs_mapManifest and cs_mapParts before calling this. -void CScraperManifest::SerializeWithoutSignature(CDataStream& ss) const +void CScraperManifest::SerializeWithoutSignature(CDataStream& ss) const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { WriteCompactSize(ss, vParts.size()); for (const CPart* part : vParts) @@ -280,8 +323,7 @@ void CScraperManifest::SerializeWithoutSignature(CDataStream& ss) const } // This is to compare manifest content quickly. We just need the parts and the consensus block. -// A lock needs to be taken on cs_mapManifest and cs_mapParts before calling this. -void CScraperManifest::SerializeForManifestCompare(CDataStream& ss) const +void CScraperManifest::SerializeForManifestCompare(CDataStream& ss) const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { WriteCompactSize(ss, vParts.size()); for (const CPart* part : vParts) @@ -292,9 +334,10 @@ void CScraperManifest::SerializeForManifestCompare(CDataStream& ss) const ss << ConsensusBlock; } -// A lock needs to be taken on cs_mapManifest and cs_mapParts before calling this. -void CScraperManifest::Serialize(CDataStream& ss) const +void CScraperManifest::Serialize(CDataStream& ss) const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_mapParts) { + LOCK(cs_manifest); + SerializeWithoutSignature(ss); ss << signature; } @@ -302,7 +345,7 @@ void CScraperManifest::Serialize(CDataStream& ss) const // This is the complement to IsScraperAuthorizedToBroadcastManifests in the scraper. // It is used to determine whether received manifests are authorized. -bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, unsigned int& banscore_out) +bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, unsigned int& banscore_out) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { bool bIsValid = PubKey.IsValid(); @@ -316,46 +359,7 @@ bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, uns // This is the address corresponding to the manifest public key. std::string sManifestAddress = ManifestAddress.ToString(); - // Now check and see if that address is in the authorized scraper list. - AppCacheSection mScrapers = ReadCacheSection(Section::SCRAPER); - - /* We cannot use the AppCacheSection mScrapers in the raw, because there are two ways to deauthorize scrapers. - * The first way is to change the value of an existing entry to false. This works fine with mScrapers. The second way is to - * issue an addkey delete key. This will remove the key entirely, therefore deauthorizing the scraper. We need to preserve - * the key entry of the deleted record and when it was deleted to calculate a grace period. Why? To ensure that - * we do not generate islanding in the network in the case of a scraper deauthorization, we must apply a grace period - * after the timestamp of the marking of false/deletion, or from the time when the wallet came in sync, whichever is greater, before - * we start assigning a banscore to nodes that send/forward unauthorized manifests. This is because not all nodes - * may receive and accept the block that contains the transaction that modifies or deletes the scraper appcache entry - * at the same time, so there is a chance a node could send/forward an unauthorized manifest between when the scraper - * is deauthorized and the block containing that deauthorization is received by the sending node. - */ - - // So we are going to make use of AppCacheEntryExt and mScrapersExt, which are just like the normal AppCache structure, except they - // have an explicit deleted boolean. - - // First, walk the mScrapersExt map and see if it contains an entry that does not exist in mScrapers. If so, - // update the entry's value and timestamp and mark deleted. - LOCK(cs_mScrapersExt); - - for (auto const& entry : mScrapersExt) - { - const auto& iter = mScrapers.find(entry.first); - - if (iter == mScrapers.end()) - { - // Mark entry in mScrapersExt as deleted at the current adjusted time. The value is changed - // to false, because if it is deleted, it is also not authorized. - mScrapersExt[entry.first] = AppCacheEntryExt {"false", GetAdjustedTime(), true}; - } - - } - - // Now insert/update entries from mScrapers into mScrapersExt. - for (auto const& entry : mScrapers) - { - mScrapersExt[entry.first] = AppCacheEntryExt {entry.second.value, entry.second.timestamp, false}; - } + AppCacheSectionExt mScrapersExtended = GetExtendedScrapersCache(); // Now mScrapersExt is up to date. Walk and see if there is an entry with a value of true that matches // manifest address. If so the manifest is authorized. Note that no grace period has to be considered @@ -366,7 +370,7 @@ bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, uns int64_t nLastFalseEntryTime = 0; int64_t nGracePeriodEnd = 0; - for (auto const& entry : mScrapersExt) + for (auto const& entry : mScrapersExtended) { if (entry.second.value == "true" || entry.second.value == "1") { @@ -399,6 +403,8 @@ bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, uns } else { + LOCK(cs_ScraperGlobals); + nGracePeriodEnd = std::max(g_nTimeBestReceived, nLastFalseEntryTime) + SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD; // If the current time is past the grace period end then set SCRAPER_MISBEHAVING_NODE_BANSCORE, otherwise 0. @@ -417,8 +423,7 @@ bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, uns } } -// A lock must be taken on cs_mapManifest before calling this function. -void CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_out) +void CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_out) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manifest) { const auto pbegin = ss.begin(); @@ -496,8 +501,14 @@ void CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_ // is set to below 0.5, both to prevent a divide by zero exception, and also prevent unreasonably lose limits. So this // means the loosest limit that is allowed is essentially 2 * whitelist + 2. - unsigned int nMaxProjects = static_cast(std::ceil(static_cast(GRC::GetWhitelist().Snapshot().size()) / + unsigned int nMaxProjects = 0; + + { + LOCK(cs_ScraperGlobals); + + nMaxProjects = static_cast(std::ceil(static_cast(GRC::GetWhitelist().Snapshot().size()) / std::max(0.5, CONVERGENCE_BY_PROJECT_RATIO)) + 2); + } if (!OutOfSyncByAge() && projects.size() > nMaxProjects) { @@ -518,25 +529,22 @@ void CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_ if (!mkey.SetPubKey(pubkey)) throw error("CScraperManifest: Invalid manifest key"); if (!mkey.Verify(hash, signature)) throw error("CScraperManifest: Invalid manifest signature"); + for (const uint256& ph : vph) { - LOCK(cs_mapParts); - - for (const uint256& ph : vph) - { - addPart(ph); - } + addPart(ph); } } -bool CScraperManifest::IsManifestCurrent() const +bool CScraperManifest::IsManifestCurrent() const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest) { + LOCK(cs_ScraperGlobals); + // This checks to see if the manifest is current, i.e. not about to be deleted. return (nTime >= GetAdjustedTime() - SCRAPER_CMANIFEST_RETENTION_TIME + (int64_t) nScraperSleep / 1000); } -// A lock must be taken on cs_mapManifest before calling this function. -bool CScraperManifest::DeleteManifest(const uint256& nHash, const bool& fImmediate) +bool CScraperManifest::DeleteManifest(const uint256& nHash, const bool& fImmediate) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { bool fDeleted = false; @@ -562,9 +570,8 @@ bool CScraperManifest::DeleteManifest(const uint256& nHash, const bool& fImmedia return fDeleted; } -// A lock must be taken on cs_mapManifest before calling this function. std::map>::iterator CScraperManifest::DeleteManifest(std::map>::iterator& iter, - const bool& fImmediate) + const bool& fImmediate) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { if (!fImmediate) mapPendingDeletedManifest[iter->first] = std::make_pair(GetAdjustedTime(), std::move(iter->second)); @@ -581,11 +588,16 @@ std::map>::iterator CScraperManifest: return iter; } -// A lock must be taken on cs_mapManifest before calling this function. -unsigned int CScraperManifest::DeletePendingDeletedManifests() +unsigned int CScraperManifest::DeletePendingDeletedManifests() EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { unsigned int nDeleted = 0; - int64_t nDeleteThresholdTime = GetAdjustedTime() - nScraperSleep / 1000; + + int64_t nDeleteThresholdTime = 0; + { + LOCK(cs_ScraperGlobals); + + nDeleteThresholdTime = GetAdjustedTime() - nScraperSleep / 1000; + } std::map>>::iterator iter; for (iter = mapPendingDeletedManifest.begin(); iter != mapPendingDeletedManifest.end();) @@ -605,7 +617,6 @@ unsigned int CScraperManifest::DeletePendingDeletedManifests() return nDeleted; } -// A lock must be taken on cs_mapManifest before calling this function. bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) { /* Index object for scraper data. @@ -622,19 +633,26 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) uint256 hash(Hash(vRecv.begin(), vRecv.end())); /* see if we do not already have it */ - if (AlreadyHave(pfrom, CInv(MSG_SCRAPERINDEX, hash))) + if (WITH_LOCK(cs_mapManifest, return AlreadyHave(pfrom, CInv(MSG_SCRAPERINDEX, hash)))) { LogPrint(BCLog::LogFlags::SCRAPER, "INFO: ScraperManifest::RecvManifest: Already have CScraperManifest %s from node %s.", hash.GetHex(), pfrom->addrName); return false; } - const auto it = mapManifest.emplace(hash, std::shared_ptr(new CScraperManifest())); - CScraperManifest& manifest = *it.first->second; - manifest.phash = &it.first->first; + CScraperManifest_shared_ptr manifest = std::shared_ptr(new CScraperManifest()); + + LOCK(cs_mapManifest); + + const auto it = mapManifest.emplace(hash, manifest); + + LOCK2(cs_mapParts, manifest->cs_manifest); + + // The phash in the manifest points to the actual hash which is the index to the element in the map. + manifest->phash = &it.first->first; try { - manifest.UnserializeCheck(vRecv, banscore); + manifest->UnserializeCheck(vRecv, banscore); } catch (bool& e) { mapManifest.erase(hash); @@ -669,13 +687,11 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) ConvergedScraperStatsCache.bClean = false; } - LOCK(cs_mapParts); - - LogPrint(BCLog::LogFlags::MANIFEST, "received manifest %s with %u / %u parts", hash.GetHex(),(unsigned)manifest.cntPartsRcvd,(unsigned)manifest.vParts.size()); - if (manifest.isComplete()) + LogPrint(BCLog::LogFlags::MANIFEST, "received manifest %s with %u / %u parts", hash.GetHex(),(unsigned)manifest->cntPartsRcvd,(unsigned)manifest->vParts.size()); + if (manifest->isComplete()) { /* If we already got all the parts in memory, signal completion */ - manifest.Complete(); + manifest->Complete(); } else { @@ -683,36 +699,41 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) // Note: As an additional buffer to prevent spurious part receipts, if the manifest timestamp is within nScraperSleep of expiration (i.e. // about to go on the pending delete list, then do not request missing parts, as it is possible that the manifest will be deleted // by the housekeeping loop in between the receipt of the manifest, request for parts, and receipt of parts otherwise. - if (manifest.IsManifestCurrent()) manifest.UseAsSource(pfrom); + if (manifest->IsManifestCurrent()) manifest->UseAsSource(pfrom); } return true; } -// A lock needs to be taken on cs_mapManifest and cs_mapParts before calling this function. -bool CScraperManifest::addManifest(std::shared_ptr&& m, CKey& keySign) +bool CScraperManifest::addManifest(std::shared_ptr&& m, CKey& keySign) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, cs_mapParts) { - m->pubkey = keySign.GetPubKey(); + uint256 hash; - CDataStream sscomp(SER_NETWORK, 1); - CDataStream ss(SER_NETWORK, 1); + { + LOCK(m->cs_manifest); - // serialize the content for comparison purposes and put in manifest. - m->SerializeForManifestCompare(sscomp); - m->nContentHash = Hash(sscomp.begin(), sscomp.end()); + m->pubkey = keySign.GetPubKey(); - /* serialize and hash the object */ - m->SerializeWithoutSignature(ss); + CDataStream sscomp(SER_NETWORK, 1); + CDataStream ss(SER_NETWORK, 1); - /* sign the serialized manifest and append the signature */ - uint256 hash(Hash(ss.begin(), ss.end())); - keySign.Sign(hash, m->signature); + // serialize the content for comparison purposes and put in manifest. + m->SerializeForManifestCompare(sscomp); + m->nContentHash = Hash(sscomp.begin(), sscomp.end()); - LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of manifest contents = %s", m->nContentHash.ToString()); - LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of manifest = %s", hash.ToString()); - LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of signature = %s", Hash(m->signature.begin(), m->signature.end()).GetHex()); - LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: datetime = %s", DateTimeStrFormat("%x %H:%M:%S", m->nTime)); + /* serialize and hash the object */ + m->SerializeWithoutSignature(ss); - LogPrint(BCLog::LogFlags::MANIFEST, "adding new local manifest"); + /* sign the serialized manifest and append the signature */ + hash = Hash(ss.begin(), ss.end()); + keySign.Sign(hash, m->signature); + + LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of manifest contents = %s", m->nContentHash.ToString()); + LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of manifest = %s", hash.ToString()); + LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of signature = %s", Hash(m->signature.begin(), m->signature.end()).GetHex()); + LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: datetime = %s", DateTimeStrFormat("%x %H:%M:%S", m->nTime)); + + LogPrint(BCLog::LogFlags::MANIFEST, "adding new local manifest"); + } /* try inserting into map */ const auto it = mapManifest.emplace(hash, std::move(m)); @@ -721,8 +742,12 @@ bool CScraperManifest::addManifest(std::shared_ptr&& m, CKey& return false; CScraperManifest& manifest = *it.first->second; + + // Relock the manifest pointed to by the iterator. + LOCK(manifest.cs_manifest); + /* set the hash pointer inside */ - manifest.phash= &it.first->first; + manifest.phash = &it.first->first; // We do not need to do a deserialize check here, because the // manifest originates from THIS node, and the scraper's authorization @@ -745,11 +770,10 @@ bool CScraperManifest::addManifest(std::shared_ptr&& m, CKey& return true; } -// A lock needs to be taken on cs_mapManifest and cs_mapParts before calling this function. -void CScraperManifest::Complete() +void CScraperManifest::Complete() EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { /* Notify peers that we have a new manifest */ - LogPrint(BCLog::LogFlags::MANIFEST, "manifest %s complete with %u parts", phash->GetHex(),(unsigned)vParts.size()); + LogPrint(BCLog::LogFlags::MANIFEST, "manifest %s complete with %u parts", phash->GetHex(), (unsigned)vParts.size()); { LOCK(cs_vNodes); for (auto const& pnode : vNodes) @@ -773,8 +797,7 @@ void CScraperManifest::Complete() * getdata it. If it turns out useless, just ban the node. Then getdata the * parts from the node. */ - -UniValue CScraperManifest::ToJson() const +UniValue CScraperManifest::ToJson() const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { UniValue r(UniValue::VOBJ); @@ -810,7 +833,8 @@ UniValue CScraperManifest::ToJson() const return r; } -UniValue CScraperManifest::dentry::ToJson() const + +UniValue CScraperManifest::dentry::ToJson() const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest) { UniValue r(UniValue::VOBJ); @@ -847,7 +871,7 @@ UniValue listmanifests(const UniValue& params, bool fHelp) if (params.size() > 0) bShowDetails = params[0].get_bool(); - LOCK(CScraperManifest::cs_mapManifest); + LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); if (params.size() > 1) { @@ -863,6 +887,8 @@ UniValue listmanifests(const UniValue& params, bool fHelp) const uint256& hash = pair->first; const CScraperManifest& manifest = *pair->second; + LOCK(manifest.cs_manifest); + if (bShowDetails) obj.pushKV(hash.GetHex(), manifest.ToJson()); else @@ -884,6 +910,8 @@ UniValue listmanifests(const UniValue& params, bool fHelp) const uint256& hash = pair.first; const CScraperManifest& manifest = *pair.second; + LOCK(manifest.cs_manifest); + if (bShowDetails) obj.pushKV(hash.GetHex(), manifest.ToJson()); else diff --git a/src/gridcoin/scraper/scraper_net.h b/src/gridcoin/scraper/scraper_net.h index 0cf2d67b2b..630fc9c28f 100755 --- a/src/gridcoin/scraper/scraper_net.h +++ b/src/gridcoin/scraper/scraper_net.h @@ -14,6 +14,7 @@ #include "net.h" #include "streams.h" #include "sync.h" +#include "gridcoin/appcache.h" #include @@ -22,7 +23,6 @@ class CSplitBlob { public: - /** Parts of the Split object */ struct CPart { std::set> refs; @@ -35,13 +35,20 @@ class CSplitBlob bool present() const {return !this->data.empty();} }; + // static methods /** Process a message containing Part of Blob. - * @return whether the data was useful - */ + * @return whether the data was useful + */ static bool RecvPart(CNode* pfrom, CDataStream& vRecv); - bool isComplete() const - { return cntPartsRcvd == vParts.size(); } + /** Forward requested Part of Blob. + * @returns whether something was sent + */ + static bool SendPartTo(CNode* pto, const uint256& hash); + + // public methods + /** Boolean that returns whether all parts for the split object have been received. **/ + bool isComplete() const; /** Notification that this Split object is fully received. */ virtual void Complete() = 0; @@ -49,13 +56,6 @@ class CSplitBlob /** Use the node as download source for this Split object. */ void UseAsSource(CNode* pnode); - /** Forward requested Part of Blob. - * @returns whether something was sent - */ - static bool SendPartTo(CNode* pto, const uint256& hash); - - std::vector vParts; - /** Add a part reference to vParts. Creates a CPart if necessary. */ void addPart(const uint256& ihash); @@ -65,50 +65,65 @@ class CSplitBlob /** Unref all parts referenced by this. Removes parts with no references */ virtual ~CSplitBlob(); + // static variables + /** Mutex for mapParts **/ + static CCriticalSection cs_mapParts; + /* We could store the parts in mapRelay and have getdata service for free. */ /** map from part hash to scraper Index, so we can attach incoming Part in Index */ - static std::map mapParts; - size_t cntPartsRcvd =0; + static std::map mapParts GUARDED_BY(cs_mapParts); - static CCriticalSection cs_mapParts; // also protects vParts. + // member variables + /** Guards vParts and other manifest fields of the manifest (derived) class. + * Note that this needs to be mutable so that a lock can be taken internally on cs_manifest on an + * otherwise const qualified member function. + **/ + mutable CCriticalSection cs_manifest; + std::vector vParts GUARDED_BY(cs_manifest); + size_t cntPartsRcvd GUARDED_BY(cs_manifest) = 0; }; /** An objects holding info about the scraper data file we have or are downloading. */ class CScraperManifest : public CSplitBlob { +public: /* constructors */ + CScraperManifest(); + + CScraperManifest(CScraperManifest& manifest); + public: /* static methods */ + /** Mutex protects both mapManifest and MapPendingDeletedManifest **/ + static CCriticalSection cs_mapManifest; /** map from index hash to scraper Index, so we can process Inv messages */ - static std::map> mapManifest; + static std::map> mapManifest GUARDED_BY(cs_mapManifest); + /** map of manifests that are pending deletion */ // ------------ hash -------------- nTime ------- pointer to CScraperManifest - static std::map>> mapPendingDeletedManifest; - - // Protects both mapManifest and MapPendingDeletedManifest - static CCriticalSection cs_mapManifest; + static std::map>> mapPendingDeletedManifest GUARDED_BY(cs_mapManifest); /** Process a message containing Index of Scraper Data. - * @returns whether the data was useful and valid - */ + * @returns whether the data was useful and valid + */ static bool RecvManifest(CNode* pfrom, CDataStream& vRecv); /** Check if we already have this object. - * @returns false only if we need this object - * Additionally sender node is used as fetch source if needed - */ + * @returns false only if we need this object + * Additionally sender node is used as fetch source if needed + */ static bool AlreadyHave(CNode* pfrom, const CInv& inv); /** Send Inv to that node about data files we have. - * Called when a node connects. - */ + * Called when a node connects. + */ static void PushInvTo(CNode* pto); /** Send a manifest of requested hash to node (from mapManifest). - * @returns whether something was sent - */ - static bool SendManifestTo(CNode* pfrom, const uint256& hash); + * @returns whether something was sent + */ + static bool SendManifestTo(CNode* pfrom, const uint256& hash); /** Add new manifest object into list of known manifests */ static bool addManifest(std::shared_ptr&& m, CKey& keySign); @@ -128,41 +143,40 @@ class CScraperManifest public: /*==== fields ====*/ - - const uint256* phash; - std::string sCManifestName; - CPubKey pubkey; - std::vector signature; + const uint256* phash GUARDED_BY(cs_manifest); + std::string sCManifestName GUARDED_BY(cs_manifest); + CPubKey pubkey GUARDED_BY(cs_manifest); + std::vector signature GUARDED_BY(cs_manifest); struct dentry { std::string project; std::string ETag; - unsigned int LastModified =0; - int part1 =-1; - unsigned partc =0; - int GridcoinTeamID =-1; - bool current =0; - bool last =0; + unsigned int LastModified = 0; + int part1 = -1; + unsigned partc = 0; + int GridcoinTeamID = -1; + bool current = 0; + bool last = 0; void Serialize(CDataStream& s) const; void Unserialize(CDataStream& s); UniValue ToJson() const; }; - std::vector projects; + std::vector projects GUARDED_BY(cs_manifest); - int BeaconList =-1; - unsigned BeaconList_c =0; - uint256 ConsensusBlock; - int64_t nTime = 0; + int BeaconList GUARDED_BY(cs_manifest) = -1 ; + unsigned BeaconList_c GUARDED_BY(cs_manifest) = 0; + uint256 ConsensusBlock GUARDED_BY(cs_manifest); + int64_t nTime GUARDED_BY(cs_manifest) = 0; - uint256 nContentHash; + uint256 nContentHash GUARDED_BY(cs_manifest); // The bCheckedAuthorized flag is LOCAL only. It is not serialized/deserialized. This // is set during Unserializecheck to false if wallet not in sync, and true if in sync // and scraper ID matches authorized list (i.e. IsManifestAuthorized is true. // The node will walk the mapManifest from - bool bCheckedAuthorized; + bool bCheckedAuthorized GUARDED_BY(cs_manifest); public: /* public methods */ diff --git a/src/gridcoin/superblock.h b/src/gridcoin/superblock.h index cb1c5af7b2..a7cd1eef07 100644 --- a/src/gridcoin/superblock.h +++ b/src/gridcoin/superblock.h @@ -17,6 +17,7 @@ #include extern int64_t SCRAPER_CMANIFEST_RETENTION_TIME; +extern CCriticalSection cs_ScraperGlobals; extern std::vector GetVerifiedBeaconIDs(const ConvergedManifest& StructConvergedManifest); extern std::vector GetVerifiedBeaconIDs(const ScraperPendingBeaconMap& VerifiedBeaconMap); @@ -1623,6 +1624,8 @@ struct ConvergedScraperStats std::map>::iterator iter; for (iter = PastConvergences.begin(); iter != PastConvergences.end(); ) { + LOCK(cs_ScraperGlobals); + // If the convergence entry is older than CManifest retention time, then delete the past convergence // entry, because the underlying CManifest will be deleted by the housekeeping loop using the same // aging. The erase advances the iterator in C++11. diff --git a/src/main.cpp b/src/main.cpp index 8a688f2232..d009b61af1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3078,7 +3078,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, } /* Notify the peer about statsscraper blobs we have */ - LOCK(CScraperManifest::cs_mapManifest); + LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); CScraperManifest::PushInvTo(pfrom); @@ -3186,17 +3186,25 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, } } - LOCK(cs_main); - CTxDB txdb("r"); for (unsigned int nInv = 0; nInv < vInv.size(); nInv++) { const CInv &inv = vInv[nInv]; - if (fShutdown) - return true; - pfrom->AddInventoryKnown(inv); + if (fShutdown) return true; - bool fAlreadyHave = AlreadyHave(txdb, inv); + // cs_main lock here must be tightly scoped and not be concatenated outside the cs_mapManifest lock, because + // that will lead to a deadlock. In the original position above the for loop, cs_main is taken first here, then + // cs_mapManifest below, while in the scraper thread, ScraperCullAndBinCScraperManifests() first locks + // cs_mapManifest, then calls ScraperDeleteUnauthorizedCScraperManifests(), which calls IsManifestAuthorized(), + // which locks cs_main to read the AppCacheSection for authorized scrapers. + bool fAlreadyHave; + { + LOCK(cs_main); + CTxDB txdb("r"); + + pfrom->AddInventoryKnown(inv); + fAlreadyHave = AlreadyHave(txdb, inv); + } // Check also the scraper data propagation system to see if it needs // this inventory object: @@ -3208,20 +3216,26 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, LogPrint(BCLog::LogFlags::NOISY, " got inventory: %s %s", inv.ToString(), fAlreadyHave ? "have" : "new"); - if (!fAlreadyHave) - pfrom->AskFor(inv); - else if (inv.type == MSG_BLOCK && mapOrphanBlocks.count(inv.hash)) { - pfrom->PushGetBlocks(pindexBest, GetOrphanRoot(mapOrphanBlocks[inv.hash])->GetHash(true)); - } else if (nInv == nLastBlock) { - // In case we are on a very long side-chain, it is possible that we already have - // the last block in an inv bundle sent in response to getblocks. Try to detect - // this situation and push another getblocks to continue. - pfrom->PushGetBlocks(mapBlockIndex[inv.hash], uint256()); - LogPrint(BCLog::LogFlags::NOISY, "force getblock request: %s", inv.ToString()); - } + // Relock cs_main after getting done with the CScraperManifest::AlreadyHave. + { + LOCK(cs_main); + + if (!fAlreadyHave) + pfrom->AskFor(inv); + else if (inv.type == MSG_BLOCK && mapOrphanBlocks.count(inv.hash)) { + pfrom->PushGetBlocks(pindexBest, GetOrphanRoot(mapOrphanBlocks[inv.hash])->GetHash(true)); + } else if (nInv == nLastBlock) { + // In case we are on a very long side-chain, it is possible that we already have + // the last block in an inv bundle sent in response to getblocks. Try to detect + // this situation and push another getblocks to continue. + pfrom->PushGetBlocks(mapBlockIndex[inv.hash], uint256()); + LogPrint(BCLog::LogFlags::NOISY, "force getblock request: %s", inv.ToString()); + } - // Track requests for our stuff - Inventory(inv.hash); + // Track requests for our stuff + Inventory(inv.hash); + + } } } @@ -3303,7 +3317,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, } else if(!pushed && inv.type == MSG_SCRAPERINDEX) { - LOCK(CScraperManifest::cs_mapManifest); + LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); // Do not send manifests while out of sync. if (!OutOfSyncByAge()) @@ -3323,8 +3337,22 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, // but it is an out parameter of IsManifestAuthorized. unsigned int banscore_out = 0; + // We have to copy out the nTime and pubkey from the selected manifest, because the IsManifestAuthorized call + // chain traverses the map and locks the cs_manifests in turn, which creats a deadlock potential if the cs_manifest + // lock is already held on one of the manifests. + int64_t nTime = 0; + CPubKey pubkey; + { + LOCK(manifest->cs_manifest); + + nTime = manifest->nTime; + pubkey = manifest->pubkey; + + } + // Also don't send a manifest that is not current. - if (CScraperManifest::IsManifestAuthorized(manifest->nTime, manifest->pubkey, banscore_out) && manifest->IsManifestCurrent()) + if (CScraperManifest::IsManifestAuthorized(nTime, pubkey, banscore_out) + && WITH_LOCK(manifest->cs_manifest, return manifest->IsManifestCurrent())) { CScraperManifest::SendManifestTo(pfrom, inv.hash); } @@ -3662,15 +3690,11 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, else if (strCommand == "scraperindex") { - LOCK(CScraperManifest::cs_mapManifest); - - CScraperManifest::RecvManifest(pfrom,vRecv); + CScraperManifest::RecvManifest(pfrom, vRecv); } else if (strCommand == "part") { - LOCK(CSplitBlob::cs_mapParts); - - CSplitBlob::RecvPart(pfrom,vRecv); + CSplitBlob::RecvPart(pfrom, vRecv); } diff --git a/src/sync.cpp b/src/sync.cpp index 4728f7c1e2..13c245548b 100644 --- a/src/sync.cpp +++ b/src/sync.cpp @@ -275,7 +275,7 @@ bool LockStackEmpty() return it->second.empty(); } -bool g_debug_lockorder_abort = false; -bool g_debug_lockorder_throw_exception = false; +bool g_debug_lockorder_abort = true; +bool g_debug_lockorder_throw_exception = true; #endif /* DEBUG_LOCKORDER */ diff --git a/src/test/gridcoin/superblock_tests.cpp b/src/test/gridcoin/superblock_tests.cpp index 348261fec4..37a63c27de 100644 --- a/src/test/gridcoin/superblock_tests.cpp +++ b/src/test/gridcoin/superblock_tests.cpp @@ -436,6 +436,8 @@ ConvergedScraperStats GetTestConvergence( auto CScraperConvergedManifest_ptr = std::shared_ptr(new CScraperManifest()); + LOCK(CScraperConvergedManifest_ptr->cs_manifest); + convergence.mScraperConvergedStats = stats.mScraperStats; convergence.Convergence.bByParts = by_parts; @@ -512,11 +514,10 @@ ConvergedScraperStats GetTestConvergence( uint256 manifest_hash(Hash(ss.begin(), ss.end())); // insert into the global map - const auto it = CScraperManifest::mapManifest.emplace(manifest_hash, std::move(CScraperConvergedManifest_ptr)); + const auto it = CScraperManifest::mapManifest.emplace(manifest_hash, CScraperConvergedManifest_ptr); - CScraperManifest& manifest = *it.first->second; /* set the hash pointer inside */ - manifest.phash= &it.first->first; + CScraperConvergedManifest_ptr->phash= &it.first->first; return convergence; } From a39c563171b724b5698b8eede3c6c13fc7dc3219 Mon Sep 17 00:00:00 2001 From: "James C. Owens" Date: Thu, 2 Sep 2021 00:56:39 -0400 Subject: [PATCH 02/14] Remove _log entries for scraper locks and other minor formatting --- src/gridcoin/scraper/scraper.cpp | 266 +------------------------------ 1 file changed, 4 insertions(+), 262 deletions(-) diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index b58650df5e..ef276efd71 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -415,7 +415,6 @@ void UpdateVerifiedBeaconsFromConsensus(BeaconConsensus& Consensus) unsigned int stale = 0; LOCK(cs_VerifiedBeacons); - _log(logattribute::INFO, "LOCK", "cs_VerifiedBeacons"); ScraperVerifiedBeacons& ScraperVerifiedBeacons = GetVerifiedBeacons(); @@ -452,8 +451,6 @@ void UpdateVerifiedBeaconsFromConsensus(BeaconConsensus& Consensus) { _log(logattribute::ERR, "UpdateVerifiedBeaconsFromConsensus", "Verified beacons save to disk failed."); } - - _log(logattribute::INFO, "ENDLOCK", "cs_VerifiedBeacons"); } } // anonymous namespace @@ -466,7 +463,6 @@ class ScraperLogger { private: - static CCriticalSection cs_log; static boost::gregorian::date PrevArchiveCheckDate; @@ -474,7 +470,6 @@ class ScraperLogger fsbridge::ofstream logfile; public: - ScraperLogger() { LOCK(cs_log); @@ -726,11 +721,9 @@ void _log(logattribute eType, const std::string& sCall, const std::string& sMess class stringbuilder { protected: - std::stringstream builtstring; public: - void append(const std::string &value) { builtstring << value; @@ -819,7 +812,6 @@ class userpass fsbridge::ifstream userpassfile; public: - userpass() { vuserpass.clear(); @@ -878,7 +870,6 @@ class authdata stringbuilder outdata; public: - authdata(const std::string& project) { std::string outfile = project + "_auth.dat"; @@ -1012,7 +1003,6 @@ void ScraperApplyAppCacheEntries() { { LOCK(cs_ScraperGlobals); - _log(logattribute::INFO, "LOCK", "cs_ScraperGlobals"); // If there are AppCache entries for the defaults in scraper.h override them. For the first two, this will also // override any GetArgs supplied from the command line, which is appropriate as network policy should take precedence. @@ -1074,8 +1064,6 @@ void ScraperApplyAppCacheEntries() _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "REQUIRE_TEAM_WHITELIST_MEMBERSHIP = " + ToString(REQUIRE_TEAM_WHITELIST_MEMBERSHIP)); _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "TEAM_WHITELIST = " + TEAM_WHITELIST); _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD = " + ToString(SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD)); - - _log(logattribute::INFO, "ENDLOCK", "cs_ScraperGlobals"); } AppCacheSection mScrapers = GetScrapersCache(); @@ -1090,8 +1078,6 @@ void ScraperApplyAppCacheEntries() AppCacheSection GetScrapersCache() { - //LOCK(cs_main); - return ReadCacheSection(Section::SCRAPER); } @@ -1149,7 +1135,6 @@ AppCacheSectionExt GetExtendedScrapersCache() // It can also be called in "single shot" mode. void Scraper(bool bSingleShot) { - // Initialize these while still single-threaded. They cannot be initialized during declaration because GetDataDir() // gives the wrong value that early. If they are already initialized then leave them alone (because this function // can be called in singleshot mode. @@ -1188,7 +1173,6 @@ void Scraper(bool bSingleShot) // Also delete any unauthorized CScraperManifests received before the wallet was in sync. { LOCK(cs_Scraper); - _log(logattribute::INFO, "LOCK", "cs_Scraper"); ScraperDirectoryAndConfigSanity(); @@ -1196,9 +1180,6 @@ void Scraper(bool bSingleShot) // See the comment on the function. ScraperDeleteUnauthorizedCScraperManifests(); - - // End LOCK(cs_Scraper) - _log(logattribute::INFO, "ENDLOCK", "cs_Scraper"); } int64_t sbage = SuperblockAge(); @@ -1233,14 +1214,10 @@ void Scraper(bool bSingleShot) // Take a lock on the whole scraper for this... { LOCK(cs_Scraper); - _log(logattribute::INFO, "LOCK", "cs_Scraper"); // The only things we do here while quiescent ScraperDirectoryAndConfigSanity(); ScraperCullAndBinCScraperManifests(); - - // End LOCK(cs_Scraper) - _log(logattribute::INFO, "ENDLOCK", "cs_Scraper"); } // Need the log archive check here, because we don't run housekeeping in this while loop. @@ -1269,7 +1246,6 @@ void Scraper(bool bSingleShot) { // Take a lock on cs_Scraper for the main activity portion of the loop. LOCK(cs_Scraper); - _log(logattribute::INFO, "LOCK", "cs_Scraper"); // Signal stats event to UI. uiInterface.NotifyScraperEvent(scrapereventtypes::Stats, CT_UPDATING, {}); @@ -1280,7 +1256,7 @@ void Scraper(bool bSingleShot) // Delete manifest entries not on whitelist. Take a lock on cs_StructScraperFileManifest for this. { LOCK(cs_StructScraperFileManifest); - _log(logattribute::INFO, "LOCK", "download statistics block: cs_StructScraperFileManifest"); + ScraperFileManifestMap::iterator entry; @@ -1294,9 +1270,6 @@ void Scraper(bool bSingleShot) DeleteScraperFileManifestEntry(entry_copy->second); } } - - // End LOCK(cs_StructScraperFileManifest) - _log(logattribute::INFO, "ENDLOCK", "download statistics block: cs_StructScraperFileManifest"); } AuthenticationETagClear(); @@ -1332,9 +1305,6 @@ void Scraper(bool bSingleShot) // Signal stats event to UI. uiInterface.NotifyScraperEvent(scrapereventtypes::Stats, CT_NEW, {}); - - // End LOCK(cs_Scraper) - _log(logattribute::INFO, "ENDLOCK", "cs_Scraper"); } // This is the section to send out manifests. Only do if authorized. @@ -1350,7 +1320,6 @@ void Scraper(bool bSingleShot) // Publish and/or local delete CScraperManifests. { LOCK2(cs_StructScraperFileManifest, CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK2", "manifest send block: cs_StructScraperFileManifest, CScraperManifest::cs_mapManifest"); // If the hash doesn't match (a new one is available), or there are none, then publish a new one. if (nmScraperFileManifestHash != StructScraperFileManifest.nFileManifestMapHash @@ -1364,9 +1333,6 @@ void Scraper(bool bSingleShot) } nmScraperFileManifestHash = StructScraperFileManifest.nFileManifestMapHash; - - // End LOCK(cs_StructScraperFileManifest) - _log(logattribute::INFO, "ENDLOCK2", "manifest send block: cs_StructScraperFileManifest, CScraperManifest::cs_mapManifest"); } } @@ -1435,7 +1401,6 @@ void ScraperSubscriber() if (!fScraperActive) { LOCK(cs_Scraper); - _log(logattribute::INFO, "LOCK", "cs_Scraper"); ScraperDirectoryAndConfigSanity(); // UnauthorizedCScraperManifests should only be seen on the first invocation after getting in sync @@ -1444,9 +1409,6 @@ void ScraperSubscriber() ScraperDeleteUnauthorizedCScraperManifests(); ScraperHousekeeping(); - - // END LOCK(cs_Scraper) - _log(logattribute::INFO, "ENDLOCK", "cs_Scraper"); } // Use the same sleep interval configured for the scraper. @@ -1539,7 +1501,6 @@ bool ScraperDirectoryAndConfigSanity() // Lock the manifest while it is being manipulated. { LOCK(cs_StructScraperFileManifest); - _log(logattribute::INFO, "LOCK", "align directory with manifest file: cs_StructScraperFileManifest"); if (StructScraperFileManifest.mScraperFileManifest.empty()) { @@ -1616,9 +1577,6 @@ bool ScraperDirectoryAndConfigSanity() DeleteScraperFileManifestEntry(entry_copy->second); } } - - // End LOCK(cs_StructScraperFileManifest) - _log(logattribute::INFO, "ENDLOCK", "align directory with manifest file: cs_StructScraperFileManifest"); } // If network policy is set to filter on whitelisted teams, then load team ID map from file. This will prevent the heavyweight @@ -1626,7 +1584,6 @@ bool ScraperDirectoryAndConfigSanity() if (require_team_whitelist_membership()) { LOCK(cs_TeamIDMap); - _log(logattribute::INFO, "LOCK", "cs_TeamIDMap"); if (TeamIDMap.empty()) { @@ -1651,23 +1608,18 @@ bool ScraperDirectoryAndConfigSanity() } } } - - _log(logattribute::INFO, "ENDLOCK", "cs_TeamIDMap"); } // If the verified beacons global has not been loaded from disk, then load it. // Log a warning if unsuccessful. { LOCK(cs_VerifiedBeacons); - _log(logattribute::INFO, "LOCK", "cs_VerifiedBeacons"); if (!GetVerifiedBeacons().LoadedFromDisk && !LoadGlobalVerifiedBeacons()) { - _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", "Initial verified beacon load from file failed. " - "This is not necessarily a problem."); + _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", "Initial verified beacon load from file " + "failed. This is not necessarily a problem."); } - - _log(logattribute::INFO, "ENDLOCK", "cs_VerifiedBeacons"); } } // if fScraperActive } @@ -1715,10 +1667,6 @@ bool UserpassPopulated() return true; } - - - - /********************** * Project Host Files * **********************/ @@ -1833,10 +1781,6 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) return true; } - - - - /********************** * Project Team Files * **********************/ @@ -1865,7 +1809,6 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) for (const auto& prjs : projectWhitelist) { LOCK(cs_TeamIDMap); - _log(logattribute::INFO, "LOCK", "cs_TeamIDMap"); const auto iter = TeamIDMap.find(prjs.m_name); bool fProjTeamIDsMissing = false; @@ -2010,14 +1953,11 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) // If require team whitelist is set and bETagChanged is true, then process the file. This also populates/updated the team whitelist TeamIDs // in the TeamIDMap and the ETag entries in the ProjTeamETags map. if (require_team_whitelist_membership() && bETagChanged) ProcessProjectTeamFile(prjs.m_name, team_file, sTeamETag); - - _log(logattribute::INFO, "ENDLOCK", "cs_TeamIDMap"); } return true; } - // Note this should be called with a lock held on cs_TeamIDMap, which is intended to protect both // TeamIDMap and ProjTeamETags. bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, const std::string& etag) EXCLUSIVE_LOCKS_REQUIRED(cs_TeamIDMap) @@ -2125,7 +2065,6 @@ bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, co return true; } - /********************** * Project RAC Files * **********************/ @@ -2171,14 +2110,11 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) { LOCK(cs_VerifiedBeacons); - _log(logattribute::INFO, "LOCK", "cs_VerifiedBeacons"); // This is a copy on purpose. This map is in general // very small, and I want to minimize holding the // lock. GlobalVerifiedBeaconsCopy = GetVerifiedBeacons(); - - _log(logattribute::INFO, "ENDLOCK", "cs_VerifiedBeacons"); } // This is a local map scoped to this function for use @@ -2297,7 +2233,6 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) // ProcessProjectRacFileByCPID iterations into the global. { LOCK(cs_VerifiedBeacons); - _log(logattribute::INFO, "LOCK", "cs_VerifiedBeacons"); ScraperVerifiedBeacons& GlobalVerifiedBeacons = GetVerifiedBeacons(); @@ -2317,14 +2252,11 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) } } - - _log(logattribute::INFO, "ENDLOCK", "cs_VerifiedBeacons"); } // After processing, update global structure with the timestamp of the latest file in the manifest. { LOCK(cs_StructScraperFileManifest); - _log(logattribute::INFO, "LOCK", "user (rac) files struct update post process: cs_StructScraperFileManifest"); int64_t nMaxTime = 0; for (const auto& entry : StructScraperFileManifest.mScraperFileManifest) @@ -2334,16 +2266,11 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) } StructScraperFileManifest.timestamp = nMaxTime; - - // End LOCK(cs_StructScraperFileManifest) - _log(logattribute::INFO, "ENDLOCK", "user (rac) files struct update post process: cs_StructScraperFileManifest"); } + return true; } - - - // This version uses a consensus beacon map (and teamid, if team filtering is specified by policy) to filter statistics. bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& file, const std::string& etag, BeaconConsensus& Consensus, ScraperVerifiedBeacons& GlobalVerifiedBeaconsCopy, @@ -2389,11 +2316,8 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil if (require_team_whitelist_membership()) { LOCK(cs_TeamIDMap); - _log(logattribute::INFO, "LOCK", "cs_TeamIDMap"); mTeamIDsForProject = TeamIDMap.find(project)->second; - - _log(logattribute::INFO, "ENDLOCK", "cs_TeamIDMap"); } std::string line; @@ -2690,8 +2614,6 @@ bool LoadBeaconList(const fs::path& file, ScraperBeaconMap& mBeaconMap) return true; } - - bool LoadTeamIDList(const fs::path& file) { fsbridge::ifstream ingzfile(file, std::ios_base::in | std::ios_base::binary); @@ -2762,20 +2684,15 @@ bool LoadTeamIDList(const fs::path& file) } LOCK(cs_TeamIDMap); - _log(logattribute::INFO, "LOCK", "cs_TeamIDMap"); // Insert into whitelist team ID map. if (!sProject.empty()) TeamIDMap[sProject] = mTeamIDsForProject; - - _log(logattribute::INFO, "ENDLOCK", "cs_TeamIDMap"); } return true; } - - bool StoreBeaconList(const fs::path& file) { BeaconConsensus Consensus = GetConsensusBeaconList(); @@ -2787,12 +2704,8 @@ bool StoreBeaconList(const fs::path& file) // Requires a lock. { LOCK(cs_StructScraperFileManifest); - _log(logattribute::INFO, "LOCK", "store beacon list - update consensus block hash: cs_StructScraperFileManifest"); StructScraperFileManifest.nConsensusBlockHash = Consensus.nBlockHash; - - // End LOCK(cs_StructScraperFileManifest) - _log(logattribute::INFO, "ENDLOCK", "store beacon list - update consensus block hash: cs_StructScraperFileManifest"); } if (fs::exists(file)) @@ -2834,8 +2747,6 @@ bool StoreBeaconList(const fs::path& file) return true; } - - bool StoreTeamIDList(const fs::path& file) { LOCK(cs_TeamIDMap); @@ -2913,8 +2824,6 @@ bool StoreTeamIDList(const fs::path& file) return true; } - - // Insert entry into Manifest. Note that cs_StructScraperFileManifest needs to be taken before calling. bool InsertScraperFileManifestEntry(ScraperFileManifestEntry& entry) EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) { @@ -2959,8 +2868,6 @@ unsigned int DeleteScraperFileManifestEntry(ScraperFileManifestEntry& entry) EXC return ret; } - - // Mark manifest entry non-current. The reason this is encapsulated in a function is // to ensure the rehash is done. Note that cs_StructScraperFileManifest needs to be // taken before calling. @@ -2975,7 +2882,6 @@ bool MarkScraperFileManifestEntryNonCurrent(ScraperFileManifestEntry& entry) EXC return true; } - void AlignScraperFileManifestEntries(const fs::path& file, const std::string& filetype, const std::string& sProject, const bool& excludefromcsmanifest) { ScraperFileManifestEntry NewRecord; @@ -2997,15 +2903,6 @@ void AlignScraperFileManifestEntries(const fs::path& file, const std::string& fi { LOCK(cs_StructScraperFileManifest); - if (excludefromcsmanifest) - { - _log(logattribute::INFO, "LOCK", "saved manifest for downloaded "+ filetype + " files: AlignScraperFileManifestEntries: cs_StructScraperFileManifest"); - } - else - { - _log(logattribute::INFO, "LOCK", "saved manifest for processed "+ filetype + " files: AlignScraperFileManifestEntries: cs_StructScraperFileManifest"); - } - // Iterate mScraperFileManifest to find any prior filetype records for the same project and change current flag to false, // or delete if older than SCRAPER_FILE_RETENTION_TIME or non-current and fScraperRetainNonCurrentFiles // is false. @@ -3043,20 +2940,9 @@ void AlignScraperFileManifestEntries(const fs::path& file, const std::string& fi _log(logattribute::ERR, "AlignScraperFileManifestEntries", "StoreScraperFileManifest error occurred"); else _log(logattribute::INFO, "AlignScraperFileManifestEntries", "Stored Manifest"); - - // End LOCK(cs_StructScraperFileManifest) - if (excludefromcsmanifest) - { - _log(logattribute::INFO, "ENDLOCK", "saved manifest for downloaded "+ filetype + " files: AlignScraperFileManifestEntries: cs_StructScraperFileManifest"); - } - else - { - _log(logattribute::INFO, "ENDLOCK", "saved manifest for processed "+ filetype + " files: AlignScraperFileManifestEntries: cs_StructScraperFileManifest"); - } } } - bool LoadScraperFileManifest(const fs::path& file) { fsbridge::ifstream ingzfile(file, std::ios_base::in | std::ios_base::binary); @@ -3151,19 +3037,14 @@ bool LoadScraperFileManifest(const fs::path& file) // global structure. { LOCK(cs_StructScraperFileManifest); - _log(logattribute::INFO, "LOCK", "load scraper file manifest - update entry: cs_StructScraperFileManifest"); InsertScraperFileManifestEntry(LoadEntry); - - // End LOCK(cs_StructScraperFileManifest - _log(logattribute::INFO, "ENDLOCK", "load scraper file manifest - update entry: cs_StructScraperFileManifest"); } } return true; } - bool StoreScraperFileManifest(const fs::path& file) { if (fs::exists(file)) @@ -3187,7 +3068,6 @@ bool StoreScraperFileManifest(const fs::path& file) //Lock StructScraperFileManifest during serialize to string. { LOCK(cs_StructScraperFileManifest); - _log(logattribute::INFO, "LOCK", "store scraper file manifest to file: cs_StructScraperFileManifest"); // Header. stream << "Hash," << "Current," << "Time," << "Project," << "Filename," << "ExcludeFromCSManifest," << "Filetype" << "\n"; @@ -3205,9 +3085,6 @@ bool StoreScraperFileManifest(const fs::path& file) + entry.second.filetype + "\n"; stream << sScraperFileManifestEntry; } - - // end LOCK(cs_StructScraperFileManifest) - _log(logattribute::INFO, "ENDLOCK", "store scraper file manifest to file: cs_StructScraperFileManifest"); } _log(logattribute::INFO, "StoreScraperFileManifest", "Finished processing manifest from map."); @@ -3222,8 +3099,6 @@ bool StoreScraperFileManifest(const fs::path& file) return true; } - - bool StoreStats(const fs::path& file, const ScraperStats& mScraperStats) { if (fs::exists(file)) @@ -3303,8 +3178,6 @@ bool StoreStats(const fs::path& file, const ScraperStats& mScraperStats) * Stats Computations * ************************/ - - bool LoadProjectFileToStatsByCPID(const std::string& project, const fs::path& file, const double& projectmag, ScraperStats& mScraperStats) { fsbridge::ifstream ingzfile(file, std::ios_base::in | std::ios_base::binary); @@ -3325,8 +3198,6 @@ bool LoadProjectFileToStatsByCPID(const std::string& project, const fs::path& fi return bResult; } - - bool LoadProjectObjectToStatsByCPID(const std::string& project, const CSerializeData& ProjectData, const double& projectmag, ScraperStats& mScraperStats) { boostio::basic_array_source input_source(&ProjectData[0], ProjectData.size()); @@ -3341,8 +3212,6 @@ bool LoadProjectObjectToStatsByCPID(const std::string& project, const CSerialize return bResult; } - - bool ProcessProjectStatsFromStreamByCPID(const std::string& project, boostio::filtering_istream& sUncompressedIn, const double& projectmag, ScraperStats& mScraperStats) { @@ -3469,7 +3338,6 @@ bool ProcessProjectStatsFromStreamByCPID(const std::string& project, boostio::fi mScraperStats[ProjectStatsEntry.statskey] = ProjectStatsEntry; return true; - } // This function takes the mScraperMap core, which is the byCPIDbyProject @@ -3587,16 +3455,12 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() unsigned int nActiveProjects = 0; { LOCK(cs_StructScraperFileManifest); - _log(logattribute::INFO, "LOCK", "GetScraperStatsByCurrentFileManifestState - count active projects: cs_StructScraperFileManifest"); for (auto const& entry : StructScraperFileManifest.mScraperFileManifest) { // if (entry.second.current && !entry.second.excludefromcsmanifest) nActiveProjects++; } - - // End LOCK(cs_StructScraperFileManifest) - _log(logattribute::INFO, "ENDLOCK", "GetScraperStatsByCurrentFileManifestState - count active projects: cs_StructScraperFileManifest"); } double dMagnitudePerProject = NETWORK_MAGNITUDE / nActiveProjects; @@ -3607,7 +3471,6 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() { LOCK(cs_StructScraperFileManifest); - _log(logattribute::INFO, "LOCK", "GetScraperStatsByCurrentFileManifestState - load project file to stats: cs_StructScraperFileManifest"); for (auto const& entry : StructScraperFileManifest.mScraperFileManifest) { @@ -3629,25 +3492,18 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() } } } - - // End LOCK(cs_StructScraperFileManifest) - _log(logattribute::INFO, "ENDLOCK", "GetScraperStatsByCurrentFileManifestState - load project file to stats: cs_StructScraperFileManifest"); } - // Since this function uses the current project files for statistics, it also makes sense to use the current verified beacons map. ScraperStatsAndVerifiedBeacons stats_and_verified_beacons; { LOCK(cs_VerifiedBeacons); - _log(logattribute::INFO, "LOCK", "cs_VerifiedBeacons"); ScraperVerifiedBeacons& verified_beacons = GetVerifiedBeacons(); stats_and_verified_beacons.mVerifiedMap = verified_beacons.mVerifiedMap; - - _log(logattribute::INFO, "ENDLOCK", "cs_VerifiedBeacons"); } ProcessNetworkWideFromProjectStats(mScraperStats); @@ -3659,7 +3515,6 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() return stats_and_verified_beacons; } - ScraperStatsAndVerifiedBeacons GetScraperStatsByConvergedManifest(const ConvergedManifest& StructConvergedManifest) { _log(logattribute::INFO, "GetScraperStatsByConvergedManifest", "Beginning stats processing."); @@ -3803,7 +3658,6 @@ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash) { // Check to see if the hash exists in the manifest map, and if not, bail. LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); // Select manifest based on provided hash. auto pair = CScraperManifest::mapManifest.find(nManifestHash); @@ -3818,12 +3672,8 @@ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash) // the scraper thread loop, and therefore the directory may not have been set up yet. { LOCK(cs_Scraper); - _log(logattribute::INFO, "LOCK", "cs_Scraper"); ScraperDirectoryAndConfigSanity(); - - // End LOCK(cs_Scraper). - _log(logattribute::INFO, "ENDLOCK", "cs_Scraper"); } fs::path savepath = pathScraper / "manifest_dump"; @@ -3861,7 +3711,6 @@ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash) const CScraperManifest_shared_ptr manifest = pair->second; LOCK(manifest->cs_manifest); - _log(logattribute::INFO, "LOCK", "cs_manifest"); // Write out to files the parts. Note this assumes one-to-one part to file. Needs to // be fixed for more than one part per file. @@ -3903,9 +3752,6 @@ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash) iPartNum++; } - _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); - _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); - return true; } @@ -3967,7 +3813,6 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& // ... and it exists in the wallet... LOCK(pwalletMain->cs_wallet); - _log(logattribute::INFO, "LOCK", "pwalletMain->cs_wallet"); if (pwalletMain->GetKey(KeyID, KeyOut)) { @@ -3984,7 +3829,6 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& "Found address " + sScraperAddressFromConfig + " in both the wallet and appcache. \n" "This scraper is authorized to publish manifests."); - _log(logattribute::INFO, "ENDLOCK", "pwalletMain->cs_wallet"); return true; } else @@ -3993,8 +3837,6 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& "Key not found in the wallet for matching address. Please check that the wallet is unlocked " "(preferably for staking only)."); } - - _log(logattribute::INFO, "ENDLOCK", "pwalletMain->cs_wallet"); } } } @@ -4003,7 +3845,6 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& else { LOCK(pwalletMain->cs_wallet); - _log(logattribute::INFO, "LOCK", "pwalletMain->cs_wallet"); for (auto const& item : pwalletMain->mapAddressBook) { @@ -4049,7 +3890,6 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& "Found address " + sScraperAddress + " in both the wallet and appcache. \n" "This scraper is authorized to publish manifests."); - _log(logattribute::INFO, "ENDLOCK", "pwalletMain->cs_wallet"); return true; } else @@ -4062,8 +3902,6 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& } } } - - _log(logattribute::INFO, "ENDLOCK", "pwalletMain->cs_wallet"); } // If we made it here, there is no match or valid key in the wallet @@ -4073,7 +3911,6 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& return false; } - // This function computes the average time between manifests as a function of the last 10 received manifests // plus the nTime provided as the argument. This gives ten intervals for sampling between manifests. If the // average time between manifests is less than 50% of the nScraperSleep interval, or the most recent manifest @@ -4197,7 +4034,6 @@ unsigned int ScraperDeleteUnauthorizedCScraperManifests() unsigned int nDeleted = 0; LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); for (auto iter = CScraperManifest::mapManifest.begin(); iter != CScraperManifest::mapManifest.end(); ) { @@ -4210,12 +4046,9 @@ unsigned int ScraperDeleteUnauthorizedCScraperManifests() CPubKey pubkey; { LOCK(manifest->cs_manifest); - _log(logattribute::INFO, "LOCK", "cs_manifest"); nTime = manifest->nTime; pubkey = manifest->pubkey; - - _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); } // We are not going to do anything with the banscore here, but it is an out parameter of IsManifestAuthorized. @@ -4227,31 +4060,22 @@ unsigned int ScraperDeleteUnauthorizedCScraperManifests() manifest->bCheckedAuthorized = true; ++iter; - - _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); } else { LOCK(manifest->cs_manifest); - _log(logattribute::INFO, "LOCK", "cs_manifest"); _log(logattribute::WARNING, "ScraperDeleteUnauthorizedCScraperManifests", "Deleting unauthorized manifest with hash " + iter->first.GetHex()); // Delete from CScraperManifest map (also advances iter to the next valid element). Immediate flag is set, because there should be // no pending delete retention grace for this. iter = CScraperManifest::DeleteManifest(iter, true); nDeleted++; - - _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); } } - // End LOCK(CScraperManifest::cs_mapManifest) - _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); - return nDeleted; } - // A lock needs to be taken on cs_StructScraperFileManifest for this function. // The sCManifestName is the public key of the scraper in address form. bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) @@ -4269,7 +4093,6 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS { LOCK2(CSplitBlob::cs_mapParts, manifest->cs_manifest); - _log(logattribute::INFO, "LOCK2", "cs_mapParts, cs_manifest"); // The manifest name is the authorized address of the scraper. manifest->sCManifestName = Address.ToString(); @@ -4331,7 +4154,6 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS // because it will never match any whitelisted project. Only include it if it is not empty. { LOCK(cs_VerifiedBeacons); - _log(logattribute::INFO, "LOCK", "cs_VerifiedBeacons"); ScraperVerifiedBeacons& ScraperVerifiedBeacons = GetVerifiedBeacons(); @@ -4360,8 +4182,6 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS iPartNum++; } - - _log(logattribute::INFO, "ENDLOCK", "cs_VerifiedBeacons"); } for (auto const& entry : StructScraperFileManifest.mScraperFileManifest) @@ -4441,8 +4261,6 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS iPartNum++; } - - _log(logattribute::INFO, "ENDLOCK2", "cs_mapParts, cs_manifest"); } // "Sign" and "send". @@ -4584,15 +4402,6 @@ void ConvergedManifest::ComputeConvergedContentHash() nContentHash = Hash(ss.begin(), ss.end()); } - - - - - - - - - // ------------------------------------ This an out parameter. bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifest) { @@ -4728,7 +4537,6 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes if (bConvergenceSuccessful) { LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); // Select agreed upon (converged) CScraper manifest based on converged hash. auto pair = CScraperManifest::mapManifest.find(convergence->second.second); @@ -4775,8 +4583,6 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes } } } - - _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); } if (!bConvergenceSuccessful) @@ -4802,7 +4608,6 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes uiInterface.NotifyScraperEvent(scrapereventtypes::Convergence, CT_DELETED, {}); return bConvergenceSuccessful; - } // Subordinate function to ScraperConstructConvergedManifest to try to find a convergence at the Project (part) level @@ -4821,7 +4626,6 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project StructConvergedManifest.CScraperConvergedManifest_ptr = std::shared_ptr(new CScraperManifest); LOCK(StructConvergedManifest.CScraperConvergedManifest_ptr->cs_manifest); - _log(logattribute::INFO, "LOCK", "cs_manifest"); // We are going to do this for each project in the whitelist. unsigned int iCountSuccessfulConvergedProjects = 0; @@ -4844,7 +4648,6 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project { LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); - _log(logattribute::INFO, "LOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts"); // For the selected project in the whitelist, walk each scraper. for (const auto& iter : mMapCSManifestsBinnedByScraper) @@ -4860,7 +4663,6 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project CScraperManifest_shared_ptr manifest = pair->second; LOCK(manifest->cs_manifest); - _log(logattribute::INFO, "LOCK", "manifest->cs_manifest"); // Find the part number in the manifest that corresponds to the whitelisted project. // Once we find a part that corresponds to the selected project in the given manifest, then break, @@ -4916,12 +4718,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project } } - - _log(logattribute::INFO, "ENDLOCK", "manifest->cs_manifest"); } } - - _log(logattribute::INFO, "ENDLOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts"); } // Walk the time map (backwards in time because the sort order is descending), and select the first @@ -4948,10 +4746,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project std::map::iterator iPart; { LOCK(CSplitBlob::cs_mapParts); - _log(logattribute::INFO, "LOCK", "CSplitBlob::cs_mapParts"); iPart = CSplitBlob::mapParts.find(std::get<0>(iter.second)); - _log(logattribute::INFO, "ENDLOCK", "CSplitBlob::cs_mapParts"); } uint256 nContentHashCheck = Hash(iPart->second.data.begin(), iPart->second.data.end()); @@ -5015,14 +4811,12 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project // with a manifest that has the newest part associated with a successful part (project) level convergence. LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); - _log(logattribute::INFO, "LOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts"); // Select manifest based on provided hash. auto pair = CScraperManifest::mapManifest.find(nManifestHashForConvergedBeaconList); CScraperManifest_shared_ptr manifest = pair->second; LOCK(manifest->cs_manifest); - _log(logattribute::INFO, "LOCK", "manifest->cs_manifest"); // Bail if BeaconList is not found or empty. if (pair == CScraperManifest::mapManifest.end() || manifest->vParts[0]->data.size() == 0) @@ -5030,7 +4824,6 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project _log(logattribute::WARNING, "ScraperConstructConvergedManifestByProject", "BeaconList was not found in the converged manifests from the scrapers."); bConvergenceSuccessful = false; - _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); } else { @@ -5161,9 +4954,6 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project } } } - - _log(logattribute::INFO, "ENDLOCK", "manifest->cs_manifest"); - _log(logattribute::INFO, "ENDLOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts"); } } @@ -5176,11 +4966,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project } return bConvergenceSuccessful; - - _log(logattribute::INFO, "ENDLOCK", "cs_manifest"); } - mmCSManifestsBinnedByScraper BinCScraperManifestsByScraper() EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { mmCSManifestsBinnedByScraper mMapCSManifestsBinnedByScraper; @@ -5196,7 +4983,6 @@ mmCSManifestsBinnedByScraper BinCScraperManifestsByScraper() EXCLUSIVE_LOCKS_REQ uint256 nContentHash; { LOCK(manifest->cs_manifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); // Do not consider manifests that do not have all of their parts. if (!manifest->isComplete()) continue; @@ -5228,7 +5014,6 @@ mmCSManifestsBinnedByScraper BinCScraperManifestsByScraper() EXCLUSIVE_LOCKS_REQ return mMapCSManifestsBinnedByScraper; } - mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() { // Apply the SCRAPER_CMANIFEST_RETAIN_NONCURRENT bool and if false delete any existing @@ -5248,7 +5033,6 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() } LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); // Bin by scraper and order by manifest time within scraper bin. mmCSManifestsBinnedByScraper mMapCSManifestsBinnedByScraper = BinCScraperManifestsByScraper(); @@ -5300,11 +5084,8 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() // SCRAPER_CMANIFEST_RETENTION_TIME as well. { LOCK(cs_ConvergedScraperStatsCache); - _log(logattribute::INFO, "LOCK", "cs_ConvergedScraperStatsCache"); ConvergedScraperStatsCache.DeleteOldConvergenceFromPastConvergencesMap(); - - _log(logattribute::INFO, "ENDLOCK", "cs_ConvergedScraperStatsCache"); } unsigned int nPendingDeleted = 0; @@ -5322,13 +5103,9 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() // that large. (The lock on CScraperManifest::cs_mapManifest is still held from above.) mMapCSManifestsBinnedByScraper = BinCScraperManifestsByScraper(); - _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); - return mMapCSManifestsBinnedByScraper; } - - // ---------------------------------------------- In ---------------------------------------- Out bool LoadBeaconListFromConvergedManifest(const ConvergedManifest& StructConvergedManifest, ScraperBeaconMap& mBeaconMap) { @@ -5429,7 +5206,6 @@ std::vector GetVerifiedBeaconIDs(const ScraperPendingBeaconMap& Verifie return result; } - ScraperStatsAndVerifiedBeacons GetScraperStatsAndVerifiedBeacons(const ConvergedScraperStats &stats) { ScraperStatsAndVerifiedBeacons stats_and_verified_beacons; @@ -5467,29 +5243,21 @@ ScraperPendingBeaconMap GetVerifiedBeaconsForReport(bool from_global) if (from_global) { LOCK(cs_VerifiedBeacons); - _log(logattribute::INFO, "LOCK", "cs_VerifiedBeacons"); // An intentional copy. VerifiedBeacons = GetVerifiedBeacons().mVerifiedMap; - - _log(logattribute::INFO, "ENDLOCK", "cs_VerifiedBeacons"); - } else { LOCK(cs_ConvergedScraperStatsCache); - _log(logattribute::INFO, "LOCK", "cs_ConvergedScraperStatsCache"); // An intentional copy. VerifiedBeacons = GetScraperStatsAndVerifiedBeacons(ConvergedScraperStatsCache).mVerifiedMap; - - _log(logattribute::INFO, "ENDLOCK", "cs_ConvergedScraperStatsCache"); } return VerifiedBeacons; } - /*********************** * Subscriber * ************************/ @@ -5509,7 +5277,6 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac bool bConvergenceUpdateNeeded = true; { LOCK(cs_ConvergedScraperStatsCache); - _log(logattribute::INFO, "LOCK", "cs_ConvergedScraperStatsCache"); // If the cache is less than nScraperSleep in minutes old OR not dirty... if (GetAdjustedTime() - ConvergedScraperStatsCache.nTime < (scraper_sleep() / 1000) || ConvergedScraperStatsCache.bClean) @@ -5517,9 +5284,6 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac bConvergenceUpdateNeeded = false; _log(logattribute::INFO, __func__, "Cached convergence is fresh, convergence update not needed."); } - - // End LOCK(cs_ConvergedScraperStatsCache) - _log(logattribute::INFO, "ENDLOCK", "cs_ConvergedScraperStatsCache"); } ConvergedManifest StructConvergedManifest; @@ -5604,12 +5368,8 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac if (superblock_Prev.WellFormed()) // If the current is empty and the previous was not empty, then the contract has been deleted. uiInterface.NotifyScraperEvent(scrapereventtypes::SBContract, CT_DELETED, {}); - - // End LOCK(cs_ConvergedScraperStatsCache) - _log(logattribute::INFO, "ENDLOCK", "cs_ConvergedScraperStatsCache"); } - _log(logattribute::INFO, "ScraperGetSuperblockContract", "Superblock object generated from convergence"); return superblock; @@ -5645,7 +5405,6 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac // If we are here, we are using cached information. LOCK(cs_ConvergedScraperStatsCache); - _log(logattribute::INFO, "LOCK", "cs_ConvergedScraperStatsCache"); superblock = ConvergedScraperStatsCache.NewFormatSuperblock; @@ -5655,10 +5414,6 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac uiInterface.NotifyScraperEvent(scrapereventtypes::SBContract, CT_UPDATED, {}); _log(logattribute::INFO, "ScraperGetSuperblockContract", "Superblock object from cached converged stats"); - - // End LOCK(cs_ConvergedScraperStatsCache) - _log(logattribute::INFO, "ENDLOCK", "cs_ConvergedScraperStatsCache"); - } return superblock; @@ -5682,12 +5437,9 @@ UniValue sendscraperfilemanifest(const UniValue& params, bool fHelp) if (IsScraperAuthorizedToBroadcastManifests(AddressOut, KeyOut)) { LOCK(cs_StructScraperFileManifest); - _log(logattribute::INFO, "LOCK", "cs_StructScraperFileManifest"); ret = ScraperSendFileManifestContents(AddressOut, KeyOut); uiInterface.NotifyScraperEvent(scrapereventtypes::Manifest, CT_NEW, {}); - - _log(logattribute::INFO, "ENDLOCK", "cs_StructScraperFileManifest"); } else ret = false; @@ -5695,8 +5447,6 @@ UniValue sendscraperfilemanifest(const UniValue& params, bool fHelp) return UniValue(ret); } - - UniValue savescraperfilemanifest(const UniValue& params, bool fHelp) { if (fHelp || params.size() != 1 ) @@ -5710,7 +5460,6 @@ UniValue savescraperfilemanifest(const UniValue& params, bool fHelp) return UniValue(ret); } - UniValue deletecscrapermanifest(const UniValue& params, bool fHelp) { if (fHelp || params.size() != 1 ) @@ -5720,16 +5469,12 @@ UniValue deletecscrapermanifest(const UniValue& params, bool fHelp) ); LOCK(CScraperManifest::cs_mapManifest); - _log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest"); bool ret = CScraperManifest::DeleteManifest(uint256S(params[0].get_str()), true); - _log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest"); - return UniValue(ret); } - UniValue archivelog(const UniValue& params, bool fHelp) { if (fHelp || params.size() != 1 ) @@ -5803,7 +5548,6 @@ UniValue ConvergedScraperStatsToJson(ConvergedScraperStats& ConvergedScraperStat return ret; } - UniValue convergencereport(const UniValue& params, bool fHelp) { if (fHelp || params.size() > 1) @@ -6027,7 +5771,6 @@ UniValue testnewsb(const UniValue& params, bool fHelp) bPastConvergencesEmpty = false; } - } if (!bPastConvergencesEmpty) @@ -6244,4 +5987,3 @@ UniValue scraperreport(const UniValue& params, bool fHelp) return ret; } - From 7f8a3dce50358cb9246aa8067c9b082fa7b28bfd Mon Sep 17 00:00:00 2001 From: "James C. Owens" Date: Fri, 3 Sep 2021 22:51:14 -0400 Subject: [PATCH 03/14] Enhance scraperreport --- src/gridcoin/scraper/scraper.cpp | 129 ++++++++++++++++++++++++++--- src/gridcoin/scraper/scraper_net.h | 2 +- src/gridcoin/superblock.h | 19 +---- 3 files changed, 121 insertions(+), 29 deletions(-) diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index ef276efd71..099a370960 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -5850,6 +5850,7 @@ UniValue scraperreport(const UniValue& params, bool fHelp) UniValue converged_scraper_stats_cache(UniValue::VOBJ); uint64_t manifest_map_size = 0; + uint64_t pending_deleted_manifest_map_size = 0; uint64_t parts_map_size = 0; UniValue part_references(UniValue::VOBJ); @@ -5859,34 +5860,36 @@ UniValue scraperreport(const UniValue& params, bool fHelp) std::set global_cache_unique_part_references; { + // This lock order is required to avoid potential deadlocks between this function and other threads. LOCK(CScraperManifest::cs_mapManifest); + LOCK2(CSplitBlob::cs_mapParts, cs_ConvergedScraperStatsCache); manifest_map_size = CScraperManifest::mapManifest.size(); - } - - global_scraper_net.pushKV("manifest_map_size", manifest_map_size); + pending_deleted_manifest_map_size = CScraperManifest::mapPendingDeletedManifest.size(); - { - LOCK(CSplitBlob::cs_mapParts); + global_scraper_net.pushKV("manifest_map_size", manifest_map_size); + global_scraper_net.pushKV("pending_deleted_manifest_map_size", pending_deleted_manifest_map_size); parts_map_size = CSplitBlob::mapParts.size(); - } - - global_scraper_net.pushKV("parts_map_size", parts_map_size); - - { - LOCK2(CSplitBlob::cs_mapParts, cs_ConvergedScraperStatsCache); if (ConvergedScraperStatsCache.NewFormatSuperblock.WellFormed()) { uint64_t current_convergence_publishing_scrapers = 0; uint64_t current_convergence_part_pointer_map_size = 0; uint64_t past_convergence_map_size = 0; + uint64_t number_of_convergences_by_parts = 0; int64_t part_objects_reduced = 0; uint64_t total_part_references = 0; uint64_t total_unique_part_references_to_manifests = 0; + uint64_t total_part_references_to_manifests_not_in_manifest_maps = 0; + uint64_t total_part_references_to_manifests_with_null_phashes = 0; + uint64_t total_part_references_to_csplitblobs_not_valid_manifests = 0; uint64_t total_part_data_size = 0; + UniValue manifests_not_in_manifest_maps(UniValue::VARR); + UniValue manifests_with_null_phashes(UniValue::VARR); + UniValue csplitblobs_invalid_manifests(UniValue::VARR); + current_convergence_publishing_scrapers = ConvergedScraperStatsCache.Convergence.vIncludedScrapers.size() + ConvergedScraperStatsCache.Convergence.vExcludedScrapers.size(); @@ -5897,6 +5900,15 @@ UniValue scraperreport(const UniValue& params, bool fHelp) past_convergence_map_size = ConvergedScraperStatsCache.PastConvergences.size(); + // Count the number of convergences that are by part (project). Note that these WILL NOT be in the + // manifest maps, because they are composite manifests that are LOCAL ONLY. If the convergences + // are at the manifest level, then the CScraperManifest_shared_ptr CScraperConvergedManifest_ptr + // will point to a manifest that IS ALREADY IN THE mapManifest. + if (ConvergedScraperStatsCache.Convergence.bByParts) ++number_of_convergences_by_parts; + + // Finish adding the number of convergences by parts below in the for loop for the PastConvergences so we + // don't have to traverse twice. + // This next section will form a set of unique pointers in the global cache // and also add the pointers up arithmetically. The difference is the efficiency gain // from using pointers rather than copies into the global cache. @@ -5909,6 +5921,9 @@ UniValue scraperreport(const UniValue& params, bool fHelp) for (const auto& iter : ConvergedScraperStatsCache.PastConvergences) { + // This increments if the past convergence is by parts because these will NOT be in the manifest maps. + if (iter.second.second.bByParts) ++number_of_convergences_by_parts; + for (const auto& iter2 : iter.second.second.ConvergedManifestPartPtrsMap) { global_cache_unique_parts.insert(iter2.second); @@ -5918,6 +5933,8 @@ UniValue scraperreport(const UniValue& params, bool fHelp) iter.second.second.ConvergedManifestPartPtrsMap.size(); } + global_scraper_net.pushKV("number_of_convergences_by_parts", number_of_convergences_by_parts); + uint64_t total_convergences_part_unique_pointer_maps_size = 0; total_convergences_part_unique_pointer_maps_size = global_cache_unique_parts.size(); @@ -5955,12 +5972,98 @@ UniValue scraperreport(const UniValue& params, bool fHelp) total_unique_part_references_to_manifests = global_cache_unique_part_references.size(); + for (auto iter : global_cache_unique_part_references) + { + // For scraper purposes, there should not be any CSplitBlobs that are not actually a manifest. + // Casting the CSplitBlob pointer to CScraperManifest and then checking the hash pointed at by the + // phash against the manifest maps... + CScraperManifest* manifest_ptr = dynamic_cast(iter); + + if (manifest_ptr != nullptr) { //valid manifest + LOCK(manifest_ptr->cs_manifest); + + if (manifest_ptr->phash != nullptr) // pointer to index hash is not null... find in map(s) + { + auto manifest_found = CScraperManifest::mapManifest.find(*(manifest_ptr->phash)); + + // Is it in mapManifest? + if (manifest_found == CScraperManifest::mapManifest.end()) + { + auto pending_deleted_manifest_found = + CScraperManifest::mapPendingDeletedManifest.find(*(manifest_ptr->phash)); + + // mapPendingDeletedManifest? + if (pending_deleted_manifest_found == CScraperManifest::mapPendingDeletedManifest.end()) + { + manifests_not_in_manifest_maps.push_back(manifest_ptr->ToJson()); + + ++total_part_references_to_manifests_not_in_manifest_maps; + } // In mapPendingDeletedManifest? + } // In mapManifest? + } // valid manifest but null pointer to index hash + else + { + // The current convergence (i.e. a by parts convergence in the local global cache but not in + // the published maps? + if (ConvergedScraperStatsCache.Convergence.CScraperConvergedManifest_ptr.get() != manifest_ptr) + { + bool found = false; + + // Past convergence? + for (const auto& past : ConvergedScraperStatsCache.PastConvergences) + { + if (past.second.second.CScraperConvergedManifest_ptr.get() == manifest_ptr) + { + found = true; + break; + } + } + + if (!found) + { + manifests_with_null_phashes.push_back(manifest_ptr->ToJson()); + + ++total_part_references_to_manifests_with_null_phashes; + } // In a past convergence? + } // In the current convergence? + } + } + else // not a valid manifest + { + // If after casting the CScraperManifest* is a nullptr, then that means this CSplitBlob is not actually + // a manifest. Right now, since the parts system is only being used for manifests, this should be zero. + // If the CSplitBlob is used as the parent class for other things besides manifests, then this will + // naturally not be zero. + LOCK(iter->cs_manifest); + + UniValue csplitblobs_invalid_manifest(UniValue::VOBJ); + + csplitblobs_invalid_manifest.pushKV("vParts.size()", (uint64_t) iter->vParts.size()); + csplitblobs_invalid_manifest.pushKV("cntPartsRcvd", (uint64_t) iter->cntPartsRcvd); + + csplitblobs_invalid_manifests.push_back(csplitblobs_invalid_manifest); + + ++total_part_references_to_csplitblobs_not_valid_manifests; + } + } + part_references.pushKV("part_references", part_references_array); part_references.pushKV("total_part_references", total_part_references); - part_references.pushKV("total_unique_part_references_to_manifests", - total_unique_part_references_to_manifests); + part_references.pushKV("total_unique_part_references_to_manifests", total_unique_part_references_to_manifests); + part_references.pushKV("total_part_references_to_manifests_not_in_manifest_maps", + total_part_references_to_manifests_not_in_manifest_maps); + part_references.pushKV("total_part_references_to_manifests_with_null_phashes", + total_part_references_to_manifests_with_null_phashes); + part_references.pushKV("total_part_references_to_csplitblobs_invalid_manifests", + total_part_references_to_csplitblobs_not_valid_manifests); part_references.pushKV("total_part_data_size", total_part_data_size); + part_references.pushKV("manifests_not_in_manifest_maps", manifests_not_in_manifest_maps); + part_references.pushKV("manifests_with_null_phashes", manifests_with_null_phashes); + part_references.pushKV("csplitblobs_invalid_manifests", csplitblobs_invalid_manifests); + global_scraper_net.pushKV("total_manifests_in_maps", manifest_map_size + + pending_deleted_manifest_map_size + number_of_convergences_by_parts); + global_scraper_net.pushKV("parts_map_size", parts_map_size); global_scraper_net.pushKV("global_parts_map_references", part_references); converged_scraper_stats_cache.pushKV("current_convergence_publishing_scrapers", diff --git a/src/gridcoin/scraper/scraper_net.h b/src/gridcoin/scraper/scraper_net.h index 630fc9c28f..ec79e528ff 100755 --- a/src/gridcoin/scraper/scraper_net.h +++ b/src/gridcoin/scraper/scraper_net.h @@ -143,7 +143,7 @@ class CScraperManifest public: /*==== fields ====*/ - const uint256* phash GUARDED_BY(cs_manifest); + const uint256* phash GUARDED_BY(cs_manifest) = nullptr; std::string sCManifestName GUARDED_BY(cs_manifest); CPubKey pubkey GUARDED_BY(cs_manifest); std::vector signature GUARDED_BY(cs_manifest); diff --git a/src/gridcoin/superblock.h b/src/gridcoin/superblock.h index a7cd1eef07..3d9a9d8593 100644 --- a/src/gridcoin/superblock.h +++ b/src/gridcoin/superblock.h @@ -1556,21 +1556,10 @@ struct hash // This is part of the scraper but is put here, because it needs the complete NN:Superblock class. struct ConvergedScraperStats { - ConvergedScraperStats() : Convergence(), NewFormatSuperblock() - { - bClean = false; - bMinHousekeepingComplete = false; - - nTime = 0; - mScraperConvergedStats = {}; - PastConvergences = {}; - } + ConvergedScraperStats() : Convergence(), NewFormatSuperblock() { /* All defaults */ } ConvergedScraperStats(const int64_t nTime_in, const ConvergedManifest& Convergence) : Convergence(Convergence) { - bClean = false; - bMinHousekeepingComplete = false; - nTime = nTime_in; mScraperConvergedStats = GetScraperStatsByConvergedManifest(Convergence).mScraperStats; @@ -1579,7 +1568,7 @@ struct ConvergedScraperStats // Flag to indicate cache is clean or dirty (i.e. state change of underlying statistics has occurred. // This flag is marked true in ScraperGetSuperblockContract() and false on receipt or deletion of // statistics objects. - bool bClean; + bool bClean = false; // This flag tracks the completion of at least one iteration of the housekeeping loop. The purpose of this flag // is to ensure enough time has gone by after a (re)start of the wallet that a complete set of manifests/parts @@ -1590,9 +1579,9 @@ struct ConvergedScraperStats // before the superblock is received. This has the effect of allowing a grace period of nScraperSleep after the // wallet start where an incoming superblock will allowed with Result::UNKNOWN, rather than rejected with // Result::INVALID. - bool bMinHousekeepingComplete; + bool bMinHousekeepingComplete = false; - int64_t nTime; + int64_t nTime = 0; ScraperStats mScraperConvergedStats; ConvergedManifest Convergence; From 2582463ece9166b3ad4447af42681752d27c70e5 Mon Sep 17 00:00:00 2001 From: "James C. Owens" Date: Sat, 4 Sep 2021 15:45:54 -0400 Subject: [PATCH 04/14] Make changes to eliminate suspected publishing of invalid manifests --- src/gridcoin/scraper/scraper.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index 099a370960..c348703a7a 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -139,9 +139,9 @@ struct ScraperFileManifestEntry std::string filename; // Filename std::string project; uint256 hash; // hash of file - int64_t timestamp; - bool current; - bool excludefromcsmanifest; + int64_t timestamp = 0; + bool current = true; + bool excludefromcsmanifest = true; std::string filetype; }; @@ -152,7 +152,7 @@ struct ScraperFileManifest ScraperFileManifestMap mScraperFileManifest; uint256 nFileManifestMapHash; uint256 nConsensusBlockHash; - int64_t timestamp; + int64_t timestamp = 0; }; // Both TeamIDMap and ProjTeamETags are protected by cs_TeamIDMap. @@ -1321,9 +1321,10 @@ void Scraper(bool bSingleShot) { LOCK2(cs_StructScraperFileManifest, CScraperManifest::cs_mapManifest); - // If the hash doesn't match (a new one is available), or there are none, then publish a new one. - if (nmScraperFileManifestHash != StructScraperFileManifest.nFileManifestMapHash - || !CScraperManifest::mapManifest.size()) + // If the hash is valid and doesn't match (a new one is available), or there are none, then publish a new one. + if (!StructScraperFileManifest.nFileManifestMapHash.IsNull() + && (nmScraperFileManifestHash != StructScraperFileManifest.nFileManifestMapHash + || !CScraperManifest::mapManifest.size())) { _log(logattribute::INFO, "Scraper", "Publishing new CScraperManifest."); From 2e1d981cb1e61404b8a7cee6315e3a18ea4285d1 Mon Sep 17 00:00:00 2001 From: "James C. Owens" Date: Sat, 4 Sep 2021 21:28:16 -0400 Subject: [PATCH 05/14] Fix phash in pending deleted manifest entries --- src/gridcoin/scraper/scraper_net.cpp | 141 +++++++++++++++++++-------- src/gridcoin/scraper/scraper_net.h | 4 +- 2 files changed, 103 insertions(+), 42 deletions(-) diff --git a/src/gridcoin/scraper/scraper_net.cpp b/src/gridcoin/scraper/scraper_net.cpp index 943be5eb4c..546da671e0 100644 --- a/src/gridcoin/scraper/scraper_net.cpp +++ b/src/gridcoin/scraper/scraper_net.cpp @@ -37,11 +37,11 @@ extern bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubK bool CSplitBlob::RecvPart(CNode* pfrom, CDataStream& vRecv) { - /* Part of larger hashed blob. Currently only used for scraper data sharing. - * retrieve parent object from mapBlobParts - * notify object or ignore if no object found - * erase from mapAlreadyAskedFor - */ + /* Part of larger hashed blob. Currently only used for scraper data sharing. + * retrieve parent object from mapBlobParts + * notify object or ignore if no object found + * erase from mapAlreadyAskedFor + */ auto& ss = vRecv; uint256 hash(Hash(ss.begin(), ss.end())); mapAlreadyAskedFor.erase(CInv(MSG_PART, hash)); @@ -59,7 +59,7 @@ bool CSplitBlob::RecvPart(CNode* pfrom, CDataStream& vRecv) { LogPrint(BCLog::LogFlags::MANIFEST, "received part %s %u refs", hash.GetHex(), (unsigned) part.refs.size()); - part.data = CSerializeData(vRecv.begin(), vRecv.end()); //TODO: replace with move constructor + part.data = CSerializeData(vRecv.begin(), vRecv.end()); for (const auto& ref : part.refs) { CSplitBlob& split = *ref.first; @@ -224,7 +224,6 @@ bool CScraperManifest::AlreadyHave(CNode* pfrom, const CInv& inv) EXCLUSIVE_LOCK { if (MSG_PART == inv.type) { - //TODO: move return false; } @@ -257,7 +256,8 @@ bool CScraperManifest::AlreadyHave(CNode* pfrom, const CInv& inv) EXCLUSIVE_LOCK } } -void CScraperManifest::PushInvTo(CNode* pto) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts) +void CScraperManifest::PushInvTo(CNode* pto) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, + CSplitBlob::cs_mapParts) { /* send all keys from the index map as inventory */ /* FIXME: advertise only completed manifests */ @@ -270,7 +270,8 @@ void CScraperManifest::PushInvTo(CNode* pto) EXCLUSIVE_LOCKS_REQUIRED(CScraperMa // Clang thread static safety analysis is showing a false positive where it claims cs_mapManifest is not held when // SendManifestTo is called in ProcessMessage in main. The complaint is on the template specialization of the serialization // of CScraperManifest in PushMessage. Manual inspection of the code shows the lock is held. -bool CScraperManifest::SendManifestTo(CNode* pto, const uint256& hash) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts) +bool CScraperManifest::SendManifestTo(CNode* pto, const uint256& hash) +EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts) { auto it = mapManifest.find(hash); @@ -305,7 +306,8 @@ void CScraperManifest::dentry::Unserialize(CDataStream& ss) ss >> last; } -void CScraperManifest::SerializeWithoutSignature(CDataStream& ss) const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) +void CScraperManifest::SerializeWithoutSignature(CDataStream& ss) const +EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { WriteCompactSize(ss, vParts.size()); for (const CPart* part : vParts) @@ -323,7 +325,8 @@ void CScraperManifest::SerializeWithoutSignature(CDataStream& ss) const EXCLUSIV } // This is to compare manifest content quickly. We just need the parts and the consensus block. -void CScraperManifest::SerializeForManifestCompare(CDataStream& ss) const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) +void CScraperManifest::SerializeForManifestCompare(CDataStream& ss) const +EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { WriteCompactSize(ss, vParts.size()); for (const CPart* part : vParts) @@ -345,7 +348,8 @@ void CScraperManifest::Serialize(CDataStream& ss) const EXCLUSIVE_LOCKS_REQUIRED // This is the complement to IsScraperAuthorizedToBroadcastManifests in the scraper. // It is used to determine whether received manifests are authorized. -bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, unsigned int& banscore_out) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) +bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, unsigned int& banscore_out) +EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { bool bIsValid = PubKey.IsValid(); @@ -405,7 +409,8 @@ bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, uns { LOCK(cs_ScraperGlobals); - nGracePeriodEnd = std::max(g_nTimeBestReceived, nLastFalseEntryTime) + SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD; + nGracePeriodEnd = std::max(g_nTimeBestReceived, nLastFalseEntryTime) + + SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD; // If the current time is past the grace period end then set SCRAPER_MISBEHAVING_NODE_BANSCORE, otherwise 0. if (nGracePeriodEnd < GetAdjustedTime()) @@ -423,7 +428,8 @@ bool CScraperManifest::IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, uns } } -void CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_out) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manifest) +void CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_out) +EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manifest) { const auto pbegin = ss.begin(); @@ -523,7 +529,8 @@ void CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_ uint256 hash = Hash(pbegin, ss.begin()); ss >> signature; - LogPrint(BCLog::LogFlags::MANIFEST, "CScraperManifest::UnserializeCheck: hash of signature = %s", Hash(signature.begin(), signature.end()).GetHex()); + LogPrint(BCLog::LogFlags::MANIFEST, "CScraperManifest::UnserializeCheck: hash of signature = %s", + Hash(signature.begin(), signature.end()).GetHex()); CKey mkey; if (!mkey.SetPubKey(pubkey)) throw error("CScraperManifest: Invalid manifest key"); @@ -544,15 +551,35 @@ bool CScraperManifest::IsManifestCurrent() const EXCLUSIVE_LOCKS_REQUIRED(CSplit } -bool CScraperManifest::DeleteManifest(const uint256& nHash, const bool& fImmediate) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) +bool CScraperManifest::DeleteManifest(const uint256& nHash, const bool& fImmediate) +EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { bool fDeleted = false; auto iter = mapManifest.find(nHash); - if(iter != mapManifest.end()) + if (iter != mapManifest.end()) { - if (!fImmediate) mapPendingDeletedManifest[nHash] = std::make_pair(GetAdjustedTime(), std::move(iter->second)); + if (!fImmediate) + { + auto iter2 = mapPendingDeletedManifest.insert(std::make_pair(nHash, std::make_pair(GetAdjustedTime(), + iter->second))); + if (!iter2.second) + { + LogPrint("WARN: %s: Manifest insertion attempt into pending deleted map failed because an entry with the same " + "hash = %s, already exists. This should not happen.", __func__, nHash.GetHex()); + } + else + { + // Since phash in the manifest is actually a pointer, we need to change it to point to the key of the + // mapPendingDeletedManifest key entry, since the old pointer is now invalid. + CScraperManifest_shared_ptr manifest = iter2.first->second.second; + + LOCK(manifest->cs_manifest); + + manifest->phash = &iter2.first->first; + } + } mapManifest.erase(nHash); @@ -567,19 +594,40 @@ bool CScraperManifest::DeleteManifest(const uint256& nHash, const bool& fImmedia fDeleted = true; } + // Note that this will be false if an entry was not found and deleted in the mapManifest. return fDeleted; } -std::map>::iterator CScraperManifest::DeleteManifest(std::map>::iterator& iter, - const bool& fImmediate) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) +std::map>::iterator +CScraperManifest::DeleteManifest(std::map>::iterator& iter, + const bool& fImmediate) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { - if (!fImmediate) mapPendingDeletedManifest[iter->first] = std::make_pair(GetAdjustedTime(), std::move(iter->second)); + if (!fImmediate) + { + auto iter2 = mapPendingDeletedManifest.insert(std::make_pair(iter->first, std::make_pair(GetAdjustedTime(), + iter->second))); + if (!iter2.second) + { + LogPrint("WARN: %s: Manifest insertion attempt into pending deleted map failed because an entry with the same " + "hash = %s, already exists. This should not happen.", __func__, iter->first.GetHex()); + } + else + { + // Since phash in the manifest is actually a pointer, we need to change it to point to the key of the + // mapPendingDeletedManifest key entry, since the old pointer is now invalid. + CScraperManifest_shared_ptr manifest = iter2.first->second.second; + + LOCK(manifest->cs_manifest); + + manifest->phash = &iter2.first->first; + } + } iter = mapManifest.erase(iter); // lock cs_ConvergedScraperStatsCache and mark ConvergedScraperStatsCache dirty because a manifest has been deleted - // that could have been used in the cached convergence, so the convergence may change. This is not conditional, because the - // iterator must be valid. + // that could have been used in the cached convergence, so the convergence may change. This is not conditional, because + // the iterator must be valid. { LOCK(cs_ConvergedScraperStatsCache); @@ -619,14 +667,14 @@ unsigned int CScraperManifest::DeletePendingDeletedManifests() EXCLUSIVE_LOCKS_R bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) { - /* Index object for scraper data. + /* Index object for scraper data. * deserialize message * hash * see if we do not already have it * validate the message * populate the maps * request parts - */ + */ unsigned int banscore = 0; /* hash the object */ @@ -635,7 +683,8 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) /* see if we do not already have it */ if (WITH_LOCK(cs_mapManifest, return AlreadyHave(pfrom, CInv(MSG_SCRAPERINDEX, hash)))) { - LogPrint(BCLog::LogFlags::SCRAPER, "INFO: ScraperManifest::RecvManifest: Already have CScraperManifest %s from node %s.", hash.GetHex(), pfrom->addrName); + LogPrint(BCLog::LogFlags::SCRAPER, "INFO: ScraperManifest::RecvManifest: Already have CScraperManifest %s from " + "node %s.", hash.GetHex(), pfrom->addrName); return false; } @@ -660,8 +709,8 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) if (pfrom) { - LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore by %u.", - hash.GetHex(), pfrom->addr.ToString(), banscore); + LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore " + "by %u.", hash.GetHex(), pfrom->addr.ToString(), banscore); pfrom->Misbehaving(banscore); } return false; @@ -672,8 +721,8 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) if (pfrom) { - LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore by %u.", - hash.GetHex(), pfrom->addr.ToString(), banscore); + LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore " + "by %u.", hash.GetHex(), pfrom->addr.ToString(), banscore); pfrom->Misbehaving(banscore); } return false; @@ -687,7 +736,9 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) ConvergedScraperStatsCache.bClean = false; } - LogPrint(BCLog::LogFlags::MANIFEST, "received manifest %s with %u / %u parts", hash.GetHex(),(unsigned)manifest->cntPartsRcvd,(unsigned)manifest->vParts.size()); + LogPrint(BCLog::LogFlags::MANIFEST, "received manifest %s with %u / %u parts", hash.GetHex(), + (unsigned) manifest->cntPartsRcvd, (unsigned) manifest->vParts.size()); + if (manifest->isComplete()) { /* If we already got all the parts in memory, signal completion */ @@ -696,15 +747,17 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) else { /* else request missing parts from the sender */ - // Note: As an additional buffer to prevent spurious part receipts, if the manifest timestamp is within nScraperSleep of expiration (i.e. - // about to go on the pending delete list, then do not request missing parts, as it is possible that the manifest will be deleted - // by the housekeeping loop in between the receipt of the manifest, request for parts, and receipt of parts otherwise. + // Note: As an additional buffer to prevent spurious part receipts, if the manifest timestamp is within nScraperSleep + // of expiration (i.e. about to go on the pending delete list, then do not request missing parts, as it is possible + // that the manifest will be deleted by the housekeeping loop in between the receipt of the manifest, request for + // parts, and receipt of parts otherwise. if (manifest->IsManifestCurrent()) manifest->UseAsSource(pfrom); } return true; } -bool CScraperManifest::addManifest(std::shared_ptr&& m, CKey& keySign) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, cs_mapParts) +bool CScraperManifest::addManifest(std::shared_ptr&& m, CKey& keySign) +EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, cs_mapParts) { uint256 hash; @@ -727,16 +780,21 @@ bool CScraperManifest::addManifest(std::shared_ptr&& m, CKey& hash = Hash(ss.begin(), ss.end()); keySign.Sign(hash, m->signature); - LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of manifest contents = %s", m->nContentHash.ToString()); - LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of manifest = %s", hash.ToString()); - LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of signature = %s", Hash(m->signature.begin(), m->signature.end()).GetHex()); - LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: datetime = %s", DateTimeStrFormat("%x %H:%M:%S", m->nTime)); + LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of manifest contents = %s", + m->nContentHash.ToString()); + LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of manifest = %s", + hash.ToString()); + LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: hash of signature = %s", + Hash(m->signature.begin(), m->signature.end()).GetHex()); + LogPrint(BCLog::LogFlags::MANIFEST, "INFO: CScraperManifest::addManifest: datetime = %s", + DateTimeStrFormat("%x %H:%M:%S", m->nTime)); LogPrint(BCLog::LogFlags::MANIFEST, "adding new local manifest"); } /* try inserting into map */ - const auto it = mapManifest.emplace(hash, std::move(m)); + const auto it = mapManifest.emplace(hash, m); + /* Already exists, do nothing */ if (it.second == false) return false; @@ -782,7 +840,8 @@ void CScraperManifest::Complete() EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manife } } - LogPrint(BCLog::LogFlags::SCRAPER, "INFO: CScraperManifest::Complete(): from %s with hash %s", sCManifestName, phash->GetHex()); + LogPrint(BCLog::LogFlags::SCRAPER, "INFO: CScraperManifest::Complete(): from %s with hash %s", + sCManifestName, phash->GetHex()); } /* how? diff --git a/src/gridcoin/scraper/scraper_net.h b/src/gridcoin/scraper/scraper_net.h index ec79e528ff..31d021948a 100755 --- a/src/gridcoin/scraper/scraper_net.h +++ b/src/gridcoin/scraper/scraper_net.h @@ -71,7 +71,7 @@ class CSplitBlob /* We could store the parts in mapRelay and have getdata service for free. */ /** map from part hash to scraper Index, so we can attach incoming Part in Index */ - static std::map mapParts GUARDED_BY(cs_mapParts); + static std::map mapParts GUARDED_BY(cs_mapParts); // member variables /** Guards vParts and other manifest fields of the manifest (derived) class. @@ -143,7 +143,9 @@ class CScraperManifest public: /*==== fields ====*/ + /** Local only (not serialized) pointer to hash (index) field of mapManifest **/ const uint256* phash GUARDED_BY(cs_manifest) = nullptr; + std::string sCManifestName GUARDED_BY(cs_manifest); CPubKey pubkey GUARDED_BY(cs_manifest); std::vector signature GUARDED_BY(cs_manifest); From 8cdf74bb9f2f72f05b1fc53b4e0e75c2cff48e89 Mon Sep 17 00:00:00 2001 From: jamescowens Date: Sat, 4 Sep 2021 22:09:30 -0400 Subject: [PATCH 06/14] Streamline ConvergedManifest constructor code --- src/gridcoin/scraper/fwd.h | 6 +++--- src/gridcoin/scraper/scraper.cpp | 29 +---------------------------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/src/gridcoin/scraper/fwd.h b/src/gridcoin/scraper/fwd.h index d25cd59f2f..d5c5d1e9f2 100644 --- a/src/gridcoin/scraper/fwd.h +++ b/src/gridcoin/scraper/fwd.h @@ -82,10 +82,10 @@ struct ConvergedManifest // the composite convergence by taking parts piecewise in the case of the fallback to bByParts (project) level. uint256 nContentHash; uint256 ConsensusBlock; - int64_t timestamp; - bool bByParts; + int64_t timestamp = 0; + bool bByParts = false; - CScraperManifest_shared_ptr CScraperConvergedManifest_ptr; + CScraperManifest_shared_ptr CScraperConvergedManifest_ptr = nullptr; mConvergedManifestPart_ptrs ConvergedManifestPartPtrsMap; diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index c348703a7a..1b41bd1d34 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -4282,32 +4282,7 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS return bAddManifestSuccessful; } -ConvergedManifest::ConvergedManifest() -{ - nContentHash = {}; - ConsensusBlock = {}; - timestamp = 0; - bByParts = false; - - CScraperConvergedManifest_ptr = nullptr; - - ConvergedManifestPartPtrsMap = {}; - - mIncludedScraperManifests = {}; - - nUnderlyingManifestContentHash = {}; - - vIncludedScrapers = {}; - vExcludedScrapers = {}; - vScrapersNotPublishing = {}; - - mIncludedScrapersbyProject = {}; - mIncludedProjectsbyScraper = {}; - - mScraperConvergenceCountbyProject = {}; - - vExcludedProjects = {}; -} +ConvergedManifest::ConvergedManifest() { /* Use all defaults */ } ConvergedManifest::ConvergedManifest(CScraperManifest_shared_ptr& in) { @@ -4316,7 +4291,6 @@ ConvergedManifest::ConvergedManifest(CScraperManifest_shared_ptr& in) ConsensusBlock = in->ConsensusBlock; timestamp = GetAdjustedTime(); - bByParts = false; CScraperConvergedManifest_ptr = in; @@ -4333,7 +4307,6 @@ bool ConvergedManifest::operator()(const CScraperManifest_shared_ptr& in) ConsensusBlock = in->ConsensusBlock; timestamp = GetAdjustedTime(); - bByParts = false; CScraperConvergedManifest_ptr = in; From de4b040483c4db2a23b688afbdd23f28a0d21efe Mon Sep 17 00:00:00 2001 From: jamescowens Date: Sun, 5 Sep 2021 00:29:12 -0400 Subject: [PATCH 07/14] Remove move semantics from addManifest There is no reason to use move for a shared smart pointer. Let the pointer control block deal with the references. --- src/gridcoin/scraper/scraper.cpp | 2 +- src/gridcoin/scraper/scraper_net.cpp | 2 +- src/gridcoin/scraper/scraper_net.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index 1b41bd1d34..0f482e81c8 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -4268,7 +4268,7 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); - bool bAddManifestSuccessful = CScraperManifest::addManifest(std::move(manifest), Key); + bool bAddManifestSuccessful = CScraperManifest::addManifest(manifest, Key); if (bAddManifestSuccessful) _log(logattribute::INFO, "ScraperSendFileManifestContents", "addManifest (send) from this scraper (address " diff --git a/src/gridcoin/scraper/scraper_net.cpp b/src/gridcoin/scraper/scraper_net.cpp index 546da671e0..6bdf79adbf 100644 --- a/src/gridcoin/scraper/scraper_net.cpp +++ b/src/gridcoin/scraper/scraper_net.cpp @@ -756,7 +756,7 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) return true; } -bool CScraperManifest::addManifest(std::shared_ptr&& m, CKey& keySign) +bool CScraperManifest::addManifest(std::shared_ptr m, CKey& keySign) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, cs_mapParts) { uint256 hash; diff --git a/src/gridcoin/scraper/scraper_net.h b/src/gridcoin/scraper/scraper_net.h index 31d021948a..fab8216225 100755 --- a/src/gridcoin/scraper/scraper_net.h +++ b/src/gridcoin/scraper/scraper_net.h @@ -126,7 +126,7 @@ class CScraperManifest static bool SendManifestTo(CNode* pfrom, const uint256& hash); /** Add new manifest object into list of known manifests */ - static bool addManifest(std::shared_ptr&& m, CKey& keySign); + static bool addManifest(std::shared_ptr m, CKey& keySign); /** Validate whether received manifest is authorized */ static bool IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, unsigned int& banscore_out); From 10db19fef1e0c51ff8c955941f3e9659dbe111bc Mon Sep 17 00:00:00 2001 From: jamescowens Date: Sun, 5 Sep 2021 13:56:42 -0400 Subject: [PATCH 08/14] Code and documentation cleanup Co-authored-by: div72 <60045611+div72@users.noreply.github.com> --- src/gridcoin/scraper/fwd.h | 133 ++- src/gridcoin/scraper/scraper.cpp | 1400 +++++++++++++++++--------- src/gridcoin/scraper/scraper.h | 137 ++- src/gridcoin/scraper/scraper_net.cpp | 52 +- src/gridcoin/scraper/scraper_net.h | 65 +- src/main.cpp | 8 +- 6 files changed, 1233 insertions(+), 562 deletions(-) diff --git a/src/gridcoin/scraper/fwd.h b/src/gridcoin/scraper/fwd.h index d5c5d1e9f2..2f62c02f84 100644 --- a/src/gridcoin/scraper/fwd.h +++ b/src/gridcoin/scraper/fwd.h @@ -18,6 +18,7 @@ * Scraper ENUMS * *********************/ +/** Defines the object type of the stats entry */ enum class statsobjecttype { NetworkWide, @@ -26,6 +27,7 @@ enum class statsobjecttype byCPIDbyProject }; +/** Defines the event type in the scraper system */ enum class scrapereventtypes { OutOfSync, @@ -37,6 +39,7 @@ enum class scrapereventtypes Sleep }; +/** Defines the validation type of the convergence achieved by the subscriber */ enum class scraperSBvalidationtype { Invalid, @@ -47,86 +50,131 @@ enum class scraperSBvalidationtype ProjectLevelConvergence }; +/** Currently the scraperID is a string. */ typedef std::string ScraperID; -// The inner map is sorted in descending order of time. The pair is manifest hash, content hash. +/** The inner map is sorted in descending order of time. The pair is manifest hash, content hash. */ typedef std::multimap, std::greater > mCSManifest; -// This is sCManifestName, which is the string version of the originating scraper pubkey. -// See the ScraperID typedef above. +/** This is sCManifestName, which is the string version of the originating scraper pubkey. */ typedef std::map mmCSManifestsBinnedByScraper; +/** Make the smart shared pointer a little less awkward for CScraperManifest */ typedef std::shared_ptr CScraperManifest_shared_ptr; -// Note the CParts pointed to by this map are safe to access, because the pointers are guaranteed valid -// as long as the holding CScraperManifests (both in the CScaperManifest global map, and this cache) -// still exist. So the safety of these pointers is coincident with the lifespan of CScraperManifests -// that have reference to them. If you have questions about this, you should review the CSplitBlob abstract -// class, which is the base class of the CScraperManifest class, and provides the mechanisms for part -// control. Note that two LOCKS are used to protect the integrity of the underlying global maps, -// CScraperManifest::cs_mapManifest and CSplitBlob::cs_mapParts. -// -------------- Project -- Converged Part Pointer +/** Note the CParts pointed to by this map are safe to access, because the pointers are guaranteed valid + * as long as the holding CScraperManifests (both in the CScaperManifest global map, and this cache) + * still exist. So the safety of these pointers is coincident with the lifespan of CScraperManifests + * that have reference to them. If you have questions about this, you should review the CSplitBlob abstract + * class, which is the base class of the CScraperManifest class, and provides the mechanisms for part + * control. Note that two LOCKS are used to protect the integrity of the underlying global maps, + * CScraperManifest::cs_mapManifest and CSplitBlob::cs_mapParts. + * ---------- Project -- Converged Part Pointer + * std::map mConvergedManifestPart_ptrs + */ typedef std::map mConvergedManifestPart_ptrs; +/** Used for a "convergence", which is the result of the subscriber comparing published manifests from the scrapers in + * accordance with the rules of convergence, where the rules have been met. The convergence is used as the basis of + * constructing a superblock. */ struct ConvergedManifest { - // Empty converged manifest constructor + /** Empty converged manifest constructor */ ConvergedManifest(); - // For constructing a dummy converged manifest from a single manifest + /** For constructing a dummy converged manifest from a single manifest */ ConvergedManifest(CScraperManifest_shared_ptr& in); - // Call operator to update an already initialized ConvergedManifest with a passed in CScraperManifest + /** Call operator to update an already initialized ConvergedManifest with a passed in CScraperManifest */ bool operator()(const CScraperManifest_shared_ptr& in); - // IMPORTANT... nContentHash is NOT the hash of part hashes in the order of vParts unlike CScraper::manifest. - // It is the hash of the data in the ConvergedManifestPartsMap in the order of the key. It represents - // the composite convergence by taking parts piecewise in the case of the fallback to bByParts (project) level. + /** IMPORTANT... nContentHash is NOT the hash of part hashes in the order of vParts unlike CScraper::manifest. + * It is the hash of the data in the ConvergedManifestPartsMap in the order of the key. It represents + * the composite convergence by taking parts piecewise in the case of the fallback to bByParts (project) level. + */ uint256 nContentHash; + /** The block on which the convergence was formed. */ uint256 ConsensusBlock; + /** The time of the convergence. */ int64_t timestamp = 0; + /** Flag indicating whether the convergence was formed at the project level. If the rules of convergence cannot + * be met at the whole manifest level (which could happen if a project were not available in some manifests, for + * example), then in the fallback to put together a convergence from matching at the project (part) level, this flag + * will be set if a convergence is formed that way. + */ bool bByParts = false; + /** The shared pointer to the CScraperManifest that underlies the convergence. If the convergence was at the manifest + * level, this will be the manifest selected as the representation of the equivalent manifests used to determine the + * convergence. If the convergence is at the parts (project) level, then this will point to a synthesized (non-published) + * manifest which only has the vParts vector filled out, and which content is the parts selected to form the byParts + * (project) level convergence. Note that this synthesized manifest is LOCAL ONLY and will not be added to mapManifests + * or published to other nodes. Convergences in general are the responsibility of the subscriber, not the publisher. + */ CScraperManifest_shared_ptr CScraperConvergedManifest_ptr = nullptr; + /** A map to the pointers of the parts used to form the convergence. This will be essentially the same as the vParts + * vector in the CScraperManifest pointed to by the CScraperConvergedManifest_ptr. + */ mConvergedManifestPart_ptrs ConvergedManifestPartPtrsMap; - // Used when convergence is at the manifest level (normal) + /** A map of the manifests by hash (key) that formed this convergence. This is used when convergence is at the manifest + * level (normal). + */ std::map mIncludedScraperManifests; - // The below is the manifest content hash for the underlying manifests that comprise the convergence. This - // will only be populated if the convergence is at the manifest level (bByParts == false). In that case, each - // manifest's content in the convergence must be the same. If the convergence is by project, this does not - // make sense to populate. See the above comment. + /** The below is the manifest content hash for the underlying manifests that comprise the convergence. This + * will only be populated if the convergence is at the manifest level (bByParts == false). In that case, each + * manifest's content in the convergence must be the same. If the convergence is by project, this does not + * make sense to populate. + */ uint256 nUnderlyingManifestContentHash; - // Used when convergence is at the manifest level (normal) and also at the part (project) level for - // scrapers that are not part of any part (project) level convergence. + /** The publishing scrapers included in the convergence. If the convergence is at the project level, a scraper in this + * vector would have to be included in at least one project level match for the synthesized convergence. + */ std::vector vIncludedScrapers; + /** The publishing scrapers excluded from the convergence. If the convergence is at the project level, a scraper in this + * vector would not have been included in ANY project level match for the synthesized convergence. + */ std::vector vExcludedScrapers; + /** The scrapers not publishing (i.e. no manifests present with the retention period) when the convergence was formed. */ std::vector vScrapersNotPublishing; - // Used when convergence is at the project (bByParts) level (fallback) - // ----- Project --------- ScraperID + /** Used when convergence is at the project (bByParts) level (fallback) + * -------------- Project --- ScraperID + * std::multimap mIncludedScrapersbyProject + */ std::multimap mIncludedScrapersbyProject; - // ----- ScraperID ------- Project + /** Used when convergence is at the project (bByParts) level (fallback) + * ------------- ScraperID --- Project + * std::multimap mIncludedProjectsbyScraper + */ std::multimap mIncludedProjectsbyScraper; - // When bByParts (project) level convergence occurs, this records the count of scrapers in the - // convergences by project. + /** When bByParts (project) level convergence occurs, this records the count of scrapers in the + * convergences by project. + */ std::map mScraperConvergenceCountbyProject; - // --------- project + /** The projects excluded from the convergence. Since the convergence rules REQUIRE a fallback to project level + * convergence if the trial convergence formed at the manifest level excludes a project, this vector should only have + * an entry if bByParts is also true. + */ std::vector vExcludedProjects; + /** Populates the part pointers map in the convergence */ bool PopulateConvergedManifestPartPtrsMap(); + /** Computes the converged content hash */ void ComputeConvergedContentHash(); }; +/** Used for the key of the statistics map(s) in the scraper */ struct ScraperObjectStatsKey { statsobjecttype objecttype; std::string objectID; }; +/** Used for the value of the stats entries in the statistics map(s) in the scraper */ struct ScraperObjectStatsValue { double dTC; @@ -136,12 +184,14 @@ struct ScraperObjectStatsValue double dMag; }; +/** Used for the stats entries in the statistics map(s) in the scraper */ struct ScraperObjectStats { ScraperObjectStatsKey statskey; ScraperObjectStatsValue statsvalue; }; +/** Comparison for the statistics map entry ordering */ struct ScraperObjectStatsKeyComp { bool operator() ( ScraperObjectStatsKey a, ScraperObjectStatsKey b ) const @@ -150,17 +200,20 @@ struct ScraperObjectStatsKeyComp } }; +/** Definition of the scraper statistics map(s) */ typedef std::map ScraperStats; -// This is modeled after AppCacheEntry/Section but named separately. +/** modeled after AppCacheEntry/Section but named separately. */ struct ScraperBeaconEntry { std::string value; //!< Value of entry. int64_t timestamp; //!< Timestamp of entry. }; +/** Definition of the scraper beacon map */ typedef std::map ScraperBeaconMap; +/** Small structure to define the fields and (un)serialization for pending beacon entries */ struct ScraperPendingBeaconEntry { std::string cpid; @@ -184,9 +237,14 @@ struct ScraperPendingBeaconEntry } }; -// --- Base58 encoded public key ---- cpid, timestamp +/** - Base58 encoded public key - {cpid, timestamp, keyid} + * std::map ScraperPendingBeaconMap + */ typedef std::map ScraperPendingBeaconMap; +/** Used to hold the block hash, scraper beacon map and pending beacon map at the ladder consensus point. This will be used + * as appropriate in the convergence formed. + */ struct BeaconConsensus { uint256 nBlockHash; @@ -194,6 +252,7 @@ struct BeaconConsensus ScraperPendingBeaconMap mPendingMap; }; +/** Small structure to define the fields for verified beacons and (un)serialization */ struct ScraperVerifiedBeacons { // Initialize the timestamp to the current adjusted time. @@ -217,13 +276,18 @@ struct ScraperVerifiedBeacons } }; +/** A combination of scraper stats and verified beacons. For convenience in the interface between the scraper and the + * quorum/superblock code. + */ struct ScraperStatsAndVerifiedBeacons { ScraperStats mScraperStats; ScraperPendingBeaconMap mVerifiedMap; }; -// Extended AppCache structures similar to those in AppCache.h, except a deleted flag is provided +/** Extended AppCache structure similar to those in AppCache.h, except a deleted flag is provided. This will be + * reimplemented in the future with a custom contract handler since the appcache is being retired. + */ struct AppCacheEntryExt { std::string value; // Value of entry. @@ -231,6 +295,9 @@ struct AppCacheEntryExt bool deleted; // Deleted flag. }; +/** Extended AppCache map typedef similar to those in AppCache.h, except a deleted flag is provided. This will be + * reimplemented in the future with a custom contract handler since the appcache is being retired. + */ typedef std::unordered_map AppCacheSectionExt; #endif // GRIDCOIN_SCRAPER_FWD_H diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index 0f482e81c8..f554287f6b 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -42,19 +42,56 @@ namespace boostio = boost::iostreams; fs::path pathDataDir = {}; fs::path pathScraper = {}; +// Externals extern CWallet* pwalletMain; // Thread safety +/** + * @brief Protects the scraper at large in portions of the scraper processing that need to be single threaded. The holding + * of this lock in general should be minimized. + */ CCriticalSection cs_Scraper; +/** + * @brief Protects the scraper globals + */ CCriticalSection cs_ScraperGlobals; +/** + * @brief Protects the extended scraper app cache global map. + */ CCriticalSection cs_mScrapersExt; +/** + * @brief Protects the main scraper file manifest structure. This is the primary global state machine for the scraper on the + * file side. + */ CCriticalSection cs_StructScraperFileManifest; +/** + * @brief Protects the global converged scraper stats cache, which is populated by periodic runs of the scraper and/or + * subscriber loop, and is used to validate superblocks. + */ CCriticalSection cs_ConvergedScraperStatsCache; +/** + * @brief Protects the team ID map + */ CCriticalSection cs_TeamIDMap; +/** + * @brief Protects the global map for verified beacons, g_verified_beacons + */ CCriticalSection cs_VerifiedBeacons; + +/** + * @brief Flag that indicates whether the scraper is supposed to be active + */ bool fScraperActive = false; +/** + * @brief Vector of usernames and passwords for access to project sites which require logins to meet GPDR requirements + */ std::vector> vuserpass; +/** + * @brief Vector of team IDs for whitelisted teams across the different whitelisted projects. When the team requirement is + * imposed this is important to correlate teams, since each project uses an independent ID for team names. I.e. Gridcoin + * in one project will have, in general, a different ID, than another project. + */ std::vector> vprojectteamids; std::vector vauthenicationetags; int64_t ndownloadsize = 0; @@ -64,67 +101,80 @@ int64_t nuploadsize = 0; * Global Defaults * *********************/ -// These can get overridden by the GetArgs in init.cpp or ScraperApplyAppCacheEntries. -// The appcache entries will take precedence. +// These can be overridden by the GetArgs in init.cpp (i.e. config file or command line args) or ScraperApplyAppCacheEntries. +// The appcache entries will take precedence over command line args. -// The amount of time to wait between scraper loop runs. This is in -// milliseconds. +/** The amount of time to wait between scraper loop runs. This is in + * milliseconds. + */ unsigned int nScraperSleep GUARDED_BY(cs_ScraperGlobals) = 300000; -// The amount of time before SB is due to start scraping. This is in -// seconds. +/** The amount of time before SB is due to start scraping. This is in + * seconds. + */ unsigned int nActiveBeforeSB GUARDED_BY(cs_ScraperGlobals) = 14400; -// Explorer mode flag. Only effective if scraper is active. +/** Explorer mode flag. Only effective if scraper is active. */ bool fExplorer GUARDED_BY(cs_ScraperGlobals) = false; -// These can be overridden by ScraperApplyAppCacheEntries(). +// These can be overridden by ScraperApplyAppCacheEntries() and are likely consensus affecting parameters. -// The flag to control whether non-current statistics files are retained. +/** The flag to control whether non-current statistics files are retained. */ bool SCRAPER_RETAIN_NONCURRENT_FILES GUARDED_BY(cs_ScraperGlobals) = true; -// Define 48 hour retention time for stats files, current or not. +/** Define 48 hour retention time for stats files, current or not. */ int64_t SCRAPER_FILE_RETENTION_TIME GUARDED_BY(cs_ScraperGlobals) = 48 * 3600; -// Define extended file retention time for explorer mode. +/** Define extended file retention time for explorer mode. */ int64_t EXPLORER_EXTENDED_FILE_RETENTION_TIME GUARDED_BY(cs_ScraperGlobals) = 168 * 3600; -// Define whether prior CScraperManifests are kept. +/** Define whether prior CScraperManifests are kept. */ bool SCRAPER_CMANIFEST_RETAIN_NONCURRENT GUARDED_BY(cs_ScraperGlobals) = true; -// Define CManifest scraper object retention time. +/** Define CManifest scraper object retention time. */ int64_t SCRAPER_CMANIFEST_RETENTION_TIME GUARDED_BY(cs_ScraperGlobals) = 48 * 3600; +/** Define whether non-current proj files are included published manifests. */ bool SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES GUARDED_BY(cs_ScraperGlobals) = false; // These are atomics so no explicit locking required. +/** Define significance level for magnitude rounding in the scraper. */ std::atomic MAG_ROUND = 0.01; +/** Define network-wide total magnitude */ std::atomic NETWORK_MAGNITUDE = 115000; +/** Define magnitude limit for CPID magnitude entry. */ std::atomic CPID_MAG_LIMIT = GRC::Magnitude::MAX; -// This settings below are important. This sets the minimum number of scrapers -// that must be available to form a convergence. Above this minimum, the ratio -// is followed. For example, if there are 4 scrapers, a ratio of 0.6 would require -// CEILING(0.6 * 4) = 3. See NumScrapersForSupermajority below. -// If there is only 1 scraper available, and the minimum is 2, then a convergence -// will not happen. Setting this below 2 will allow convergence to happen without -// cross checking, and is undesirable, because the scrapers are not supposed to be -// trusted entities. +// The settings below are consensus critical. +/** + * This sets the minimum number of scrapers that must be available to form a convergence. + * Above this minimum, the ratio is followed. For example, if there are 4 scrapers, a ratio + * of 0.6 would require CEILING(0.6 * 4) = 3. See NumScrapersForSupermajority below. + * If there is only 1 scraper available, and the minimum is 2, then a convergence + * will not happen. Setting this below 2 will allow convergence to happen without + * cross checking, and is undesirable, because the scrapers are not supposed to be + * trusted entities. + */ unsigned int SCRAPER_CONVERGENCE_MINIMUM GUARDED_BY(cs_ScraperGlobals) = 2; -// 0.6 seems like a reasonable standard for agreement. It will require... -// 2 out of 3, 3 out of 4, 3 out of 5, 4 out of 6, 5 out of 7, 5 out of 8, etc. +/** 0.6 seems like a reasonable standard for agreement. It will require... + * 2 out of 3, 3 out of 4, 3 out of 5, 4 out of 6, 5 out of 7, 5 out of 8, etc. + */ double SCRAPER_CONVERGENCE_RATIO GUARDED_BY(cs_ScraperGlobals) = 0.6; -// By Project Fallback convergence rule as a ratio of projects converged vs whitelist. -// For 20 whitelisted projects this means up to five can be excluded and a contract formed. +/** By Project Fallback convergence rule as a ratio of projects converged vs whitelist. + * For 20 whitelisted projects this means up to five can be excluded and a contract formed. + */ double CONVERGENCE_BY_PROJECT_RATIO GUARDED_BY(cs_ScraperGlobals) = 0.75; -// Allow non-scraper nodes to download stats? +/** Allow non-scraper nodes to download stats */ bool ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD GUARDED_BY(cs_ScraperGlobals) = false; -// Misbehaving scraper node banscore +/** Misbehaving scraper node banscore */ unsigned int SCRAPER_MISBEHAVING_NODE_BANSCORE GUARDED_BY(cs_ScraperGlobals) = 0; -// Require team membership in team whitelist. +/** Require team membership in team whitelist */ bool REQUIRE_TEAM_WHITELIST_MEMBERSHIP GUARDED_BY(cs_ScraperGlobals) = false; -// Default team whitelist +/** Default team whitelist. Remember this will be overridden by appcache entries. */ std::string TEAM_WHITELIST GUARDED_BY(cs_ScraperGlobals) = "Gridcoin"; -// This is the period after the deauthorizing of a scraper before the nodes will start -// to assign banscore to nodes sending unauthorized manifests. +/** This is the period after the deauthorizing of a scraper in seconds before the nodes will start + * to assign banscore to nodes sending unauthorized manifests. + */ int64_t SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD GUARDED_BY(cs_ScraperGlobals) = 300; +/** Map that holds extended app cache entries for scrapers, which includes deleted entries. */ AppCacheSectionExt mScrapersExt GUARDED_BY(cs_mScrapersExt) = {}; +/** Enum for scraper log attributes */ enum class logattribute { // Can't use ERROR here because it is defined already in windows.h. @@ -134,6 +184,7 @@ enum class logattribute CRITICAL }; +/** Defines scraper file manifest entry. These are the entries for individual project stats file downloads. */ struct ScraperFileManifestEntry { std::string filename; // Filename @@ -145,8 +196,16 @@ struct ScraperFileManifestEntry std::string filetype; }; +/** + * @brief Defines the scaper file manifest map. + * --------- filename ---ScraperFileManifestEntry + * std::map ScraperFileManifestMap + */ typedef std::map ScraperFileManifestMap; +/** Defines a structure that combines the ScraperFileManifestMap along with a map hash, the block hash of the + * consensus block, and the time that the above fields were updated. + */ struct ScraperFileManifest { ScraperFileManifestMap mScraperFileManifest; @@ -156,85 +215,344 @@ struct ScraperFileManifest }; // Both TeamIDMap and ProjTeamETags are protected by cs_TeamIDMap. -// --------------- project -------------team name -- teamID +/** Stores the team IDs for each team keyed by project. (Team ID's are different for the same team across different + * projects.) + * --------- project -------------team name -- teamID + * std::map> mTeamIDs + */ typedef std::map> mTeamIDs; mTeamIDs TeamIDMap GUARDED_BY(cs_TeamIDMap); -// ProjTeamETags is not persisted to disk. There would be little to be gained by doing so. The scrapers are restarted very -// rarely, and on restart, this would only save downloading team files for those projects that have one or TeamIDs missing AND -// an ETag had NOT changed since the last pull. Not worth the complexity. -// --------------- project ---- eTag +/** ProjTeamETags is not persisted to disk. There would be little to be gained by doing so. The scrapers are restarted very + * rarely, and on restart, this would only save downloading team files for those projects that have one or TeamIDs missing + * AND an ETag had NOT changed since the last pull, IF REQUIRE_TEAM_WHITELIST_MEMBERSHIP is true. Not worth the complexity. + * ----------- project ---- eTag + * std::map mProjectTeamETags + */ typedef std::map mProjectTeamETags; +/** Global map that holds the team etag entries for each project. */ mProjectTeamETags ProjTeamETags GUARDED_BY(cs_TeamIDMap); +/** Gets a vector of teams that are whitelisted. This is only used when REQUIRE_TEAM_WHITELIST_MEMBERSHIP is true. */ std::vector GetTeamWhiteList(); -std::string urlsanity(const std::string& s, const std::string& type); +/** Global that stores file manifest state for the scraper */ ScraperFileManifest StructScraperFileManifest GUARDED_BY(cs_StructScraperFileManifest) = {}; // Although scraper_net.h declares these maps, we define them here instead of // in scraper_net.cpp to ensure that the executable destroys these objects in // order. They need to be destroyed after ConvergedScraperStatsCache: -// +/** Protects CSplitBlob::mapParts */ CCriticalSection CSplitBlob::cs_mapParts; +/** Protects CScraperManifest::mapManifest and CScraperManifest::mapPendingDeletedManifest */ CCriticalSection CScraperManifest::cs_mapManifest; +/** Protects CSplitBlob::mapParts */ std::map CSplitBlob::mapParts; +/** Global that stores published/received manifests via smart shared pointers indexed by manifest hash */ std::map> CScraperManifest::mapManifest; -// Global cache for converged scraper stats. Access must be with the lock cs_ConvergedScraperStatsCache taken. +/** Global cache for converged scraper stats. */ ConvergedScraperStats ConvergedScraperStatsCache GUARDED_BY(cs_ConvergedScraperStatsCache) = {}; +/** + * @brief Scraper loggger function + * @param eType + * @param sCall + * @param sMessage + */ void _log(logattribute eType, const std::string& sCall, const std::string& sMessage); template +/** + * @brief Applies app cache entries from the protocol appcache section to the scraper global variables + * @param key + * @param result + */ void ApplyCache(const std::string& key, T& result); +// Internal functions for scraper/subscriber operation. +/** + * @brief Gets scrapers AppCacheSection + * @return AppCacheSection + */ AppCacheSection GetScrapersCache(); +/** + * @brief Applies protocol app cache entries to the scraper globals + */ void ScraperApplyAppCacheEntries(); +/** + * @brief The scraper "main". This function is the main loop for scraper operation. + * @param bSingleShot. Set true for a singleshot pass at statistics collection, false (default) for normal, while loop + * operation. + */ void Scraper(bool bSingleShot = false); +/** + * @brief Function to invoke the scraper statistics download from project sites in single shot mode. + */ void ScraperSingleShot(); +/** + * @brief Function that is responsible for various housekeeping functions of the scraper/subscriber. This currently + * includes generating periodic convergences/superblocks, cleaning up DeletePendingDeletedManifests, and running the + * testnewsb function (if the scraper log category is enabled). + * @return Currently returns true. The boolean is reserved for future overall status of housekeeping. + */ bool ScraperHousekeeping(); +/** + * @brief Checks if vuserpass is populated and if empty, populates it + * @return bool true if populated + */ bool UserpassPopulated(); +/** + * @brief Checks the scraper directory structure and files against the manifest and aligns/corrects as appropriate. Also + * loads the TeamIDMap from file if REQUIRE_TEAM_WHITELIST_MEMBERSHIP is enabled and TeamIDs were saved to disk and loads + * the VerifiedBeacons from disk. + * @return + */ bool ScraperDirectoryAndConfigSanity(); +/** + * @brief Stores the current beacon map to the provided file path + * @param file + * @return bool true if successful + */ bool StoreBeaconList(const fs::path& file); +/** + * @brief Stores the current TeamID map to the provided file path + * @param file + * @return bool true if successful + */ bool StoreTeamIDList(const fs::path& file); +/** + * @brief Loads the beacons from the provided file path to the provided mBeaconMap out parameter + * @param file + * @param mBeaconMap + * @return bool true if successful + */ bool LoadBeaconList(const fs::path& file, ScraperBeaconMap& mBeaconMap); +/** + * @brief Loads the beacons from the provided ConvergedManifest to the provided mBeaconMap out parameter + * @param StructConvergedManifest + * @param mBeaconMap + * @return bool true if successful + */ bool LoadBeaconListFromConvergedManifest(const ConvergedManifest& StructConvergedManifest, ScraperBeaconMap& mBeaconMap); +/** + * @brief Loads the team ID's from the provided file to the TeamIDMap global + * @param file + * @return bool true if successful + */ bool LoadTeamIDList(const fs::path& file); -std::vector split(const std::string& s, const std::string& delim); +/** + * @brief Gets the hash of the mScraperFileManifest map in the StructScraperFileManifest. This hash is a fingerprint of + * the state of the file entries in the manifest and a change in the hash determines when it is time to publish a new + * manifest to the network if the scraper is active. + * @return bool true if successful + */ uint256 GetmScraperFileManifestHash(); +/** + * @brief Stores the mScraperFileManifest map to disk + * @param file + * @return bool true if successful + */ bool StoreScraperFileManifest(const fs::path& file); +/** + * @brief Loads the mScraperFileManifest map in the StructScraperFileManifest from disk. This is done on scraper startup + * to restore last state from disk. + * @param file + * @return bool true if successful + */ bool LoadScraperFileManifest(const fs::path& file); +/** + * @brief Inserts a file manifest entry from a newly downloaded statistics file into the mScraperFileManifest map and + * also recomputes the mScraperFileManifest map hash and stores the hash in StructScraperFileManifest.nFileManifestMapHash. + * @param entry + * @return bool true if successful + */ bool InsertScraperFileManifestEntry(ScraperFileManifestEntry& entry); +/** + * @brief Deletes an entry from the mScraperFileManifest map and also deletes the corresponding file from disk, if it exists. + * Also updates the mScraperFileManifest map hash and stores the hash in StructScraperFileManifest.nFileManifestMapHash. + * @param entry + * @return unsigned int of the number of elements erased + */ unsigned int DeleteScraperFileManifestEntry(ScraperFileManifestEntry& entry); +/** + * @brief Marks a file manifest entry non-current in the mScraperFileManifest map and updates the + * StructScraperFileManifest.nFileManifestMapHash. + * @param entry + * @return + */ bool MarkScraperFileManifestEntryNonCurrent(ScraperFileManifestEntry& entry); -void AlignScraperFileManifestEntries(const fs::path& file, const std::string& filetype, const std::string& sProject, const bool& excludefromcsmanifest); +/** + * @brief Aligns the file manifest entries in the mScraperFileManifest map to the files present on disk. Deletes either/both + * files and/or entries that are not present and have matching hashes in both. + * @param file + * @param filetype + * @param sProject + * @param excludefromcsmanifest + */ +void AlignScraperFileManifestEntries(const fs::path& file, const std::string& filetype, const std::string& sProject, + const bool& excludefromcsmanifest); +/** + * @brief Constructs the scraper statistics from the current state of the scraper, which is all of the in scope files at the + * time the function is called + * @return ScraperStatsAndVerifiedBeacons + */ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState(); +/** + * @brief Computes the scraper statistics from a single CScraperManifest. This function should only be used as part of the + * superblock validation in bv11+. + * @param manifest + * @return ScraperStatsAndVerifiedBeacons + */ ScraperStatsAndVerifiedBeacons GetScraperStatsFromSingleManifest(CScraperManifest_shared_ptr& manifest); -bool LoadProjectFileToStatsByCPID(const std::string& project, const fs::path& file, const double& projectmag, ScraperStats& mScraperStats); -bool LoadProjectObjectToStatsByCPID(const std::string& project, const CSerializeData& ProjectData, const double& projectmag, ScraperStats& mScraperStats); +/** + * @brief Loads a project manifest file from disk and computes statistics for that project + * @param project + * @param file + * @param projectmag + * @param mScraperStats + * @return bool true if successful + */ +bool LoadProjectFileToStatsByCPID(const std::string& project, const fs::path& file, const double& projectmag, + ScraperStats& mScraperStats); +/** + * @brief Computes statistics from a provided project object + * @param project + * @param ProjectData + * @param projectmag + * @param mScraperStats + * @return bool true if successful + */ +bool LoadProjectObjectToStatsByCPID(const std::string& project, const CSerializeData& ProjectData, const double& projectmag, + ScraperStats& mScraperStats); +/** + * @brief Computes statistics from a provided project data stream. This is used by LoadProjectFileToStatsByCPID. + * @param project + * @param sUncompressedIn + * @param projectmag + * @param mScraperStats + * @return bool true if successful + */ bool ProcessProjectStatsFromStreamByCPID(const std::string& project, boostio::filtering_istream& sUncompressedIn, const double& projectmag, ScraperStats& mScraperStats); +/** + * @brief Once the project statistics have been computed for all of the whitelisted projects, this function is called + * to compute network-wide statistics, and also compute the magnitudes, which cannot be computed until all projects are + * processed. + * @param mScraperStats + * @return bool true if successful + */ bool ProcessNetworkWideFromProjectStats(ScraperStats& mScraperStats); +/** + * @brief Stores the provided mScraperStats statistics map to file. + * @param file + * @param mScraperStats + * @return bool true if successful + */ bool StoreStats(const fs::path& file, const ScraperStats& mScraperStats); +/** + * @brief Saves a CScraperManifest contents to a subdirectory of the scraper data directory which is the left 7 digits + * of the manifest hash + * @param nManifestHash + * @return bool true if successful + */ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash); +/** + * @brief Publish a CScraperManifest to the network + * @param Address + * @param Key + * @return bool true if successful + */ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key); +/** + * @brief Sorts the inventory of CScraperManifests by scraper and orders by manifest time, which is important for + * convergence determination + * @return mmCSManifestsBinnedByScraper + */ mmCSManifestsBinnedByScraper BinCScraperManifestsByScraper(); +/** + * @brief Sorts the inventory of CScraperManifests by scraper and orders by manifest time, which is important for + * convergence determination. Also culls old manifest that do not meet retention rules. + * @return mmCSManifestsBinnedByScraper + */ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests(); +/** + * @brief A DoS prevention function that deletes manifests received that are not authorized. + * @return unsigned int of the number of unauthorized manifests deleted + * + * This function is necessary because some CScraperManifest messages are likely to be received before the wallet is in sync. + * Therefore, they cannot be checked at that time by the deserialize check. Instead, while the wallet is not in sync, the + * local CScraperManifest flag bCheckedAuthorized will be set to false on any manifests received during that time. Once the + * wallet is in sync, this function will be called and will walk the mapManifest and check all Manifests to ensure the + * PubKey in the manifest is in the authorized scraper list in the AppCache. If it passes the flag will be set to true. If + * it fails, the manifest will be deleted. All manifests must be checked, because we have to deal with another condition + * where a scraper is deauthorized by network policy. This means manifests may not be authorized even if the + * bCheckedAuthorized is true from a prior check. + */ unsigned int ScraperDeleteUnauthorizedCScraperManifests(); +/** + * @brief Attempts to construct a converged manifest from the inventory of CScraperManifests on the node + * @param StructConvergedManifest (out parameter) + * @return bool true if successful + */ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifest); +/** + * @brief Attempts to construct a converged manifest at the project level from the projects which are contained in the + * inventory of CScraperManifests on the node. Note that this function is called by ScraperConstructConvergedManifest if the + * manifest level convergence is unsuccessful + * @param projectWhitelist + * @param mMapCSManifestsBinnedByScraper + * @param StructConvergedManifest (out parameter) + * @return bool true if successful + */ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& projectWhitelist, - mmCSManifestsBinnedByScraper& mMapCSManifestsBinnedByScraper, ConvergedManifest& StructConvergedManifest); - + mmCSManifestsBinnedByScraper& mMapCSManifestsBinnedByScraper, + ConvergedManifest& StructConvergedManifest); +/** + * @brief Downloads the project host statistics files and stores in the scraper data directory. This is used in explorer + * mode. + * @param projectWhitelist + * @return bool true if successful + */ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist); +/** + * @brief Download the project team files and stores in the scraper data directory. This is used when + * REQUIRE_TEAM_WHITELIST_MEMBERSHIP is true OR explorer mode is enabled. + * @param projectWhitelist + * @return bool true if successful + */ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist); +/** + * @brief Process project team file and populate TeamIDMap global + * @param project + * @param file + * @param etag + * @return bool true if successful + */ bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, const std::string& etag); +/** + * @brief Download project RAC (user) files (which have CPID level statistics) for each project on the provided whitelist. + * @param projectWhitelist + * @return bool true if successful + */ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist); +/** + * @brief Process a project RAC (user) file (which has CPID level statistics) into a filtered statistcs file + * @param project + * @param file + * @param etag + * @param Consensus + * @param GlobalVerifiedBeaconsCopy + * @param IncomingVerifiedBeacons + * @return bool true if successful + */ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& file, const std::string& etag, BeaconConsensus& Consensus, ScraperVerifiedBeacons& GlobalVerifiedBeaconsCopy, ScraperVerifiedBeacons& IncomingVerifiedBeacons); -bool AuthenticationETagUpdate(const std::string& project, const std::string& etag); +/** + * @brief Clears the authentication ETag auth.dat file + */ void AuthenticationETagClear(); // Need to access from rpcblockchain.cpp @@ -340,24 +658,24 @@ BeaconConsensus GetConsensusBeaconList() return consensus; } -// A global map for verified beacons. This map is updated by ProcessProjectRacFileByCPID. -// As ProcessProjectRacFileByCPID is called in the loop for each whitelisted projects, -// a single match across any project will inject a record into this map. If multiple -// projects match, the key will match and the [] method is used, so the latest entry will be -// the only one to survive, which is fine. We only need one. - -// This map has to be global because the scraper function is reentrant. +/** A global map for verified beacons. This map is updated by ProcessProjectRacFileByCPID. + * As ProcessProjectRacFileByCPID is called in the loop for each whitelisted projects, + * a single match across any project will inject a record into this map. If multiple + * projects match, the key will match and the [] method is used, so the latest entry will be + * the only one to survive, which is fine. We only need one. + * + * This map has to be global because the scraper function is reentrant. + */ ScraperVerifiedBeacons g_verified_beacons GUARDED_BY(cs_VerifiedBeacons); -// Use of this global should be protected by a lock on cs_VerifiedBeacons -ScraperVerifiedBeacons& GetVerifiedBeacons() +ScraperVerifiedBeacons& GetVerifiedBeacons() EXCLUSIVE_LOCKS_REQUIRED(cs_VerifiedBeacons) { // Return global return g_verified_beacons; } -// A lock must be taken on cs_VerifiedBeacons before calling this function. -bool StoreGlobalVerifiedBeacons() +/** Stores the verified beacons map to disk */ +bool StoreGlobalVerifiedBeacons() EXCLUSIVE_LOCKS_REQUIRED(cs_VerifiedBeacons) { fs::path file = pathScraper / "VerifiedBeacons.dat"; @@ -378,8 +696,8 @@ bool StoreGlobalVerifiedBeacons() return true; } -// A lock must be taken on cs_VerifiedBeacons before calling this function. -bool LoadGlobalVerifiedBeacons() +/** Loads the verified beacons from disk into the global map */ +bool LoadGlobalVerifiedBeacons() EXCLUSIVE_LOCKS_REQUIRED(cs_VerifiedBeacons) { fs::path file = pathScraper / "VerifiedBeacons.dat"; @@ -402,14 +720,15 @@ bool LoadGlobalVerifiedBeacons() return true; } -// Check to see if any Verified Beacons have been removed from the pending beacon list. -// Removal from the pending beacon list can only happen by the pending entry expiring without -// verification, or alternatively, being marked as active when the SB is staked, which then -// removes the beacon from the pending list. If the scraper is shut down for a while and -// restarted after a significant amount of time, the verified beacons loaded from disk -// may contain stale entries. These will be immediately taken care of by comparing to the -// pending beacon list. This function also stores the state of the verified beacons on disk -// so that call does not have to be done separately. +/** Check to see if any Verified Beacons have been removed from the pending beacon list. + * Removal from the pending beacon list can only happen by the pending entry expiring without + * verification, or alternatively, being marked as active when the SB is staked, which then + * removes the beacon from the pending list. If the scraper is shut down for a while and + * restarted after a significant amount of time, the verified beacons loaded from disk + * may contain stale entries. These will be immediately taken care of by comparing to the + * pending beacon list. This function also stores the state of the verified beacons on disk + * so that call does not have to be done separately. + */ void UpdateVerifiedBeaconsFromConsensus(BeaconConsensus& Consensus) { unsigned int stale = 0; @@ -454,14 +773,12 @@ void UpdateVerifiedBeaconsFromConsensus(BeaconConsensus& Consensus) } } // anonymous namespace -/********************** -* Scraper Logger * -**********************/ - +/** + * Scraper logger class + */ class ScraperLogger { - private: static CCriticalSection cs_log; @@ -528,7 +845,8 @@ class ScraperLogger ssPrevArchiveCheckDate << PrevArchiveCheckDate; // Goes in main log only and not subject to category. - LogPrint(BCLog::LogFlags::VERBOSE, "INFO: ScraperLogger: ArchiveCheckDate %s, PrevArchiveCheckDate %s", ssArchiveCheckDate.str(), ssPrevArchiveCheckDate.str()); + LogPrint(BCLog::LogFlags::VERBOSE, "INFO: ScraperLogger: ArchiveCheckDate %s, PrevArchiveCheckDate %s", + ssArchiveCheckDate.str(), ssPrevArchiveCheckDate.str()); fs::path LogArchiveDir = pathDataDir / "logarchive"; @@ -654,6 +972,10 @@ class ScraperLogger } }; +/** + * @brief Global singleton instance of the scraper logger + * @return + */ ScraperLogger& ScraperLogInstance() { // This is similar to Bitcoin's newer approach. @@ -661,9 +983,13 @@ ScraperLogger& ScraperLogInstance() return *scraperlogger; } +/** Protects the scraper logger singleton */ CCriticalSection ScraperLogger::cs_log; boost::gregorian::date ScraperLogger::PrevArchiveCheckDate = boost::posix_time::from_time_t(GetAdjustedTime()).date(); +/** Accessor function to make scraper log entries using the scraper logger. Also shunts a subset of entries to the + * debug log as appropriate. + */ void _log(logattribute eType, const std::string& sCall, const std::string& sMessage) { std::string sType; @@ -712,12 +1038,7 @@ void _log(logattribute eType, const std::string& sCall, const std::string& sMess } - - -/********************** -* String Builder EXP * -**********************/ - +/** A small utility class for building strings */ class stringbuilder { protected: @@ -771,9 +1092,10 @@ class stringbuilder }; -/********************* -* Whitelist Data * -*********************/ +/** + * @brief Returns the age of the current superblock + * @return int64_t representing the age in seconds of the current superblock + */ int64_t SuperblockAge() { LOCK(cs_main); @@ -781,6 +1103,10 @@ int64_t SuperblockAge() return Quorum::CurrentSuperblock().Age(GetAdjustedTime()); } +/** + * @brief Gets a vector of whitelisted teams from the scraper global TEAM_WHITELIST. + * @return std::vector of whitelisted teams + */ std::vector GetTeamWhiteList() { std::string delimiter; @@ -801,10 +1127,10 @@ std::vector GetTeamWhiteList() return split(TEAM_WHITELIST, delimiter); } -/********************* -* Userpass Data * -*********************/ +/** Username and password data utility class for accessing project stats that have implemented usernam and password + * protection for stats downloads to satisfy GDPR requirements + */ class userpass { private: @@ -858,10 +1184,7 @@ class userpass } }; -/********************* -* Auth Data * -*********************/ - +/** Authentication ETag data utility class */ class authdata { private: @@ -918,7 +1241,8 @@ class authdata catch (std::exception& ex) { - _log(logattribute::CRITICAL, "auth_data_export", "Failed to export auth data due to exception (" + std::string(ex.what()) + ")"); + _log(logattribute::CRITICAL, "auth_data_export", "Failed to export auth data due to exception (" + + std::string(ex.what()) + ")"); return false; } @@ -1044,26 +1368,45 @@ void ScraperApplyAppCacheEntries() ApplyCache("TEAM_WHITELIST", TEAM_WHITELIST); ApplyCache("SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD", SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "scrapersleep = " + ToString(nScraperSleep)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "activebeforesb = " + ToString(nActiveBeforeSB)); - - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_RETAIN_NONCURRENT_FILES = " + ToString(SCRAPER_RETAIN_NONCURRENT_FILES)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_FILE_RETENTION_TIME = " + ToString(SCRAPER_FILE_RETENTION_TIME)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "EXPLORER_EXTENDED_FILE_RETENTION_TIME = " + ToString(EXPLORER_EXTENDED_FILE_RETENTION_TIME)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CMANIFEST_RETAIN_NONCURRENT = " + ToString(SCRAPER_CMANIFEST_RETAIN_NONCURRENT)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CMANIFEST_RETENTION_TIME = " + ToString(SCRAPER_CMANIFEST_RETENTION_TIME)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES = " + ToString(SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "MAG_ROUND = " + ToString(MAG_ROUND)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "NETWORK_MAGNITUDE = " + ToString(NETWORK_MAGNITUDE)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "CPID_MAG_LIMIT = " + ToString(CPID_MAG_LIMIT)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CONVERGENCE_MINIMUM = " + ToString(SCRAPER_CONVERGENCE_MINIMUM)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_CONVERGENCE_RATIO = " + ToString(SCRAPER_CONVERGENCE_RATIO)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "CONVERGENCE_BY_PROJECT_RATIO = " + ToString(CONVERGENCE_BY_PROJECT_RATIO)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD = " + ToString(ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_MISBEHAVING_NODE_BANSCORE = " + ToString(SCRAPER_MISBEHAVING_NODE_BANSCORE)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "REQUIRE_TEAM_WHITELIST_MEMBERSHIP = " + ToString(REQUIRE_TEAM_WHITELIST_MEMBERSHIP)); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "TEAM_WHITELIST = " + TEAM_WHITELIST); - _log(logattribute::INFO, "ScraperApplyAppCacheEntries", "SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD = " + ToString(SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "scrapersleep = " + ToString(nScraperSleep)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "activebeforesb = " + ToString(nActiveBeforeSB)); + + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "SCRAPER_RETAIN_NONCURRENT_FILES = " + ToString(SCRAPER_RETAIN_NONCURRENT_FILES)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "SCRAPER_FILE_RETENTION_TIME = " + ToString(SCRAPER_FILE_RETENTION_TIME)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "EXPLORER_EXTENDED_FILE_RETENTION_TIME = " + ToString(EXPLORER_EXTENDED_FILE_RETENTION_TIME)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "SCRAPER_CMANIFEST_RETAIN_NONCURRENT = " + ToString(SCRAPER_CMANIFEST_RETAIN_NONCURRENT)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "SCRAPER_CMANIFEST_RETENTION_TIME = " + ToString(SCRAPER_CMANIFEST_RETENTION_TIME)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES = " + ToString(SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "MAG_ROUND = " + ToString(MAG_ROUND)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "NETWORK_MAGNITUDE = " + ToString(NETWORK_MAGNITUDE)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "CPID_MAG_LIMIT = " + ToString(CPID_MAG_LIMIT)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "SCRAPER_CONVERGENCE_MINIMUM = " + ToString(SCRAPER_CONVERGENCE_MINIMUM)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "SCRAPER_CONVERGENCE_RATIO = " + ToString(SCRAPER_CONVERGENCE_RATIO)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "CONVERGENCE_BY_PROJECT_RATIO = " + ToString(CONVERGENCE_BY_PROJECT_RATIO)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD = " + ToString(ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "SCRAPER_MISBEHAVING_NODE_BANSCORE = " + ToString(SCRAPER_MISBEHAVING_NODE_BANSCORE)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "REQUIRE_TEAM_WHITELIST_MEMBERSHIP = " + ToString(REQUIRE_TEAM_WHITELIST_MEMBERSHIP)); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "TEAM_WHITELIST = " + TEAM_WHITELIST); + _log(logattribute::INFO, "ScraperApplyAppCacheEntries", + "SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD = " + ToString(SCRAPER_DEAUTHORIZED_BANSCORE_GRACE_PERIOD)); } AppCacheSection mScrapers = GetScrapersCache(); @@ -1072,7 +1415,8 @@ void ScraperApplyAppCacheEntries() for (auto const& entry : mScrapers) { _log(logattribute::INFO, "ScraperApplyAppCacheEntries", - "Scraper entry: " + entry.first + ", " + entry.second.value + ", " + DateTimeStrFormat("%x %H:%M:%S", entry.second.timestamp)); + "Scraper entry: " + entry.first + ", " + entry.second.value + ", " + + DateTimeStrFormat("%x %H:%M:%S", entry.second.timestamp)); } } @@ -1088,19 +1432,19 @@ AppCacheSectionExt GetExtendedScrapersCache() // For the IsManifestAuthorized() function... /* We cannot use the AppCacheSection mScrapers in the raw, because there are two ways to deauthorize scrapers. - * The first way is to change the value of an existing entry to false. This works fine with mScrapers. The second way is to - * issue an addkey delete key. This will remove the key entirely, therefore deauthorizing the scraper. We need to preserve - * the key entry of the deleted record and when it was deleted to calculate a grace period. Why? To ensure that + * The first way is to change the value of an existing entry to false. This works fine with mScrapers. The second way + * is to issue an addkey delete key. This will remove the key entirely, therefore deauthorizing the scraper. We need to + * preserve the key entry of the deleted record and when it was deleted to calculate a grace period. Why? To ensure that * we do not generate islanding in the network in the case of a scraper deauthorization, we must apply a grace period - * after the timestamp of the marking of false/deletion, or from the time when the wallet came in sync, whichever is greater, before - * we start assigning a banscore to nodes that send/forward unauthorized manifests. This is because not all nodes - * may receive and accept the block that contains the transaction that modifies or deletes the scraper appcache entry - * at the same time, so there is a chance a node could send/forward an unauthorized manifest between when the scraper - * is deauthorized and the block containing that deauthorization is received by the sending node. + * after the timestamp of the marking of false/deletion, or from the time when the wallet came in sync, whichever is + * greater, before we start assigning a banscore to nodes that send/forward unauthorized manifests. This is because not + * all nodes may receive and accept the block that contains the transaction that modifies or deletes the scraper appcache + * entry at the same time, so there is a chance a node could send/forward an unauthorized manifest between when the + * scraper is deauthorized and the block containing that deauthorization is received by the sending node. */ - // So we are going to make use of AppCacheEntryExt and mScrapersExt, which are just like the normal AppCache structure, except they - // have an explicit deleted boolean. + // So we are going to make use of AppCacheEntryExt and mScrapersExt, which are just like the normal AppCache structure, + // except they have an explicit deleted boolean. // First, walk the mScrapersExt map and see if it contains an entry that does not exist in mScrapers. If so, // update the entry's value and timestamp and mark deleted. @@ -1260,13 +1604,15 @@ void Scraper(bool bSingleShot) ScraperFileManifestMap::iterator entry; - for (entry = StructScraperFileManifest.mScraperFileManifest.begin(); entry != StructScraperFileManifest.mScraperFileManifest.end(); ) + for (entry = StructScraperFileManifest.mScraperFileManifest.begin(); + entry != StructScraperFileManifest.mScraperFileManifest.end(); ) { ScraperFileManifestMap::iterator entry_copy = entry++; if (!projectWhitelist.Contains(entry_copy->second.project)) { - _log(logattribute::INFO, "Scraper", "Removing manifest entry for non-whitelisted project: " + entry_copy->first); + _log(logattribute::INFO, "Scraper", "Removing manifest entry for non-whitelisted project: " + + entry_copy->first); DeleteScraperFileManifestEntry(entry_copy->second); } } @@ -1281,22 +1627,25 @@ void Scraper(bool bSingleShot) else _log(logattribute::INFO, "Scraper", "Stored Beacon List"); - // If team filtering is set by policy then pull down and retrieve team IDs as needed. This loads the TeamIDMap global. - // Note that the call(s) to ScraperDirectoryAndConfigSanity() above will preload the team ID map from the persisted file - // if it exists, so this will minimize the work that DownloadProjectTeamFiles() has to do, unless explorer mode (fExplorer) is true. + // If team filtering is set by policy then pull down and retrieve team IDs as needed. This loads the TeamIDMap + // global. Note that the call(s) to ScraperDirectoryAndConfigSanity() above will preload the team ID map from + // the persisted file if it exists, so this will minimize the work that DownloadProjectTeamFiles() has to do, + // unless explorer mode (fExplorer) is true. if (require_team_whitelist_membership() || explorer_mode()) DownloadProjectTeamFiles(projectWhitelist); DownloadProjectRacFilesByCPID(projectWhitelist); - // If explorer mode is set (fExplorer is true), then download host files. These are currently not use for any other processing, - // so there is no corresponding Process function for the host files. + // If explorer mode is set (fExplorer is true), then download host files. These are currently not use for any + // other processing, so there is no corresponding Process function for the host files. if (explorer_mode()) DownloadProjectHostFiles(projectWhitelist); - _log(logattribute::INFO, "Scraper", "download size so far: " + ToString(ndownloadsize) + " upload size so far: " + ToString(nuploadsize)); + _log(logattribute::INFO, "Scraper", "download size so far: " + ToString(ndownloadsize) + " upload size so far: " + + ToString(nuploadsize)); ScraperStats mScraperStats = GetScraperStatsByCurrentFileManifestState().mScraperStats; - _log(logattribute::INFO, "Scraper", "mScraperStats has the following number of elements: " + ToString(mScraperStats.size())); + _log(logattribute::INFO, "Scraper", "mScraperStats has the following number of elements: " + + ToString(mScraperStats.size())); if (!StoreStats(pathScraper / "Stats.csv.gz", mScraperStats)) _log(logattribute::ERR, "Scraper", "StoreStats error occurred"); @@ -1321,7 +1670,8 @@ void Scraper(bool bSingleShot) { LOCK2(cs_StructScraperFileManifest, CScraperManifest::cs_mapManifest); - // If the hash is valid and doesn't match (a new one is available), or there are none, then publish a new one. + // If the hash is valid and doesn't match (a new one is available), or there are none, then publish a new + // one. if (!StructScraperFileManifest.nFileManifestMapHash.IsNull() && (nmScraperFileManifestHash != StructScraperFileManifest.nFileManifestMapHash || !CScraperManifest::mapManifest.size())) @@ -1380,8 +1730,8 @@ void ScraperSubscriber() _log(logattribute::INFO, "Scraper", "Using data directory " + pathScraper.string()); _log(logattribute::INFO, "ScraperSubscriber", "Starting scraper subscriber housekeeping thread. \n" - "Note that this does NOT mean the subscriber is active. This simply does housekeeping " - "functions."); + "Note that this does NOT mean the subscriber is active. This simply does " + "housekeeping functions."); auto scraper_sleep = []() { LOCK(cs_ScraperGlobals); return nScraperSleep; }; @@ -1446,7 +1796,8 @@ bool ScraperHousekeeping() EXCLUSIVE_LOCKS_REQUIRED(cs_Scraper) // Make sure deleted manifests pending permanent deletion are culled. nPendingDeleted = CScraperManifest::DeletePendingDeletedManifests(); - _log(logattribute::INFO, "ScraperHousekeeping", "Permanently deleted " + ToString(nPendingDeleted) + " manifest(s) pending permanent deletion."); + _log(logattribute::INFO, "ScraperHousekeeping", "Permanently deleted " + ToString(nPendingDeleted) + + " manifest(s) pending permanent deletion."); _log(logattribute::INFO, "ScraperHousekeeping", "Size of mapPendingDeletedManifest after delete = " + ToString(CScraperManifest::mapPendingDeletedManifest.size())); } @@ -1481,7 +1832,8 @@ bool ScraperDirectoryAndConfigSanity() auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; auto scraper_retain_noncurrent_files = []() { LOCK(cs_ScraperGlobals); return SCRAPER_RETAIN_NONCURRENT_FILES; }; auto scraper_file_retention_time = []() { LOCK(cs_ScraperGlobals); return SCRAPER_FILE_RETENTION_TIME; }; - auto explorer_extended_file_retention_time = []() { LOCK(cs_ScraperGlobals); return EXPLORER_EXTENDED_FILE_RETENTION_TIME; }; + auto explorer_extended_file_retention_time = []() { LOCK(cs_ScraperGlobals); + return EXPLORER_EXTENDED_FILE_RETENTION_TIME; }; auto require_team_whitelist_membership = []() { LOCK(cs_ScraperGlobals); return REQUIRE_TEAM_WHITELIST_MEMBERSHIP; }; ScraperApplyAppCacheEntries(); @@ -1534,13 +1886,15 @@ bool ScraperDirectoryAndConfigSanity() if (LogInstance().WillLogCategory(BCLog::LogFlags::NOISY)) { - _log(logattribute::INFO, "ScraperDirectoryAndConfigSanity", "Iterating through directory - checking file " + filename); + _log(logattribute::INFO, "ScraperDirectoryAndConfigSanity", + "Iterating through directory - checking file " + filename); } if (entry == StructScraperFileManifest.mScraperFileManifest.end()) { fs::remove(dir.path()); - _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", "Removing orphan file not in Manifest: " + filename); + _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", + "Removing orphan file not in Manifest: " + filename); continue; } @@ -1549,7 +1903,8 @@ bool ScraperDirectoryAndConfigSanity() { if (entry->second.hash != GetFileHash(dir)) { - _log(logattribute::INFO, "ScraperDirectoryAndConfigSanity", "File failed hash check. Removing file."); + _log(logattribute::INFO, "ScraperDirectoryAndConfigSanity", + "File failed hash check. Removing file."); fs::remove(dir.path()); } } @@ -1559,29 +1914,34 @@ bool ScraperDirectoryAndConfigSanity() // Now iterate through the Manifest map and remove entries with no file, or entries and files older than // nRetentionTime, whether they are current or not, and remove non-current files regardless of time //if fScraperRetainNonCurrentFiles is false. - for (entry = StructScraperFileManifest.mScraperFileManifest.begin(); entry != StructScraperFileManifest.mScraperFileManifest.end(); ) + for (entry = StructScraperFileManifest.mScraperFileManifest.begin(); + entry != StructScraperFileManifest.mScraperFileManifest.end(); ) { ScraperFileManifestMap::iterator entry_copy = entry++; - int64_t nFileRetentionTime = explorer_mode() ? explorer_extended_file_retention_time() : scraper_file_retention_time(); + int64_t nFileRetentionTime = explorer_mode() ? + explorer_extended_file_retention_time() : scraper_file_retention_time(); if (LogInstance().WillLogCategory(BCLog::LogFlags::NOISY)) { - _log(logattribute::INFO, "ScraperDirectoryAndConfigSanity", "Iterating through map - checking map entry " + entry_copy->first); + _log(logattribute::INFO, "ScraperDirectoryAndConfigSanity", + "Iterating through map - checking map entry " + entry_copy->first); } if (!fs::exists(pathScraper / entry_copy->first) || ((GetAdjustedTime() - entry_copy->second.timestamp) > nFileRetentionTime) || (!scraper_retain_noncurrent_files() && entry_copy->second.current == false)) { - _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", "Removing stale or orphan manifest entry: " + entry_copy->first); + _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", + "Removing stale or orphan manifest entry: " + entry_copy->first); DeleteScraperFileManifestEntry(entry_copy->second); } } } - // If network policy is set to filter on whitelisted teams, then load team ID map from file. This will prevent the heavyweight - // team file downloads for projects whose team IDs have already been found and stored, unless explorer mode (fExplorer) is true. + // If network policy is set to filter on whitelisted teams, then load team ID map from file. This will prevent + // the heavyweight team file downloads for projects whose team IDs have already been found and stored, unless + // explorer mode (fExplorer) is true. if (require_team_whitelist_membership()) { LOCK(cs_TeamIDMap); @@ -1590,7 +1950,8 @@ bool ScraperDirectoryAndConfigSanity() { _log(logattribute::INFO, "ScraperDirectoryAndConfigSanity", "Loading team IDs"); if (!LoadTeamIDList(pathScraper / "TeamIDs.csv.gz")) - _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", "Unable to load team IDs. This is normal for first time startup."); + _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", + "Unable to load team IDs. This is normal for first time startup."); else { _log(logattribute::INFO, "ScraperDirectoryAndConfigSanity", "Loaded team IDs file into map."); @@ -1618,8 +1979,8 @@ bool ScraperDirectoryAndConfigSanity() if (!GetVerifiedBeacons().LoadedFromDisk && !LoadGlobalVerifiedBeacons()) { - _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", "Initial verified beacon load from file " - "failed. This is not necessarily a problem."); + _log(logattribute::WARNING, "ScraperDirectoryAndConfigSanity", + "Initial verified beacon load from file failed. This is not necessarily a problem."); } } } // if fScraperActive @@ -1640,10 +2001,6 @@ void AuthenticationETagClear() fs::remove(file); } -/********************** -* Populate UserPass * -**********************/ - bool UserpassPopulated() { if (vuserpass.empty()) @@ -1663,15 +2020,12 @@ bool UserpassPopulated() } } - _log(logattribute::INFO, "UserPassPopulated", "Userpass is populated; Contains " + ToString(vuserpass.size()) + " projects"); + _log(logattribute::INFO, "UserPassPopulated", "Userpass is populated; Contains " + + ToString(vuserpass.size()) + " projects"); return true; } -/********************** -* Project Host Files * -**********************/ - bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) { auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; @@ -1679,7 +2033,8 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) // If fExplorer is false then skip processing. (This should not be called anyway, but return immediately just in case. if (!explorer_mode()) { - _log(logattribute::INFO, "DownloadProjectHostFiles", "Not in explorer mode. Skipping host file download and processing."); + _log(logattribute::INFO, "DownloadProjectHostFiles", + "Not in explorer mode. Skipping host file download and processing."); return false; } @@ -1690,7 +2045,8 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) return false; } - _log(logattribute::INFO, "DownloadProjectHostFiles", "Whitelist is populated; Contains " + ToString(projectWhitelist.size()) + " projects"); + _log(logattribute::INFO, "DownloadProjectHostFiles", "Whitelist is populated; Contains " + + ToString(projectWhitelist.size()) + " projects"); if (!UserpassPopulated()) { @@ -1728,7 +2084,8 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) } catch (const std::runtime_error& e) { - _log(logattribute::ERR, "DownloadProjectHostFiles", "Failed to pull host header file for " + prjs.m_name + ": " + e.what()); + _log(logattribute::ERR, "DownloadProjectHostFiles", "Failed to pull host header file for " + + prjs.m_name + ": " + e.what()); continue; } @@ -1748,7 +2105,8 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) ad.setoutputdata("host", prjs.m_name, sHostETag); if (!ad.xport()) - _log(logattribute::CRITICAL, "DownloadProjectHostFiles", "Failed to export etag for " + prjs.m_name + " to authentication file"); + _log(logattribute::CRITICAL, "DownloadProjectHostFiles", "Failed to export etag for " + + prjs.m_name + " to authentication file"); } std::string host_file_name; @@ -1771,7 +2129,8 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) } catch(const std::runtime_error& e) { - _log(logattribute::ERR, "DownloadProjectHostFiles", "Failed to download project host file for " + prjs.m_name + ": " + e.what()); + _log(logattribute::ERR, "DownloadProjectHostFiles", "Failed to download project host file for " + + prjs.m_name + ": " + e.what()); continue; } @@ -1782,10 +2141,6 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist) return true; } -/********************** -* Project Team Files * -**********************/ - bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) { auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; @@ -1798,7 +2153,8 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) return false; } - _log(logattribute::INFO, "DownloadProjectTeamFiles", "Whitelist is populated; Contains " + ToString(projectWhitelist.size()) + " projects"); + _log(logattribute::INFO, "DownloadProjectTeamFiles", "Whitelist is populated; Contains " + + ToString(projectWhitelist.size()) + " projects"); if (!UserpassPopulated()) { @@ -1820,7 +2176,8 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) // then skip processing altogether. if (!explorer_mode() && !fProjTeamIDsMissing) { - _log(logattribute::INFO, "DownloadProjectTeamFiles", "Correct team whitelist entries already in the team ID map for " + _log(logattribute::INFO, "DownloadProjectTeamFiles", + "Correct team whitelist entries already in the team ID map for " + prjs.m_name + " project. Skipping team file download and processing."); continue; } @@ -1852,7 +2209,8 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) } catch (const std::runtime_error& e) { - _log(logattribute::ERR, "DownloadProjectTeamFiles", "Failed to pull team header file for " + prjs.m_name + ": " + e.what()); + _log(logattribute::ERR, "DownloadProjectTeamFiles", "Failed to pull team header file for " + + prjs.m_name + ": " + e.what()); continue; } @@ -1872,7 +2230,8 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) ad.setoutputdata("team", prjs.m_name, sTeamETag); if (!ad.xport()) - _log(logattribute::CRITICAL, "DownloadProjectTeamFiles", "Failed to export etag for " + prjs.m_name + " to authentication file"); + _log(logattribute::CRITICAL, "DownloadProjectTeamFiles", "Failed to export etag for " + + prjs.m_name + " to authentication file"); } std::string team_file_name; @@ -1881,9 +2240,9 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) bool bETagChanged = false; // Detect change in ETag from in memory versioning. - // ProjTeamETags is not persisted to disk. There would be little to be gained by doing so. The scrapers are restarted very - // rarely, and on restart, this would only save downloading team files for those projects that have one or TeamIDs missing AND - // an ETag had NOT changed since the last pull. Not worth the complexity. + // ProjTeamETags is not persisted to disk. There would be little to be gained by doing so. The scrapers are + // restarted very rarely, and on restart, this would only save downloading team files for those projects that have + // one or TeamIDs missing AND an ETag had NOT changed since the last pull. Not worth the complexity. auto const& iPrevETag = ProjTeamETags.find(prjs.m_name); if (iPrevETag == ProjTeamETags.end() || iPrevETag->second != sTeamETag) @@ -1893,7 +2252,6 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) _log(logattribute::INFO, "DownloadProjectTeamFiles", "Team header file ETag has changed for " + prjs.m_name); } - if (explorer_mode()) { // Use eTag versioning ON THE DISK with eTag versioned team files per project. @@ -1914,13 +2272,14 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) else { // Not in explorer mode... - // No versioning ON THE DISK for the individual team files for a given project. However, if the eTag pulled from the header - // does not match the entry in ProjTeamETags, then download the file and process. Note that this combined with the size check - // above means that the size of the inner map for the mTeamIDs for this project already doesn't match, which means either there - // were teams that cannot be associated (-1 entries in the file), or there was an addition to or deletion from the team - // whitelist. Either way if the ETag has changed under this condition, the -1 entries may be subject to change so the team file - // must be downloaded and processed to see if it has and update. If the ETag matches what was in the map, then the state - // has not changed since the team file was last processed, and no need to download and process again. + // No versioning ON THE DISK for the individual team files for a given project. However, if the eTag pulled from + // the header does not match the entry in ProjTeamETags, then download the file and process. Note that this + // combined with the size check above means that the size of the inner map for the mTeamIDs for this project + // already doesn't match, which means either there were teams that cannot be associated (-1 entries in the file), + // or there was an addition to or deletion from the team whitelist. Either way if the ETag has changed under this + // condition, the -1 entries may be subject to change so the team file must be downloaded and processed to see if + // it has and update. If the ETag matches what was in the map, then the state has not changed since the team file + // was last processed, and no need to download and process again. team_file_name = prjs.m_name + "-team.gz"; team_file = pathScraper / team_file_name; @@ -1941,27 +2300,27 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist) } catch(const std::runtime_error& e) { - _log(logattribute::ERR, "DownloadProjectTeamFiles", "Failed to download project team file for " + prjs.m_name + ": " + e.what()); + _log(logattribute::ERR, "DownloadProjectTeamFiles", + "Failed to download project team file for " + prjs.m_name + ": " + e.what()); continue; } } - // If in explorer mode and new file downloaded, save team xml files to file manifest map with exclude from CSManifest flag set to true. - // If not in explorer mode, this is not necessary, because the team xml file is just temporary and can be discarded after - // processing. + // If in explorer mode and new file downloaded, save team xml files to file manifest map with exclude from CSManifest + // flag set to true. If not in explorer mode, this is not necessary, because the team xml file is just temporary and + // can be discarded after processing. if (explorer_mode() && bDownloadFlag) AlignScraperFileManifestEntries(team_file, "team", prjs.m_name, true); - // If require team whitelist is set and bETagChanged is true, then process the file. This also populates/updated the team whitelist TeamIDs - // in the TeamIDMap and the ETag entries in the ProjTeamETags map. + // If require team whitelist is set and bETagChanged is true, then process the file. This also populates/updated the + // team whitelist TeamIDs in the TeamIDMap and the ETag entries in the ProjTeamETags map. if (require_team_whitelist_membership() && bETagChanged) ProcessProjectTeamFile(prjs.m_name, team_file, sTeamETag); } return true; } -// Note this should be called with a lock held on cs_TeamIDMap, which is intended to protect both -// TeamIDMap and ProjTeamETags. -bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, const std::string& etag) EXCLUSIVE_LOCKS_REQUIRED(cs_TeamIDMap) +bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, const std::string& etag) +EXCLUSIVE_LOCKS_REQUIRED(cs_TeamIDMap) { auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; @@ -1975,7 +2334,8 @@ bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, co if (!ingzfile) { - _log(logattribute::ERR, "ProcessProjectTeamFile", "Failed to open team gzip file (" + file.filename().string() + ")"); + _log(logattribute::ERR, "ProcessProjectTeamFile", + "Failed to open team gzip file (" + file.filename().string() + ")"); return false; } @@ -2048,7 +2408,8 @@ bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, co ProjTeamETags[project] = etag; if (mTeamIdsForProject.size() < vTeamWhiteList.size()) - _log(logattribute::WARNING, "ProcessProjectTeamFile", "Unable to determine team IDs for one or more whitelisted teams. This is not necessarily an error."); + _log(logattribute::WARNING, "ProcessProjectTeamFile", + "Unable to determine team IDs for one or more whitelisted teams. This is not necessarily an error."); // The below is not an ideal implementation, because the entire map is going to be written out to disk each time. // The TeamIDs file is actually very small though, and this primitive implementation will suffice. @@ -2066,10 +2427,6 @@ bool ProcessProjectTeamFile(const std::string& project, const fs::path& file, co return true; } -/********************** -* Project RAC Files * -**********************/ - bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) { auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; }; @@ -2081,7 +2438,8 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) return false; } - _log(logattribute::INFO, "DownloadProjectRacFiles", "Whitelist is populated; Contains " + ToString(projectWhitelist.size()) + " projects"); + _log(logattribute::INFO, "DownloadProjectRacFiles", "Whitelist is populated; Contains " + + ToString(projectWhitelist.size()) + " projects"); if (!UserpassPopulated()) { @@ -2151,7 +2509,8 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) sRacETag = http.GetEtag(prjs.StatsUrl("user"), userpass); } catch (const std::runtime_error& e) { - _log(logattribute::ERR, "DownloadProjectRacFiles", "Failed to pull rac header file for " + prjs.m_name + ": " + e.what()); + _log(logattribute::ERR, "DownloadProjectRacFiles", "Failed to pull rac header file for " + + prjs.m_name + ": " + e.what()); continue; } @@ -2172,7 +2531,8 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) ad.setoutputdata("user", prjs.m_name, sRacETag); if (!ad.xport()) - _log(logattribute::CRITICAL, "DownloadProjectRacFiles", "Failed to export etag for " + prjs.m_name + " to authentication file"); + _log(logattribute::CRITICAL, "DownloadProjectRacFiles", "Failed to export etag for " + + prjs.m_name + " to authentication file"); } std::string rac_file_name; @@ -2199,7 +2559,8 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) } else { - // No versioning for source file. If file exists delete it and download anew, unless processed file already present. + // No versioning for source file. If file exists delete it and download anew, unless processed file already + // present. rac_file_name = prjs.m_name + "-user.gz"; rac_file = pathScraper / rac_file_name; @@ -2219,15 +2580,18 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist) } catch(const std::runtime_error& e) { - _log(logattribute::ERR, "DownloadProjectRacFiles", "Failed to download project rac file for " + prjs.m_name + ": " + e.what()); + _log(logattribute::ERR, "DownloadProjectRacFiles", "Failed to download project rac file for " + + prjs.m_name + ": " + e.what()); continue; } - // If in explorer mode, save user (rac) source xml files to file manifest map with exclude from CSManifest flag set to true. + // If in explorer mode, save user (rac) source xml files to file manifest map with exclude from CSManifest flag set + // to true. if (explorer_mode()) AlignScraperFileManifestEntries(rac_file, "user_source", prjs.m_name, true); // Now that the source file is handled, process the file. - ProcessProjectRacFileByCPID(prjs.m_name, rac_file, sRacETag, Consensus, GlobalVerifiedBeaconsCopy, IncomingVerifiedBeacons); + ProcessProjectRacFileByCPID(prjs.m_name, rac_file, sRacETag, Consensus, + GlobalVerifiedBeaconsCopy, IncomingVerifiedBeacons); } // for prjs : projectWhitelist // Get the global verified beacons and copy the incoming verified beacons from the @@ -2442,8 +2806,8 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil if (bfileerror) { - _log(logattribute::WARNING, "ProcessProjectRacFileByCPID", "Data processing of " + file.string() + " yielded no CPIDs with stats; " - "file may have been truncated. Removing source file."); + _log(logattribute::WARNING, "ProcessProjectRacFileByCPID", "Data processing of " + file.string() + + " yielded no CPIDs with stats; file may have been truncated. Removing source file."); ingzfile.close(); outgzfile.flush(); @@ -2479,7 +2843,8 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil fs::path temp = gzetagfile; size_t fileb = fs::file_size(temp); - _log(logattribute::INFO, "ProcessProjectRacFileByCPID", "Processed new rac file " + file.string() + "(" + ToString(filea) + " -> " + ToString(fileb) + ")"); + _log(logattribute::INFO, "ProcessProjectRacFileByCPID", "Processed new rac file " + + file.string() + "(" + ToString(filea) + " -> " + ToString(fileb) + ")"); ndownloadsize += (int64_t)filea; nuploadsize += (int64_t)fileb; @@ -2492,7 +2857,8 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil // If not in explorer mode, no need to retain source file. if (!explorer_mode()) fs::remove(file); - // Here, regardless of explorer mode, save processed rac files to file manifest map with exclude from CSManifest flag set to false. + // Here, regardless of explorer mode, save processed rac files to file manifest map with exclude from CSManifest flag + // set to false. AlignScraperFileManifestEntries(gzetagfile, "user", project, false); _log(logattribute::INFO, "ProcessProjectRacFileByCPID", "Complete Process"); @@ -2500,7 +2866,6 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil return true; } - uint256 GetFileHash(const fs::path& inputfile) { // open input file, and associate with CAutoFile @@ -2535,8 +2900,6 @@ uint256 GetFileHash(const fs::path& inputfile) return nHash; } - -// Note that cs_StructScraperFileManifest needs to be taken before calling. uint256 GetmScraperFileManifestHash() EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) { uint256 nHash; @@ -2568,10 +2931,6 @@ uint256 GetmScraperFileManifestHash() EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperF return nHash; } -/*********************** -* Persistence * -************************/ - bool LoadBeaconList(const fs::path& file, ScraperBeaconMap& mBeaconMap) { fsbridge::ifstream ingzfile(file, std::ios_base::in | std::ios_base::binary); @@ -2698,8 +3057,10 @@ bool StoreBeaconList(const fs::path& file) { BeaconConsensus Consensus = GetConsensusBeaconList(); - _log(logattribute::INFO, "StoreBeaconList", "ReadCacheSection element count: " + ToString(GetBeaconRegistry().Beacons().size())); - _log(logattribute::INFO, "StoreBeaconList", "mBeaconMap element count: " + ToString(Consensus.mBeaconMap.size())); + _log(logattribute::INFO, "StoreBeaconList", "ReadCacheSection element count: " + + ToString(GetBeaconRegistry().Beacons().size())); + _log(logattribute::INFO, "StoreBeaconList", "mBeaconMap element count: " + + ToString(Consensus.mBeaconMap.size())); // Update block hash for block at consensus height to StructScraperFileManifest. // Requires a lock. @@ -2825,7 +3186,6 @@ bool StoreTeamIDList(const fs::path& file) return true; } -// Insert entry into Manifest. Note that cs_StructScraperFileManifest needs to be taken before calling. bool InsertScraperFileManifestEntry(ScraperFileManifestEntry& entry) EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) { // This less readable form is so we know whether the element already existed or not. @@ -2838,7 +3198,8 @@ bool InsertScraperFileManifestEntry(ScraperFileManifestEntry& entry) EXCLUSIVE_L { StructScraperFileManifest.nFileManifestMapHash = GetmScraperFileManifestHash(); - _log(logattribute::INFO, "InsertScraperFileManifestEntry", "Inserted File Manifest Entry and stored modified nFileManifestMapHash."); + _log(logattribute::INFO, "InsertScraperFileManifestEntry", + "Inserted File Manifest Entry and stored modified nFileManifestMapHash."); } } @@ -2846,8 +3207,8 @@ bool InsertScraperFileManifestEntry(ScraperFileManifestEntry& entry) EXCLUSIVE_L return ret.second; } -// Delete entry from Manifest and corresponding file if it exists. Note that cs_StructScraperFileManifest needs to be taken before calling. -unsigned int DeleteScraperFileManifestEntry(ScraperFileManifestEntry& entry) EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) +unsigned int DeleteScraperFileManifestEntry(ScraperFileManifestEntry& entry) +EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) { unsigned int ret; @@ -2862,33 +3223,35 @@ unsigned int DeleteScraperFileManifestEntry(ScraperFileManifestEntry& entry) EXC { StructScraperFileManifest.nFileManifestMapHash = GetmScraperFileManifestHash(); - _log(logattribute::INFO, "DeleteScraperFileManifestEntry", "Deleted File Manifest Entry and stored modified nFileManifestMapHash."); + _log(logattribute::INFO, "DeleteScraperFileManifestEntry", + "Deleted File Manifest Entry and stored modified nFileManifestMapHash."); } // Returns number of elements erased, either 0 or 1. return ret; } -// Mark manifest entry non-current. The reason this is encapsulated in a function is -// to ensure the rehash is done. Note that cs_StructScraperFileManifest needs to be -// taken before calling. -bool MarkScraperFileManifestEntryNonCurrent(ScraperFileManifestEntry& entry) EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) +bool MarkScraperFileManifestEntryNonCurrent(ScraperFileManifestEntry& entry) +EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) { entry.current = false; StructScraperFileManifest.nFileManifestMapHash = GetmScraperFileManifestHash(); - _log(logattribute::INFO, "DeleteScraperFileManifestEntry", "Marked File Manifest Entry non-current and stored modified nFileManifestMapHash."); + _log(logattribute::INFO, "DeleteScraperFileManifestEntry", + "Marked File Manifest Entry non-current and stored modified nFileManifestMapHash."); return true; } -void AlignScraperFileManifestEntries(const fs::path& file, const std::string& filetype, const std::string& sProject, const bool& excludefromcsmanifest) +void AlignScraperFileManifestEntries(const fs::path& file, const std::string& filetype, + const std::string& sProject, const bool& excludefromcsmanifest) { ScraperFileManifestEntry NewRecord; auto scraper_retain_noncurrent_files = []() { LOCK(cs_ScraperGlobals); return SCRAPER_RETAIN_NONCURRENT_FILES; }; - auto explorer_extended_file_retention_time = []() { LOCK(cs_ScraperGlobals); return EXPLORER_EXTENDED_FILE_RETENTION_TIME; }; + auto explorer_extended_file_retention_time = []() { LOCK(cs_ScraperGlobals); + return EXPLORER_EXTENDED_FILE_RETENTION_TIME; }; std::string file_name = file.filename().string(); @@ -2904,35 +3267,43 @@ void AlignScraperFileManifestEntries(const fs::path& file, const std::string& fi { LOCK(cs_StructScraperFileManifest); - // Iterate mScraperFileManifest to find any prior filetype records for the same project and change current flag to false, - // or delete if older than SCRAPER_FILE_RETENTION_TIME or non-current and fScraperRetainNonCurrentFiles + // Iterate mScraperFileManifest to find any prior filetype records for the same project and change current flag + // to false, or delete if older than SCRAPER_FILE_RETENTION_TIME or non-current and fScraperRetainNonCurrentFiles // is false. ScraperFileManifestMap::iterator entry; - for (entry = StructScraperFileManifest.mScraperFileManifest.begin(); entry != StructScraperFileManifest.mScraperFileManifest.end(); ) + for (entry = StructScraperFileManifest.mScraperFileManifest.begin(); + entry != StructScraperFileManifest.mScraperFileManifest.end(); ) { ScraperFileManifestMap::iterator entry_copy = entry++; - if (entry_copy->second.project == sProject && entry_copy->second.current == true && entry_copy->second.filetype == filetype) + if (entry_copy->second.project == sProject && entry_copy->second.current == true + && entry_copy->second.filetype == filetype) { - _log(logattribute::INFO, "AlignScraperFileManifestEntries", "Marking old project manifest "+ filetype + " entry as current = false."); + _log(logattribute::INFO, "AlignScraperFileManifestEntries", + "Marking old project manifest "+ filetype + " entry as current = false."); MarkScraperFileManifestEntryNonCurrent(entry_copy->second); } - // If filetype records are older than EXPLORER_EXTENDED_FILE_RETENTION_TIME delete record, or if fScraperRetainNonCurrentFiles is false, - // delete all non-current records, including the one just marked non-current. (EXPLORER_EXTENDED_FILE_RETENTION_TIME rather - // then SCRAPER_FILE_RETENTION_TIME is used, because this section is only active if fExplorer is true.) - if (entry_copy->second.filetype == filetype && (((GetAdjustedTime() - entry_copy->second.timestamp) > explorer_extended_file_retention_time()) - || (entry_copy->second.project == sProject && entry_copy->second.current == false && !scraper_retain_noncurrent_files()))) + // If filetype records are older than EXPLORER_EXTENDED_FILE_RETENTION_TIME delete record, or if + // fScraperRetainNonCurrentFiles is false, delete all non-current records, including the one just marked + // non-current. (EXPLORER_EXTENDED_FILE_RETENTION_TIME rather then SCRAPER_FILE_RETENTION_TIME is used, because + // this section is only active if fExplorer is true.) + if (entry_copy->second.filetype == filetype + && (((GetAdjustedTime() - entry_copy->second.timestamp) > explorer_extended_file_retention_time()) + || (entry_copy->second.project == sProject && entry_copy->second.current == false + && !scraper_retain_noncurrent_files()))) { DeleteScraperFileManifestEntry(entry_copy->second); } } if (!InsertScraperFileManifestEntry(NewRecord)) - _log(logattribute::WARNING, "AlignScraperFileManifestEntries", "Manifest entry already exists for " + NewRecord.hash.ToString() + " " + file_name); + _log(logattribute::WARNING, "AlignScraperFileManifestEntries", "Manifest entry already exists for " + + NewRecord.hash.ToString() + " " + file_name); else - _log(logattribute::INFO, "AlignScraperFileManifestEntries", "Created manifest entry for " + NewRecord.hash.ToString() + " " + file_name); + _log(logattribute::INFO, "AlignScraperFileManifestEntries", "Created manifest entry for " + + NewRecord.hash.ToString() + " " + file_name); // The below is not an ideal implementation, because the entire map is going to be written out to disk each time. // The manifest file is actually very small though, and this primitive implementation will suffice. @@ -3071,7 +3442,14 @@ bool StoreScraperFileManifest(const fs::path& file) LOCK(cs_StructScraperFileManifest); // Header. - stream << "Hash," << "Current," << "Time," << "Project," << "Filename," << "ExcludeFromCSManifest," << "Filetype" << "\n"; + stream << "Hash," + << "Current," + << "Time," + << "Project," + << "Filename," + << "ExcludeFromCSManifest," + << "Filetype" + << "\n"; for (auto const& entry : StructScraperFileManifest.mScraperFileManifest) { @@ -3175,17 +3553,15 @@ bool StoreStats(const fs::path& file, const ScraperStats& mScraperStats) return true; } -/*********************** -* Stats Computations * -************************/ - -bool LoadProjectFileToStatsByCPID(const std::string& project, const fs::path& file, const double& projectmag, ScraperStats& mScraperStats) +bool LoadProjectFileToStatsByCPID(const std::string& project, const fs::path& file, + const double& projectmag, ScraperStats& mScraperStats) { fsbridge::ifstream ingzfile(file, std::ios_base::in | std::ios_base::binary); if (!ingzfile) { - _log(logattribute::ERR, "LoadProjectFileToStatsByCPID", "Failed to open project user stats gzip file (" + file.string() + ")"); + _log(logattribute::ERR, "LoadProjectFileToStatsByCPID", + "Failed to open project user stats gzip file (" + file.string() + ")"); return false; } @@ -3199,7 +3575,8 @@ bool LoadProjectFileToStatsByCPID(const std::string& project, const fs::path& fi return bResult; } -bool LoadProjectObjectToStatsByCPID(const std::string& project, const CSerializeData& ProjectData, const double& projectmag, ScraperStats& mScraperStats) +bool LoadProjectObjectToStatsByCPID(const std::string& project, const CSerializeData& ProjectData, + const double& projectmag, ScraperStats& mScraperStats) { boostio::basic_array_source input_source(&ProjectData[0], ProjectData.size()); boostio::stream> ingzss(input_source); @@ -3293,7 +3670,8 @@ bool ProcessProjectStatsFromStreamByCPID(const std::string& project, boostio::fi dProjectRAC += statsentry.statsvalue.dRAC; } - _log(logattribute::INFO, "LoadProjectObjectToStatsByCPID", "There are " + ToString(mScraperStats.size()) + " CPID entries for " + project); + _log(logattribute::INFO, "LoadProjectObjectToStatsByCPID", + "There are " + ToString(mScraperStats.size()) + " CPID entries for " + project); // The mScraperStats here is scoped to only this project so we do not need project filtering here. ScraperStats::iterator entry; @@ -3333,7 +3711,8 @@ bool ProcessProjectStatsFromStreamByCPID(const std::string& project, boostio::fi } //Compute AvgRAC for project across CPIDs and set. - (nCPIDCount > 0) ? ProjectStatsEntry.statsvalue.dAvgRAC = ProjectStatsEntry.statsvalue.dRAC / nCPIDCount : ProjectStatsEntry.statsvalue.dAvgRAC = 0.0; + (nCPIDCount > 0) ? ProjectStatsEntry.statsvalue.dAvgRAC = ProjectStatsEntry.statsvalue.dRAC / nCPIDCount : + ProjectStatsEntry.statsvalue.dAvgRAC = 0.0; // Insert project level map entry. mScraperStats[ProjectStatsEntry.statskey] = ProjectStatsEntry; @@ -3341,11 +3720,6 @@ bool ProcessProjectStatsFromStreamByCPID(const std::string& project, boostio::fi return true; } -// This function takes the mScraperMap core, which is the byCPIDbyProject -// entries composed by the ProcessProjectStatsFromStreamByCPID above for -// each project and roles the entries up into byCPID and the single network- -// wide entry. -// ---------------------------------------------- In/Out bool ProcessNetworkWideFromProjectStats(ScraperStats& mScraperStats) { // -------- CPID ----------------- stats entry ---- # of projects @@ -3367,7 +3741,8 @@ bool ProcessNetworkWideFromProjectStats(ScraperStats& mScraperStats) // Note the following is VERY inelegant. It CAPS the CPID magnitude to CPID_MAG_LIMIT. // No attempt to renormalize the magnitudes due to this cap is done at this time. This means // The total magnitude across projects will NOT match the total across all CPIDs and the network. - mByCPID_entry->second.first.statsvalue.dMag = std::min(CPID_MAG_LIMIT, mByCPID_entry->second.first.statsvalue.dMag); + mByCPID_entry->second.first.statsvalue.dMag = + std::min(CPID_MAG_LIMIT, mByCPID_entry->second.first.statsvalue.dMag); // Increment number of projects tallied ++mByCPID_entry->second.second; } @@ -3385,7 +3760,8 @@ bool ProcessNetworkWideFromProjectStats(ScraperStats& mScraperStats) // Note the following is VERY inelegant. It CAPS the CPID magnitude to CPID_MAG_LIMIT. // No attempt to renormalize the magnitudes due to this cap is done at this time. This means // The total magnitude across projects will NOT match the total across all CPIDs and the network. - CPIDStatsEntry.statsvalue.dMag = std::min(CPID_MAG_LIMIT, byCPIDbyProjectEntry.second.statsvalue.dMag); + CPIDStatsEntry.statsvalue.dMag = + std::min(CPID_MAG_LIMIT, byCPIDbyProjectEntry.second.statsvalue.dMag); // This is the first project encountered, because otherwise there would already be an entry. mByCPID[CPID] = std::make_pair(CPIDStatsEntry, 1); @@ -3444,8 +3820,6 @@ bool ProcessNetworkWideFromProjectStats(ScraperStats& mScraperStats) return true; } -// Note that this function essentially constructs the scraper stats from the current state of the scraper, which is all of the current files at the time -// the function is called. ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() { _log(logattribute::INFO, "GetScraperStatsByCurrentFileManifestState", "Beginning stats processing."); @@ -3482,7 +3856,8 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() fs::path file = pathScraper / entry.second.filename; ScraperStats mProjectScraperStats; - _log(logattribute::INFO, "GetScraperStatsByCurrentFileManifestState", "Processing stats for project: " + project); + _log(logattribute::INFO, "GetScraperStatsByCurrentFileManifestState", + "Processing stats for project: " + project); LoadProjectFileToStatsByCPID(project, file, dMagnitudePerProject, mProjectScraperStats); @@ -3495,7 +3870,8 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() } } - // Since this function uses the current project files for statistics, it also makes sense to use the current verified beacons map. + // Since this function uses the current project files for statistics, it also makes sense to use the current verified + // beacons map. ScraperStatsAndVerifiedBeacons stats_and_verified_beacons; @@ -3549,14 +3925,16 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByConvergedManifest(const Converge stats_and_verified_beacons.mVerifiedMap = VerifiedBeaconMap; unsigned int nActiveProjects = StructConvergedManifest.ConvergedManifestPartPtrsMap.size() - exclude_parts_from_count; - _log(logattribute::INFO, "GetScraperStatsByConvergedManifest", "Number of active projects in converged manifest = " + ToString(nActiveProjects)); + _log(logattribute::INFO, "GetScraperStatsByConvergedManifest", + "Number of active projects in converged manifest = " + ToString(nActiveProjects)); double dMagnitudePerProject = NETWORK_MAGNITUDE / nActiveProjects; ScraperStats mScraperStats; - for (auto entry = StructConvergedManifest.ConvergedManifestPartPtrsMap.begin(); entry != StructConvergedManifest.ConvergedManifestPartPtrsMap.end(); ++entry) + for (auto entry = StructConvergedManifest.ConvergedManifestPartPtrsMap.begin(); + entry != StructConvergedManifest.ConvergedManifestPartPtrsMap.end(); ++entry) { std::string project = entry->first; ScraperStats mProjectScraperStats; @@ -3585,7 +3963,6 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByConvergedManifest(const Converge return stats_and_verified_beacons; } -// This function should only be used as part of the superblock validation in bv11+. ScraperStatsAndVerifiedBeacons GetScraperStatsFromSingleManifest(CScraperManifest_shared_ptr& manifest) { _log(logattribute::INFO, "GetScraperStatsFromSingleManifest", "Beginning stats processing."); @@ -3622,12 +3999,15 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsFromSingleManifest(CScraperManifes stats_and_verified_beacons.mVerifiedMap = VerifiedBeaconMap; - unsigned int nActiveProjects = StructDummyConvergedManifest.ConvergedManifestPartPtrsMap.size() - exclude_parts_from_count; - _log(logattribute::INFO, "GetScraperStatsFromSingleManifest", "Number of active projects in converged manifest = " + ToString(nActiveProjects)); + unsigned int nActiveProjects = StructDummyConvergedManifest.ConvergedManifestPartPtrsMap.size() + - exclude_parts_from_count; + _log(logattribute::INFO, "GetScraperStatsFromSingleManifest", + "Number of active projects in converged manifest = " + ToString(nActiveProjects)); double dMagnitudePerProject = NETWORK_MAGNITUDE / nActiveProjects; - for (auto entry = StructDummyConvergedManifest.ConvergedManifestPartPtrsMap.begin(); entry != StructDummyConvergedManifest.ConvergedManifestPartPtrsMap.end(); ++entry) + for (auto entry = StructDummyConvergedManifest.ConvergedManifestPartPtrsMap.begin(); + entry != StructDummyConvergedManifest.ConvergedManifestPartPtrsMap.end(); ++entry) { std::string project = entry->first; ScraperStats mProjectScraperStats; @@ -3665,7 +4045,8 @@ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash) if (pair == CScraperManifest::mapManifest.end()) { - _log(logattribute::ERR, "ScraperSaveCScraperManifestToFiles", "Specified manifest hash does not exist. Save unsuccessful."); + _log(logattribute::ERR, "ScraperSaveCScraperManifestToFiles", + "Specified manifest hash does not exist. Save unsuccessful."); return false; } @@ -3756,19 +4137,6 @@ bool ScraperSaveCScraperManifestToFiles(uint256 nManifestHash) return true; } -// The idea here is that there are two levels of authorization. The first level is whether any -// node can operate as a "scraper", in other words, download the stats files themselves. -// The second level, which is the IsScraperAuthorizedToBroadcastManifests() function, -// is to authorize a particular node to actually be able to publish manifests. -// The second function is intended to override the first, with the first being a network wide -// policy. So to be clear, if the network wide policy has IsScraperAuthorized() set to false -// then ONLY nodes that have IsScraperAuthorizedToBroadcastManifests() can download stats at all. -// If IsScraperAuthorized() is set to true, then you have two levels of operation allowed. -// Nodes can run -scraper and download stats for themselves. They will only be able to publish -// manifests if for that node IsScraperAuthorizedToBroadcastManifests() evaluates to true. -// This allows flexibility in network policy, and will allow us to convert from a scraper based -// approach to convergence back to individual node stats download and convergence without a lot of -// headaches. bool IsScraperAuthorized() { LOCK(cs_ScraperGlobals); @@ -3776,9 +4144,6 @@ bool IsScraperAuthorized() return ALLOW_NONSCRAPER_NODE_STATS_DOWNLOAD; } -// This checks to see if the local node is authorized to publish manifests. Note that this code could be -// modified to bypass this check, so messages sent will also be validated on receipt by the complement -// to this function, IsManifestAuthorized(CKey& Key) in the CScraperManifest class. bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& KeyOut) { @@ -3793,7 +4158,8 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& // If the address (entry) exists in the config and appcache... if (sScraperAddressFromConfig != "false" && entry != mScrapers.end()) { - _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", "Entry from config/command line found in AppCache."); + _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", + "Entry from config/command line found in AppCache."); // ... and is enabled... if (entry->second.value == "true" || entry->second.value == "1") @@ -3810,7 +4176,8 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& if (address.IsValid()) { _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", "The address is valid."); - _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", "(Doublecheck) The address is " + address.ToString()); + _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", + "(Doublecheck) The address is " + address.ToString()); // ... and it exists in the wallet... LOCK(pwalletMain->cs_wallet); @@ -3820,7 +4187,8 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& // ... and the key returned from the wallet is valid and matches the provided public key... assert(KeyOut.IsValid()); - _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", "The wallet key for the address is valid."); + _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", + "The wallet key for the address is valid."); AddressOut = address; @@ -3873,7 +4241,8 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& if (address.IsValid()) { _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", "The address is valid."); - _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", "(Doublecheck) The address is " + address.ToString()); + _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", + "(Doublecheck) The address is " + address.ToString()); // ... and it exists in the wallet... (It SHOULD here... it came from the map...) if (pwalletMain->GetKey(KeyID, KeyOut)) @@ -3881,7 +4250,8 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& // ... and the key returned from the wallet is valid ... assert(KeyOut.IsValid()); - _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", "The wallet key for the address is valid."); + _log(logattribute::INFO, "IsScraperAuthorizedToBroadcastManifests", + "The wallet key for the address is valid."); AddressOut = address; @@ -3896,8 +4266,8 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& else { _log(logattribute::WARNING, "IsScraperAuthorizedToBroadcastManifests", - "Key not found in the wallet for matching address. Please check that the wallet is unlocked " - "(preferably for staking only)."); + "Key not found in the wallet for matching address. Please check that the wallet is " + "unlocked (preferably for staking only)."); } } } @@ -3907,18 +4277,14 @@ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& // If we made it here, there is no match or valid key in the wallet - _log(logattribute::WARNING, "IsScraperAuthorizedToBroadcastManifests", "No key found in wallet that matches authorized scrapers in appcache."); + _log(logattribute::WARNING, "IsScraperAuthorizedToBroadcastManifests", + "No key found in wallet that matches authorized scrapers in appcache."); return false; } -// This function computes the average time between manifests as a function of the last 10 received manifests -// plus the nTime provided as the argument. This gives ten intervals for sampling between manifests. If the -// average time between manifests is less than 50% of the nScraperSleep interval, or the most recent manifest -// for a scraper is more than five minutes in the future (accounts for clock skew) then the publishing rate -// of the scraper is deemed too high. This is actually used in CScraperManifest::IsManifestAuthorized to ban -// a scraper that is abusing the network by sending too many manifests over a very short period of time. -bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& PubKey) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) +bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& PubKey) +EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) { mmCSManifestsBinnedByScraper mMapCSManifestBinnedByScraper; @@ -3951,8 +4317,8 @@ bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& Pub auto scraper_sleep = []() { LOCK(cs_ScraperGlobals); return nScraperSleep; }; - // Insert manifest referenced by the argument first (the "incoming" manifest). Note that it may NOT have the most recent time. - // This is followed by the rest so that we have a unified map with the incoming in the right order. + // Insert manifest referenced by the argument first (the "incoming" manifest). Note that it may NOT have the most + // recent time. This is followed by the rest so that we have a unified map with the incoming in the right order. mScraperManifests.insert(std::make_pair(nTime, sManifestAddress)); // Insert the rest of the manifests for the scraper matching the public key. @@ -3975,14 +4341,15 @@ bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& Pub // Set the beginning time of the interval to the time at this element nBeginTime = iManifest.first; - // Go till 10 intervals (between samples) OR time interval reaches 5 expected scraper updates at 3 nScraperSleep scraper cycles per update, - // whichever occurs first. + // Go till 10 intervals (between samples) OR time interval reaches 5 expected scraper updates at 3 nScraperSleep + // scraper cycles per update, whichever occurs first. if (nIntervals == 10 || (nCurrentTime - nBeginTime) >= scraper_sleep() * 3 * 5 / 1000) break; } - // Do not allow the most recent manifest from a scraper to be more than five minutes into the future from GetAdjustedTime. (This takes - // into account reasonable clock skew between the scraper and this node, but prevents future dating manifests to try and fool the rate calculation.) - // Note that this is regardless of the minimum sample size below. + // Do not allow the most recent manifest from a scraper to be more than five minutes into the future from + // GetAdjustedTime. (This takes into account reasonable clock skew between the scraper and this node, but prevents + // future dating manifests to try and fool the rate calculation. Note that this is regardless of the minimum sample + // size below. if (nEndTime - nCurrentTime > 300) { _log(logattribute::CRITICAL, "IsScraperMaximumManifestPublishingRateExceeded", "Scraper " + sManifestAddress + @@ -3991,19 +4358,20 @@ bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& Pub return true; } - // We are not going to allow less than 5 intervals in the sample. If it is less than 5 intervals, it has either passed the break - // condition above (or even slower), or there really are very few published total, in which the sample size is too small to judge - // the rate. + // We are not going to allow less than 5 intervals in the sample. If it is less than 5 intervals, it has either + // passed the break condition above (or even slower), or there really are very few published total, in which the sample + // size is too small to judge the rate. if (nIntervals < 5) return false; - - // nTotalTime cannot be negative because of the sort order of mScraperManifests, and nIntervals is protected against being zero - // by the above conditional. + // nTotalTime cannot be negative because of the sort order of mScraperManifests, and nIntervals is protected against + // being zero by the above conditional. nTotalTime = nEndTime - nBeginTime; nAvgTimeBetweenManifests = nTotalTime / nIntervals; - // nScraperSleep is in milliseconds. If the average interval is less than 25% of nScraperSleep in seconds, ban the scraper. - // Note that this is a factor of 24 faster than the expected rate given usual project update velocity. + // nScraperSleep is in milliseconds. If the average interval is less than 25% of nScraperSleep in seconds, ban the + // scraper. Note that this is a factor of 24 faster than the expected rate given usual project update velocity. The + // chance of a false positive is very remote. If a scraper is publishing at a rate that trips this, it is doing something + // wrong. if (nAvgTimeBetweenManifests < scraper_sleep() / 8000) { _log(logattribute::CRITICAL, "IsScraperMaximumManifestPublishingRateExceeded", "Scraper " + sManifestAddress + @@ -4023,13 +4391,6 @@ bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& Pub } } -// This function is necessary because some CScraperManifest messages are likely to be received before the wallet is in sync. Therefore, they -// cannot be checked at that time by the deserialize check. Instead, while the wallet is not in sync, the local CScraperManifest flag -// bCheckedAuthorized will be set to false on any manifests received during that time. Once the wallet is in sync, this function will be -// called and will walk the mapManifest and check all Manifests to ensure the PubKey in the manifest is in the -// authorized scraper list in the AppCache. If it passes the flag will be set to true. If it fails, the manifest will be deleted. All manifests -// must be checked, because we have to deal with another condition where a scraper is deauthorized by network policy. This means manifests may -// not be authorized even if the bCheckedAuthorized is true from a prior check. unsigned int ScraperDeleteUnauthorizedCScraperManifests() { unsigned int nDeleted = 0; @@ -4041,7 +4402,7 @@ unsigned int ScraperDeleteUnauthorizedCScraperManifests() CScraperManifest_shared_ptr manifest = iter->second; // We have to copy out the nTime and pubkey from the selected manifest, because the IsManifestAuthorized call - // chain traverses the map and locks the cs_manifests in turn, which creats a deadlock potential if the cs_manifest + // chain traverses the map and locks the cs_manifests in turn, which creates a deadlock potential if the cs_manifest // lock is already held on one of the manifests. int64_t nTime = 0; CPubKey pubkey; @@ -4066,9 +4427,10 @@ unsigned int ScraperDeleteUnauthorizedCScraperManifests() { LOCK(manifest->cs_manifest); - _log(logattribute::WARNING, "ScraperDeleteUnauthorizedCScraperManifests", "Deleting unauthorized manifest with hash " + iter->first.GetHex()); - // Delete from CScraperManifest map (also advances iter to the next valid element). Immediate flag is set, because there should be - // no pending delete retention grace for this. + _log(logattribute::WARNING, "ScraperDeleteUnauthorizedCScraperManifests", + "Deleting unauthorized manifest with hash " + iter->first.GetHex()); + // Delete from CScraperManifest map (also advances iter to the next valid element). Immediate flag is set, + // because there should be no pending delete retention grace for this. iter = CScraperManifest::DeleteManifest(iter, true); nDeleted++; } @@ -4077,9 +4439,8 @@ unsigned int ScraperDeleteUnauthorizedCScraperManifests() return nDeleted; } -// A lock needs to be taken on cs_StructScraperFileManifest for this function. -// The sCManifestName is the public key of the scraper in address form. -bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) +bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) +EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) { // This "broadcasts" the current ScraperFileManifest contents to the network. @@ -4190,15 +4551,14 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS auto scraper_cmanifest_include_noncurrent_proj_files = []() { LOCK(cs_ScraperGlobals); return SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES; }; - // If SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES is false, only include current files to send across the network. - // Also continue (exclude) if it is a non-publishable entry (excludefromcsmanifest is true). - if ((!scraper_cmanifest_include_noncurrent_proj_files() && !entry.second.current) || entry.second.excludefromcsmanifest) + // If SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES is false, only include current files to send across the + // network. Also continue (exclude) if it is a non-publishable entry (excludefromcsmanifest is true). + if ((!scraper_cmanifest_include_noncurrent_proj_files() && !entry.second.current) + || entry.second.excludefromcsmanifest) continue; fs::path inputfile = entry.first; - //_log(logattribute::INFO, "ScraperSendFileManifestContents", "Input file for CScraperManifest is " + inputfile.string()); - fs::path inputfilewpath = pathScraper / inputfile; // open input file, and associate with CAutoFile @@ -4207,7 +4567,8 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS if (filein.IsNull()) { - _log(logattribute::ERR, "ScraperSendFileManifestContents", "Failed to open file (" + inputfile.string() + ")"); + _log(logattribute::ERR, "ScraperSendFileManifestContents", + "Failed to open file (" + inputfile.string() + ")"); return false; } @@ -4223,7 +4584,8 @@ bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) EXCLUS } catch (std::exception &e) { - _log(logattribute::ERR, "ScraperSendFileManifestContents", "Failed to read file (" + inputfile.string() + ")"); + _log(logattribute::ERR, "ScraperSendFileManifestContents", + "Failed to read file (" + inputfile.string() + ")"); return false; } @@ -4320,7 +4682,8 @@ bool ConvergedManifest::operator()(const CScraperManifest_shared_ptr& in) return bConvergedContentHashMatches; } -bool ConvergedManifest::PopulateConvergedManifestPartPtrsMap() EXCLUSIVE_LOCKS_REQUIRED(CScraperConvergedManifest_ptr->cs_manifest) +bool ConvergedManifest::PopulateConvergedManifestPartPtrsMap() +EXCLUSIVE_LOCKS_REQUIRED(CScraperConvergedManifest_ptr->cs_manifest) { if (CScraperConvergedManifest_ptr == nullptr) return false; @@ -4393,7 +4756,8 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes unsigned int nScraperCount = mMapCSManifestsBinnedByScraper.size(); - _log(logattribute::INFO, "ScraperConstructConvergedManifest", "Number of Scrapers with manifests = " + ToString(nScraperCount)); + _log(logattribute::INFO, "ScraperConstructConvergedManifest", + "Number of Scrapers with manifests = " + ToString(nScraperCount)); for (const auto& iter : mMapCSManifestsBinnedByScraper) { @@ -4445,7 +4809,8 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes // Find the first one of equivalent content manifests. convergence = mManifestsBinnedbyContent.find(iter.second); - _log(logattribute::INFO, "ScraperConstructConvergedManifest", "Found convergence on manifest " + convergence->second.second.GetHex() + _log(logattribute::INFO, "ScraperConstructConvergedManifest", + "Found convergence on manifest " + convergence->second.second.GetHex() + " at " + DateTimeStrFormat("%x %H:%M:%S", iter.first) + " with " + ToString(nIdenticalContentManifestCount) + " scrapers out of " + ToString(nScraperCount) + " agreeing."); @@ -4461,20 +4826,24 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes StructConvergedManifest.mIncludedScraperManifests[iter2->second.first] = iter2->second.second; } - // Record scrapers that are not part of the convergence by iterating through the top level of the double map (which is keyed by ScraperID) + // Record scrapers that are not part of the convergence by iterating through the top level of the double map + // (which is keyed by ScraperID) for (const auto& iScraper : mMapCSManifestsBinnedByScraper) { // If the scraper is not found in the mIncludedScraperManifests, then it was not part of the convergence. - if (StructConvergedManifest.mIncludedScraperManifests.find(iScraper.first) == StructConvergedManifest.mIncludedScraperManifests.end()) + if (StructConvergedManifest.mIncludedScraperManifests.find(iScraper.first) == + StructConvergedManifest.mIncludedScraperManifests.end()) { StructConvergedManifest.vExcludedScrapers.push_back(iScraper.first); - _log(logattribute::INFO, "ScraperConstructConvergedManifest", "Scraper " + iScraper.first + " not in convergence."); + _log(logattribute::INFO, "ScraperConstructConvergedManifest", "Scraper " + + iScraper.first + " not in convergence."); } else { StructConvergedManifest.vIncludedScrapers.push_back(iScraper.first); // Scraper was in the convergence. - _log(logattribute::INFO, "ScraperConstructConvergedManifest", "Scraper " + iScraper.first + " in convergence."); + _log(logattribute::INFO, "ScraperConstructConvergedManifest", "Scraper " + + iScraper.first + " in convergence."); } } @@ -4486,20 +4855,24 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes if (iScraper.second.value == "true" || iScraper.second.value == "1") { - if (std::find(std::begin(StructConvergedManifest.vExcludedScrapers), std::end(StructConvergedManifest.vExcludedScrapers), iScraper.first) - == std::end(StructConvergedManifest.vExcludedScrapers) - && std::find(std::begin(StructConvergedManifest.vIncludedScrapers), std::end(StructConvergedManifest.vIncludedScrapers), iScraper.first) - == std::end(StructConvergedManifest.vIncludedScrapers)) + if (std::find(std::begin(StructConvergedManifest.vExcludedScrapers), + std::end(StructConvergedManifest.vExcludedScrapers), + iScraper.first) == std::end(StructConvergedManifest.vExcludedScrapers) + && std::find(std::begin(StructConvergedManifest.vIncludedScrapers), + std::end(StructConvergedManifest.vIncludedScrapers), + iScraper.first) == std::end(StructConvergedManifest.vIncludedScrapers)) { StructConvergedManifest.vScrapersNotPublishing.push_back(iScraper.first); - _log(logattribute::INFO, "ScraperConstructConvergedManifest", "Scraper " + iScraper.first + " authorized but not publishing."); + _log(logattribute::INFO, "ScraperConstructConvergedManifest", + "Scraper " + iScraper.first + " authorized but not publishing."); } } } bConvergenceSuccessful = true; - // Note this break is VERY important, it prevents considering essentially the same manifest that meets convergence multiple times. + // Note this break is VERY important, it prevents considering essentially the same manifest that meets + // convergence multiple times. break; } } @@ -4517,22 +4890,26 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes CScraperManifest_shared_ptr manifest = pair->second; // Fill out the ConvergedManifest structure. Note this assumes one-to-one part to project statistics BLOB. Needs to - // be fixed for more than one part per BLOB. This is easy in this case, because it is all from/referring to one manifest. + // be fixed for more than one part per BLOB. This is easy in this case, because it is all from/referring to one + // manifest. bool bConvergedContentHashMatches = StructConvergedManifest(manifest); if (!bConvergedContentHashMatches) { bConvergenceSuccessful = false; - _log(logattribute::ERR, "ScraperConstructConvergedManifest", "Selected Converged Manifest content hash check failed!"); + _log(logattribute::ERR, "ScraperConstructConvergedManifest", + "Selected Converged Manifest content hash check failed!"); // Reinitialize StructConvergedManifest StructConvergedManifest = {}; } else // Content matches so we have a confirmed convergence. { - // Determine if there is an excluded project. If so, set convergence back to false and drop back to project level to try and recover project by project. + // Determine if there is an excluded project. If so, set convergence back to false and drop back to project level + // to try and recover project by project. for (const auto& iProjects : projectWhitelist) { - if (StructConvergedManifest.ConvergedManifestPartPtrsMap.find(iProjects.m_name) == StructConvergedManifest.ConvergedManifestPartPtrsMap.end()) + if (StructConvergedManifest.ConvergedManifestPartPtrsMap.find(iProjects.m_name) + == StructConvergedManifest.ConvergedManifestPartPtrsMap.end()) { _log(logattribute::WARNING, "ScraperConstructConvergedManifest", "Project " + iProjects.m_name @@ -4541,18 +4918,22 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes bConvergenceSuccessful = false; - // Since we are falling back to project level and discarding this convergence, no need to process any more once one missed project is found. + // Since we are falling back to project level and discarding this convergence, no need to process any + // more once one missed project is found. break; } - if (StructConvergedManifest.ConvergedManifestPartPtrsMap.find("BeaconList") == StructConvergedManifest.ConvergedManifestPartPtrsMap.end()) + if (StructConvergedManifest.ConvergedManifestPartPtrsMap.find("BeaconList") + == StructConvergedManifest.ConvergedManifestPartPtrsMap.end()) { - _log(logattribute::WARNING, "ScraperConstructConvergedManifest", "BeaconList was not found in the converged manifests from the scrapers. \n" + _log(logattribute::WARNING, "ScraperConstructConvergedManifest", + "BeaconList was not found in the converged manifests from the scrapers. \n" "Falling back to attempt convergence by project."); bConvergenceSuccessful = false; - // Since we are falling back to project level and discarding this convergence, no need to process any more if BeaconList is missing. + // Since we are falling back to project level and discarding this convergence, no need to process any + // more if BeaconList is missing. break; } } @@ -4567,10 +4948,11 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes StructConvergedManifest = {}; // Try to form a convergence by project objects (parts)... - bConvergenceSuccessful = ScraperConstructConvergedManifestByProject(projectWhitelist, mMapCSManifestsBinnedByScraper, StructConvergedManifest); + bConvergenceSuccessful = ScraperConstructConvergedManifestByProject(projectWhitelist, mMapCSManifestsBinnedByScraper, + StructConvergedManifest); - // If we have reached here. All attempts at convergence have failed. Reinitialize StructConvergedManifest to eliminate stale or - // partially filled-in data. + // If we have reached here. All attempts at convergence have failed. Reinitialize StructConvergedManifest to + // eliminate stale or partially filled-in data. if (!bConvergenceSuccessful) StructConvergedManifest = {}; } @@ -4606,14 +4988,16 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project unsigned int nScraperCount = mMapCSManifestsBinnedByScraper.size(); _log(logattribute::INFO, __func__, "Number of projects in the whitelist = " + ToString(projectWhitelist.size())); - _log(logattribute::INFO, "ScraperConstructConvergedManifestByProject", "Number of Scrapers with manifests = " + ToString(nScraperCount)); + _log(logattribute::INFO, "ScraperConstructConvergedManifestByProject", + "Number of Scrapers with manifests = " + ToString(nScraperCount)); for (const auto& iWhitelistProject : projectWhitelist) { - // Do a map for unique ProjectObject times ordered by descending time then content hash. Note that for Project Objects (Parts), - // the content hash is the object hash. We also need the consensus block here, because we are "composing" the manifest by - // parts, so we will need to choose the latest consensus block by manifest time. This will occur naturally below if tracked in - // this manner. We will also want the BeaconList from the associated manifest. + // Do a map for unique ProjectObject times ordered by descending time then content hash. Note that for Project + // Objects (Parts), the content hash is the object hash. We also need the consensus block here, because we are + // "composing" the manifest by parts, so we will need to choose the latest consensus block by manifest time. This + // will occur naturally below if tracked in this manner. We will also want the BeaconList from the associated + // manifest. // ------ manifest time --- object hash - consensus block hash - manifest hash. std::multimap, std::greater> mProjectObjectsBinnedByTime; // and also by project object (content) hash, then scraperID and project. @@ -4661,13 +5045,17 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project nProjectObjectHash = manifest->vParts[nPart]->hash; // Insert into mManifestsBinnedByTime multimap. - mProjectObjectsBinnedByTime.insert(std::make_pair(nProjectObjectTime, std::make_tuple(nProjectObjectHash, manifest->ConsensusBlock, *manifest->phash))); - - // Even though this is a multimap on purpose because we are going to count occurrences of the same key, - // We need to prevent the insertion of a second entry with the same content from the same scraper. This is - // even more true here at the part level than at the manifest level, because if both SCRAPER_CMANIFEST_RETAIN_NONCURRENT - // and SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES are true, then there can be many references - // to the same part by different manifests of the same scraper in addition to across scrapers. + mProjectObjectsBinnedByTime.insert(std::make_pair(nProjectObjectTime, + std::make_tuple(nProjectObjectHash, + manifest->ConsensusBlock, + *manifest->phash))); + + // Even though this is a multimap on purpose because we are going to count occurrences of the same + // key, We need to prevent the insertion of a second entry with the same content from the same + // scraper. This is even more true here at the part level than at the manifest level, because if + // both SCRAPER_CMANIFEST_RETAIN_NONCURRENT and SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES are + // true, then there can be many references to the same part by different manifests of the same + // scraper in addition to across scrapers. auto range = mProjectObjectsBinnedbyContent.equal_range(nProjectObjectHash); bool bAlreadyExists = false; for (auto iter3 = range.first; iter3 != range.second; ++iter3) @@ -4681,15 +5069,13 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project { // Insert into mProjectObjectsBinnedbyContent -------- content hash ------------------- ScraperID -------- Project. mProjectObjectsBinnedbyContent.insert(std::make_pair(nProjectObjectHash, std::make_pair(iter.first, iWhitelistProject.m_name))); - //_log(logattribute::INFO, "ScraperConstructConvergedManifestByProject", "mProjectObjectsBinnedbyContent insert " - // + nProjectObjectHash.GetHex() + ", " + iter.first + ", " + iWhitelistProject.m_name); - _log(logattribute::INFO, "ScraperConstructConvergedManifestByProject", "mProjectObjectsBinnedbyContent insert, timestamp " - + DateTimeStrFormat("%x %H:%M:%S", manifest->nTime) - + ", content hash "+ nProjectObjectHash.GetHex() - + ", scraper ID " + iter.first - + ", project " + iWhitelistProject.m_name - + ", manifest hash " + nCSManifestHash.GetHex()); - + _log(logattribute::INFO, "ScraperConstructConvergedManifestByProject", + "mProjectObjectsBinnedbyContent insert, timestamp " + + DateTimeStrFormat("%x %H:%M:%S", manifest->nTime) + + ", content hash "+ nProjectObjectHash.GetHex() + + ", scraper ID " + iter.first + + ", project " + iWhitelistProject.m_name + + ", manifest hash " + nCSManifestHash.GetHex()); } } } @@ -4701,8 +5087,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project for (const auto& iter : mProjectObjectsBinnedByTime) { // Notice the below is NOT using the time. We switch to the content only. The time is only used to make sure - // we test the convergence of the project objects in time order, but once a content hash is selected based on the time, - // only the content hash is used to count occurrences in the multimap, because the times for the same + // we test the convergence of the project objects in time order, but once a content hash is selected based on + // the time, only the content hash is used to count occurrences in the multimap, because the times for the same // project object (part hash) will be different across different manifests and different scrapers. unsigned int nIdenticalContentManifestCount = mProjectObjectsBinnedbyContent.count(std::get<0>(iter.second)); if (nIdenticalContentManifestCount >= NumScrapersForSupermajority(nScraperCount)) @@ -4710,7 +5096,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project // Find the first one of equivalent parts ------------------ by object hash. ProjectConvergence = mProjectObjectsBinnedbyContent.find(std::get<0>(iter.second)); - _log(logattribute::INFO, "ScraperConstructConvergedManifestByProject", "Found convergence on project object " + ProjectConvergence->first.GetHex() + _log(logattribute::INFO, "ScraperConstructConvergedManifestByProject", + "Found convergence on project object " + ProjectConvergence->first.GetHex() + " for project " + iWhitelistProject.m_name + " with " + ToString(nIdenticalContentManifestCount) + " scrapers out of " + ToString(nScraperCount) + " agreeing."); @@ -4728,14 +5115,16 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project if (nContentHashCheck != iPart->first) { - _log(logattribute::ERR, "ScraperConstructConvergedManifestByProject", "Selected Converged Project Object content hash check failed! nContentHashCheck = " + _log(logattribute::ERR, "ScraperConstructConvergedManifestByProject", + "Selected Converged Project Object content hash check failed! nContentHashCheck = " + nContentHashCheck.GetHex() + " and nContentHash = " + iPart->first.GetHex()); break; } auto ProjectConvergenceRange = mProjectObjectsBinnedbyContent.equal_range(std::get<0>(iter.second)); - // Record included scrapers included for the project level convergence keyed by project and the reverse. A multimap is convenient here for both. + // Record included scrapers included for the project level convergence keyed by project and the reverse. + // A multimap is convenient here for both. for (auto iter2 = ProjectConvergenceRange.first; iter2 != ProjectConvergenceRange.second; ++iter2) { // ------------------------------------------------------------------------- project -------------- ScraperID. @@ -4745,12 +5134,15 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project } // Put Project Object (Part) in StructConvergedManifest keyed by project. - StructConvergedManifest.ConvergedManifestPartPtrsMap.insert(std::make_pair(iWhitelistProject.m_name, &(iPart->second))); - - // If the indirectly referenced manifest has a consensus time that is greater than already recorded, replace with that time, and also - // change the consensus block to the referred to consensus block. (Note that this is scoped at even above the individual project level, so - // the result after iterating through all projects will be the latest manifest time and consensus block that corresponds to any of the - // parts that meet convergence.) We will also get the manifest hash too, so we can retrieve the associated BeaconList that was used. + StructConvergedManifest.ConvergedManifestPartPtrsMap.insert(std::make_pair(iWhitelistProject.m_name, + &(iPart->second))); + + // If the indirectly referenced manifest has a consensus time that is greater than already recorded, replace + // with that time, and also change the consensus block to the referred to consensus block. (Note that this + // is scoped at even above the individual project level, so the result after iterating through all projects + // will be the latest manifest time and consensus block that corresponds to any of the parts that meet + // convergence.) We will also get the manifest hash too, so we can retrieve the associated BeaconList + // that was used. if (iter.first > nConvergedConsensusTime) { nConvergedConsensusTime = iter.first; @@ -4760,7 +5152,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project iCountSuccessfulConvergedProjects++; - // Note this break is VERY important, it prevents considering essentially the same project object that meets convergence multiple times. + // Note this break is VERY important, it prevents considering essentially the same project object that meets + // convergence multiple times. break; } } @@ -4776,13 +5169,15 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project { AppCacheSection mScrapers = GetScrapersCache(); - // Fill out the rest of the ConvergedManifest structure. Note this assumes one-to-one part to project statistics BLOB. Needs to - // be fixed for more than one part per BLOB. This is easy in this case, because it is all from/referring to one manifest. + // Fill out the rest of the ConvergedManifest structure. Note this assumes one-to-one part to project statistics + // BLOB. Needs to be fixed for more than one part per BLOB. This is easy in this case, because it is all + // from/referring to one manifest. - // Lets use the BeaconList from the manifest referred to by nManifestHashForConvergedBeaconList. Technically there is no exact answer to - // the BeaconList that should be used in the convergence when putting it together at the individual part level, because each project part - // could have used a different BeaconList (subject to the consensus ladder). It makes sense to use the "newest" one that is associated - // with a manifest that has the newest part associated with a successful part (project) level convergence. + // Lets use the BeaconList from the manifest referred to by nManifestHashForConvergedBeaconList. Technically there + // is no exact answer to the BeaconList that should be used in the convergence when putting it together at the + // individual part level, because each project part could have used a different BeaconList (subject to the consensus + // ladder). It makes sense to use the "newest" one that is associated with a manifest that has the newest part + // associated with a successful part (project) level convergence. LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); @@ -4795,7 +5190,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project // Bail if BeaconList is not found or empty. if (pair == CScraperManifest::mapManifest.end() || manifest->vParts[0]->data.size() == 0) { - _log(logattribute::WARNING, "ScraperConstructConvergedManifestByProject", "BeaconList was not found in the converged manifests from the scrapers."); + _log(logattribute::WARNING, "ScraperConstructConvergedManifestByProject", + "BeaconList was not found in the converged manifests from the scrapers."); bConvergenceSuccessful = false; } @@ -4821,11 +5217,13 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project // converged manifest if it is. if (nPart > 0) { - StructConvergedManifest.ConvergedManifestPartPtrsMap.insert(std::make_pair("VerifiedBeacons", manifest->vParts[nPart])); + StructConvergedManifest.ConvergedManifestPartPtrsMap.insert(std::make_pair("VerifiedBeacons", + manifest->vParts[nPart])); } - _log(logattribute::INFO, __func__, "After BeaconList and VerifiedBeacons insert StructConvergedManifest.ConvergedManifestPartPtrsMap.size() = " - + ToString(StructConvergedManifest.ConvergedManifestPartPtrsMap.size())); + _log(logattribute::INFO, __func__, + "After BeaconList and VerifiedBeacons insert StructConvergedManifest.ConvergedManifestPartPtrsMap.size() = " + + ToString(StructConvergedManifest.ConvergedManifestPartPtrsMap.size())); StructConvergedManifest.ConsensusBlock = nConvergedConsensusBlock; @@ -4876,25 +5274,30 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project // Fill out the excluded projects vector and the included scraper count (by project) map for (const auto& iProjects : projectWhitelist) { - if (StructConvergedManifest.ConvergedManifestPartPtrsMap.find(iProjects.m_name) == StructConvergedManifest.ConvergedManifestPartPtrsMap.end()) + if (StructConvergedManifest.ConvergedManifestPartPtrsMap.find(iProjects.m_name) + == StructConvergedManifest.ConvergedManifestPartPtrsMap.end()) { // Project in whitelist was not in the map, so it goes in the exclusion vector. StructConvergedManifest.vExcludedProjects.push_back(iProjects.m_name); _log(logattribute::WARNING, "ScraperConstructConvergedManifestByProject", "Project " + iProjects.m_name - + " was excluded because there was no convergence from the scrapers for this project at the project level."); + + " was excluded because there was no convergence from the scrapers for" + " this project at the project level."); continue; } - unsigned int nScraperConvergenceCount = StructConvergedManifest.mIncludedScrapersbyProject.count(iProjects.m_name); - StructConvergedManifest.mScraperConvergenceCountbyProject.insert(std::make_pair(iProjects.m_name, nScraperConvergenceCount)); + unsigned int nScraperConvergenceCount = + StructConvergedManifest.mIncludedScrapersbyProject.count(iProjects.m_name); + StructConvergedManifest.mScraperConvergenceCountbyProject.insert(std::make_pair(iProjects.m_name, + nScraperConvergenceCount)); _log(logattribute::INFO, "ScraperConstructConvergedManifestByProject", "Project " + iProjects.m_name + ": " + ToString(nScraperConvergenceCount) + " scraper(s) converged"); } - // Fill out the included and excluded scraper vector for scrapers that did not participate in any project level convergence. + // Fill out the included and excluded scraper vector for scrapers that did not participate in any project + // level convergence. for (const auto& iScraper : mMapCSManifestsBinnedByScraper) { if (StructConvergedManifest.mIncludedProjectsbyScraper.count(iScraper.first)) @@ -4918,13 +5321,16 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project // Only include scrapers enabled in protocol. if (iScraper.second.value == "true" || iScraper.second.value == "1") { - if (std::find(std::begin(StructConvergedManifest.vExcludedScrapers), std::end(StructConvergedManifest.vExcludedScrapers), iScraper.first) - == std::end(StructConvergedManifest.vExcludedScrapers) - && std::find(std::begin(StructConvergedManifest.vIncludedScrapers), std::end(StructConvergedManifest.vIncludedScrapers), iScraper.first) - == std::end(StructConvergedManifest.vIncludedScrapers)) + if (std::find(std::begin(StructConvergedManifest.vExcludedScrapers), + std::end(StructConvergedManifest.vExcludedScrapers), + iScraper.first) == std::end(StructConvergedManifest.vExcludedScrapers) + && std::find(std::begin(StructConvergedManifest.vIncludedScrapers), + std::end(StructConvergedManifest.vIncludedScrapers), + iScraper.first) == std::end(StructConvergedManifest.vIncludedScrapers)) { StructConvergedManifest.vScrapersNotPublishing.push_back(iScraper.first); - _log(logattribute::INFO, "ScraperConstructConvergedManifesByProject", "Scraper " + iScraper.first + " authorized but not publishing."); + _log(logattribute::INFO, "ScraperConstructConvergedManifesByProject", + "Scraper " + iScraper.first + " authorized but not publishing."); } } } @@ -5003,7 +5409,11 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() if (!OutOfSyncByAge()) { unsigned int nDeleted = ScraperDeleteUnauthorizedCScraperManifests(); - if (nDeleted) _log(logattribute::WARNING, "ScraperDeleteCScraperManifests", "Deleted " + ToString(nDeleted) + " unauthorized manifests."); + if (nDeleted) + { + _log(logattribute::WARNING, "ScraperDeleteCScraperManifests", + "Deleted " + ToString(nDeleted) + " unauthorized manifests."); + } } LOCK(CScraperManifest::cs_mapManifest); @@ -5011,7 +5421,8 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() // Bin by scraper and order by manifest time within scraper bin. mmCSManifestsBinnedByScraper mMapCSManifestsBinnedByScraper = BinCScraperManifestsByScraper(); - _log(logattribute::INFO, "ScraperDeleteCScraperManifests", "mMapCSManifestsBinnedByScraper size = " + ToString(mMapCSManifestsBinnedByScraper.size())); + _log(logattribute::INFO, "ScraperDeleteCScraperManifests", + "mMapCSManifestsBinnedByScraper size = " + ToString(mMapCSManifestsBinnedByScraper.size())); if (!scraper_cmanifest_retain_noncurrent()) { @@ -5020,15 +5431,17 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() { mCSManifest mManifestInner = iter->second; - _log(logattribute::INFO, "ScraperDeleteCScraperManifests", "mManifestInner size = " + ToString(mManifestInner.size()) + + _log(logattribute::INFO, "ScraperDeleteCScraperManifests", + "mManifestInner size = " + ToString(mManifestInner.size()) + " for " + iter->first + " scraper"); - // This preserves the LATEST CScraperManifest entry for the given scraper, because the inner map is in descending order, - // and the first element is therefore the LATEST, and is skipped. + // This preserves the LATEST CScraperManifest entry for the given scraper, because the inner map is in + // descending order, and the first element is therefore the LATEST, and is skipped. for (auto iter_inner = ++mManifestInner.begin(); iter_inner != mManifestInner.end(); ++iter_inner) { - _log(logattribute::INFO, "ScraperDeleteCScraperManifests", "Deleting non-current manifest " + iter_inner->second.first.GetHex() + _log(logattribute::INFO, "ScraperDeleteCScraperManifests", + "Deleting non-current manifest " + iter_inner->second.first.GetHex() + " from scraper source " + iter->first); // Delete from CScraperManifest map @@ -5046,7 +5459,8 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() if (GetAdjustedTime() - manifest->nTime > scraper_cmanifest_retention_time()) { - _log(logattribute::INFO, "ScraperDeleteCScraperManifests", "Deleting old CScraperManifest with hash " + iter->first.GetHex()); + _log(logattribute::INFO, "ScraperDeleteCScraperManifests", + "Deleting old CScraperManifest with hash " + iter->first.GetHex()); // Delete from CScraperManifest map iter = CScraperManifest::DeleteManifest(iter); } @@ -5069,7 +5483,8 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests() // Clear old CScraperManifests out of mapPendingDeletedManifest. nPendingDeleted = CScraperManifest::DeletePendingDeletedManifests(); - _log(logattribute::INFO, "ScraperDeleteCScraperManifests", "Permanently deleted " + ToString(nPendingDeleted) + " manifest(s) pending permanent deletion."); + _log(logattribute::INFO, "ScraperDeleteCScraperManifests", + "Permanently deleted " + ToString(nPendingDeleted) + " manifest(s) pending permanent deletion."); _log(logattribute::INFO, "ScraperDeleteCScraperManifests", "Size of mapPendingDeletedManifest = " + ToString(CScraperManifest::mapPendingDeletedManifest.size())); @@ -5124,7 +5539,8 @@ bool LoadBeaconListFromConvergedManifest(const ConvergedManifest& StructConverge mBeaconMap[key] = LoadEntry; } - _log(logattribute::INFO, "LoadBeaconListFromConvergedManifest", "mBeaconMap element count: " + ToString(mBeaconMap.size())); + _log(logattribute::INFO, "LoadBeaconListFromConvergedManifest", + "mBeaconMap element count: " + ToString(mBeaconMap.size())); // We used to return false if the beacon map had no entries, but this is a valid // condition if all beacons have expired. So return true. (False is returned above @@ -5204,7 +5620,6 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsAndVerifiedBeacons(const Converged return stats_and_verified_beacons; } -// This is for rpc report functions. ScraperPendingBeaconMap GetPendingBeaconsForReport() { return GetConsensusBeaconList().mPendingMap; @@ -5232,11 +5647,10 @@ ScraperPendingBeaconMap GetVerifiedBeaconsForReport(bool from_global) return VerifiedBeacons; } -/*********************** -* Subscriber * -************************/ +// Subscriber -Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContractDirectFromStatsUpdate, bool bFromHousekeeping) +Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContractDirectFromStatsUpdate, + bool bFromHousekeeping) { Superblock empty_superblock; @@ -5245,15 +5659,17 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac // If not in sync then immediately bail with an empty superblock. if (OutOfSyncByAge()) return empty_superblock; - // Check the age of the ConvergedScraperStats cache. If less than nScraperSleep / 1000 old (for seconds) or clean, then simply report back the cache contents. - // This prevents the relatively heavyweight stats computations from running too often. The time here may not exactly align with - // the scraper loop if it is running, but that is ok. The scraper loop updates the time in the cache too. + // Check the age of the ConvergedScraperStats cache. If less than nScraperSleep / 1000 old (for seconds) or clean, + // then simply report back the cache contents. This prevents the relatively heavyweight stats computations from + // running too often. The time here may not exactly align with the scraper loop if it is running, but that is ok. + // The scraper loop updates the time in the cache too. bool bConvergenceUpdateNeeded = true; { LOCK(cs_ConvergedScraperStatsCache); // If the cache is less than nScraperSleep in minutes old OR not dirty... - if (GetAdjustedTime() - ConvergedScraperStatsCache.nTime < (scraper_sleep() / 1000) || ConvergedScraperStatsCache.bClean) + if (GetAdjustedTime() - ConvergedScraperStatsCache.nTime < (scraper_sleep() / 1000) + || ConvergedScraperStatsCache.bClean) { bConvergenceUpdateNeeded = false; _log(logattribute::INFO, __func__, "Cached convergence is fresh, convergence update not needed."); @@ -5278,11 +5694,14 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac // ScraperConstructConvergedManifest also culls old CScraperManifests. If no convergence, then // you can't make a SB core and you can't make a contract, so return the empty string. Also check // to make sure the BeaconMap has been populated properly. - if (ScraperConstructConvergedManifest(StructConvergedManifest) && LoadBeaconListFromConvergedManifest(StructConvergedManifest, mBeaconMap)) + if (ScraperConstructConvergedManifest(StructConvergedManifest) + && LoadBeaconListFromConvergedManifest(StructConvergedManifest, mBeaconMap)) { - ScraperStats mScraperConvergedStats = GetScraperStatsByConvergedManifest(StructConvergedManifest).mScraperStats; + ScraperStats mScraperConvergedStats = + GetScraperStatsByConvergedManifest(StructConvergedManifest).mScraperStats; - _log(logattribute::INFO, "ScraperGetSuperblockContract", "mScraperStats has the following number of elements: " + ToString(mScraperConvergedStats.size())); + _log(logattribute::INFO, "ScraperGetSuperblockContract", + "mScraperStats has the following number of elements: " + ToString(mScraperConvergedStats.size())); if (bStoreConvergedStats) { @@ -5330,7 +5749,8 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac { if (superblock_Prev.WellFormed()) { - // If the current is not empty and the previous is not empty and not the same, then there is an updated contract. + // If the current is not empty and the previous is not empty and not the same, then there is + // an updated contract. if (superblock.GetHash() != superblock_Prev.GetHash()) uiInterface.NotifyScraperEvent(scrapereventtypes::SBContract, CT_UPDATED, {}); } @@ -5393,16 +5813,20 @@ Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats, bool bContrac return superblock; } -/*********************** -* RPC Functions * -************************/ +// RPC functions +/** + * @brief Publishes a CScraperManifest to the network from the current file manifest IF the node is authorized. + * @param params + * @param fHelp + * @return bool true if successful + */ UniValue sendscraperfilemanifest(const UniValue& params, bool fHelp) { if (fHelp || params.size() != 0 ) throw std::runtime_error( "sendscraperfilemanifest\n" - "Send a CScraperManifest object with the ScraperFileManifest.\n" + "Send a CScraperManifest object from the current ScraperFileManifest.\n" ); CBitcoinAddress AddressOut; @@ -5421,12 +5845,18 @@ UniValue sendscraperfilemanifest(const UniValue& params, bool fHelp) return UniValue(ret); } +/** + * @brief Saves a CScraperManifest to disk + * @param params Takes a single parameter which is the hash of the manifest to save to disk. + * @param fHelp + * @return bool true if successful + */ UniValue savescraperfilemanifest(const UniValue& params, bool fHelp) { if (fHelp || params.size() != 1 ) throw std::runtime_error( "savescraperfilemanifest \n" - "Send a CScraperManifest object with the ScraperFileManifest.\n" + "Saves a CScraperManifest object to disk.\n" ); bool ret = ScraperSaveCScraperManifestToFiles(uint256S(params[0].get_str())); @@ -5434,6 +5864,14 @@ UniValue savescraperfilemanifest(const UniValue& params, bool fHelp) return UniValue(ret); } +/** + * @brief Deletes a CScraperManifest entry from the global mapManifest map. Note this is immediate and does not + * create a grace period entry in the pending deleted manifest map. It also will not delete the underlying manifest object + * if a shared pointer to the manifest object is also held by the global convergence cache. + * @param params Takes a single parameter which is the hash if the manifest to delete. + * @param fHelp + * @return bool true if successful + */ UniValue deletecscrapermanifest(const UniValue& params, bool fHelp) { if (fHelp || params.size() != 1 ) @@ -5449,6 +5887,12 @@ UniValue deletecscrapermanifest(const UniValue& params, bool fHelp) return UniValue(ret); } +/** + * @brief Immediately archives the specified log, either the debug.log of scraper.log + * @param params Takes a single parameter specifying the log to archive, debug or scraper. + * @param fHelp + * @return bool true if successful + */ UniValue archivelog(const UniValue& params, bool fHelp) { if (fHelp || params.size() != 1 ) @@ -5475,7 +5919,11 @@ UniValue archivelog(const UniValue& params, bool fHelp) return UniValue(ret); } -// Helper function to convergencereport to provide detailed convergence cache output. +/** + * @brief Outputs the contents of the input ConvergedScraperStats to JSON + * @param ConvergedScraperStatsIn + * @return JSON representation of input ConvergedScraperStats + */ UniValue ConvergedScraperStatsToJson(ConvergedScraperStats& ConvergedScraperStatsIn) { UniValue ret(UniValue::VOBJ); @@ -5484,12 +5932,15 @@ UniValue ConvergedScraperStatsToJson(ConvergedScraperStats& ConvergedScraperStat current_convergence.pushKV("current_cache_clean_flag", ConvergedScraperStatsIn.bClean); current_convergence.pushKV("current_cache_timestamp", ConvergedScraperStatsIn.nTime); - current_convergence.pushKV("current_cache_datetime", DateTimeStrFormat("%x %H:%M:%S UTC", ConvergedScraperStatsIn.nTime)); + current_convergence.pushKV("current_cache_datetime", + DateTimeStrFormat("%x %H:%M:%S UTC", ConvergedScraperStatsIn.nTime)); current_convergence.pushKV("convergence_content_hash", ConvergedScraperStatsIn.Convergence.nContentHash.ToString()); - current_convergence.pushKV("superblock_from_current_convergence_quorum_hash", ConvergedScraperStatsIn.NewFormatSuperblock.GetHash().ToString()); - current_convergence.pushKV("superblock_from_current_convergence", SuperblockToJson(ConvergedScraperStatsIn.NewFormatSuperblock)); + current_convergence.pushKV("superblock_from_current_convergence_quorum_hash", + ConvergedScraperStatsIn.NewFormatSuperblock.GetHash().ToString()); + current_convergence.pushKV("superblock_from_current_convergence", + SuperblockToJson(ConvergedScraperStatsIn.NewFormatSuperblock)); UniValue past_convergences_array(UniValue::VARR); @@ -5522,6 +5973,12 @@ UniValue ConvergedScraperStatsToJson(ConvergedScraperStats& ConvergedScraperStat return ret; } +/** + * @brief Reports on the state of the convergence on the local node. + * @param params bool true to provide detailed output + * @param fHelp + * @return JSON report of convergence state with optional details + */ UniValue convergencereport(const UniValue& params, bool fHelp) { if (fHelp || params.size() > 1) @@ -5541,7 +5998,8 @@ UniValue convergencereport(const UniValue& params, bool fHelp) { LOCK(cs_ConvergedScraperStatsCache); - if (GetAdjustedTime() - ConvergedScraperStatsCache.nTime < (scraper_sleep() / 1000) || ConvergedScraperStatsCache.bClean) + if (GetAdjustedTime() - ConvergedScraperStatsCache.nTime < (scraper_sleep() / 1000) + || ConvergedScraperStatsCache.bClean) { bConvergenceUpdateNeeded = false; } @@ -5615,12 +6073,20 @@ UniValue convergencereport(const UniValue& params, bool fHelp) return result; } +/** + * @brief Tests superblock formation + * @param params unsigned int to specify the number of bits for the reduced hash hint to force more duplicates to check. This + * is clamped between 4 and 32, with 32 as the default, which is the normal hint bits. + * @param fHelp + * @return report of test results + */ UniValue testnewsb(const UniValue& params, bool fHelp) { if (fHelp || params.size() > 1 ) throw std::runtime_error( "testnewsb [hint bits]\n" - "Test the new Superblock class. Optional parameter of the number of bits for the reduced hash hint for uncached test.\n" + "Tests superblock formation. Optional parameter of the number of bits for the reduced hash hint for" + " uncached test.\n" "This is limited to a range of 4 to 32, with 32 as the default (which is the normal hint bits).\n" ); @@ -5641,12 +6107,14 @@ UniValue testnewsb(const UniValue& params, bool fHelp) return error; } - _log(logattribute::INFO, "testnewsb", "Size of the PastConvergences map = " + ToString(ConvergedScraperStatsCache.PastConvergences.size())); + _log(logattribute::INFO, "testnewsb", + "Size of the PastConvergences map = " + ToString(ConvergedScraperStatsCache.PastConvergences.size())); res.pushKV("Size of the PastConvergences map", ToString(ConvergedScraperStatsCache.PastConvergences.size())); } // Contract binary pack/unpack check... - _log(logattribute::INFO, "testnewsb", "Checking compatibility with binary SB pack/unpack by packing then unpacking, then comparing to the original"); + _log(logattribute::INFO, "testnewsb", + "Checking compatibility with binary SB pack/unpack by packing then unpacking, then comparing to the original"); SuperblockPtr NewFormatSuperblock = SuperblockPtr::Empty(); QuorumHash nNewFormatSuperblockHash; @@ -5660,7 +6128,9 @@ UniValue testnewsb(const UniValue& params, bool fHelp) Superblock::FromConvergence(ConvergedScraperStatsCache), pindexBest); - _log(logattribute::INFO, "testnewsb", "ConvergedScraperStatsCache.Convergence.bByParts = " + ToString(ConvergedScraperStatsCache.Convergence.bByParts)); + _log(logattribute::INFO, "testnewsb", + "ConvergedScraperStatsCache.Convergence.bByParts = " + + ToString(ConvergedScraperStatsCache.Convergence.bByParts)); } _log(logattribute::INFO, "testnewsb", "m_projects size = " + ToString(NewFormatSuperblock->m_projects.size())); @@ -5678,10 +6148,16 @@ UniValue testnewsb(const UniValue& params, bool fHelp) nNewFormatSuperblockReducedContentHashFromConvergenceHint = NewFormatSuperblock->m_convergence_hint; nNewFormatSuperblockReducedContentHashFromUnderlyingManifestHint = NewFormatSuperblock->m_manifest_content_hint; - res.pushKV("nNewFormatSuperblockReducedContentHashFromConvergenceHint", (uint64_t) nNewFormatSuperblockReducedContentHashFromConvergenceHint); - _log(logattribute::INFO, "testnewsb", "nNewFormatSuperblockReducedContentHashFromConvergenceHint = " + ToString(nNewFormatSuperblockReducedContentHashFromConvergenceHint)); - res.pushKV("nNewFormatSuperblockReducedContentHashFromUnderlyingManifestHint", (uint64_t) nNewFormatSuperblockReducedContentHashFromUnderlyingManifestHint); - _log(logattribute::INFO, "testnewsb", "nNewFormatSuperblockReducedContentHashFromUnderlyingManifestHint = " + ToString(nNewFormatSuperblockReducedContentHashFromUnderlyingManifestHint)); + res.pushKV("nNewFormatSuperblockReducedContentHashFromConvergenceHint", + (uint64_t) nNewFormatSuperblockReducedContentHashFromConvergenceHint); + _log(logattribute::INFO, "testnewsb", + "nNewFormatSuperblockReducedContentHashFromConvergenceHint = " + + ToString(nNewFormatSuperblockReducedContentHashFromConvergenceHint)); + res.pushKV("nNewFormatSuperblockReducedContentHashFromUnderlyingManifestHint", + (uint64_t) nNewFormatSuperblockReducedContentHashFromUnderlyingManifestHint); + _log(logattribute::INFO, "testnewsb", + "nNewFormatSuperblockReducedContentHashFromUnderlyingManifestHint = " + + ToString(nNewFormatSuperblockReducedContentHashFromUnderlyingManifestHint)); // Log the number of bits used to force key collisions. _log(logattribute::INFO, "testnewsb", "nReducedCacheBits = " + ToString(nReducedCacheBits)); @@ -5729,7 +6205,8 @@ UniValue testnewsb(const UniValue& params, bool fHelp) { int i = GetRandInt(PastConvergencesSize - 1); - _log(logattribute::INFO, "testnewsb", "ValidateSuperblock random past RandomPastConvergedManifest index " + ToString(i) + " selected."); + _log(logattribute::INFO, "testnewsb", + "ValidateSuperblock random past RandomPastConvergedManifest index " + ToString(i) + " selected."); res.pushKV("ValidateSuperblock random past RandomPastConvergedManifest index selected", i); std::advance(iPastSB, i); @@ -5749,7 +6226,8 @@ UniValue testnewsb(const UniValue& params, bool fHelp) if (!bPastConvergencesEmpty) { - ScraperStatsAndVerifiedBeacons RandomPastSBStatsAndVerifiedBeacons = GetScraperStatsByConvergedManifest(RandomPastConvergedManifest); + ScraperStatsAndVerifiedBeacons RandomPastSBStatsAndVerifiedBeacons = + GetScraperStatsByConvergedManifest(RandomPastConvergedManifest); Superblock RandomPastSB = Superblock::FromStats(RandomPastSBStatsAndVerifiedBeacons); @@ -5773,7 +6251,8 @@ UniValue testnewsb(const UniValue& params, bool fHelp) } else { - RandomPastSB.m_manifest_content_hint = RandomPastConvergedManifest.nUnderlyingManifestContentHash.GetUint64() >> 32; + RandomPastSB.m_manifest_content_hint = + RandomPastConvergedManifest.nUnderlyingManifestContentHash.GetUint64() >> 32; } // @@ -5797,12 +6276,14 @@ UniValue testnewsb(const UniValue& params, bool fHelp) if (Quorum::ValidateSuperblock(RandomPastSBPtr, false, nReducedCacheBits)) { - _log(logattribute::INFO, "testnewsb", "ValidateSuperblock validation against random past (without using cache) passed"); + _log(logattribute::INFO, "testnewsb", + "ValidateSuperblock validation against random past (without using cache) passed"); res.pushKV("ValidateSuperblock validation against random past (without using cache)", "passed"); } else { - _log(logattribute::INFO, "testnewsb", "ValidateSuperblock validation against random past (without using cache) failed"); + _log(logattribute::INFO, "testnewsb", + "ValidateSuperblock validation against random past (without using cache) failed"); res.pushKV("ValidateSuperblock validation against random past (without using cache)", "failed"); } } @@ -5810,6 +6291,13 @@ UniValue testnewsb(const UniValue& params, bool fHelp) return res; } +/** + * @brief Generates a comprehensive report of the scraper convergence, manifest and parts objects. This report is mainly + * used for integrity checking of the scraper's internal operation + * @param params none + * @param fHelp + * @return JSON report of scraper status + */ UniValue scraperreport(const UniValue& params, bool fHelp) { if (fHelp || params.size() != 0 ) diff --git a/src/gridcoin/scraper/scraper.h b/src/gridcoin/scraper/scraper.h index b958d33366..06c90ba88e 100644 --- a/src/gridcoin/scraper/scraper.h +++ b/src/gridcoin/scraper/scraper.h @@ -24,7 +24,7 @@ #include "gridcoin/scraper/fwd.h" #include "gridcoin/superblock.h" -// Thread safety +// Thread safety. See scraper.cpp for documentation. extern CCriticalSection cs_Scraper; extern CCriticalSection cs_ScraperGlobals; extern CCriticalSection cs_mScrapersExt; @@ -68,22 +68,114 @@ extern AppCacheSectionExt mScrapersExt; * Functions * *********************/ +/** + * @brief Returns the hash of the provided input file path. If the file path cannot be resolved or an exception occurs + * in processing the file, a null hash is returned. + * @param inputfile + * @return uint256 hash + */ uint256 GetFileHash(const fs::path& inputfile); +/** + * @brief Provides the computed scraper stats and verified beacons from the input converged manifest + * @param StructConvergedManifest + * @return ScraperStatsAndVerifiedBeacons + */ ScraperStatsAndVerifiedBeacons GetScraperStatsByConvergedManifest(const ConvergedManifest& StructConvergedManifest); +/** + * @brief Gets a copy of the extended scrapers cache global. This global is an extension of the appcache in that it + * retains deleted entries with a deleted flag. + * @return AppCacheSectionExt + */ AppCacheSectionExt GetExtendedScrapersCache(); +/** + * @brief Returns whether this node is authorized to download statistics. + * @return bool + */ bool IsScraperAuthorized(); +/** + * @brief Returns whether this node is authorized to broadcast statistics manifests to the network as a scraper. + * @param AddressOut + * @param KeyOut + * @return bool + * + * The idea here is that there are two levels of authorization. The first level is whether any + * node can operate as a "scraper", in other words, download the stats files themselves. + * The second level, which is the IsScraperAuthorizedToBroadcastManifests() function, + * is to authorize a particular node to actually be able to publish manifests. + * The second function is intended to override the first, with the first being a network wide + * policy. So to be clear, if the network wide policy has IsScraperAuthorized() set to false + * then ONLY nodes that have IsScraperAuthorizedToBroadcastManifests() can download stats at all. + * If IsScraperAuthorized() is set to true, then you have two levels of operation allowed. + * Nodes can run -scraper and download stats for themselves. They will only be able to publish + * manifests if for that node IsScraperAuthorizedToBroadcastManifests() evaluates to true. + * This allows flexibility in network policy, and will allow us to convert from a scraper based + * approach to convergence back to individual node stats download and convergence without a lot of + * headaches. + * + * This function checks to see if the local node is authorized to publish manifests. Note that this code could be + * modified to bypass this check, so messages sent will also be validated on receipt by the complement + * to this function, IsManifestAuthorized(CKey& Key) in the CScraperManifest class. + */ bool IsScraperAuthorizedToBroadcastManifests(CBitcoinAddress& AddressOut, CKey& KeyOut); +/** + * @brief Returns whether the scraper with the input public key at the input time has exceeded the maximum allowable + * manifest publishing rate. This is a DoS function and is used to issue misbehavior points, which could result in banning + * the node that has exceeded the max publishing rate. + * @param nTime + * @param PubKey + * @return bool + * + * This function computes the average time between manifests as a function of the last 10 received manifests + * plus the nTime provided as the argument. This gives ten intervals for sampling between manifests. If the + * average time between manifests is less than 50% of the nScraperSleep interval, or the most recent manifest + * for a scraper is more than five minutes in the future (accounts for clock skew) then the publishing rate + * of the scraper is deemed too high. This is actually used in CScraperManifest::IsManifestAuthorized to ban + * a scraper that is abusing the network by sending too many manifests over a very short period of time. + */ bool IsScraperMaximumManifestPublishingRateExceeded(int64_t& nTime, CPubKey& PubKey); -GRC::Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats = false, bool bContractDirectFromStatsUpdate = false, bool bFromHousekeeping = false); -scraperSBvalidationtype ValidateSuperblock(const GRC::Superblock& NewFormatSuperblock, bool bUseCache = true, unsigned int nReducedCacheBits = 32); +/** + * @brief Generates a superblock (contract) from the current convergence. It will construct/update the convergence if needed. + * @param bStoreConvergedStats + * @param bContractDirectFromStatsUpdate + * @param bFromHousekeeping + * @return GRC::Superblock + */ +GRC::Superblock ScraperGetSuperblockContract(bool bStoreConvergedStats = false, bool bContractDirectFromStatsUpdate = false, + bool bFromHousekeeping = false); +/** + * @brief Gets the verified beacon ID's from the input converged manifest (overloaded) + * @param StructConvergedManifest + * @return std::vector of beacon ID's + */ std::vector GetVerifiedBeaconIDs(const ConvergedManifest& StructConvergedManifest); +/** + * @brief Gets the verified beacon ID's from the input VerifiedBeaconMap (overloaded) + * @param VerifiedBeaconMap + * @return std::vector of beacon ID's + */ std::vector GetVerifiedBeaconIDs(const ScraperPendingBeaconMap& VerifiedBeaconMap); +/** + * @brief Returns the scraper stats and verified beacons in one structure from the input ConvergedScraperStats + * @param stats + * @return ScraperStatsAndVerifiedBeacons + */ ScraperStatsAndVerifiedBeacons GetScraperStatsAndVerifiedBeacons(const ConvergedScraperStats &stats); +/** + * @brief Returns a map of pending beacons + * @return ScraperPendingBeaconMap of pending beacons + */ ScraperPendingBeaconMap GetPendingBeaconsForReport(); +/** + * @brief Returns a map of verified beacons + * @param from_global + * @return ScraperPendingBeaconMap of verified beacons + */ ScraperPendingBeaconMap GetVerifiedBeaconsForReport(bool from_global = false); +/** Vector of strings that correspond with the statsobjecttype ENUM class */ static std::vector vstatsobjecttypestrings = { "NetWorkWide", "byCPID", "byProject", "byCPIDbyProject" }; +/** Vector of strings that correspond with the scraperSBvalidationtype ENUM class */ static std::vector scraperSBvalidationtypestrings = { "Invalid", "Unknown", @@ -93,43 +185,50 @@ static std::vector scraperSBvalidationtypestrings = { "ProjectLevelConvergence" }; - +/** + * @brief Returns text that corresponds to the input statsobjecttype + * @param StatsObjType + * @return std::string + */ const std::string GetTextForstatsobjecttype(statsobjecttype StatsObjType) { return vstatsobjecttypestrings[static_cast(StatsObjType)]; } +/** + * @brief Returns text that corresponds to the input scraperSBvalidationtype + * @param ScraperSBValidationType + * @return std::string + */ const std::string GetTextForscraperSBvalidationtype(scraperSBvalidationtype ScraperSBValidationType) { return scraperSBvalidationtypestrings[static_cast(ScraperSBValidationType)]; } - +/** + * @brief Rounds the double floating point magnitude according to the global parameter MAG_ROUND. + * @param dMag + * @return double + */ double MagRound(double dMag) { return round(dMag / MAG_ROUND) * MAG_ROUND; } +/** + * @brief Returns the number of scrapers required for a supermajority when determining a convergence. This is a CONSENSUS + * critical function + * @param nScraperCount + * @return unsigned int + */ unsigned int NumScrapersForSupermajority(unsigned int nScraperCount) { LOCK(cs_ScraperGlobals); - unsigned int nRequired = std::max(SCRAPER_CONVERGENCE_MINIMUM, (unsigned int)std::ceil(SCRAPER_CONVERGENCE_RATIO * nScraperCount)); + unsigned int nRequired = std::max(SCRAPER_CONVERGENCE_MINIMUM, + (unsigned int)std::ceil(SCRAPER_CONVERGENCE_RATIO * nScraperCount)); return nRequired; } -/********************* -* Scraper * -*********************/ - -// For version 2 of the scraper we will restructure into a class. For now this is a placeholder. -/* - * class scraper -{ -public: - scraper(); -}; -*/ - #endif // GRIDCOIN_SCRAPER_SCRAPER_H diff --git a/src/gridcoin/scraper/scraper_net.cpp b/src/gridcoin/scraper/scraper_net.cpp index 6bdf79adbf..a19787b0cf 100644 --- a/src/gridcoin/scraper/scraper_net.cpp +++ b/src/gridcoin/scraper/scraper_net.cpp @@ -667,20 +667,22 @@ unsigned int CScraperManifest::DeletePendingDeletedManifests() EXCLUSIVE_LOCKS_R bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) { - /* Index object for scraper data. - * deserialize message - * hash - * see if we do not already have it - * validate the message - * populate the maps - * request parts - */ + // General procedure here: + // + // Index object for scraper data. + // deserialize message + // hash + // see if we do not already have it + // validate the message + // populate the maps + // request parts + unsigned int banscore = 0; - /* hash the object */ + // hash the object uint256 hash(Hash(vRecv.begin(), vRecv.end())); - /* see if we do not already have it */ + // see if we do not already have it if (WITH_LOCK(cs_mapManifest, return AlreadyHave(pfrom, CInv(MSG_SCRAPERINDEX, hash)))) { LogPrint(BCLog::LogFlags::SCRAPER, "INFO: ScraperManifest::RecvManifest: Already have CScraperManifest %s from " @@ -741,12 +743,12 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) if (manifest->isComplete()) { - /* If we already got all the parts in memory, signal completion */ + // If we already got all the parts in memory, signal completion... manifest->Complete(); } else { - /* else request missing parts from the sender */ + // ... else request missing parts from the sender // Note: As an additional buffer to prevent spurious part receipts, if the manifest timestamp is within nScraperSleep // of expiration (i.e. about to go on the pending delete list, then do not request missing parts, as it is possible // that the manifest will be deleted by the housekeeping loop in between the receipt of the manifest, request for @@ -773,10 +775,10 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, cs_mapParts) m->SerializeForManifestCompare(sscomp); m->nContentHash = Hash(sscomp.begin(), sscomp.end()); - /* serialize and hash the object */ + // serialize and hash the object m->SerializeWithoutSignature(ss); - /* sign the serialized manifest and append the signature */ + // sign the serialized manifest and append the signature hash = Hash(ss.begin(), ss.end()); keySign.Sign(hash, m->signature); @@ -792,10 +794,10 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, cs_mapParts) LogPrint(BCLog::LogFlags::MANIFEST, "adding new local manifest"); } - /* try inserting into map */ + // try inserting into map const auto it = mapManifest.emplace(hash, m); - /* Already exists, do nothing */ + // Already exists, do nothing if (it.second == false) return false; @@ -804,7 +806,7 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, cs_mapParts) // Relock the manifest pointed to by the iterator. LOCK(manifest.cs_manifest); - /* set the hash pointer inside */ + // set the hash pointer inside manifest.phash = &it.first->first; // We do not need to do a deserialize check here, because the @@ -830,7 +832,7 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, cs_mapParts) void CScraperManifest::Complete() EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { - /* Notify peers that we have a new manifest */ + // Notify peers that we have a new manifest LogPrint(BCLog::LogFlags::MANIFEST, "manifest %s complete with %u parts", phash->GetHex(), (unsigned)vParts.size()); { LOCK(cs_vNodes); @@ -844,18 +846,6 @@ void CScraperManifest::Complete() EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manife sCManifestName, phash->GetHex()); } -/* how? - * Should we only request objects that we need? - * Because nodes should only have valid data, download anything they send. - * They should only send what we requested, but we do not know what it is, - * until we have it, let it pass. - * There is 32MiB message size limit. There is a chance we could hit it, so - * splitting is necessary. Index object with list of parts is needed. - * - * If inv about index is received, and we do not know about it yet, just - * getdata it. If it turns out useless, just ban the node. Then getdata the - * parts from the node. -*/ UniValue CScraperManifest::ToJson() const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { UniValue r(UniValue::VOBJ); @@ -909,6 +899,7 @@ UniValue CScraperManifest::dentry::ToJson() const EXCLUSIVE_LOCKS_REQUIRED(CSpli return r; } +/** RPC function to list manifests and optionally provide their contents in JSON form. */ UniValue listmanifests(const UniValue& params, bool fHelp) { if (fHelp || params.size() > 2) @@ -990,6 +981,7 @@ UniValue listmanifests(const UniValue& params, bool fHelp) return obj; } +/** Provides hex string output of part object contents. */ UniValue getmpart(const UniValue& params, bool fHelp) { if (fHelp || params.size() != 1) diff --git a/src/gridcoin/scraper/scraper_net.h b/src/gridcoin/scraper/scraper_net.h index fab8216225..200f03b214 100755 --- a/src/gridcoin/scraper/scraper_net.h +++ b/src/gridcoin/scraper/scraper_net.h @@ -18,21 +18,26 @@ #include - -/** Abstract class for blobs that are split into parts. */ +/** Abstract class for blobs that are split into parts. This more complex approach using a parent vanilla parts class will + * allow the parts system to be used for other purposes besides the scrapers if needed in the future. + * polymorphism. + */ class CSplitBlob { public: - /** Parts of the Split object */ + /** Parts of the Split object. For right now in the current implementation for the scraper system, all objects are + * represented by one part. This provides the future capability to have large objects greater than the message size + * limit (currently 32 MiB), but this is not necessary now. + */ struct CPart { - std::set> refs; + std::set> refs; CSerializeData data; uint256 hash; CPart(const uint256& ihash) :hash(ihash) {} CDataStream getReader() const { return CDataStream(data.begin(), data.end(), SER_NETWORK, PROTOCOL_VERSION); } - bool present() const {return !this->data.empty();} + bool present() const { return !this->data.empty(); } }; // static methods @@ -47,7 +52,7 @@ class CSplitBlob static bool SendPartTo(CNode* pto, const uint256& hash); // public methods - /** Boolean that returns whether all parts for the split object have been received. **/ + /** Boolean that returns whether all parts for the split object have been received. */ bool isComplete() const; /** Notification that this Split object is fully received. */ @@ -66,10 +71,9 @@ class CSplitBlob virtual ~CSplitBlob(); // static variables - /** Mutex for mapParts **/ + /** Mutex for mapParts */ static CCriticalSection cs_mapParts; - /* We could store the parts in mapRelay and have getdata service for free. */ /** map from part hash to scraper Index, so we can attach incoming Part in Index */ static std::map mapParts GUARDED_BY(cs_mapParts); @@ -77,7 +81,7 @@ class CSplitBlob /** Guards vParts and other manifest fields of the manifest (derived) class. * Note that this needs to be mutable so that a lock can be taken internally on cs_manifest on an * otherwise const qualified member function. - **/ + */ mutable CCriticalSection cs_manifest; std::vector vParts GUARDED_BY(cs_manifest); @@ -94,7 +98,7 @@ class CScraperManifest CScraperManifest(CScraperManifest& manifest); public: /* static methods */ - /** Mutex protects both mapManifest and MapPendingDeletedManifest **/ + /** Mutex protects both mapManifest and MapPendingDeletedManifest */ static CCriticalSection cs_mapManifest; /** map from index hash to scraper Index, so we can process Inv messages */ @@ -123,7 +127,7 @@ class CScraperManifest /** Send a manifest of requested hash to node (from mapManifest). * @returns whether something was sent */ - static bool SendManifestTo(CNode* pfrom, const uint256& hash); + static bool SendManifestTo(CNode* pfrom, const uint256& hash); /** Add new manifest object into list of known manifests */ static bool addManifest(std::shared_ptr m, CKey& keySign); @@ -131,25 +135,29 @@ class CScraperManifest /** Validate whether received manifest is authorized */ static bool IsManifestAuthorized(int64_t& nTime, CPubKey& PubKey, unsigned int& banscore_out); - /** Delete Manifest (key version) **/ + /** Delete Manifest (key version) */ static bool DeleteManifest(const uint256& nHash, const bool& fImmediate = false); - /** Delete Manifest (iterator version) **/ + /** Delete Manifest (iterator version) */ static std::map>::iterator DeleteManifest(std::map>::iterator& iter, const bool& fImmediate = false); - /** Delete PendingDeletedManifests **/ + /** Delete PendingDeletedManifests */ static unsigned int DeletePendingDeletedManifests(); public: /*==== fields ====*/ - /** Local only (not serialized) pointer to hash (index) field of mapManifest **/ + /** LOCAL only (not serialized) pointer to hash (index) field of mapManifest */ const uint256* phash GUARDED_BY(cs_manifest) = nullptr; + /** By convention the string version of the public key on the scraper used to publish the manifest */ std::string sCManifestName GUARDED_BY(cs_manifest); + /** The public key of the private key used by the publishing scraper to sign the manifest */ CPubKey pubkey GUARDED_BY(cs_manifest); + /** The signature on the manifest from the publishing scraper */ std::vector signature GUARDED_BY(cs_manifest); + /** Project "directory" entry in the manifest. The GridcoinTeamID is not used. */ struct dentry { std::string project; std::string ETag; @@ -162,22 +170,30 @@ class CScraperManifest void Serialize(CDataStream& s) const; void Unserialize(CDataStream& s); + + /** Outputs content of dentry in JSON format. Helper function to CScraperManifest::ToJson(). */ UniValue ToJson() const; }; + /** Vector of project entries */ std::vector projects GUARDED_BY(cs_manifest); + /** Part index in the vParts vector for the BeaconList. This should always be zero once populated (the first element). */ int BeaconList GUARDED_BY(cs_manifest) = -1 ; unsigned BeaconList_c GUARDED_BY(cs_manifest) = 0; + /** The block on which the convergence will be formed if this manifest is part of a convergence. */ uint256 ConsensusBlock GUARDED_BY(cs_manifest); + /** The time the manifest was published */ int64_t nTime GUARDED_BY(cs_manifest) = 0; + /** The hash of the manifest's contents (the vparts vector). This hash is used for matching purposes in a convergence. */ uint256 nContentHash GUARDED_BY(cs_manifest); - // The bCheckedAuthorized flag is LOCAL only. It is not serialized/deserialized. This - // is set during Unserializecheck to false if wallet not in sync, and true if in sync - // and scraper ID matches authorized list (i.e. IsManifestAuthorized is true. - // The node will walk the mapManifest from + /** The bCheckedAuthorized flag is LOCAL only. It is not serialized/deserialized. This + * is set during Unserializecheck to false if wallet not in sync, and true if in sync + * and scraper ID matches authorized list (i.e. IsManifestAuthorized is true. + * The node will walk the mapManifest from + */ bool bCheckedAuthorized GUARDED_BY(cs_manifest); public: /* public methods */ @@ -185,14 +201,23 @@ class CScraperManifest /** Hook called when all parts are available */ void Complete() override; - /** Serialize this object for seding over the network. */ + /** Serialize this object for sending over the network. This includes the signature as well as the payload. */ void Serialize(CDataStream& s) const; + /** Serialize without the signature. We need this to generate the (inner) content for the hash to sign with the key. */ void SerializeWithoutSignature(CDataStream& s) const; + /** Serialize the contents (vParts vector) for purposes of content comparison. This is used to fill out the nContentHash, + * which is then included in SerializeWithoutSignature. + */ void SerializeForManifestCompare(CDataStream& ss) const; + /** A combination of unserialization and integrity checking, which includes hash checks, authorization checks, and + * signature checks. + */ void UnserializeCheck(CDataStream& s, unsigned int& banscore_out); + /** Checks to see whether manifest age is current according to the SCRAPER_CMANIFEST_RETENTION_TIME network setting. */ bool IsManifestCurrent() const; + /** Outputs manifest in JSON format. */ UniValue ToJson() const; }; diff --git a/src/main.cpp b/src/main.cpp index d009b61af1..2d21dd90f6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3337,9 +3337,10 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, // but it is an out parameter of IsManifestAuthorized. unsigned int banscore_out = 0; - // We have to copy out the nTime and pubkey from the selected manifest, because the IsManifestAuthorized call - // chain traverses the map and locks the cs_manifests in turn, which creats a deadlock potential if the cs_manifest - // lock is already held on one of the manifests. + // We have to copy out the nTime and pubkey from the selected manifest, because the + // IsManifestAuthorized call chain traverses the map and locks the cs_manifests in turn, + // which creates a deadlock potential if the cs_manifest lock is already held on one of + // the manifests. int64_t nTime = 0; CPubKey pubkey; { @@ -3347,7 +3348,6 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, nTime = manifest->nTime; pubkey = manifest->pubkey; - } // Also don't send a manifest that is not current. From f6f449a0e5ae27a4ec01241724991ba617a056ae Mon Sep 17 00:00:00 2001 From: jamescowens Date: Mon, 6 Sep 2021 14:59:32 -0400 Subject: [PATCH 09/14] Modify SendManifestTo to take a smart shared pointer to CScraperManifest This modifies SendManifestTo to take a smart shared pointer to the CScraperManifest to be sent, rather than doing another lookup of the inv.hash against the mapManifest to find the manifest, which is what the original form of the function did. This avoids another map find. --- src/gridcoin/scraper/scraper_net.cpp | 15 ++++----------- src/gridcoin/scraper/scraper_net.h | 4 ++-- src/main.cpp | 8 +++++++- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/gridcoin/scraper/scraper_net.cpp b/src/gridcoin/scraper/scraper_net.cpp index a19787b0cf..e1c2bb0212 100644 --- a/src/gridcoin/scraper/scraper_net.cpp +++ b/src/gridcoin/scraper/scraper_net.cpp @@ -267,24 +267,16 @@ void CScraperManifest::PushInvTo(CNode* pto) EXCLUSIVE_LOCKS_REQUIRED(CScraperMa } } -// Clang thread static safety analysis is showing a false positive where it claims cs_mapManifest is not held when -// SendManifestTo is called in ProcessMessage in main. The complaint is on the template specialization of the serialization -// of CScraperManifest in PushMessage. Manual inspection of the code shows the lock is held. -bool CScraperManifest::SendManifestTo(CNode* pto, const uint256& hash) +bool CScraperManifest::SendManifestTo(CNode* pto, std::shared_ptr manifest) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts) { - auto it = mapManifest.find(hash); + LOCK(manifest->cs_manifest); - if (it == mapManifest.end()) return false; - - LOCK(it->second->cs_manifest); - - pto->PushMessage("scraperindex", *it->second); + pto->PushMessage("scraperindex", *manifest); return true; } - void CScraperManifest::dentry::Serialize(CDataStream& ss) const { ss << project; @@ -295,6 +287,7 @@ void CScraperManifest::dentry::Serialize(CDataStream& ss) const ss << current; ss << last; } + void CScraperManifest::dentry::Unserialize(CDataStream& ss) { ss >> project; diff --git a/src/gridcoin/scraper/scraper_net.h b/src/gridcoin/scraper/scraper_net.h index 200f03b214..e29f04b0e9 100755 --- a/src/gridcoin/scraper/scraper_net.h +++ b/src/gridcoin/scraper/scraper_net.h @@ -124,10 +124,10 @@ class CScraperManifest */ static void PushInvTo(CNode* pto); - /** Send a manifest of requested hash to node (from mapManifest). + /** Send manifest pointed to by the provided smart pointer to node. * @returns whether something was sent */ - static bool SendManifestTo(CNode* pfrom, const uint256& hash); + static bool SendManifestTo(CNode* pfrom, std::shared_ptr manifest); /** Add new manifest object into list of known manifests */ static bool addManifest(std::shared_ptr m, CKey& keySign); diff --git a/src/main.cpp b/src/main.cpp index 2d21dd90f6..8a309c7df3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3354,7 +3354,13 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, if (CScraperManifest::IsManifestAuthorized(nTime, pubkey, banscore_out) && WITH_LOCK(manifest->cs_manifest, return manifest->IsManifestCurrent())) { - CScraperManifest::SendManifestTo(pfrom, inv.hash); + // SendManifestTo takes its own lock on the manifest. Note that the original form of + // SendManifestTo took the inv.hash and did another lookup to find the actual + // manifest in the mapManifest. This is unnecessary since we already have the manifest + // identified above. The new form, which takes a smart shared pointer to the manifest + // as an argument, sends the manifest directly using PushMessage, and avoids another + // map find. + CScraperManifest::SendManifestTo(pfrom, manifest); } } } From 4768399e61756685bb8bde8111c724f2b3f53bbd Mon Sep 17 00:00:00 2001 From: jamescowens Date: Mon, 6 Sep 2021 18:13:28 -0400 Subject: [PATCH 10/14] Scraper thread safety - Part II --- src/gridcoin/scraper/scraper.cpp | 89 ++++++++++++++----------- src/gridcoin/scraper/scraper_net.cpp | 98 +++++++++++++++------------- src/sync.cpp | 4 +- 3 files changed, 106 insertions(+), 85 deletions(-) diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index f554287f6b..4e568e0ff2 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -4440,7 +4440,7 @@ unsigned int ScraperDeleteUnauthorizedCScraperManifests() } bool ScraperSendFileManifestContents(CBitcoinAddress& Address, CKey& Key) -EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) +EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest, CScraperManifest::cs_mapManifest) { // This "broadcasts" the current ScraperFileManifest contents to the network. @@ -4628,7 +4628,7 @@ EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest) // "Sign" and "send". - LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); + LOCK(CSplitBlob::cs_mapParts); bool bAddManifestSuccessful = CScraperManifest::addManifest(manifest, Key); @@ -4981,8 +4981,6 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project StructConvergedManifest.CScraperConvergedManifest_ptr = std::shared_ptr(new CScraperManifest); - LOCK(StructConvergedManifest.CScraperConvergedManifest_ptr->cs_manifest); - // We are going to do this for each project in the whitelist. unsigned int iCountSuccessfulConvergedProjects = 0; unsigned int nScraperCount = mMapCSManifestsBinnedByScraper.size(); @@ -5236,6 +5234,8 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project // The BeaconList is element 0, do that first. { + LOCK(StructConvergedManifest.CScraperConvergedManifest_ptr->cs_manifest); + auto iter = StructConvergedManifest.ConvergedManifestPartPtrsMap.find("BeaconList"); StructConvergedManifest.CScraperConvergedManifest_ptr->addPart(iter->second->hash); @@ -5834,7 +5834,7 @@ UniValue sendscraperfilemanifest(const UniValue& params, bool fHelp) bool ret; if (IsScraperAuthorizedToBroadcastManifests(AddressOut, KeyOut)) { - LOCK(cs_StructScraperFileManifest); + LOCK2(cs_StructScraperFileManifest, CScraperManifest::cs_mapManifest); ret = ScraperSendFileManifestContents(AddressOut, KeyOut); uiInterface.NotifyScraperEvent(scrapereventtypes::Manifest, CT_NEW, {}); @@ -6323,8 +6323,7 @@ UniValue scraperreport(const UniValue& params, bool fHelp) { // This lock order is required to avoid potential deadlocks between this function and other threads. - LOCK(CScraperManifest::cs_mapManifest); - LOCK2(CSplitBlob::cs_mapParts, cs_ConvergedScraperStatsCache); + LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts); manifest_map_size = CScraperManifest::mapManifest.size(); pending_deleted_manifest_map_size = CScraperManifest::mapPendingDeletedManifest.size(); @@ -6334,7 +6333,11 @@ UniValue scraperreport(const UniValue& params, bool fHelp) parts_map_size = CSplitBlob::mapParts.size(); - if (ConvergedScraperStatsCache.NewFormatSuperblock.WellFormed()) + // Note that we would want to, but cannot, hold the cs_ConvergedScraperStatsCache continuously as the outside lock + // during this function, because in other areas of the code, cs_ConvergedScraperStatsCache is taken as the INSIDE + // lock, and this results in a potential deadlock situation. So, cs_ConvergedScraperStatsCache is locked three + // different times here. The third one is the most important, where it is locked AFTER cs_manifest. + if (WITH_LOCK(cs_ConvergedScraperStatsCache, return ConvergedScraperStatsCache.NewFormatSuperblock.WellFormed())) { uint64_t current_convergence_publishing_scrapers = 0; uint64_t current_convergence_part_pointer_map_size = 0; @@ -6352,47 +6355,53 @@ UniValue scraperreport(const UniValue& params, bool fHelp) UniValue manifests_with_null_phashes(UniValue::VARR); UniValue csplitblobs_invalid_manifests(UniValue::VARR); - current_convergence_publishing_scrapers = - ConvergedScraperStatsCache.Convergence.vIncludedScrapers.size() - + ConvergedScraperStatsCache.Convergence.vExcludedScrapers.size(); + uint64_t total_convergences_part_pointer_maps_size = 0; - current_convergence_part_pointer_map_size = - ConvergedScraperStatsCache.Convergence.ConvergedManifestPartPtrsMap.size(); + { + LOCK(cs_ConvergedScraperStatsCache); - past_convergence_map_size = - ConvergedScraperStatsCache.PastConvergences.size(); + current_convergence_publishing_scrapers = + ConvergedScraperStatsCache.Convergence.vIncludedScrapers.size() + + ConvergedScraperStatsCache.Convergence.vExcludedScrapers.size(); - // Count the number of convergences that are by part (project). Note that these WILL NOT be in the - // manifest maps, because they are composite manifests that are LOCAL ONLY. If the convergences - // are at the manifest level, then the CScraperManifest_shared_ptr CScraperConvergedManifest_ptr - // will point to a manifest that IS ALREADY IN THE mapManifest. - if (ConvergedScraperStatsCache.Convergence.bByParts) ++number_of_convergences_by_parts; + current_convergence_part_pointer_map_size = + ConvergedScraperStatsCache.Convergence.ConvergedManifestPartPtrsMap.size(); - // Finish adding the number of convergences by parts below in the for loop for the PastConvergences so we - // don't have to traverse twice. + past_convergence_map_size = + ConvergedScraperStatsCache.PastConvergences.size(); - // This next section will form a set of unique pointers in the global cache - // and also add the pointers up arithmetically. The difference is the efficiency gain - // from using pointers rather than copies into the global cache. - for (const auto& iter : ConvergedScraperStatsCache.Convergence.ConvergedManifestPartPtrsMap) - { - global_cache_unique_parts.insert(iter.second); - } + // Count the number of convergences that are by part (project). Note that these WILL NOT be in the + // manifest maps, because they are composite manifests that are LOCAL ONLY. If the convergences + // are at the manifest level, then the CScraperManifest_shared_ptr CScraperConvergedManifest_ptr + // will point to a manifest that IS ALREADY IN THE mapManifest. + if (ConvergedScraperStatsCache.Convergence.bByParts) ++number_of_convergences_by_parts; - uint64_t total_convergences_part_pointer_maps_size = current_convergence_part_pointer_map_size; + // Finish adding the number of convergences by parts below in the for loop for the PastConvergences so we + // don't have to traverse twice. - for (const auto& iter : ConvergedScraperStatsCache.PastConvergences) - { - // This increments if the past convergence is by parts because these will NOT be in the manifest maps. - if (iter.second.second.bByParts) ++number_of_convergences_by_parts; - - for (const auto& iter2 : iter.second.second.ConvergedManifestPartPtrsMap) + // This next section will form a set of unique pointers in the global cache + // and also add the pointers up arithmetically. The difference is the efficiency gain + // from using pointers rather than copies into the global cache. + for (const auto& iter : ConvergedScraperStatsCache.Convergence.ConvergedManifestPartPtrsMap) { - global_cache_unique_parts.insert(iter2.second); + global_cache_unique_parts.insert(iter.second); } - total_convergences_part_pointer_maps_size += - iter.second.second.ConvergedManifestPartPtrsMap.size(); + total_convergences_part_pointer_maps_size = current_convergence_part_pointer_map_size; + + for (const auto& iter : ConvergedScraperStatsCache.PastConvergences) + { + // This increments if the past convergence is by parts because these will NOT be in the manifest maps. + if (iter.second.second.bByParts) ++number_of_convergences_by_parts; + + for (const auto& iter2 : iter.second.second.ConvergedManifestPartPtrsMap) + { + global_cache_unique_parts.insert(iter2.second); + } + + total_convergences_part_pointer_maps_size += + iter.second.second.ConvergedManifestPartPtrsMap.size(); + } } global_scraper_net.pushKV("number_of_convergences_by_parts", number_of_convergences_by_parts); @@ -6465,6 +6474,8 @@ UniValue scraperreport(const UniValue& params, bool fHelp) } // valid manifest but null pointer to index hash else { + LOCK(cs_ConvergedScraperStatsCache); + // The current convergence (i.e. a by parts convergence in the local global cache but not in // the published maps? if (ConvergedScraperStatsCache.Convergence.CScraperConvergedManifest_ptr.get() != manifest_ptr) diff --git a/src/gridcoin/scraper/scraper_net.cpp b/src/gridcoin/scraper/scraper_net.cpp index e1c2bb0212..0a56b09ee2 100644 --- a/src/gridcoin/scraper/scraper_net.cpp +++ b/src/gridcoin/scraper/scraper_net.cpp @@ -267,8 +267,11 @@ void CScraperManifest::PushInvTo(CNode* pto) EXCLUSIVE_LOCKS_REQUIRED(CScraperMa } } +// The exclusive lock on cs_mapParts is required because the part hashes are serialized as part of the manifest +// serialization. These hashes are contained in the part objects in the mapParts, which is POINTED TO by the manifest +// vParts vector. We need to ensure that the mapParts is not changing while the vParts vector is traversed. bool CScraperManifest::SendManifestTo(CNode* pto, std::shared_ptr manifest) -EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts) +EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_mapParts) { LOCK(manifest->cs_manifest); @@ -330,10 +333,9 @@ EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) ss << ConsensusBlock; } -void CScraperManifest::Serialize(CDataStream& ss) const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_mapParts) +void CScraperManifest::Serialize(CDataStream& ss) const +EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest, CSplitBlob::cs_mapParts) { - LOCK(cs_manifest); - SerializeWithoutSignature(ss); ss << signature; } @@ -685,42 +687,44 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) CScraperManifest_shared_ptr manifest = std::shared_ptr(new CScraperManifest()); - LOCK(cs_mapManifest); + LOCK2(cs_mapManifest, cs_mapParts); const auto it = mapManifest.emplace(hash, manifest); - LOCK2(cs_mapParts, manifest->cs_manifest); - - // The phash in the manifest points to the actual hash which is the index to the element in the map. - manifest->phash = &it.first->first; - - try { - manifest->UnserializeCheck(vRecv, banscore); - } catch (bool& e) - { - mapManifest.erase(hash); - LogPrint(BCLog::LogFlags::MANIFEST, "invalid manifest %s received", hash.GetHex()); + LOCK(manifest->cs_manifest); - if (pfrom) + // The phash in the manifest points to the actual hash which is the index to the element in the map. + manifest->phash = &it.first->first; + + try { - LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore " - "by %u.", hash.GetHex(), pfrom->addr.ToString(), banscore); - pfrom->Misbehaving(banscore); - } - return false; - } catch(std::ios_base::failure& e) - { - mapManifest.erase(hash); - LogPrint(BCLog::LogFlags::MANIFEST, "invalid manifest %s received", hash.GetHex()); + manifest->UnserializeCheck(vRecv, banscore); + } catch (bool& e) + { + mapManifest.erase(hash); + LogPrint(BCLog::LogFlags::MANIFEST, "invalid manifest %s received", hash.GetHex()); - if (pfrom) + if (pfrom) + { + LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore " + "by %u.", hash.GetHex(), pfrom->addr.ToString(), banscore); + pfrom->Misbehaving(banscore); + } + return false; + } catch(std::ios_base::failure& e) { - LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore " - "by %u.", hash.GetHex(), pfrom->addr.ToString(), banscore); - pfrom->Misbehaving(banscore); + mapManifest.erase(hash); + LogPrint(BCLog::LogFlags::MANIFEST, "invalid manifest %s received", hash.GetHex()); + + if (pfrom) + { + LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore " + "by %u.", hash.GetHex(), pfrom->addr.ToString(), banscore); + pfrom->Misbehaving(banscore); + } + return false; } - return false; } // lock cs_ConvergedScraperStatsCache and mark ConvergedScraperStatsCache dirty because a new manifest is present, @@ -731,6 +735,9 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) ConvergedScraperStatsCache.bClean = false; } + // Relock manifest + LOCK(manifest->cs_manifest); + LogPrint(BCLog::LogFlags::MANIFEST, "received manifest %s with %u / %u parts", hash.GetHex(), (unsigned) manifest->cntPartsRcvd, (unsigned) manifest->vParts.size()); @@ -794,23 +801,26 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, cs_mapParts) if (it.second == false) return false; - CScraperManifest& manifest = *it.first->second; + // Release lock on cs_manifest before taking a lonk on cs_ConvergedScraperStatsCache to avoid potential deadlocks. + { + CScraperManifest& manifest = *it.first->second; - // Relock the manifest pointed to by the iterator. - LOCK(manifest.cs_manifest); + // Relock the manifest pointed to by the iterator. + LOCK(manifest.cs_manifest); - // set the hash pointer inside - manifest.phash = &it.first->first; + // set the hash pointer inside + manifest.phash = &it.first->first; - // We do not need to do a deserialize check here, because the - // manifest originates from THIS node, and the scraper's authorization - // to send has already been checked before the call. - // We also do not need to do a manifest.isComplete to see if all - // parts are available, because they have to be - this manifest was constructed - // on THIS node. + // We do not need to do a deserialize check here, because the + // manifest originates from THIS node, and the scraper's authorization + // to send has already been checked before the call. + // We also do not need to do a manifest.isComplete to see if all + // parts are available, because they have to be - this manifest was constructed + // on THIS node. - // Call manifest complete to notify peers of new manifest. - manifest.Complete(); + // Call manifest complete to notify peers of new manifest. + manifest.Complete(); + } // lock cs_ConvergedScraperStatsCache and mark ConvergedScraperStatsCache dirty because a new manifest is present, // so the convergence may change. diff --git a/src/sync.cpp b/src/sync.cpp index 13c245548b..4728f7c1e2 100644 --- a/src/sync.cpp +++ b/src/sync.cpp @@ -275,7 +275,7 @@ bool LockStackEmpty() return it->second.empty(); } -bool g_debug_lockorder_abort = true; -bool g_debug_lockorder_throw_exception = true; +bool g_debug_lockorder_abort = false; +bool g_debug_lockorder_throw_exception = false; #endif /* DEBUG_LOCKORDER */ From f2ca10ba4fc5e02481ebf4a35c88fa07a37e5a35 Mon Sep 17 00:00:00 2001 From: "James C. Owens" Date: Thu, 16 Sep 2021 23:09:05 -0400 Subject: [PATCH 11/14] Correct regression after rebase on update_scheduler --- src/gridcoin/scraper/scraper.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index 4e568e0ff2..55fd4a5ef4 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -1578,7 +1578,7 @@ void Scraper(bool bSingleShot) _log(logattribute::INFO, "Scraper", "Superblock not needed. age=" + ToString(sbage)); _log(logattribute::INFO, "Scraper", "Sleeping for " + ToString(scraper_sleep() / 1000) +" seconds"); - if (!MilliSleep(nScraperSleep)) return; + if (!MilliSleep(scraper_sleep())) return; } } @@ -1695,8 +1695,8 @@ void Scraper(bool bSingleShot) ScraperHousekeeping(); - _log(logattribute::INFO, "Scraper", "Sleeping for " + ToString(nScraperSleep / 1000) +" seconds"); - if (!MilliSleep(nScraperSleep)) return; + _log(logattribute::INFO, "Scraper", "Sleeping for " + ToString(scraper_sleep() / 1000) +" seconds"); + if (!MilliSleep(scraper_sleep())) return; } else // This will break from the outer while loop if in singleshot mode and end execution after one pass. @@ -1765,7 +1765,7 @@ void ScraperSubscriber() // Use the same sleep interval configured for the scraper. _log(logattribute::INFO, "ScraperSubscriber", "Sleeping for " + ToString(scraper_sleep() / 1000) +" seconds"); - if (!MilliSleep(nScraperSleep)) return; + if (!MilliSleep(scraper_sleep())) return; } } From 7ac548c460f38de0e8b72360f870232263356bb7 Mon Sep 17 00:00:00 2001 From: "James C. Owens" Date: Sat, 18 Sep 2021 10:24:24 -0400 Subject: [PATCH 12/14] Move lock on cs_mapManifest in RecvManifest to avoid a WITH_LOCK --- src/gridcoin/scraper/scraper_net.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gridcoin/scraper/scraper_net.cpp b/src/gridcoin/scraper/scraper_net.cpp index 0a56b09ee2..ba35b396be 100644 --- a/src/gridcoin/scraper/scraper_net.cpp +++ b/src/gridcoin/scraper/scraper_net.cpp @@ -677,8 +677,10 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) // hash the object uint256 hash(Hash(vRecv.begin(), vRecv.end())); + LOCK(cs_mapManifest); + // see if we do not already have it - if (WITH_LOCK(cs_mapManifest, return AlreadyHave(pfrom, CInv(MSG_SCRAPERINDEX, hash)))) + if (AlreadyHave(pfrom, CInv(MSG_SCRAPERINDEX, hash))) { LogPrint(BCLog::LogFlags::SCRAPER, "INFO: ScraperManifest::RecvManifest: Already have CScraperManifest %s from " "node %s.", hash.GetHex(), pfrom->addrName); @@ -687,7 +689,7 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) CScraperManifest_shared_ptr manifest = std::shared_ptr(new CScraperManifest()); - LOCK2(cs_mapManifest, cs_mapParts); + LOCK(cs_mapParts); const auto it = mapManifest.emplace(hash, manifest); From f8457655b996b2f1afc6fbe6c0cc52e026b8f486 Mon Sep 17 00:00:00 2001 From: "James C. Owens" Date: Sat, 18 Sep 2021 10:35:06 -0400 Subject: [PATCH 13/14] Change UnserializeCheck to return a bool of success or failure rather than throwing bool. We still must use a try/catch, because the basic deserialization could fail on a malformed incoming message. --- src/gridcoin/scraper/scraper_net.cpp | 42 +++++++++++++++------------- src/gridcoin/scraper/scraper_net.h | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/gridcoin/scraper/scraper_net.cpp b/src/gridcoin/scraper/scraper_net.cpp index ba35b396be..dc69bdfa87 100644 --- a/src/gridcoin/scraper/scraper_net.cpp +++ b/src/gridcoin/scraper/scraper_net.cpp @@ -423,7 +423,7 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) } } -void CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_out) +bool CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_out) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manifest) { const auto pbegin = ss.begin(); @@ -437,7 +437,7 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manife // This will set the bCheckAuthorized flag to false if a message // is received while the wallet is not in sync. If in sync and // the manifest is authorized, then set the checked flag to true, - // otherwise terminate the unserializecheck and throw an error, + // otherwise terminate the unserializecheck and return false, // which will also result in an increase in banscore, if past the grace period. if (OutOfSyncByAge()) { @@ -449,7 +449,7 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manife } else { - throw error("CScraperManifest::UnserializeCheck: Unapproved scraper ID"); + return error("CScraperManifest::UnserializeCheck: Unapproved scraper ID"); } // We need to do an additional check here for non-current manifests, because the sending node may not @@ -457,7 +457,7 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manife // AlreadyHave only checks for whether the manifest is current if it is already in the map. if (!IsManifestCurrent()) { - throw error("CScraperManifest::UnserializeCheck: Received non-current manifest."); + return error("CScraperManifest::UnserializeCheck: Received non-current manifest."); } ss >> ConsensusBlock; @@ -466,14 +466,14 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manife if (BeaconList + BeaconList_c > vph.size()) { - throw error("CScraperManifest::UnserializeCheck: beacon part out of range"); + return error("CScraperManifest::UnserializeCheck: beacon part out of range"); } for (const dentry& prj : projects) { if (prj.part1 + prj.partc > vph.size()) { - throw error("CScraperManifest::UnserializeCheck: project part out of range"); + return error("CScraperManifest::UnserializeCheck: project part out of range"); } } @@ -516,7 +516,7 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manife // Immediately ban the node from which the manifest was received. banscore_out = gArgs.GetArg("-banscore", 100); - throw error("CScraperManifest::UnserializeCheck: Too many projects in the manifest."); + return error("CScraperManifest::UnserializeCheck: Too many projects in the manifest."); } ss >> nContentHash; @@ -528,13 +528,15 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manife Hash(signature.begin(), signature.end()).GetHex()); CKey mkey; - if (!mkey.SetPubKey(pubkey)) throw error("CScraperManifest: Invalid manifest key"); - if (!mkey.Verify(hash, signature)) throw error("CScraperManifest: Invalid manifest signature"); + if (!mkey.SetPubKey(pubkey)) return error("CScraperManifest: Invalid manifest key"); + if (!mkey.Verify(hash, signature)) return error("CScraperManifest: Invalid manifest signature"); for (const uint256& ph : vph) { addPart(ph); } + + return true; } bool CScraperManifest::IsManifestCurrent() const EXCLUSIVE_LOCKS_REQUIRED(CSplitBlob::cs_manifest) @@ -701,19 +703,19 @@ bool CScraperManifest::RecvManifest(CNode* pfrom, CDataStream& vRecv) try { - manifest->UnserializeCheck(vRecv, banscore); - } catch (bool& e) - { - mapManifest.erase(hash); - LogPrint(BCLog::LogFlags::MANIFEST, "invalid manifest %s received", hash.GetHex()); - - if (pfrom) + if (!manifest->UnserializeCheck(vRecv, banscore)) { - LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore " - "by %u.", hash.GetHex(), pfrom->addr.ToString(), banscore); - pfrom->Misbehaving(banscore); + mapManifest.erase(hash); + LogPrint(BCLog::LogFlags::MANIFEST, "invalid manifest %s received", hash.GetHex()); + + if (pfrom) + { + LogPrintf("WARNING: CScraperManifest::RecvManifest: Invalid manifest %s received from %s. Increasing banscore " + "by %u.", hash.GetHex(), pfrom->addr.ToString(), banscore); + pfrom->Misbehaving(banscore); + } + return false; } - return false; } catch(std::ios_base::failure& e) { mapManifest.erase(hash); diff --git a/src/gridcoin/scraper/scraper_net.h b/src/gridcoin/scraper/scraper_net.h index e29f04b0e9..a402fb4381 100755 --- a/src/gridcoin/scraper/scraper_net.h +++ b/src/gridcoin/scraper/scraper_net.h @@ -212,7 +212,7 @@ class CScraperManifest /** A combination of unserialization and integrity checking, which includes hash checks, authorization checks, and * signature checks. */ - void UnserializeCheck(CDataStream& s, unsigned int& banscore_out); + [[nodiscard]] bool UnserializeCheck(CDataStream& s, unsigned int& banscore_out); /** Checks to see whether manifest age is current according to the SCRAPER_CMANIFEST_RETENTION_TIME network setting. */ bool IsManifestCurrent() const; From bc8832907d63d2240b8c532151a8d799af285e19 Mon Sep 17 00:00:00 2001 From: "James C. Owens" Date: Sat, 18 Sep 2021 10:40:57 -0400 Subject: [PATCH 14/14] Correct format of std::atomic declarations in scraper.h --- src/gridcoin/scraper/scraper.h | 5 ++--- src/gridcoin/scraper/scraper_net.cpp | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gridcoin/scraper/scraper.h b/src/gridcoin/scraper/scraper.h index 06c90ba88e..caa8465597 100644 --- a/src/gridcoin/scraper/scraper.h +++ b/src/gridcoin/scraper/scraper.h @@ -1,7 +1,6 @@ // Copyright (c) 2014-2021 The Gridcoin developers // Distributed under the MIT/X11 software license, see the accompanying // file COPYING or https://opensource.org/licenses/mit-license.php. - #ifndef GRIDCOIN_SCRAPER_SCRAPER_H #define GRIDCOIN_SCRAPER_SCRAPER_H @@ -49,8 +48,8 @@ extern bool SCRAPER_CMANIFEST_RETAIN_NONCURRENT; extern int64_t SCRAPER_CMANIFEST_RETENTION_TIME; extern bool SCRAPER_CMANIFEST_INCLUDE_NONCURRENT_PROJ_FILES; extern std::atomic MAG_ROUND; -extern std::atomic NETWORK_MAGNITUDE; -extern std::atomic CPID_MAG_LIMIT; +extern std::atomic NETWORK_MAGNITUDE; +extern std::atomic CPID_MAG_LIMIT; extern unsigned int SCRAPER_CONVERGENCE_MINIMUM; extern double SCRAPER_CONVERGENCE_RATIO; extern double CONVERGENCE_BY_PROJECT_RATIO; diff --git a/src/gridcoin/scraper/scraper_net.cpp b/src/gridcoin/scraper/scraper_net.cpp index dc69bdfa87..79f58c6f1e 100644 --- a/src/gridcoin/scraper/scraper_net.cpp +++ b/src/gridcoin/scraper/scraper_net.cpp @@ -423,7 +423,7 @@ EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest) } } -bool CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_out) +[[nodiscard]] bool CScraperManifest::UnserializeCheck(CDataStream& ss, unsigned int& banscore_out) EXCLUSIVE_LOCKS_REQUIRED(CScraperManifest::cs_mapManifest, CSplitBlob::cs_manifest) { const auto pbegin = ss.begin();