returns removed occurrences.
90 {
91
92 if (rarenessCut < 0 or rarenessCut >= 1)
93 { B2WARNING("pruneGraph: rarenessCut is rubbish: " << rarenessCut << ", stopping prune-process."); return 0; }
94 if (rarenessCut == 0) { B2DEBUG(20, "pruneGraph: nothing to be done, stopping prune-process."); return 0; }
95
97
98
99 std::vector< std::pair<unsigned, std::vector<SubGraph<FilterType>*> >> trunks;
100
101
103 SubGraph<FilterType>& graph = subGraphEntry.second;
104 bool found = false;
105 for (auto& trunk : trunks) {
106 if (graph.checkSharesTrunk(*(trunk.second.at(0)))) {
107 trunk.first += graph.getFound();
108 trunk.second.push_back(&graph);
109 found = true;
110 continue;
111 }
112
113 if (found) continue;
114
115
116
117 trunks.push_back({graph.getFound(), {&graph} });
118 }
119 }
120
121 unsigned long nFoundB4 =
nFoundTotal(), nKilled = 0;
122 unsigned sizeb4 =
size();
123 B2DEBUG(20, "pruneGraph - before pruning: graph of size " << sizeb4 << " has " << trunks.size() << " trunks with " << nFoundB4 <<
124 " total found.");
125
126
127 std::vector<SubGraph<FilterType>*> deadBranches;
128 for (auto& trunk : trunks) {
129 double trunkCut = rarenessCut * double(trunk.first);
130
131
132 std::sort(trunk.second.begin(),
133 trunk.second.end(),
134 [](const SubGraph<FilterType>* a, const SubGraph<FilterType>* b)
135 -> bool { return a->getFound() < b->getFound(); });
136
137
138 auto pos = trunk.second.begin();
139 if (double((**pos).getFound()) >= trunkCut) continue;
140
141
142 while (pos != trunk.second.end()) {
143
144 deadBranches.push_back(*pos);
145 trunkCut -= double((**pos).getFound()) * rarenessCut;
146
147
148 if (double((**pos).getFound()) >= trunkCut) break;
149 ++pos;
150 }
151 }
152
153 if (deadBranches.empty()) { B2DEBUG(20, "pruneGraph: no rare branches found - stopping pruning process."); return 0; }
154
155
156 for (auto* graph : deadBranches) {
157 nKilled += graph->getFound();
159 }
160
161 B2DEBUG(20,
"pruneGraph - after pruning graph with size (before/after " << sizeb4 <<
"/" <<
size() <<
162 ") and nFound (before/after/killed " << nFoundB4 <<
"/" <<
nFoundTotal() <<
"/" << nKilled);
163
164 return nKilled;
165 }
unsigned size() const
returns number of collected subgraphs so far.
unsigned long nFoundTotal() const
returns number of occurrences for all subGraphs found together.