One trick that can sometimes help in this situation is to sort the vector by type (you should be able to use the knowledge of the type available in the add () function to ensure that it is executed) if the order of the elements fails otherwise. If you are mainly going to iterate over a vector to call a virtual function, this will help the processor branch predictor predict the purpose of the call. Alternatively, you can maintain separate vectors for each type in your manager and iterate over them one by one, which has a similar effect.
The compiler optimizer can also help you with such code, especially if it supports profiled optimization (POGO). Compilers can de-virtualize calls in certain situations or with POGO can do something in the generated assembly to help the processor branch predictor, for example, test the most common types and make a direct call for those who have rejected an indirect call for less common types.
Here are the results of a test program that illustrates the performance benefits of sorting by type, Manager is your version, Manager2 maintains a hash table of vectors indexed by typeid:
Derived1::count = 50043000, Derived2::count = 49957000 class Manager::funcAll took 714ms Derived1::count = 50043000, Derived2::count = 49957000 class Manager2::funcAll took 274ms Derived1::count = 50043000, Derived2::count = 49957000 class Manager2::funcAll took 273ms Derived1::count = 50043000, Derived2::count = 49957000 class Manager::funcAll took 714ms
Test code:
#include <iostream> #include <vector> #include <memory> #include <random> #include <unordered_map> #include <typeindex> #include <chrono> using namespace std; using namespace std::chrono; static const int instanceCount = 100000; static const int funcAllIterations = 1000; static const int numTypes = 2; struct Base { virtual void func() = 0; }; struct Derived1 : Base { static int count; void func() override { ++count; } }; int Derived1::count = 0; struct Derived2 : Base { static int count; void func() override { ++count; } }; int Derived2::count = 0; class Manager { vector<unique_ptr<Base>> items; public: template<class T> void add() { items.emplace_back(new T); } void funcAll() { for (auto& i : items) i->func(); } }; class Manager2 { unordered_map<type_index, vector<unique_ptr<Base>>> items; public: template<class T> void add() { items[type_index(typeid(T))].push_back(make_unique<T>()); } void funcAll() { for (const auto& type : items) { for (auto& i : type.second) { i->func(); } } } }; template<typename Man> void Test() { mt19937 engine; uniform_int_distribution<int> d(0, numTypes - 1); Derived1::count = 0; Derived2::count = 0; Man man; for (auto i = 0; i < instanceCount; ++i) { switch (d(engine)) { case 0: man.add<Derived1>(); break; case 1: man.add<Derived2>(); break; } } auto startTime = high_resolution_clock::now(); for (auto i = 0; i < funcAllIterations; ++i) { man.funcAll(); } auto endTime = high_resolution_clock::now(); cout << "Derived1::count = " << Derived1::count << ", Derived2::count = " << Derived2::count << "\n" << typeid(Man).name() << "::funcAll took " << duration_cast<milliseconds>(endTime - startTime).count() << "ms" << endl; } int main() { Test<Manager>(); Test<Manager2>(); Test<Manager2>(); Test<Manager>(); }
source share