I have the following program similar to my code base. The FunctionState class, which executes some kind of algorithm (potentially in several threads), and the Function class, which controls the use of FunctionState classes and can perform some operations of installing / uninstalling the algorithm.
#include <iostream>
#include <vector>
class FunctionState;
class Function {
public:
virtual FunctionState* NewFunctionState() = 0;
protected:
std::vector<FunctionState*> states;
};
class FunctionState {
public:
FunctionState(Function* func) : mFunc(func) {}
virtual void RunState() = 0;
void ExecuteFunctionLotsAndLotsOfTimes();
private:
Function* mFunc;
};
#define VERY_BIG_NUMBER 10
void FunctionState::ExecuteFunctionLotsAndLotsOfTimes() {
for(int i = 0; i < VERY_BIG_NUMBER; ++i) {
RunState();
}
};
class PrintFunction : public Function {
FunctionState* NewFunctionState();
};
class PrintFunctionState : public FunctionState {
public:
PrintFunctionState(PrintFunction* func) : FunctionState(func) {}
void RunState() override {
std::cout << "in print function state" << '\n';
}
};
FunctionState* PrintFunction::NewFunctionState() {
FunctionState* state = new PrintFunctionState(this);
states.push_back(state);
return state;
}
class AddFunction : public Function {
FunctionState* NewFunctionState();
};
class AddFunctionState : public FunctionState {
public:
AddFunctionState(AddFunction* func) : FunctionState(func), x(0) {}
void RunState() override {
++x;
}
private:
int x;
};
FunctionState* AddFunction::NewFunctionState() {
FunctionState* state = new AddFunctionState(this);
states.push_back(state);
return state;
}
int main() {
Function* func = new PrintFunction();
Function* func2 = new AddFunction();
std::vector<Function*> vec = {func, func2};
for(auto& func : vec) {
func->NewFunctionState()->ExecuteFunctionLotsAndLotsOfTimes();
}
return 0;
}
Now I was profiling my code and saw that there is a hot spot in FunctionState :: ExecuteFunctionLotsAndLotsOfTimes (). The problem is that this function is repeated many times and calls RunState (), a virtual function in the FunctionState class. There, I perform many operations that could potentially infer vtable pointers from L1 cache, as a result of which L1 cache will skip every iteration of the loop.
, . , CRTP. FunctionState , RunState().
, CRTP, Function:
, . - 10 + ( , ).
, RunState(), CRTP, .
CRTP:
#include <iostream>
#include <vector>
class Function;
template<class T>
class FunctionState {
public:
FunctionState(Function* func) : mFunc(func) {}
void RunState() {
static_cast<T*>(this)->RunState();
};
void ExecuteFunctionLotsAndLotsOfTimes();
private:
Function* mFunc;
};
class Function {
public:
virtual FunctionState* NewFunctionState() = 0;
protected:
std::vector<FunctionState*> states;
};
#define VERY_BIG_NUMBER 10
template <typename T>
void FunctionState<T>::ExecuteFunctionLotsAndLotsOfTimes() {
for(int i = 0; i < VERY_BIG_NUMBER; ++i) {
RunState();
}
};
class PrintFunctionState;
class PrintFunction : public Function {
PrintFunctionState* NewFunctionState();
};
class PrintFunctionState : public FunctionState<PrintFunctionState> {
public:
PrintFunctionState(PrintFunction* func) : FunctionState<PrintFunctionState>(func) {}
void RunState() {
std::cout << "in print function state" << '\n';
}
};
PrintFunctionState* PrintFunction::NewFunctionState() {
PrintFunctionState* state = new PrintFunctionState(this);
states.push_back(state);
return state;
}
class AddFunctionState;
class AddFunction : public Function {
AddFunctionState* NewFunctionState();
};
class AddFunctionState : public FunctionState<AddFunctionState> {
public:
AddFunctionState(AddFunction* func) : FunctionState<AddFunctionState>(func), x(0) {}
void RunState() {
++x;
}
private:
int x;
};
AddFunctionState* AddFunction::NewFunctionState() {
AddFunctionState* state = new AddFunctionState(this);
states.push_back(state);
return state;
}
int main() {
Function* func = new PrintFunction();
Function* func2 = new AddFunction();
std::vector<Function*> vec = {func, func2};
for(auto& func : vec) {
func->NewFunctionState()->ExecuteFunctionLotsAndLotsOfTimes();
}
return 0;
}