Skip to content

Commit 7c8793c

Browse files
committedFeb 16, 2015
Performance Improvement: Use a cache which caches result for getFacePositions.
This greatly reduce the number of std::list generated by caching the result, which is always constant for each radius selected. In the callgrind map, you will see original: * 3.3M calls to std::list for 9700 calls to getFacePositions In the modified version, you will see: * 3.3K calls to std::list for 6900 call to getFacePositions Callgrind map is here: #2321 it's a huge performance improvement to l_find_node_near
1 parent ed04e8e commit 7c8793c

File tree

4 files changed

+81
-68
lines changed

4 files changed

+81
-68
lines changed
 

Diff for: ‎src/clientiface.cpp

+5-8
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ void RemoteClient::ResendBlockIfOnWire(v3s16 p)
5959
}
6060
}
6161

62-
void RemoteClient::GetNextBlocks(
62+
void RemoteClient::GetNextBlocks (
6363
ServerEnvironment *env,
6464
EmergeManager * emerge,
6565
float dtime,
@@ -182,18 +182,15 @@ void RemoteClient::GetNextBlocks(
182182
//bool queue_is_full = false;
183183

184184
s16 d;
185-
for(d = d_start; d <= d_max; d++)
186-
{
185+
for(d = d_start; d <= d_max; d++) {
187186
/*
188187
Get the border/face dot coordinates of a "d-radiused"
189188
box
190189
*/
191-
std::list<v3s16> list;
192-
getFacePositions(list, d);
190+
std::vector<v3s16> list = FacePositionCache::getFacePositions(d);
193191

194-
std::list<v3s16>::iterator li;
195-
for(li=list.begin(); li!=list.end(); ++li)
196-
{
192+
std::vector<v3s16>::iterator li;
193+
for(li = list.begin(); li != list.end(); ++li) {
197194
v3s16 p = *li + center;
198195

199196
/*

Diff for: ‎src/script/lua_api/l_env.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -530,9 +530,8 @@ int ModApiEnvMod::l_find_node_near(lua_State *L)
530530
}
531531

532532
for(int d=1; d<=radius; d++){
533-
std::list<v3s16> list;
534-
getFacePositions(list, d);
535-
for(std::list<v3s16>::iterator i = list.begin();
533+
std::vector<v3s16> list = FacePositionCache::getFacePositions(d);
534+
for(std::vector<v3s16>::iterator i = list.begin();
536535
i != list.end(); ++i){
537536
v3s16 p = pos + (*i);
538537
content_t c = env->getMap().getNodeNoEx(p).getContent();

Diff for: ‎src/util/numeric.cpp

+59-55
Original file line numberDiff line numberDiff line change
@@ -20,90 +20,94 @@ with this program; if not, write to the Free Software Foundation, Inc.,
2020
#include "numeric.h"
2121
#include "mathconstants.h"
2222

23-
#include "../log.h"
23+
#include "log.h"
2424
#include "../constants.h" // BS, MAP_BLOCKSIZE
2525
#include <string.h>
2626
#include <iostream>
2727

28+
std::map<u16, std::vector<v3s16> > FacePositionCache::m_cache;
2829
// Calculate the borders of a "d-radius" cube
29-
void getFacePositions(std::list<v3s16> &list, u16 d)
30+
std::vector<v3s16> FacePositionCache::getFacePositions(u16 d)
3031
{
31-
if(d == 0)
32-
{
33-
list.push_back(v3s16(0,0,0));
32+
if (m_cache.find(d) != m_cache.end())
33+
return m_cache[d];
34+
35+
generateFacePosition(d);
36+
return m_cache[d];
37+
38+
}
39+
40+
void FacePositionCache::generateFacePosition(u16 d)
41+
{
42+
m_cache[d] = std::vector<v3s16>();
43+
if(d == 0) {
44+
m_cache[d].push_back(v3s16(0,0,0));
3445
return;
3546
}
36-
if(d == 1)
37-
{
47+
if(d == 1) {
3848
/*
3949
This is an optimized sequence of coordinates.
4050
*/
41-
list.push_back(v3s16( 0, 1, 0)); // top
42-
list.push_back(v3s16( 0, 0, 1)); // back
43-
list.push_back(v3s16(-1, 0, 0)); // left
44-
list.push_back(v3s16( 1, 0, 0)); // right
45-
list.push_back(v3s16( 0, 0,-1)); // front
46-
list.push_back(v3s16( 0,-1, 0)); // bottom
51+
m_cache[d].push_back(v3s16( 0, 1, 0)); // top
52+
m_cache[d].push_back(v3s16( 0, 0, 1)); // back
53+
m_cache[d].push_back(v3s16(-1, 0, 0)); // left
54+
m_cache[d].push_back(v3s16( 1, 0, 0)); // right
55+
m_cache[d].push_back(v3s16( 0, 0,-1)); // front
56+
m_cache[d].push_back(v3s16( 0,-1, 0)); // bottom
4757
// 6
48-
list.push_back(v3s16(-1, 0, 1)); // back left
49-
list.push_back(v3s16( 1, 0, 1)); // back right
50-
list.push_back(v3s16(-1, 0,-1)); // front left
51-
list.push_back(v3s16( 1, 0,-1)); // front right
52-
list.push_back(v3s16(-1,-1, 0)); // bottom left
53-
list.push_back(v3s16( 1,-1, 0)); // bottom right
54-
list.push_back(v3s16( 0,-1, 1)); // bottom back
55-
list.push_back(v3s16( 0,-1,-1)); // bottom front
56-
list.push_back(v3s16(-1, 1, 0)); // top left
57-
list.push_back(v3s16( 1, 1, 0)); // top right
58-
list.push_back(v3s16( 0, 1, 1)); // top back
59-
list.push_back(v3s16( 0, 1,-1)); // top front
58+
m_cache[d].push_back(v3s16(-1, 0, 1)); // back left
59+
m_cache[d].push_back(v3s16( 1, 0, 1)); // back right
60+
m_cache[d].push_back(v3s16(-1, 0,-1)); // front left
61+
m_cache[d].push_back(v3s16( 1, 0,-1)); // front right
62+
m_cache[d].push_back(v3s16(-1,-1, 0)); // bottom left
63+
m_cache[d].push_back(v3s16( 1,-1, 0)); // bottom right
64+
m_cache[d].push_back(v3s16( 0,-1, 1)); // bottom back
65+
m_cache[d].push_back(v3s16( 0,-1,-1)); // bottom front
66+
m_cache[d].push_back(v3s16(-1, 1, 0)); // top left
67+
m_cache[d].push_back(v3s16( 1, 1, 0)); // top right
68+
m_cache[d].push_back(v3s16( 0, 1, 1)); // top back
69+
m_cache[d].push_back(v3s16( 0, 1,-1)); // top front
6070
// 18
61-
list.push_back(v3s16(-1, 1, 1)); // top back-left
62-
list.push_back(v3s16( 1, 1, 1)); // top back-right
63-
list.push_back(v3s16(-1, 1,-1)); // top front-left
64-
list.push_back(v3s16( 1, 1,-1)); // top front-right
65-
list.push_back(v3s16(-1,-1, 1)); // bottom back-left
66-
list.push_back(v3s16( 1,-1, 1)); // bottom back-right
67-
list.push_back(v3s16(-1,-1,-1)); // bottom front-left
68-
list.push_back(v3s16( 1,-1,-1)); // bottom front-right
71+
m_cache[d].push_back(v3s16(-1, 1, 1)); // top back-left
72+
m_cache[d].push_back(v3s16( 1, 1, 1)); // top back-right
73+
m_cache[d].push_back(v3s16(-1, 1,-1)); // top front-left
74+
m_cache[d].push_back(v3s16( 1, 1,-1)); // top front-right
75+
m_cache[d].push_back(v3s16(-1,-1, 1)); // bottom back-left
76+
m_cache[d].push_back(v3s16( 1,-1, 1)); // bottom back-right
77+
m_cache[d].push_back(v3s16(-1,-1,-1)); // bottom front-left
78+
m_cache[d].push_back(v3s16( 1,-1,-1)); // bottom front-right
6979
// 26
7080
return;
7181
}
7282

7383
// Take blocks in all sides, starting from y=0 and going +-y
74-
for(s16 y=0; y<=d-1; y++)
75-
{
84+
for(s16 y=0; y<=d-1; y++) {
7685
// Left and right side, including borders
77-
for(s16 z=-d; z<=d; z++)
78-
{
79-
list.push_back(v3s16(d,y,z));
80-
list.push_back(v3s16(-d,y,z));
81-
if(y != 0)
82-
{
83-
list.push_back(v3s16(d,-y,z));
84-
list.push_back(v3s16(-d,-y,z));
86+
for(s16 z=-d; z<=d; z++) {
87+
m_cache[d].push_back(v3s16(d,y,z));
88+
m_cache[d].push_back(v3s16(-d,y,z));
89+
if(y != 0) {
90+
m_cache[d].push_back(v3s16(d,-y,z));
91+
m_cache[d].push_back(v3s16(-d,-y,z));
8592
}
8693
}
8794
// Back and front side, excluding borders
88-
for(s16 x=-d+1; x<=d-1; x++)
89-
{
90-
list.push_back(v3s16(x,y,d));
91-
list.push_back(v3s16(x,y,-d));
92-
if(y != 0)
93-
{
94-
list.push_back(v3s16(x,-y,d));
95-
list.push_back(v3s16(x,-y,-d));
95+
for(s16 x=-d+1; x<=d-1; x++) {
96+
m_cache[d].push_back(v3s16(x,y,d));
97+
m_cache[d].push_back(v3s16(x,y,-d));
98+
if(y != 0) {
99+
m_cache[d].push_back(v3s16(x,-y,d));
100+
m_cache[d].push_back(v3s16(x,-y,-d));
96101
}
97102
}
98103
}
99104

100105
// Take the bottom and top face with borders
101106
// -d<x<d, y=+-d, -d<z<d
102107
for(s16 x=-d; x<=d; x++)
103-
for(s16 z=-d; z<=d; z++)
104-
{
105-
list.push_back(v3s16(x,-d,z));
106-
list.push_back(v3s16(x,d,z));
108+
for(s16 z=-d; z<=d; z++) {
109+
m_cache[d].push_back(v3s16(x,-d,z));
110+
m_cache[d].push_back(v3s16(x,d,z));
107111
}
108112
}
109113

Diff for: ‎src/util/numeric.h

+15-2
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,23 @@ with this program; if not, write to the Free Software Foundation, Inc.,
2525
#include "../irr_v3d.h"
2626
#include "../irr_aabb3d.h"
2727
#include <list>
28+
#include <map>
29+
#include <vector>
2830
#include <algorithm>
2931

30-
// Calculate the borders of a "d-radius" cube
31-
void getFacePositions(std::list<v3s16> &list, u16 d);
32+
33+
/*
34+
* This class permits to cache getFacePosition call results
35+
* This reduces CPU usage and vector calls
36+
*/
37+
class FacePositionCache
38+
{
39+
public:
40+
static std::vector<v3s16> getFacePositions(u16 d);
41+
private:
42+
static void generateFacePosition(u16 d);
43+
static std::map<u16, std::vector<v3s16> > m_cache;
44+
};
3245

3346
class IndentationRaiser
3447
{

0 commit comments

Comments
 (0)
Please sign in to comment.