Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: ngscopeclient/scopehal-apps
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: feae9527846a
Choose a base ref
...
head repository: ngscopeclient/scopehal-apps
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 193f0f4d647c
Choose a head ref
  • 4 commits
  • 5 files changed
  • 1 contributor

Commits on Aug 7, 2020

  1. Copy the full SHA
    7d00bee View commit details

Commits on Aug 8, 2020

  1. Copy the full SHA
    ab1bf97 View commit details
  2. Significant performance improvements to WaveformArea::PrepareGeometry…

    …() for large waveforms. Moved some preprocessing to compute shader.
    azonenberg committed Aug 8, 2020
    Copy the full SHA
    45857e5 View commit details
  3. Copy the full SHA
    193f0f4 View commit details
4 changes: 3 additions & 1 deletion src/glscopeclient/WaveformArea.h
Original file line number Diff line number Diff line change
@@ -96,7 +96,8 @@ class WaveformRenderData
bool m_geometryOK;

//SSBOs with waveform data
ShaderStorageBuffer m_waveformStorageBuffer;
ShaderStorageBuffer m_waveformXBuffer;
ShaderStorageBuffer m_waveformYBuffer;
ShaderStorageBuffer m_waveformConfigBuffer;
ShaderStorageBuffer m_waveformIndexBuffer;

@@ -373,6 +374,7 @@ class WaveformArea : public Gtk::GLArea
float XAxisUnitsToPixels(int64_t t);
float XAxisUnitsToXPosition(int64_t t);
float PickStepSize(float volts_per_half_span, int min_steps = 2, int max_steps = 5);
size_t BinarySearchForGequal(float* buf, size_t len, float value);

void OnRemoveOverlay(ProtocolDecoder* decode);

165 changes: 104 additions & 61 deletions src/glscopeclient/WaveformArea_rendering.cpp
Original file line number Diff line number Diff line change
@@ -77,9 +77,7 @@ void WaveformArea::PrepareGeometry(WaveformRenderData* wdata)
return;
}

double xscale = pdat->m_timescale * m_group->m_pixelsPerXUnit;
float xoff = (pdat->m_triggerPhase - m_group->m_xAxisOffset) * m_group->m_pixelsPerXUnit;

float xscale = pdat->m_timescale * m_group->m_pixelsPerXUnit;
bool fft = IsFFT();

//Zero voltage level
@@ -96,64 +94,72 @@ void WaveformArea::PrepareGeometry(WaveformRenderData* wdata)
ybase = m_height - (m_overlayPositions[dynamic_cast<ProtocolDecoder*>(channel)] + 10);
}

//Calculate X/Y coordinate of each sample point
//TODO: some of this can probably move to GPU too?
vector<EmptyConstructorWrapper<float>> traceBuffer;
vector<EmptyConstructorWrapper<uint32_t>> indexBuffer;
double offset = channel->GetOffset();
float offset = channel->GetOffset();

//Y axis scaling in shader
float yoff = 0;
float yscale = 1;

//We need to stretch every sample to two samples, one at the very left and one at the very right,
//so interpolation works right.
//TODO: we can probably avoid this by rewriting the compute shader to not interpolate like this
//TODO: only add extra samples if the left and right values are not the same?
size_t realcount = count;
if(digdat)
{
//We need to stretch every sample to two samples, one at the very left and one at the very right,
//so interpolation works right.
//TODO: we can probably avoid this by rewriting the compute shader to not interpolate like this
//TODO: only add extra samples if the left and right values are not the same
size_t realcount = count;
count *= 2;

float* xBuffer = reinterpret_cast<float*>(aligned_alloc(32, count*sizeof(float)));
float* yBuffer = NULL;
bool needToFreeYBuffer = true;
uint32_t* indexBuffer = reinterpret_cast<uint32_t*>(aligned_alloc(32, m_width*sizeof(uint32_t)));

if(digdat)
{
float digheight;
if(channel == m_channel)
digheight = m_height - 5;
else
digheight = 20;

traceBuffer.resize(count*2);
indexBuffer.resize(m_width);
yBuffer = reinterpret_cast<float*>(aligned_alloc(32, count*sizeof(float)));

//#pragma omp parallel for
yoff = ybase;
yscale = digheight;
for(size_t j=0; j<realcount; j++)
{
int64_t off = digdat->m_offsets[j];
traceBuffer[j*4] = off * xscale + xoff;
traceBuffer[j*4 + 2] = (off + digdat->m_durations[j]) * xscale + xoff - 1;
xBuffer[j*2] = off;
xBuffer[j*2 + 1] = off + digdat->m_durations[j];

float y = ybase + ( digdat->m_samples[j] ? digheight: 0 );
traceBuffer[j*4 + 1] = y;
traceBuffer[j*4 + 3] = y;
yBuffer[j*2] = digdat->m_samples[j];
yBuffer[j*2 + 1] = digdat->m_samples[j];
}
}
else
{
traceBuffer.resize(count*2);
indexBuffer.resize(m_width);
float* psamps = reinterpret_cast<float*>(__builtin_assume_aligned(&andat->m_samples[0], 16));
float* pdst = reinterpret_cast<float*>(__builtin_assume_aligned(xBuffer, 32));
int64_t* psrc = reinterpret_cast<int64_t*>(__builtin_assume_aligned(&andat->m_offsets[0], 16));

//Not possible to push this to a compute shader without GL_ARB_gpu_shader_int64,
//which isn't well supported on integrated gfx yet :(
for(size_t j=0; j < count; j++)
pdst[j] = psrc[j];

//TODO: can we push this to a compute shader?
//This doesn't look too SIMD-friendly because the inputs aren't all flops
if(fft)
{
yBuffer = reinterpret_cast<float*>(aligned_alloc(32, count*sizeof(float)));
yscale = 1;
for(size_t j=0; j<count; j++)
{
traceBuffer[j*2] = andat->m_offsets[j] * xscale + xoff;
traceBuffer[j*2 + 1] = DbToYPosition(-70 - (20 * log10(andat->m_samples[j]))); //TODO: don't hard code plot limits
}
yBuffer[j] = DbToYPosition(-70 - (20 * log10(psamps[j]))); //TODO: don't hard code plot limits
}
else
{
for(size_t j=0; j<count; j++)
{
traceBuffer[j*2] = andat->m_offsets[j] * xscale + xoff;
traceBuffer[j*2 + 1] = (m_pixelsPerVolt * (andat->m_samples[j] + offset)) + ybase;
}
yoff = ybase;
yscale = m_pixelsPerVolt;
needToFreeYBuffer = false;
yBuffer = psamps;
}
}

@@ -165,56 +171,92 @@ void WaveformArea::PrepareGeometry(WaveformRenderData* wdata)
//This is necessary since samples may be sparse and have arbitrary spacing between them, so we can't
//trivially map sample indexes to X pixel coordinates.
//TODO: can we parallelize this? move to a compute shader?
size_t nsample = 0;
float xoff = (pdat->m_triggerPhase - m_group->m_xAxisOffset) * m_group->m_pixelsPerXUnit;
for(int j=0; j<m_width; j++)
{
bool hit = false;

//Move forward until we find a sample that starts in the current column
for(; nsample < count-1; nsample ++)
{
//If the next sample ends after the start of the current pixel. stop
float end = traceBuffer[(nsample+1)*2];
if(end >= j)
{
//Start the current column at this sample
indexBuffer[j] = nsample;
hit = true;
break;
}
}

//Default to drawing nothing
if(!hit)
indexBuffer[j] = count;
}
indexBuffer[j] = BinarySearchForGequal(xBuffer, count, (j - xoff) / xscale);

dt = GetTime() - start;
m_indexTime += dt;
start = GetTime();

//Download it
wdata->m_waveformStorageBuffer.Bind();
glBufferData(GL_SHADER_STORAGE_BUFFER, traceBuffer.size()*sizeof(float), &traceBuffer[0], GL_STREAM_DRAW);
wdata->m_waveformXBuffer.Bind();
glBufferData(GL_SHADER_STORAGE_BUFFER, count*sizeof(float), xBuffer, GL_STREAM_DRAW);
wdata->m_waveformYBuffer.Bind();
glBufferData(GL_SHADER_STORAGE_BUFFER, count*sizeof(float), yBuffer, GL_STREAM_DRAW);

//Config stuff
uint32_t config[5];
uint32_t config[10];
float* fconfig = reinterpret_cast<float*>(config);
config[0] = m_height; //windowHeight
config[1] = m_plotRight; //windowWidth
config[2] = count; //depth
config[3] = m_parent->GetTraceAlpha() * 256; //alpha
config[4] = digdat ? 1 : 0; //digital
fconfig[5] = xoff; //xoff
fconfig[6] = xscale; //xscale
fconfig[7] = yoff; //ybase
fconfig[8] = yscale; //yscale
fconfig[9] = offset; //yoff
wdata->m_waveformConfigBuffer.Bind();
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(config), config, GL_STREAM_DRAW);

//Indexing
wdata->m_waveformIndexBuffer.Bind();
glBufferData(GL_SHADER_STORAGE_BUFFER, indexBuffer.size()*sizeof(uint32_t), &indexBuffer[0], GL_STREAM_DRAW);
glBufferData(GL_SHADER_STORAGE_BUFFER, m_width*sizeof(uint32_t), indexBuffer, GL_STREAM_DRAW);

dt = GetTime() - start;
m_downloadTime += dt;

wdata->m_geometryOK = true;

free(xBuffer);
if(needToFreeYBuffer)
free(yBuffer);
free(indexBuffer);
}

/**
@brief Look for a value greater than or equal to "value" in buf and return the index
*/
size_t WaveformArea::BinarySearchForGequal(float* buf, size_t len, float value)
{
size_t pos = len/2;
size_t last_lo = 0;
size_t last_hi = len-1;

//Clip if out of range
if(buf[0] >= value)
return 0;
if(buf[last_hi] < value)
return len;

while(true)
{
LogIndenter li;

//Stop if we've bracketed the target
if( (last_hi - last_lo) <= 1)
break;

//Move down
if(buf[pos] > value)
{
size_t delta = pos - last_lo;
last_hi = pos;
pos = last_lo + delta/2;
}

//Move up
else
{
size_t delta = last_hi - pos;
last_lo = pos;
pos = last_hi - delta/2;
}
}

return last_lo;
}

void WaveformArea::ResetTextureFiltering()
@@ -460,7 +502,8 @@ void WaveformArea::RenderTrace(WaveformRenderData* data)

m_waveformComputeProgram.Bind();
m_waveformComputeProgram.SetImageUniform(data->m_waveformTexture, "outputTex");
data->m_waveformStorageBuffer.BindBase(1);
data->m_waveformXBuffer.BindBase(1);
data->m_waveformYBuffer.BindBase(4);
data->m_waveformConfigBuffer.BindBase(2);
data->m_waveformIndexBuffer.BindBase(3);
m_waveformComputeProgram.DispatchCompute(numGroups, 1, 1);
2 changes: 2 additions & 0 deletions src/glscopeclient/glscopeclient.h
Original file line number Diff line number Diff line change
@@ -75,4 +75,6 @@

double GetTime();

extern bool g_hasAvx512F;

#endif
15 changes: 11 additions & 4 deletions src/glscopeclient/main.cpp
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@
@author Andrew D. Zonenberg
@brief Program entry point
*/

#ifdef _WIN32
#include <windows.h>
#include <shlwapi.h>
@@ -54,6 +54,8 @@ int g_numDecodes = 0;

ScopeApp* g_app = NULL;

bool g_hasAvx512F = false;

void help();

void help()
@@ -159,25 +161,30 @@ int main(int argc, char* argv[])
//Set up logging
g_log_sinks.emplace(g_log_sinks.begin(), new ColoredSTDLogSink(console_verbosity));

//Check CPU features
g_hasAvx512F = __builtin_cpu_supports("avx512f");
if(g_hasAvx512F)
LogDebug("CPU supports AVX512F\n");

//Change to the binary's directory so we can use relative paths for external resources
//FIXME: portability warning: this only works on Linux
#ifdef _WIN32
// Retrieve the file name of the current process image
TCHAR binPath[MAX_PATH];

if( GetModuleFileName(NULL, binPath, MAX_PATH) == 0 )
{
LogError("Error: GetModuleFileName() failed.\n");
return 1;
}

// Remove file name from path
if( !PathRemoveFileSpec(binPath) )
{
LogError("Error: PathRemoveFileSpec() failed.\n");
return 1;
}

// Set it as current working directory
if( SetCurrentDirectory(binPath) == 0 )
{
19 changes: 11 additions & 8 deletions src/glscopeclient/shaders/waveform-compute.glsl
Original file line number Diff line number Diff line change
@@ -3,16 +3,14 @@
//The output texture (for now, only alpha channel is used)
layout(binding=0, rgba32f) uniform image2D outputTex;

//Voltage data
struct data_point
layout(std430, binding=1) buffer waveform_x
{
float x; //x pixel position (fractional)
float voltage; //y value of this sample, in pixels
float xpos[]; //x pixel position (fractional)
};

layout(std430, binding=1) buffer waveform
layout(std430, binding=4) buffer waveform_y
{
data_point data[];
float voltage[]; //y value of this sample, in pixels
};

//Global configuration for the run
@@ -23,6 +21,11 @@ layout(std430, binding=2) buffer config
uint memDepth;
uint alpha_scaled;
uint digital;
float xoff;
float xscale;
float ybase;
float yscale;
float yoff;
};

//Indexes so we know which samples go to which X pixel range
@@ -81,12 +84,12 @@ void main()

//Loop over the waveform, starting at the leftmost point that overlaps this column
uint istart = xind[gl_GlobalInvocationID.x];
vec2 left = vec2(data[istart].x, data[istart].voltage);
vec2 left = vec2(xpos[istart]*xscale + xoff, (voltage[istart] + yoff)*yscale + ybase);
vec2 right;
for(uint i=istart; i<(memDepth-1); i++)
{
//Fetch coordinates of the current and upcoming sample
right = vec2(data[i+1].x, data[i+1].voltage);
right = vec2(xpos[i+1]*xscale + xoff, (voltage[i+1] + yoff)*yscale + ybase);

//If the current point is right of us, stop
if(left.x > gl_GlobalInvocationID.x + 1)