Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: ngscopeclient/scopehal-apps
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 571d3d34eb77
Choose a base ref
...
head repository: ngscopeclient/scopehal-apps
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 4fc69326d1c3
Choose a head ref
  • 3 commits
  • 8 files changed
  • 1 contributor

Commits on May 7, 2021

  1. Initial implementation of dense pack optimizations for waveform rende…

    …ring. About 26% speedup for a dense 128M point waveform. Fixes #328 but probably still more room to tweak.
    azonenberg committed May 7, 2021
    Copy the full SHA
    8918279 View commit details
  2. Copy the full SHA
    4a83a23 View commit details
  3. Switched local structure in compute shader to 1x32 instead of 2x16 fo…

    …r significant (44%) speedups due to better GPU occupancy
    azonenberg committed May 7, 2021
    Copy the full SHA
    4fc6932 View commit details
18 changes: 18 additions & 0 deletions src/glscopeclient/WaveformArea.cpp
Original file line number Diff line number Diff line change
@@ -611,6 +611,7 @@ void WaveformArea::CleanupGLHandles()
m_histogramWaveformComputeProgram.Destroy();
m_digitalWaveformComputeProgram.Destroy();
m_analogWaveformComputeProgram.Destroy();
m_denseAnalogWaveformComputeProgram.Destroy();
m_colormapProgram.Destroy();
m_eyeProgram.Destroy();
m_spectrogramProgram.Destroy();
@@ -651,6 +652,7 @@ void WaveformArea::InitializeWaveformPass()
ComputeShader hwc;
ComputeShader dwc;
ComputeShader awc;
ComputeShader adwc;
if(GLEW_ARB_gpu_shader_int64 && !g_noglint64)
{
if(!hwc.Load(
@@ -671,6 +673,12 @@ void WaveformArea::InitializeWaveformPass()
"shaders/waveform-compute-core.glsl",
NULL))
LogFatal("failed to load analog waveform compute shader, aborting\n");
if(!adwc.Load(
"shaders/waveform-compute-head-dense.glsl",
"shaders/waveform-compute-analog.glsl",
"shaders/waveform-compute-core.glsl",
NULL))
LogFatal("failed to load dense analog waveform compute shader, aborting\n");
}
else
{
@@ -692,6 +700,12 @@ void WaveformArea::InitializeWaveformPass()
"shaders/waveform-compute-core.glsl",
NULL))
LogFatal("failed to load analog waveform compute shader, aborting\n");
if(!adwc.Load(
"shaders/waveform-compute-head-dense-noint64.glsl",
"shaders/waveform-compute-analog.glsl",
"shaders/waveform-compute-core.glsl",
NULL))
LogFatal("failed to load dense analog waveform compute shader, aborting\n");
}

//Link them
@@ -706,6 +720,10 @@ void WaveformArea::InitializeWaveformPass()
m_analogWaveformComputeProgram.Add(awc);
if(!m_analogWaveformComputeProgram.Link())
LogFatal("failed to link analog waveform shader program, aborting\n");

m_denseAnalogWaveformComputeProgram.Add(adwc);
if(!m_denseAnalogWaveformComputeProgram.Link())
LogFatal("failed to link dense analog waveform shader program, aborting\n");
}

void WaveformArea::InitializeColormapPass()
10 changes: 10 additions & 0 deletions src/glscopeclient/WaveformArea.h
Original file line number Diff line number Diff line change
@@ -72,6 +72,15 @@ class WaveformRenderData
bool IsHistogram()
{ return m_channel.m_channel->GetYAxisUnits() == Unit(Unit::UNIT_COUNTS_SCI); }

bool IsDensePacked()
{
auto data = m_channel.m_channel->GetData(0);
if(data)
return data->m_densePacked;
else
return false;
}

WaveformArea* m_area;

//The channel of interest
@@ -311,6 +320,7 @@ class WaveformArea : public Gtk::GLArea
void RenderTrace(WaveformRenderData* wdata);
void InitializeWaveformPass();
Program m_analogWaveformComputeProgram;
Program m_denseAnalogWaveformComputeProgram;
Program m_digitalWaveformComputeProgram;
Program m_histogramWaveformComputeProgram;
WaveformRenderData* m_waveformRenderData;
73 changes: 48 additions & 25 deletions src/glscopeclient/WaveformArea_rendering.cpp
Original file line number Diff line number Diff line change
@@ -87,7 +87,7 @@ void WaveformRenderData::MapBuffers(size_t width, bool update_waveform)
}

m_mappedIndexBuffer = (uint32_t*)m_waveformIndexBuffer.Map(width*sizeof(uint32_t));
m_mappedConfigBuffer = (uint32_t*)m_waveformConfigBuffer.Map(sizeof(float)*12);
m_mappedConfigBuffer = (uint32_t*)m_waveformConfigBuffer.Map(sizeof(float)*13);
//We're writing to different offsets in the buffer, not reinterpreting, so this is safe.
//A struct is probably the better long term solution...
//cppcheck-suppress invalidPointerCast
@@ -176,24 +176,27 @@ void WaveformArea::PrepareGeometry(WaveformRenderData* wdata, bool update_wavefo
else
memcpy(wdata->m_mappedYBuffer, &andat->m_samples[0], wdata->m_count*sizeof(float));

//Copy the X axis timestamps, no conversion needed
memcpy(wdata->m_mappedXBuffer, &pdat->m_offsets[0], wdata->m_count*sizeof(int64_t));
//Copy the X axis timestamps, no conversion needed.
//But if dense packed, we can skip this
if(!wdata->IsDensePacked())
memcpy(wdata->m_mappedXBuffer, &pdat->m_offsets[0], wdata->m_count*sizeof(int64_t));
}

//Calculate indexes for rendering.
//This is necessary since samples may be sparse and have arbitrary spacing between them, so we can't
//trivially map sample indexes to X pixel coordinates.
//Calculate indexes for rendering of sparse waveforms
//TODO: can we parallelize this? move to a compute shader?
auto group = wdata->m_area->m_group;
int64_t offset_samples = (group->m_xAxisOffset - pdat->m_triggerPhase) / pdat->m_timescale;
float xscale = (pdat->m_timescale * group->m_pixelsPerXUnit);
for(int j=0; j<wdata->m_area->m_width; j++)
if(!wdata->IsDensePacked())
{
int64_t target = floor(j / xscale) + offset_samples;
wdata->m_mappedIndexBuffer[j] = BinarySearchForGequal(
(int64_t*)&pdat->m_offsets[0],
wdata->m_count,
target-2);
for(int j=0; j<wdata->m_area->m_width; j++)
{
int64_t target = floor(j / xscale) + offset_samples;
wdata->m_mappedIndexBuffer[j] = BinarySearchForGequal(
(int64_t*)&pdat->m_offsets[0],
wdata->m_count,
target-2);
}
}

//Scale alpha by zoom.
@@ -211,18 +214,19 @@ void WaveformArea::PrepareGeometry(WaveformRenderData* wdata, bool update_wavefo
wdata->m_mappedConfigBuffer[2] = height; //windowHeight
wdata->m_mappedConfigBuffer[3] = wdata->m_area->m_plotRight; //windowWidth
wdata->m_mappedConfigBuffer[4] = wdata->m_count; //depth
wdata->m_mappedFloatConfigBuffer[5] = alpha_scaled; //alpha
wdata->m_mappedFloatConfigBuffer[6] = (pdat->m_triggerPhase - fractional_offset) * group->m_pixelsPerXUnit; //xoff
wdata->m_mappedFloatConfigBuffer[7] = pdat->m_timescale * group->m_pixelsPerXUnit; //xscale
wdata->m_mappedFloatConfigBuffer[8] = ybase; //ybase
wdata->m_mappedFloatConfigBuffer[9] = yscale; //yscale
wdata->m_mappedFloatConfigBuffer[10] = channel->GetOffset(); //yoff
wdata->m_mappedConfigBuffer[5] = offset_samples - 2; //offset_samples
wdata->m_mappedFloatConfigBuffer[6] = alpha_scaled; //alpha
wdata->m_mappedFloatConfigBuffer[7] = (pdat->m_triggerPhase - fractional_offset) * group->m_pixelsPerXUnit; //xoff
wdata->m_mappedFloatConfigBuffer[8] = xscale; //xscale
wdata->m_mappedFloatConfigBuffer[9] = ybase; //ybase
wdata->m_mappedFloatConfigBuffer[10] = yscale; //yscale
wdata->m_mappedFloatConfigBuffer[11] = channel->GetOffset(); //yoff

//persistScale
if(!wdata->m_persistence)
wdata->m_mappedFloatConfigBuffer[11] = 0;
wdata->m_mappedFloatConfigBuffer[12] = 0;
else
wdata->m_mappedFloatConfigBuffer[11] = persistDecay;
wdata->m_mappedFloatConfigBuffer[12] = persistDecay;

//Done
wdata->m_geometryOK = true;
@@ -433,7 +437,12 @@ bool WaveformArea::on_render(const Glib::RefPtr<Gdk::GLContext>& /*context*/)
else if(m_waveformRenderData->IsHistogram())
m_histogramWaveformComputeProgram.MemoryBarrier();
else
m_analogWaveformComputeProgram.MemoryBarrier();
{
if(m_waveformRenderData->IsDensePacked())
m_denseAnalogWaveformComputeProgram.MemoryBarrier();
else
m_analogWaveformComputeProgram.MemoryBarrier();
}

//Final compositing of data being drawn to the screen
m_windowFramebuffer.Bind(GL_FRAMEBUFFER);
@@ -612,7 +621,8 @@ void WaveformArea::RenderTrace(WaveformRenderData* data)
return;

//Round thread block size up to next multiple of the local size (must be power of two)
int localSize = 2;
//localSize must match COLS_PER_BLOCK in waveform-compute-core.glsl
int localSize = 1;
int numCols = m_plotRight;
if(0 != (numCols % localSize) )
{
@@ -633,8 +643,16 @@ void WaveformArea::RenderTrace(WaveformRenderData* data)
}
else
{
m_analogWaveformComputeProgram.Bind();
m_analogWaveformComputeProgram.SetImageUniform(data->m_waveformTexture, "outputTex");
if(data->IsDensePacked())
{
m_denseAnalogWaveformComputeProgram.Bind();
m_denseAnalogWaveformComputeProgram.SetImageUniform(data->m_waveformTexture, "outputTex");
}
else
{
m_analogWaveformComputeProgram.Bind();
m_analogWaveformComputeProgram.SetImageUniform(data->m_waveformTexture, "outputTex");
}
}

data->m_waveformXBuffer.BindBase(1);
@@ -647,7 +665,12 @@ void WaveformArea::RenderTrace(WaveformRenderData* data)
else if(data->IsHistogram())
m_histogramWaveformComputeProgram.DispatchCompute(numGroups, 1, 1);
else
m_analogWaveformComputeProgram.DispatchCompute(numGroups, 1, 1);
{
if(data->IsDensePacked())
m_denseAnalogWaveformComputeProgram.DispatchCompute(numGroups, 1, 1);
else
m_analogWaveformComputeProgram.DispatchCompute(numGroups, 1, 1);
}
}

void WaveformArea::RenderTraceColorCorrection(WaveformRenderData* data)
12 changes: 9 additions & 3 deletions src/glscopeclient/shaders/waveform-compute-core.glsl
Original file line number Diff line number Diff line change
@@ -3,8 +3,8 @@
#define MAX_HEIGHT 2048

//Number of columns of pixels per thread block
#define COLS_PER_BLOCK 2
#define ROWS_PER_BLOCK 16
#define COLS_PER_BLOCK 1
#define ROWS_PER_BLOCK 32

//The output texture (for now, only alpha channel is used)
layout(binding=0, rgba32f) uniform image2D outputTex;
@@ -66,7 +66,13 @@ void main()
g_done[gl_LocalInvocationID.x] = false;
g_updating[gl_LocalInvocationID.x] = false;

istart = xind[gl_GlobalInvocationID.x];
#ifdef DENSE_PACK
istart = uint(floor(gl_GlobalInvocationID.x / xscale)) + offset_samples;
#else
istart = xind[gl_GlobalInvocationID.x];
#endif


i = istart;

#ifdef ANALOG_PATH
96 changes: 96 additions & 0 deletions src/glscopeclient/shaders/waveform-compute-head-dense-noint64.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/***********************************************************************************************************************
* *
* glscopeclient *
* *
* Copyright (c) 2012-2021 Andrew D. Zonenberg *
* All rights reserved. *
* *
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the *
* following conditions are met: *
* *
* * Redistributions of source code must retain the above copyright notice, this list of conditions, and the *
* following disclaimer. *
* *
* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the *
* following disclaimer in the documentation and/or other materials provided with the distribution. *
* *
* * Neither the name of the author nor the names of any contributors may be used to endorse or promote products *
* derived from this software without specific prior written permission. *
* *
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL *
* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR *
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE *
* POSSIBILITY OF SUCH DAMAGE. *
* *
***********************************************************************************************************************/

/**
@file
@brief Waveform rendering shader for dense-packed waveforms without GL_ARB_gpu_shader_int64 support
*/

#version 420
#extension GL_ARB_compute_shader : require
#extension GL_ARB_arrays_of_arrays : require
#extension GL_ARB_shader_storage_buffer_object : require

#define DENSE_PACK

layout(std430, binding=1) buffer waveform_x
{
uint xpos[]; //x position, in time ticks
//actually 64-bit little endian signed ints
};

//Global configuration for the run
layout(std430, binding=2) buffer config
{
uint innerXoff_lo; //actually a 64-bit little endian signed int
uint innerXoff_hi;

uint windowHeight;
uint windowWidth;
uint memDepth;
uint offset_samples;
float alpha;
float xoff;
float xscale;
float ybase;
float yscale;
float yoff;
float persistScale;
};

//All this just because most Intel integrated GPUs lack GL_ARB_gpu_shader_int64...
float FetchX(uint i)
{
//Fetch the input
uint xpos_lo = i;
uint offset_lo = innerXoff_lo;

//Sum the low halves
uint carry;
uint sum_lo = uaddCarry(xpos_lo, offset_lo, carry);

//Sum the high halves with carry in
uint sum_hi = innerXoff_hi + carry;

//If MSB is 1, we're negative.
//Calculate the twos complement by flipping all the bits.
//To complete the complement we need to add 1, but that comes later.
bool negative = ( (sum_hi & 0x80000000) == 0x80000000 );
if(negative)
{
sum_lo = ~sum_lo;
sum_hi = ~sum_hi;
}

//Convert back to floating point
float f = (float(sum_hi) * 4294967296.0) + float(sum_lo);
if(negative)
f = -f + 1;
return f;
}
Loading