Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: ngscopeclient/scopehal-apps
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 16bf77be3992
Choose a base ref
...
head repository: ngscopeclient/scopehal-apps
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: bea78c34b555
Choose a head ref
  • 1 commit
  • 3 files changed
  • 1 contributor

Commits on Feb 23, 2020

  1. Moved temporary buffer to shared memory and made a few other tweaks f…

    …or massive FPS improvements
    azonenberg committed Feb 23, 2020
    Copy the full SHA
    bea78c3 View commit details
Showing with 53 additions and 38 deletions.
  1. +0 −9 glscopeclient/WaveformArea_events.cpp
  2. +14 −8 glscopeclient/WaveformArea_rendering.cpp
  3. +39 −21 glscopeclient/shaders/waveform-compute.glsl
9 changes: 0 additions & 9 deletions glscopeclient/WaveformArea_events.cpp
Original file line number Diff line number Diff line change
@@ -566,15 +566,6 @@ void WaveformArea::OnWaveformDataReady()
m_group->m_xAxisOffset = -eye->GetUIWidth();
}

//Download the waveform to the GPU and kick off the compute shader for rendering it
if(!IsEye() && !IsWaterfall())
{
make_current();
m_geometryOK = PrepareGeometry();
if(m_geometryOK)
RenderTrace();
}

//Update our measurements and redraw the waveform
SetGeometryDirty();
queue_draw();
22 changes: 14 additions & 8 deletions glscopeclient/WaveformArea_rendering.cpp
Original file line number Diff line number Diff line change
@@ -211,7 +211,13 @@ bool WaveformArea::on_render(const Glib::RefPtr<Gdk::GLContext>& /*context*/)
RenderPersistenceOverlay();
*/

//PrepareGeometry() / RenderTrace() are now launched early on, by OnWaveformDataReady()
//Download the waveform to the GPU and kick off the compute shader for rendering it
if(!IsEye() && !IsWaterfall())
{
m_geometryOK = PrepareGeometry();
if(m_geometryOK)
RenderTrace();
}

//Launch software rendering passes and push these to the GPU
ComputeAndDownloadCairoUnderlays();
@@ -350,15 +356,15 @@ void WaveformArea::RenderPersistenceOverlay()

void WaveformArea::RenderTrace()
{
//Round thread count up to next multiple of the local size (must be power of two)
int localSize = 128;
int numThreads = m_plotRight;
if(0 != (numThreads % localSize) )
//Round thread block size up to next multiple of the local size (must be power of two)
int localSize = 2;
int numCols = m_plotRight;
if(0 != (numCols % localSize) )
{
numThreads |= (localSize-1);
numThreads ++;
numCols |= (localSize-1);
numCols ++;
}
int numGroups = numThreads / localSize;
int numGroups = numCols / localSize;

m_waveformComputeProgram.Bind();
m_waveformComputeProgram.SetImageUniform(m_waveformTextureResolved, "outputTex");
60 changes: 39 additions & 21 deletions glscopeclient/shaders/waveform-compute.glsl
Original file line number Diff line number Diff line change
@@ -28,35 +28,53 @@ layout(std430, binding=3) buffer index
uint xind[];
};

layout(local_size_x=128, local_size_y=1, local_size_z=1) in;
//Maximum height of a single waveform, in pixels.
//This is enough for a nearly fullscreen 4K window so should be plenty.
#define MAX_HEIGHT 2048

//Number of columns of pixels per thread block
#define COLS_PER_BLOCK 2

layout(local_size_x=COLS_PER_BLOCK, local_size_y=1, local_size_z=1) in;

//Interpolate a Y coordinate
float InterpolateY(vec2 left, vec2 right, float slope, float x)
{
return left.y + ( (x - left.x) * slope );
}

//Maximum height of a single waveform, in pixels.
//This is enough for a nearly fullscreen 4K window so should be plenty.
#define MAX_HEIGHT 2048
/*
NEW IDEA
Multiple threads per X coordinate (say, 32 - 1 warp)
Parallel fetch base[i+z] and atomically increment local memory
Each local has a 2D shared array
Assuming 96 KB shared memory, we can fit a total of 24K float32 temp pixels
Assuming 2K max line height, that's up to 12 pixels of width per local
*/

//Shared buffer for the local working buffer
shared float g_workingBuffer[COLS_PER_BLOCK][MAX_HEIGHT];

void main()
{
//Make sure image isn't too big for our hard coded max
//TODO: truncate in this case??
float g_workingBuffer[MAX_HEIGHT];
//Abort if window height is too big, or if we're off the end of the window
if(windowHeight > MAX_HEIGHT)
return;
if(gl_GlobalInvocationID.x > windowWidth)
return;

//Save some constants
float x = gl_GlobalInvocationID.x;
if(x > windowWidth)
return;
float alpha = float(alpha_scaled) / 256;

//Clear column to blank
for(uint y=0; y<windowHeight; y++)
g_workingBuffer[y] = 0;
//Clear column to blank in the first thread of the block
if(gl_LocalInvocationID.y == 0)
{
for(uint y=0; y<windowHeight; y++)
g_workingBuffer[gl_LocalInvocationID.x][y] = 0;
}
barrier();
memoryBarrierShared();

//Loop over the waveform, starting at the leftmost point that overlaps this column
uint istart = xind[gl_GlobalInvocationID.x];
@@ -68,11 +86,11 @@ void main()
right = vec2(data[i+1].x, data[i+1].voltage);

//If the current point is right of us, stop
if(left.x > x+1)
if(left.x > gl_GlobalInvocationID.x + 1)
break;

//If the upcoming point is still left of us, we're not there yet
if(right.x < x)
if(right.x < gl_GlobalInvocationID.x)
{
left = right;
continue;
@@ -84,10 +102,10 @@ void main()

//Interpolate if either end is outside our column
float slope = (right.y - left.y) / (right.x - left.x);
if(left.x < x)
starty = InterpolateY(left, right, slope, x);
if(right.x > x+1)
endy = InterpolateY(left, right, slope, x+1);
if(left.x < gl_GlobalInvocationID.x)
starty = InterpolateY(left, right, slope, gl_GlobalInvocationID.x);
if(right.x > gl_GlobalInvocationID.x + 1)
endy = InterpolateY(left, right, slope, gl_GlobalInvocationID.x + 1);

//Sort Y coordinates from min to max
int ymin = int(min(starty, endy));
@@ -99,7 +117,7 @@ void main()
//Fill in the space between min and max for this segment
for(int y=ymin; y <= ymax; y++)
{
g_workingBuffer[y] += alpha;
g_workingBuffer[gl_LocalInvocationID.x][y] += alpha;
}

//TODO: antialiasing
@@ -108,5 +126,5 @@ void main()

//Copy working buffer to RGB output
for(uint y=0; y<windowHeight; y++)
imageStore(outputTex, ivec2(gl_GlobalInvocationID.x, y), vec4(0, 0, 0, g_workingBuffer[y]));
imageStore(outputTex, ivec2(gl_GlobalInvocationID.x, y), vec4(0, 0, 0, g_workingBuffer[gl_LocalInvocationID.x][y]));
}