Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: ngscopeclient/scopehal-apps
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 917c9569ad0f
Choose a base ref
...
head repository: ngscopeclient/scopehal-apps
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 2a4fb3ef799a
Choose a head ref
  • 2 commits
  • 3 files changed
  • 1 contributor

Commits on Sep 5, 2020

  1. Now require ARB_GPU_shader_int64. Waveform compute shader now takes X…

    … positions as int64_t's.
    azonenberg committed Sep 5, 2020

    Verified

    This commit was signed with the committer’s verified signature.
    lukekarrys Luke Karrys
    Copy the full SHA
    181d444 View commit details
  2. Waveform rendering now performs the timebase offset with int64 precis…

    …ion before converting to float32 for actual rendering. Fixes #162.
    azonenberg committed Sep 5, 2020

    Verified

    This commit was signed with the committer’s verified signature.
    lukekarrys Luke Karrys
    Copy the full SHA
    2a4fb3e View commit details
Showing with 30 additions and 61 deletions.
  1. +2 −3 src/glscopeclient/WaveformArea.h
  2. +22 −54 src/glscopeclient/WaveformArea_rendering.cpp
  3. +6 −4 src/glscopeclient/shaders/waveform-compute.glsl
5 changes: 2 additions & 3 deletions src/glscopeclient/WaveformArea.h
Original file line number Diff line number Diff line change
@@ -109,10 +109,11 @@ class WaveformRenderData
size_t m_count;

//OpenGL-mapped buffers for the data
float* m_mappedXBuffer;
int64_t* m_mappedXBuffer;
float* m_mappedYBuffer;
uint32_t* m_mappedIndexBuffer;
uint32_t* m_mappedConfigBuffer;
int64_t* m_mappedConfigBuffer64;
float* m_mappedFloatConfigBuffer;

//Map all buffers for download
@@ -403,8 +404,6 @@ class WaveformArea : public Gtk::GLArea
float PickStepSize(float volts_per_half_span, int min_steps = 2, int max_steps = 5);
template<class T> size_t BinarySearchForGequal(T* buf, size_t len, T value);
float GetValueAtTime(int64_t time_ps);
void Int64ToFloat(float* dst, int64_t* src, size_t len);
void Int64ToFloatAVX512(float* dst, int64_t* src, size_t len);

void OnRemoveOverlay(StreamDescriptor filter);

76 changes: 22 additions & 54 deletions src/glscopeclient/WaveformArea_rendering.cpp
Original file line number Diff line number Diff line change
@@ -68,11 +68,12 @@ void WaveformRenderData::MapBuffers(size_t width)
m_count *= 2;
}

m_mappedXBuffer = (float*)m_waveformXBuffer.Map(m_count*sizeof(float), GL_READ_WRITE);
m_mappedXBuffer = (int64_t*)m_waveformXBuffer.Map(m_count*sizeof(int64_t), GL_READ_WRITE);
m_mappedYBuffer = (float*)m_waveformYBuffer.Map(m_count*sizeof(float));
m_mappedIndexBuffer = (uint32_t*)m_waveformIndexBuffer.Map(width*sizeof(uint32_t));
m_mappedConfigBuffer = (uint32_t*)m_waveformConfigBuffer.Map(sizeof(float)*10);
m_mappedConfigBuffer = (uint32_t*)m_waveformConfigBuffer.Map(sizeof(float)*12);
m_mappedFloatConfigBuffer = (float*)m_mappedConfigBuffer;
m_mappedConfigBuffer64 = (int64_t*)m_mappedConfigBuffer;
}

void WaveformRenderData::UnmapBuffers()
@@ -165,16 +166,10 @@ void WaveformArea::PrepareGeometry(WaveformRenderData* wdata)
}
else
{
//Need AVX512DQ or AVX512VL for VCTVQQ2PS
//TODO: see if there is any way to speed this up at least a little on AVX2?
if(g_hasAvx512DQ || g_hasAvx512VL)
Int64ToFloatAVX512(wdata->m_mappedXBuffer, reinterpret_cast<int64_t*>(&andat->m_offsets[0]), wdata->m_count);
else
Int64ToFloat(wdata->m_mappedXBuffer, reinterpret_cast<int64_t*>(&andat->m_offsets[0]), wdata->m_count);

yscale = m_pixelsPerVolt;

//Copy the waveform
memcpy(wdata->m_mappedXBuffer, &andat->m_offsets[0], wdata->m_count*sizeof(int64_t));
memcpy(wdata->m_mappedYBuffer, &andat->m_samples[0], wdata->m_count*sizeof(float));
}

@@ -186,9 +181,14 @@ void WaveformArea::PrepareGeometry(WaveformRenderData* wdata)
//This is necessary since samples may be sparse and have arbitrary spacing between them, so we can't
//trivially map sample indexes to X pixel coordinates.
//TODO: can we parallelize this? move to a compute shader?
float xoff = (pdat->m_triggerPhase - m_group->m_xAxisOffset) * m_group->m_pixelsPerXUnit;
float xoff = pdat->m_triggerPhase * m_group->m_pixelsPerXUnit;
for(int j=0; j<m_width; j++)
wdata->m_mappedIndexBuffer[j] = BinarySearchForGequal(wdata->m_mappedXBuffer, wdata->m_count, (j - xoff) / xscale);
{
wdata->m_mappedIndexBuffer[j] = BinarySearchForGequal(
wdata->m_mappedXBuffer,
wdata->m_count,
(j /*- pdat->m_triggerPhase*/ + m_group->m_xAxisOffset) / pdat->m_timescale);
}

dt = GetTime() - start;
m_indexTime += dt;
@@ -200,54 +200,22 @@ void WaveformArea::PrepareGeometry(WaveformRenderData* wdata)

//Config stuff
//TODO: we should be able to only update this stuff if we pan/zoom, without redoing the waveform data itself
wdata->m_mappedConfigBuffer[0] = m_height; //windowHeight
wdata->m_mappedConfigBuffer[1] = m_plotRight; //windowWidth
wdata->m_mappedConfigBuffer[2] = wdata->m_count; //depth
wdata->m_mappedFloatConfigBuffer[3] = alpha_scaled; //alpha
wdata->m_mappedConfigBuffer[4] = digdat ? 1 : 0; //digital
wdata->m_mappedFloatConfigBuffer[5] = xoff; //xoff
wdata->m_mappedFloatConfigBuffer[6] = xscale; //xscale
wdata->m_mappedFloatConfigBuffer[7] = ybase; //ybase
wdata->m_mappedFloatConfigBuffer[8] = yscale; //yscale
wdata->m_mappedFloatConfigBuffer[9] = yoff; //yoff
wdata->m_mappedConfigBuffer64[0] = -m_group->m_xAxisOffset / pdat->m_timescale; //innerXoff
wdata->m_mappedConfigBuffer[2] = m_height; //windowHeight
wdata->m_mappedConfigBuffer[3] = m_plotRight; //windowWidth
wdata->m_mappedConfigBuffer[4] = wdata->m_count; //depth
wdata->m_mappedFloatConfigBuffer[5] = alpha_scaled; //alpha
wdata->m_mappedConfigBuffer[6] = digdat ? 1 : 0; //digital
wdata->m_mappedFloatConfigBuffer[7] = xoff; //xoff
wdata->m_mappedFloatConfigBuffer[8] = xscale; //xscale
wdata->m_mappedFloatConfigBuffer[9] = ybase; //ybase
wdata->m_mappedFloatConfigBuffer[10] = yscale; //yscale
wdata->m_mappedFloatConfigBuffer[11] = yoff; //yoff

//Done
wdata->m_geometryOK = true;
}

/**
@brief Convert an array of int64_t's to floats
*/
void WaveformArea::Int64ToFloat(float* dst, int64_t* src, size_t len)
{
float* pdst = reinterpret_cast<float*>(__builtin_assume_aligned(dst, 32));
int64_t* psrc = reinterpret_cast<int64_t*>(__builtin_assume_aligned(src, 16));

//Not possible to push this to a compute shader without GL_ARB_gpu_shader_int64,
//which isn't well supported on integrated gfx yet :(
for(size_t j=0; j < len; j++)
pdst[j] = psrc[j];
}

__attribute__((target("avx512dq")))
void WaveformArea::Int64ToFloatAVX512(float* dst, int64_t* src, size_t len)
{
//Round length down to multiple of 8 so we can SIMD the loop
size_t len_rounded = len - (len % 8);

//Main unrolled loop
for(size_t j=0; j<len_rounded; j+= 8)
{
__m512i i64x8 = _mm512_load_epi64(src + j);
__m256 f32x8 = _mm512_cvt_roundepi64_ps(i64x8, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
_mm256_store_ps(dst + j, f32x8);
}

//Do anything we missed
for(size_t j=len_rounded; j < len; j++)
dst[j] = src[j];
}

/**
@brief Look for a value greater than or equal to "value" in buf and return the index
*/
10 changes: 6 additions & 4 deletions src/glscopeclient/shaders/waveform-compute.glsl
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#version 430
#version 450
#extension GL_ARB_gpu_shader_int64 : require

//The output texture (for now, only alpha channel is used)
layout(binding=0, rgba32f) uniform image2D outputTex;

layout(std430, binding=1) buffer waveform_x
{
float xpos[]; //x pixel position (fractional)
int64_t xpos[]; //x position, in time ticks
};

layout(std430, binding=4) buffer waveform_y
@@ -16,6 +17,7 @@ layout(std430, binding=4) buffer waveform_y
//Global configuration for the run
layout(std430, binding=2) buffer config
{
int64_t innerXoff;
uint windowHeight;
uint windowWidth;
uint memDepth;
@@ -81,12 +83,12 @@ void main()

//Loop over the waveform, starting at the leftmost point that overlaps this column
uint istart = xind[gl_GlobalInvocationID.x];
vec2 left = vec2(xpos[istart]*xscale + xoff, (voltage[istart] + yoff)*yscale + ybase);
vec2 left = vec2(float(xpos[istart] + innerXoff) * xscale + xoff, (voltage[istart] + yoff)*yscale + ybase);
vec2 right;
for(uint i=istart; i<(memDepth-1); i++)
{
//Fetch coordinates of the current and upcoming sample
right = vec2(xpos[i+1]*xscale + xoff, (voltage[i+1] + yoff)*yscale + ybase);
right = vec2(float(xpos[i+1] + innerXoff)*xscale + xoff, (voltage[i+1] + yoff)*yscale + ybase);

//If the current point is right of us, stop
if(left.x > gl_GlobalInvocationID.x + 1)