Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: ngscopeclient/scopehal
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 91805e83624e
Choose a base ref
...
head repository: ngscopeclient/scopehal
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 50c0c23b4a95
Choose a head ref
  • 1 commit
  • 16 files changed
  • 1 contributor

Commits on May 13, 2021

  1. Reverted last commit because it causes build failures on non-ELF targ…

    …ets like Win32. Apparently gcc can't multiversion on anything non-ELF.
    azonenberg committed May 13, 2021

    Unverified

    This user has not yet uploaded their public signing key.
    Copy the full SHA
    50c0c23 View commit details
92 changes: 65 additions & 27 deletions scopehal/Oscilloscope.cpp
Original file line number Diff line number Diff line change
@@ -635,28 +635,47 @@ void Oscilloscope::Convert8BitSamples(
nsamp = count - i*blocksize;

size_t off = i*blocksize;
DoConvert8BitSamples(
offs + off,
durs + off,
pout + off,
pin + off,
gain,
offset,
nsamp,
ibase + off);
if(g_hasAvx2)
{
Convert8BitSamplesAVX2(
offs + off,
durs + off,
pout + off,
pin + off,
gain,
offset,
nsamp,
ibase + off);
}
else
{
Convert8BitSamplesGeneric(
offs + off,
durs + off,
pout + off,
pin + off,
gain,
offset,
nsamp,
ibase + off);
}
}
}

//Small waveforms get done single threaded to avoid overhead
else
DoConvert8BitSamples(offs, durs, pout, pin, gain, offset, count, ibase);
{
if(g_hasAvx2)
Convert8BitSamplesAVX2(offs, durs, pout, pin, gain, offset, count, ibase);
else
Convert8BitSamplesGeneric(offs, durs, pout, pin, gain, offset, count, ibase);
}
}

/**
@brief Generic backend for Convert8BitSamples()
*/
__attribute__((target("default")))
void Oscilloscope::DoConvert8BitSamples(
void Oscilloscope::Convert8BitSamplesGeneric(
int64_t* offs, int64_t* durs, float* pout, int8_t* pin, float gain, float offset, size_t count, int64_t ibase)
{
for(unsigned int k=0; k<count; k++)
@@ -671,7 +690,7 @@ void Oscilloscope::DoConvert8BitSamples(
@brief Optimized version of Convert8BitSamples()
*/
__attribute__((target("avx2")))
void Oscilloscope::DoConvert8BitSamples(
void Oscilloscope::Convert8BitSamplesAVX2(
int64_t* offs, int64_t* durs, float* pout, int8_t* pin, float gain, float offset, size_t count, int64_t ibase)
{
unsigned int end = count - (count % 32);
@@ -780,7 +799,7 @@ void Oscilloscope::DoConvert8BitSamples(
// Helpers for converting raw 16-bit ADC samples to fp32 waveforms

/**
@brief Converts 16-bit ADC samples to floating point
@brief Converts 8-bit ADC samples to floating point
*/
void Oscilloscope::Convert16BitSamples(
int64_t* offs, int64_t* durs, float* pout, int16_t* pin, float gain, float offset, size_t count, int64_t ibase)
@@ -804,28 +823,47 @@ void Oscilloscope::Convert16BitSamples(
nsamp = count - i*blocksize;

size_t off = i*blocksize;
DoConvert16BitSamples(
offs + off,
durs + off,
pout + off,
pin + off,
gain,
offset,
nsamp,
ibase + off);
if(g_hasAvx2)
{
Convert16BitSamplesAVX2(
offs + off,
durs + off,
pout + off,
pin + off,
gain,
offset,
nsamp,
ibase + off);
}
else
{
Convert16BitSamplesGeneric(
offs + off,
durs + off,
pout + off,
pin + off,
gain,
offset,
nsamp,
ibase + off);
}
}
}

//Small waveforms get done single threaded to avoid overhead
else
DoConvert16BitSamples(offs, durs, pout, pin, gain, offset, count, ibase);
{
if(g_hasAvx2)
Convert16BitSamplesAVX2(offs, durs, pout, pin, gain, offset, count, ibase);
else
Convert16BitSamplesGeneric(offs, durs, pout, pin, gain, offset, count, ibase);
}
}

/**
@brief Converts raw ADC samples to floating point
*/
__attribute__((target("default")))
void Oscilloscope::DoConvert16BitSamples(
void Oscilloscope::Convert16BitSamplesGeneric(
int64_t* offs, int64_t* durs, float* pout, int16_t* pin, float gain, float offset, size_t count, int64_t ibase)
{
for(size_t j=0; j<count; j++)
@@ -837,7 +875,7 @@ void Oscilloscope::DoConvert16BitSamples(
}

__attribute__((target("avx2")))
void Oscilloscope::DoConvert16BitSamples(
void Oscilloscope::Convert16BitSamplesAVX2(
int64_t* offs, int64_t* durs, float* pout, int16_t* pin, float gain, float offset, size_t count, int64_t ibase)
{
size_t end = count - (count % 32);
16 changes: 4 additions & 12 deletions scopehal/Oscilloscope.h
Original file line number Diff line number Diff line change
@@ -775,24 +775,16 @@ class Oscilloscope : public virtual Instrument
protected:
void Convert8BitSamples(
int64_t* offs, int64_t* durs, float* pout, int8_t* pin, float gain, float offset, size_t count, int64_t ibase);

__attribute__((target("default")))
void DoConvert8BitSamples(
void Convert8BitSamplesGeneric(
int64_t* offs, int64_t* durs, float* pout, int8_t* pin, float gain, float offset, size_t count, int64_t ibase);

__attribute__((target("avx2")))
void DoConvert8BitSamples(
void Convert8BitSamplesAVX2(
int64_t* offs, int64_t* durs, float* pout, int8_t* pin, float gain, float offset, size_t count, int64_t ibase);

void Convert16BitSamples(
int64_t* offs, int64_t* durs, float* pout, int16_t* pin, float gain, float offset, size_t count, int64_t ibase);

__attribute__((target("default")))
void DoConvert16BitSamples(
void Convert16BitSamplesGeneric(
int64_t* offs, int64_t* durs, float* pout, int16_t* pin, float gain, float offset, size_t count, int64_t ibase);

__attribute__((target("avx2")))
void DoConvert16BitSamples(
void Convert16BitSamplesAVX2(
int64_t* offs, int64_t* durs, float* pout, int16_t* pin, float gain, float offset, size_t count, int64_t ibase);

public:
19 changes: 16 additions & 3 deletions scopehal/scopehal.cpp
Original file line number Diff line number Diff line change
@@ -71,6 +71,10 @@

using namespace std;

bool g_hasAvx512F = false;
bool g_hasAvx512DQ = false;
bool g_hasAvx512VL = false;
bool g_hasAvx2 = false;
bool g_disableOpenCL = false;

vector<string> g_searchPaths;
@@ -106,11 +110,20 @@ void DetectCPUFeatures()
LogDebug("Detecting CPU features...\n");
LogIndenter li;

if(__builtin_cpu_supports("avx2"))
//Check CPU features
g_hasAvx512F = __builtin_cpu_supports("avx512f");
g_hasAvx512VL = __builtin_cpu_supports("avx512vl");
g_hasAvx512DQ = __builtin_cpu_supports("avx512dq");
g_hasAvx2 = __builtin_cpu_supports("avx2");

if(g_hasAvx2)
LogDebug("* AVX2\n");
if(__builtin_cpu_supports("avx512f"))
if(g_hasAvx512F)
LogDebug("* AVX512F\n");

if(g_hasAvx512DQ)
LogDebug("* AVX512DQ\n");
if(g_hasAvx512VL)
LogDebug("* AVX512VL\n");
LogDebug("\n");
}

5 changes: 5 additions & 0 deletions scopehal/scopehal.h
Original file line number Diff line number Diff line change
@@ -126,6 +126,11 @@ float FreqToPhase(float hz);

uint64_t next_pow2(uint64_t v);

extern bool g_hasAvx512F;
extern bool g_hasAvx512VL;
extern bool g_hasAvx512DQ;
extern bool g_hasAvx2;

#define FS_PER_SECOND 1e15
#define SECONDS_PER_FS 1e-15

14 changes: 10 additions & 4 deletions scopeprotocols/DeEmbedFilter.cpp
Original file line number Diff line number Diff line change
@@ -499,9 +499,16 @@ void DeEmbedFilter::DoRefresh(bool invert)
for(size_t i=npoints_raw; i<npoints; i++)
m_forwardInBuf[i] = 0;

//Actual transformation
//Do the forward FFT
ffts_execute(m_forwardPlan, &m_forwardInBuf[0], &m_forwardOutBuf[0]);
MainLoop(nouts);

//Do the actual filter operation
if(g_hasAvx2)
MainLoopAVX2(nouts);
else
MainLoop(nouts);

//Calculate the inverse FFT
ffts_execute(m_reversePlan, &m_forwardOutBuf[0], &m_reverseOutBuf[0]);

#ifdef HAVE_CLFFT
@@ -596,7 +603,6 @@ void DeEmbedFilter::InterpolateSparameters(float bin_hz, bool invert, size_t nou
}
}

__attribute__((target("default")))
void DeEmbedFilter::MainLoop(size_t nouts)
{
for(size_t i=0; i<nouts; i++)
@@ -615,7 +621,7 @@ void DeEmbedFilter::MainLoop(size_t nouts)
}

__attribute__((target("avx2")))
void DeEmbedFilter::MainLoop(size_t nouts)
void DeEmbedFilter::MainLoopAVX2(size_t nouts)
{
unsigned int end = nouts - (nouts % 8);

5 changes: 1 addition & 4 deletions scopeprotocols/DeEmbedFilter.h
Original file line number Diff line number Diff line change
@@ -96,11 +96,8 @@ class DeEmbedFilter : public Filter
std::vector<float, AlignedAllocator<float, 64> > m_forwardOutBuf;
std::vector<float, AlignedAllocator<float, 64> > m_reverseOutBuf;

__attribute__((target("default")))
void MainLoop(size_t nouts);

__attribute__((target("avx2")))
void MainLoop(size_t nouts);
void MainLoopAVX2(size_t nouts);

#ifdef HAVE_CLFFT
clfftPlanHandle m_clfftForwardPlan;
10 changes: 7 additions & 3 deletions scopeprotocols/EyePattern.cpp
Original file line number Diff line number Diff line change
@@ -476,7 +476,12 @@ void EyePattern::Refresh()
//Optimized inner loop for dense packed waveforms
//We can assume m_offsets[i] = i and m_durations[i] = 0 for all input
if(waveform->m_densePacked)
DensePackedInnerLoop(waveform, clock_edges, data, wend, cend, xmax, ymax, xtimescale, yscale, yoff);
{
if(g_hasAvx2)
DensePackedInnerLoopAVX2(waveform, clock_edges, data, wend, cend, xmax, ymax, xtimescale, yscale, yoff);
else
DensePackedInnerLoop(waveform, clock_edges, data, wend, cend, xmax, ymax, xtimescale, yscale, yoff);
}

//Normal main loop
else
@@ -510,7 +515,7 @@ void EyePattern::Refresh()
}

__attribute__((target("avx2")))
void EyePattern::DensePackedInnerLoop(
void EyePattern::DensePackedInnerLoopAVX2(
AnalogWaveform* waveform,
vector<int64_t>& clock_edges,
int64_t* data,
@@ -691,7 +696,6 @@ void EyePattern::DensePackedInnerLoop(
}
}

__attribute__((target("default")))
void EyePattern::DensePackedInnerLoop(
AnalogWaveform* waveform,
vector<int64_t>& clock_edges,
4 changes: 1 addition & 3 deletions scopeprotocols/EyePattern.h
Original file line number Diff line number Diff line change
@@ -190,7 +190,6 @@ class EyePattern : public Filter
float yoff
);

__attribute__((target("default")))
void DensePackedInnerLoop(
AnalogWaveform* waveform,
std::vector<int64_t>& clock_edges,
@@ -204,8 +203,7 @@ class EyePattern : public Filter
float yoff
);

__attribute__((target("avx2")))
void DensePackedInnerLoop(
void DensePackedInnerLoopAVX2(
AnalogWaveform* waveform,
std::vector<int64_t>& clock_edges,
int64_t* data,
Loading