Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: ngscopeclient/scopehal
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 118fb4f4578d
Choose a base ref
...
head repository: ngscopeclient/scopehal
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 35c590eb001c
Choose a head ref
  • 3 commits
  • 5 files changed
  • 1 contributor

Commits on Nov 28, 2020

  1. Massive optimizations to PeakDetectionFilter. Improved accuracy by in…

    …terpolating results. Avoid double-covering areas we already know don't have peaks in them (50x speedup!)
    azonenberg committed Nov 28, 2020
    Copy the full SHA
    f8b5656 View commit details
  2. Copy the full SHA
    484050c View commit details
  3. Copy the full SHA
    35c590e View commit details
Showing with 181 additions and 32 deletions.
  1. +42 −16 scopehal/PeakDetectionFilter.cpp
  2. +13 −1 scopehal/Unit.cpp
  3. +117 −11 scopeprotocols/FFTFilter.cpp
  4. +5 −3 scopeprotocols/FFTFilter.h
  5. +4 −1 scopeprotocols/JitterSpectrumFilter.cpp
58 changes: 42 additions & 16 deletions scopehal/PeakDetectionFilter.cpp
Original file line number Diff line number Diff line change
@@ -52,34 +52,60 @@ void PeakDetector::FindPeaks(AnalogWaveform* cap, int64_t max_peaks, float searc
{
//Get peak search width in bins
int64_t search_bins = ceil(search_hz / cap->m_timescale);
search_bins = min(search_bins, (int64_t)512); //TODO: reasonable limit
int64_t search_rad = search_bins/2;

//Find peaks (TODO: can we vectorize/multithread this?)
//Start at index 1 so we don't waste a marker on the DC peak
vector<Peak> peaks;
for(ssize_t i=1; i<(ssize_t)nouts; i++)
ssize_t nend = nouts-1;
size_t minpeak = 10; //Skip this many bins at left to avoid false positives on the DC peak
for(ssize_t i=minpeak; i<(ssize_t)nouts; i++)
{
ssize_t max_delta = 0;
float max_value = -FLT_MAX;

for(ssize_t delta = -search_rad; delta <= search_rad; delta ++)
//Locate the peak
ssize_t left = max((ssize_t)minpeak, i - search_rad);
ssize_t right = min(i + search_rad, (ssize_t)nend);
float target = cap->m_samples[i];
bool is_peak = true;
for(ssize_t j=left; j<=right; j++)
{
ssize_t index = i+delta ;
if( (index < 0) || (index >= (ssize_t)nouts) )
if(i == j)
continue;

float amp = cap->m_samples[index];
if(amp > max_value)
if(cap->m_samples[j] >= target)
{
max_value = amp;
max_delta = delta;
//Something higher is to our right.
//It's higher than anything from left to j. This makes it a candidate peak.
//Restart our search from there.
if(j > i)
i = j-1;

is_peak = false;
break;
}
}
if(!is_peak)
continue;

//Do a weighted average of our immediate neighbors to fine tune our position
ssize_t fine_rad = 10;
left = max((ssize_t)1, i - fine_rad);
right = min(i + fine_rad, (ssize_t)nouts);
//LogDebug("peak range: %zu, %zu\n", left, right);
double total = 0;
double count = 0;
for(ssize_t j=left; j<=right; j++)
{
total += cap->m_samples[j] * cap->m_offsets[j];
count += cap->m_samples[j];
}
ssize_t peak_location = total / count;
//LogDebug("Moved peak from %zu to %zd\n", (size_t)cap->m_offsets[i], peak_location);
//LogDebug("Final position: %s\n", Unit(Unit::UNIT_HZ).PrettyPrint(peak_location * cap->m_timescale).c_str());

peaks.push_back(Peak(peak_location, target));

//If the highest point in the search window is at our location, we're a peak
if(max_delta == 0)
peaks.push_back(Peak(cap->m_offsets[i], max_value));
//We know we're the highest point until at least i+search_rad.
//Don't bother searching those points.
i += (search_rad-1);
}

//Sort the peak table and pluck out the requested count
14 changes: 13 additions & 1 deletion scopehal/Unit.cpp
Original file line number Diff line number Diff line change
@@ -73,6 +73,11 @@ string Unit::PrettyPrint(double value, int sigfigs)
else if(fabs(value) < 1e-9)
{
value_rescaled *= 1e9;
scale = "n";
}
else if(fabs(value) < 1e-12)
{
value_rescaled *= 1e12;
scale = "p";
}

@@ -103,11 +108,16 @@ string Unit::PrettyPrint(double value, int sigfigs)
value_rescaled = value / 1e3;
scale = "n";
}
else
else if(fabs(value) >= 1)
{
value_rescaled = value;
scale = "p";
}
else
{
value_rescaled = value * 1e3;
scale = "f";
}
break;

case UNIT_HZ:
@@ -287,6 +297,8 @@ double Unit::ParseString(const string& str)
scale = 1e-9;
else if(c == 'p')
scale = 1e-12;
else if(c == 'f')
scale = 1e-15;

break;
}
128 changes: 117 additions & 11 deletions scopeprotocols/FFTFilter.cpp
Original file line number Diff line number Diff line change
@@ -195,10 +195,10 @@ void FFTFilter::Refresh()
memset(m_rdin + npoints_raw, 0, (npoints - npoints_raw) * sizeof(float));

double ps = din->m_timescale * (din->m_offsets[1] - din->m_offsets[0]);
DoRefresh(din, ps, npoints, nouts);
DoRefresh(din, ps, npoints, nouts, true);
}

void FFTFilter::DoRefresh(AnalogWaveform* din, double ps_per_sample, size_t npoints, size_t nouts)
void FFTFilter::DoRefresh(AnalogWaveform* din, double ps_per_sample, size_t npoints, size_t nouts, bool log_output)
{
//Calculate the FFT
ffts_execute(m_plan, m_rdin, m_rdout);
@@ -216,10 +216,20 @@ void FFTFilter::DoRefresh(AnalogWaveform* din, double ps_per_sample, size_t npoi

//Normalize magnitudes
cap->Resize(nouts);
if(g_hasAvx2)
NormalizeOutputAVX2(cap, nouts, npoints);
if(log_output)
{
if(g_hasAvx2)
NormalizeOutputLogAVX2(cap, nouts, npoints);
else
NormalizeOutputLog(cap, nouts, npoints);
}
else
NormalizeOutput(cap, nouts, npoints);
{
if(g_hasAvx2)
NormalizeOutputLinearAVX2(cap, nouts, npoints);
else
NormalizeOutputLinear(cap, nouts, npoints);
}

//Peak search
FindPeaks(cap);
@@ -232,13 +242,13 @@ void FFTFilter::DoRefresh(AnalogWaveform* din, double ps_per_sample, size_t npoi
// Normalization

/**
@brief Normalize FFT output (unoptimized C++ implementation)
@brief Normalize FFT output and convert to dBm (unoptimized C++ implementation)
*/
void FFTFilter::NormalizeOutput(AnalogWaveform* cap, size_t nouts, size_t npoints)
void FFTFilter::NormalizeOutputLog(AnalogWaveform* cap, size_t nouts, size_t npoints)
{
//assume constant 50 ohms for now
const float impedance = 50;

float scale = 2.0 / npoints;
for(size_t i=0; i<nouts; i++)
{
cap->m_offsets[i] = i;
@@ -247,18 +257,36 @@ void FFTFilter::NormalizeOutput(AnalogWaveform* cap, size_t nouts, size_t npoint
float real = m_rdout[i*2];
float imag = m_rdout[i*2 + 1];

float voltage = sqrtf(real*real + imag*imag) / npoints;
float voltage = sqrtf(real*real + imag*imag) * scale;

//Convert to dBm
cap->m_samples[i] = (10 * log10(voltage*voltage / impedance) + 30);
}
}

/**
@brief Normalize FFT output (optimized AVX2 implementation)
@brief Normalize FFT output and output in native Y-axis units (unoptimized C++ implementation)
*/
void FFTFilter::NormalizeOutputLinear(AnalogWaveform* cap, size_t nouts, size_t npoints)
{
float scale = 2.0 / npoints;
for(size_t i=0; i<nouts; i++)
{
cap->m_offsets[i] = i;
cap->m_durations[i] = 1;

float real = m_rdout[i*2];
float imag = m_rdout[i*2 + 1];

cap->m_samples[i] = sqrtf(real*real + imag*imag) * scale;
}
}

/**
@brief Normalize FFT output and convert to dBm (optimized AVX2 implementation)
*/
__attribute__((target("avx2")))
void FFTFilter::NormalizeOutputAVX2(AnalogWaveform* cap, size_t nouts, size_t npoints)
void FFTFilter::NormalizeOutputLogAVX2(AnalogWaveform* cap, size_t nouts, size_t npoints)
{
int64_t* offs = (int64_t*)&cap->m_offsets[0];
int64_t* durs = (int64_t*)&cap->m_durations[0];
@@ -361,6 +389,84 @@ void FFTFilter::NormalizeOutputAVX2(AnalogWaveform* cap, size_t nouts, size_t np
}
}

/**
@brief Normalize FFT output and keep in native units (optimized AVX2 implementation)
*/
__attribute__((target("avx2")))
void FFTFilter::NormalizeOutputLinearAVX2(AnalogWaveform* cap, size_t nouts, size_t npoints)
{
int64_t* offs = (int64_t*)&cap->m_offsets[0];
int64_t* durs = (int64_t*)&cap->m_durations[0];

size_t end = nouts - (nouts % 8);

int64_t __attribute__ ((aligned(32))) ones_x4[] = {1, 1, 1, 1};
int64_t __attribute__ ((aligned(32))) fours_x4[] = {4, 4, 4, 4};
int64_t __attribute__ ((aligned(32))) count_x4[] = {0, 1, 2, 3};

__m256i all_ones = _mm256_load_si256(reinterpret_cast<__m256i*>(ones_x4));
__m256i all_fours = _mm256_load_si256(reinterpret_cast<__m256i*>(fours_x4));
__m256i counts = _mm256_load_si256(reinterpret_cast<__m256i*>(count_x4));

//double since we only look at positive half
float norm = 2.0f / npoints;
__m256 norm_f = { norm, norm, norm, norm, norm, norm, norm, norm };

float* pout = (float*)&cap->m_samples[0];

//Vectorized processing (8 samples per iteration)
for(size_t k=0; k<end; k += 8)
{
//Fill duration
_mm256_store_si256(reinterpret_cast<__m256i*>(durs + k), all_ones);
_mm256_store_si256(reinterpret_cast<__m256i*>(durs + k + 4), all_ones);

//Fill offset
_mm256_store_si256(reinterpret_cast<__m256i*>(offs + k), counts);
counts = _mm256_add_epi64(counts, all_fours);
_mm256_store_si256(reinterpret_cast<__m256i*>(offs + k + 4), counts);
counts = _mm256_add_epi64(counts, all_fours);

//Read interleaved real/imaginary FFT output (riririri riririri)
__m256 din0 = _mm256_load_ps(m_rdout + k*2);
__m256 din1 = _mm256_load_ps(m_rdout + k*2 + 8);

//Step 1: Shuffle 32-bit values within 128-bit lanes to get rriirrii rriirrii.
din0 = _mm256_permute_ps(din0, 0xd8);
din1 = _mm256_permute_ps(din1, 0xd8);

//Step 2: Shuffle 64-bit values to get rrrriiii rrrriiii.
__m256i block0 = _mm256_permute4x64_epi64(_mm256_castps_si256(din0), 0xd8);
__m256i block1 = _mm256_permute4x64_epi64(_mm256_castps_si256(din1), 0xd8);

//Step 3: Shuffle 128-bit values to get rrrrrrrr iiiiiiii.
__m256 real = _mm256_castsi256_ps(_mm256_permute2x128_si256(block0, block1, 0x20));
__m256 imag = _mm256_castsi256_ps(_mm256_permute2x128_si256(block0, block1, 0x31));

//Actual vector normalization
real = _mm256_mul_ps(real, real);
imag = _mm256_mul_ps(imag, imag);
__m256 sum = _mm256_add_ps(real, imag);
__m256 mag = _mm256_sqrt_ps(sum);
mag = _mm256_mul_ps(mag, norm_f);

//and store the result
_mm256_store_ps(pout + k, mag);
}

//Get any extras we didn't get in the SIMD loop
for(size_t k=end; k<nouts; k++)
{
cap->m_offsets[k] = k;
cap->m_durations[k] = 1;

float real = m_rdout[k*2];
float imag = m_rdout[k*2 + 1];

pout[k] = sqrtf(real*real + imag*imag) / npoints;
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Window functions

8 changes: 5 additions & 3 deletions scopeprotocols/FFTFilter.h
Original file line number Diff line number Diff line change
@@ -78,12 +78,14 @@ class FFTFilter : public PeakDetectionFilter
PROTOCOL_DECODER_INITPROC(FFTFilter)

protected:
void NormalizeOutput(AnalogWaveform* cap, size_t nouts, size_t npoints);
void NormalizeOutputAVX2(AnalogWaveform* cap, size_t nouts, size_t npoints);
void NormalizeOutputLog(AnalogWaveform* cap, size_t nouts, size_t npoints);
void NormalizeOutputLogAVX2(AnalogWaveform* cap, size_t nouts, size_t npoints);
void NormalizeOutputLinear(AnalogWaveform* cap, size_t nouts, size_t npoints);
void NormalizeOutputLinearAVX2(AnalogWaveform* cap, size_t nouts, size_t npoints);

void ReallocateBuffers(size_t npoints_raw, size_t npoints, size_t nouts);

void DoRefresh(AnalogWaveform* din, double ps_per_sample, size_t npoints, size_t nouts);
void DoRefresh(AnalogWaveform* din, double ps_per_sample, size_t npoints, size_t nouts, bool log_output);

size_t m_cachedNumPoints;
size_t m_cachedNumPointsFFT;
5 changes: 4 additions & 1 deletion scopeprotocols/JitterSpectrumFilter.cpp
Original file line number Diff line number Diff line change
@@ -43,6 +43,9 @@ JitterSpectrumFilter::JitterSpectrumFilter(const string& color)
m_xAxisUnit = Unit(Unit::UNIT_HZ);
m_yAxisUnit = Unit(Unit::UNIT_PS);
m_category = CAT_ANALYSIS;

m_range = 1;
m_offset = -0.5;
}

JitterSpectrumFilter::~JitterSpectrumFilter()
@@ -201,5 +204,5 @@ void JitterSpectrumFilter::Refresh()
memset(m_rdin + npoints_raw, 0, (npoints - npoints_raw) * sizeof(float));

//and do the actual FFT processing
DoRefresh(din, ui_width_final, npoints, nouts);
DoRefresh(din, ui_width_final, npoints, nouts, false);
}