Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/artiq
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: e9592105ce8d
Choose a base ref
...
head repository: m-labs/artiq
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 115ea6786068
Choose a head ref
  • 10 commits
  • 8 files changed
  • 1 contributor

Commits on Dec 14, 2016

  1. 2
    Copy the full SHA
    708c25b View commit details
  2. sawg: fix limit regs

    jordens committed Dec 14, 2016
    Copy the full SHA
    4c27029 View commit details
  3. Copy the full SHA
    7be27d7 View commit details
  4. rtio: add support for latency compensation in phy

    * if multiple RTIO channels influence the same data stream and physical
    output channel (see SAWG) differential latency needs to be compensated
    * this is a NOP for phys with zero delay (default)
    * if delay==1, it adds one timestamp-wide register
    * if delay >1, it adds one adder and one register
    * latency compensation using (~10-50 deep) delay lines is about as
    expensive as a single adder+register but very tedious to implement
    jordens committed Dec 14, 2016
    Copy the full SHA
    6cdb96c View commit details
  5. Copy the full SHA
    8381db2 View commit details
  6. fir: force dsp48

    jordens committed Dec 14, 2016
    Copy the full SHA
    641d109 View commit details
  7. Revert "fir: force dsp48"

    This reverts commit 0ad433832d1b6dcd803ffa086ae73b2ee0568326.
    jordens committed Dec 14, 2016
    Copy the full SHA
    61abd99 View commit details
  8. fir: different adder layout

    jordens committed Dec 14, 2016
    Copy the full SHA
    93076b8 View commit details
  9. Revert "fir: different adder layout"

    This reverts commit 6f50e77b409c293c1905f28e69d79403a0803866.
    jordens committed Dec 14, 2016
    Copy the full SHA
    a451b67 View commit details
  10. Copy the full SHA
    115ea67 View commit details
1 change: 0 additions & 1 deletion artiq/examples/phaser/startup_kernel.py
Original file line number Diff line number Diff line change
@@ -10,7 +10,6 @@ def build(self):

@kernel
def run(self):
self.core.reset()
self.ad9154.jesd_enable(0)
self.ad9154.init()
self.clock_setup()
86 changes: 54 additions & 32 deletions artiq/gateware/dsp/fir.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from operator import add
from functools import reduce
from collections import namedtuple
import numpy as np
from migen import *

@@ -38,7 +39,10 @@ def halfgen4(width, n):
class FIR(Module):
"""Full-rate finite impulse response filter.
:param coefficients: integer taps.
Tries to use transposed form (adder chain instead of adder tree)
as much as possible.
:param coefficients: integer taps, increasing delay.
:param width: bit width of input and output.
:param shift: scale factor (as power of two).
"""
@@ -47,37 +51,46 @@ def __init__(self, coefficients, width=16, shift=None):
self.i = Signal((width, True))
self.o = Signal((width, True))
n = len(coefficients)
self.latency = (n + 1)//2 + 2
self.latency = n//2 + 3

###

if shift is None:
shift = bits_for(sum(abs(c) for c in coefficients)) - 1

# Delay line: increasing delay
x = [Signal((width, True)) for _ in range(n)]
self.sync += [xi.eq(xj) for xi, xj in zip(x, [self.i] + x)]

if shift is None:
shift = width - 1

o = Signal((width + shift + 1, True))
self.comb += self.o.eq(o >> shift)
delay = -1
# Make products
o = []
for i, c in enumerate(coefficients):
# simplify for halfband and symmetric filters
if c == 0 or c in coefficients[i + 1:]:
if not c or c in coefficients[:i]:
continue
m = Signal((width + shift, True))
self.sync += m.eq(c*reduce(add, [
xj for xj, cj in zip(x[::-1], coefficients) if cj == c
]))
o.append(m)

# Make sum
self.sync += self.o.eq(reduce(add, o) >> shift)
js = [j for j, cj in enumerate(coefficients) if cj == c]
m = Signal.like(o)
o0, o = o, Signal.like(o)
if delay < js[0]:
self.sync += o0.eq(o + m)
delay += 1
else:
self.comb += o0.eq(o + m)
assert js[0] - delay >= 0
self.sync += m.eq(c*reduce(add, [x[j - delay] for j in js]))
# symmetric rounding
if shift:
self.comb += o.eq((1 << shift - 1) - 1)


class ParallelFIR(Module):
"""Full-rate parallelized finite impulse response filter.
:param coefficients: integer taps.
Tries to use transposed form as much as possible.
:param coefficients: integer taps, increasing delay.
:param parallelism: number of samples per cycle.
:param width: bit width of input and output.
:param shift: scale factor (as power of two).
@@ -86,34 +99,43 @@ def __init__(self, coefficients, parallelism, width=16, shift=None):
self.width = width
self.parallelism = p = parallelism
n = len(coefficients)
# input and output: old to young, decreasing delay
# input and output: old to new, decreasing delay
self.i = [Signal((width, True)) for i in range(p)]
self.o = [Signal((width, True)) for i in range(p)]
self.latency = (n + 1)//2//parallelism + 3 # minus one sample
self.latency = (n + 1)//2//p + 2
# ... plus one sample

###

# Delay line: young to old, increasing delay
if shift is None:
shift = bits_for(sum(abs(c) for c in coefficients)) - 1

# Delay line: increasing delay
x = [Signal((width, True)) for _ in range(n + p - 1)]
self.sync += [xi.eq(xj) for xi, xj in zip(x, self.i[::-1] + x)]

if shift is None:
shift = width - 1

for j in range(p):
for delay in range(p):
o = Signal((width + shift + 1, True))
self.comb += self.o[delay].eq(o >> shift)
# Make products
o = []
for i, c in enumerate(coefficients):
# simplify for halfband and symmetric filters
if c == 0 or c in coefficients[i + 1:]:
if not c or c in coefficients[:i]:
continue
m = Signal((width + shift, True))
self.sync += m.eq(c*reduce(add, [
xj for xj, cj in zip(x[-1 - j::-1], coefficients) if cj == c
]))
o.append(m)
# Make sum
self.sync += self.o[j].eq(reduce(add, o) >> shift)
js = [j + p - 1 for j, cj in enumerate(coefficients)
if cj == c]
m = Signal.like(o)
o0, o = o, Signal.like(o)
if delay + p <= js[0]:
self.sync += o0.eq(o + m)
delay += p
else:
self.comb += o0.eq(o + m)
assert js[0] - delay >= 0
self.sync += m.eq(c*reduce(add, [x[j - delay] for j in js]))
# symmetric rounding
if shift:
self.comb += o.eq((1 << shift - 1) - 1)


def halfgen4_cascade(rate, width, order=None):
117 changes: 57 additions & 60 deletions artiq/gateware/dsp/sawg.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
from misoc.cores.cordic import Cordic

from .accu import PhasedAccu
from .tools import eqh, Delay, SatAddMixin
from .tools import eqh, SatAddMixin
from .spline import Spline
from .fir import ParallelHBFUpsampler, halfgen4_cascade

@@ -14,72 +14,52 @@
_Orders = namedtuple("_Orders", "a p f")


class ParallelDDS(Module):
def __init__(self, widths, parallelism=1, a_delay=0):
self.i = Endpoint([("x", widths.a), ("y", widths.a),
("f", widths.f), ("p", widths.f), ("clr", 1)])
class SplineParallelDUC(Module):
def __init__(self, widths, orders, parallelism=1, **kwargs):
self.parallelism = parallelism
self.widths = widths

###

accu = PhasedAccu(widths.f, parallelism)
cordic = [Cordic(width=widths.a, widthz=widths.p, guard=None,
eval_mode="pipelined") for i in range(parallelism)]
self.xo = [c.xo for c in cordic]
self.yo = [c.yo for c in cordic]
a_delay += accu.latency
xy_delay = Delay(2*widths.a, max(0, a_delay))
z_delay = Delay(parallelism*widths.p, max(0, -a_delay))
self.submodules += accu, xy_delay, z_delay, cordic
self.latency = max(0, a_delay) + cordic[0].latency
self.gain = cordic[0].gain

self.comb += [
xy_delay.i.eq(Cat(self.i.x, self.i.y)),
z_delay.i.eq(Cat(zi[-widths.p:]
for zi in accu.o.payload.flatten())),
eqh(accu.i.p, self.i.p),
accu.i.f.eq(self.i.f),
accu.i.clr.eq(self.i.clr),
accu.i.stb.eq(self.i.stb),
self.i.ack.eq(accu.i.ack),
accu.o.ack.eq(1),
[Cat(c.xi, c.yi).eq(xy_delay.o) for c in cordic],
Cat(c.zi for c in cordic).eq(z_delay.o),
]


class SplineParallelDUC(ParallelDDS):
def __init__(self, widths, orders, **kwargs):
p = Spline(order=orders.p, width=widths.p)
f = Spline(order=orders.f, width=widths.f)
self.f = f.tri(widths.t)
self.p = p.tri(widths.t)
self.submodules += p, f
self.ce = Signal(reset=1)
self.clr = Signal()
super().__init__(widths._replace(p=len(self.p.a0), f=len(self.f.a0)),
**kwargs)
self.latency += f.latency

###
accu = PhasedAccu(len(self.f.a0), parallelism)
cordic = [Cordic(width=widths.a, widthz=len(self.p.a0), guard=None,
eval_mode="pipelined") for i in range(parallelism)]
self.submodules += accu, cordic

assert p.latency == f.latency
self.xi = [c.xi for c in cordic]
self.yi = [c.yi for c in cordic]
self.xo = [c.xo for c in cordic]
self.yo = [c.yo for c in cordic]
self.latency = cordic[0].latency
self.gain = cordic[0].gain
self.f.latency += accu.latency + self.latency
self.p.latency += accu.latency + self.latency

###

assert p.latency == f.latency
self.comb += [
p.o.ack.eq(self.ce),
f.o.ack.eq(self.ce),
eqh(self.i.f, f.o.a0),
eqh(self.i.p, p.o.a0),
self.i.stb.eq(p.o.stb | f.o.stb),
eqh(accu.i.f, f.o.a0),
eqh(accu.i.p, p.o.a0),
accu.i.stb.eq(p.o.stb | f.o.stb),
accu.o.ack.eq(1),
[eqh(c.zi, zi) for c, zi in
zip(cordic, accu.o.payload.flatten())]
]

assert p.latency == 1
self.sync += [
self.i.clr.eq(0),
accu.i.clr.eq(0),
If(p.i.stb,
self.i.clr.eq(self.clr),
accu.i.clr.eq(self.clr),
),
]

@@ -91,12 +71,14 @@ def __init__(self, widths, orders, **kwargs):
self.submodules += a
super().__init__(widths._replace(a=len(self.a.a0)), orders, **kwargs)

self.a.latency += self.latency

###

self.comb += [
a.o.ack.eq(self.ce),
eqh(self.i.x, a.o.a0),
self.i.y.eq(0),
[eqh(x, a.o.a0) for x in self.xi],
[y.eq(0) for y in self.yi],
]


@@ -108,7 +90,7 @@ def __init__(self, width):
Signal((width, True), reset=(1 << width - 1) - 1)]
for i in range(3)]
self.clipped = [Signal(2) for i in range(3)] # TODO
self.i = Endpoint([("addr", bits_for(4 + len(self.limits))),
self.i = Endpoint([("addr", bits_for(1 + 4 + len(self.limits))),
("data", 16)])
self.ce = Signal()

@@ -119,7 +101,7 @@ def __init__(self, width):
pad = Signal()

reg = Array([Cat(div, n), self.clr, self.iq_en, pad] +
[Cat(*l) for l in self.limits])
sum(self.limits, []))

self.comb += [
self.i.ack.eq(1),
@@ -151,12 +133,11 @@ def __init__(self, width=16, parallelism=4, widths=None, orders=None):
hbf = [ParallelHBFUpsampler(coeff, width=width, shift=17)
for i in range(2)]
self.submodules.b = b = SplineParallelDUC(
widths._replace(a=len(a1.xo[0]), f=widths.f - width), orders,
parallelism=parallelism, a_delay=-a1.latency-hbf[0].latency)
widths._replace(a=len(hbf[0].o[0]), f=widths.f - width), orders,
parallelism=parallelism)
cfg = Config(widths.a)
u = Spline(width=widths.a, order=orders.a)
du = Delay(width, a1.latency + hbf[0].latency + b.latency - u.latency)
self.submodules += cfg, u, du, hbf
self.submodules += cfg, u, hbf
self.u = u.tri(widths.t)
self.i = [cfg.i, self.u, a1.a, a1.f, a1.p, a2.a, a2.f, a2.p, b.f, b.p]
self.i_names = "cfg u a1 f1 p1 a2 f2 p2 f0 p0".split()
@@ -166,9 +147,23 @@ def __init__(self, width=16, parallelism=4, widths=None, orders=None):
self.widths = widths
self.orders = orders
self.parallelism = parallelism
self.latency = a1.latency + hbf[0].latency + b.latency + 2
self.cordic_gain = a1.gain*b.gain

self.u.latency += 1
b.p.latency += 2
b.f.latency += 2
a_latency_delta = hbf[0].latency + b.latency + 2
for a in a1, a2:
a.a.latency += a_latency_delta
a.p.latency += a_latency_delta
a.f.latency += a_latency_delta

self.latency = max(_.latency for _ in self.i[1:])
for i in self.i[1:]:
i.latency -= self.latency
assert i.latency <= 0
cfg.i.latency = 0

###

self.comb += [
@@ -177,8 +172,8 @@ def __init__(self, width=16, parallelism=4, widths=None, orders=None):
b.ce.eq(cfg.ce),
u.o.ack.eq(cfg.ce),
Cat(a1.clr, a2.clr, b.clr).eq(cfg.clr),
b.i.x.eq(hbf[0].o[0]), # FIXME: rip up
b.i.y.eq(hbf[1].o[0]),
Cat(b.xi).eq(Cat(hbf[0].o)),
Cat(b.yi).eq(Cat(hbf[1].o)),
]
self.sync += [
hbf[0].i.eq(self.sat_add(a1.xo[0], a2.xo[0],
@@ -187,14 +182,16 @@ def __init__(self, width=16, parallelism=4, widths=None, orders=None):
hbf[1].i.eq(self.sat_add(a1.yo[0], a2.yo[0],
limits=cfg.limits[1],
clipped=cfg.clipped[1])),
eqh(du.i, u.o.a0),
]
# wire up outputs and q_{i,o} exchange
for o, x, y in zip(self.o, b.xo, self.y_in):
self.sync += [
o.eq(self.sat_add(
du.o, Mux(cfg.iq_en[0], x, 0), Mux(cfg.iq_en[1], y, 0),
limits=cfg.limits[2], clipped=cfg.clipped[2])),
u.o.a0[-len(o):],
Mux(cfg.iq_en[0], x, 0),
Mux(cfg.iq_en[1], y, 0),
limits=cfg.limits[2],
clipped=cfg.clipped[2])),
]

def connect_y(self, buddy):
1 change: 1 addition & 0 deletions artiq/gateware/dsp/spline.py
Original file line number Diff line number Diff line change
@@ -37,6 +37,7 @@ def tri(self, time_width):
enumerate(self.i.payload.layout[::-1])]
layout.reverse()
i = Endpoint(layout)
i.latency = self.latency
self.comb += [
self.i.stb.eq(i.stb),
i.ack.eq(self.i.ack),
Loading