Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
gateware: add jesd204b awg
Browse files Browse the repository at this point in the history
gateware: copy phaser (df3825a)
dsp/tools: update satadd mixin
phaser: no DDS stubs
dsp: accu fix
phaser: cleanup/reduce
jordens committed Sep 2, 2016
1 parent c414026 commit ee75220
Showing 9 changed files with 732 additions and 1 deletion.
Empty file added artiq/gateware/dsp/__init__.py
Empty file.
112 changes: 112 additions & 0 deletions artiq/gateware/dsp/accu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from migen import *
from misoc.interconnect.stream import Endpoint


class Accu(Module):
def __init__(self, width, meta=[]):
self.i = Endpoint([("p", width), ("f", width), ("clr", 1)])
self.o = Endpoint([("z", width)])
self.latency = 1

###

f = Signal.like(self.i.f)
p = Signal.like(self.i.p)
self.comb += self.i.ack.eq(~self.o.stb | self.o.ack)
self.sync += [
If(self.o.ack,
self.o.stb.eq(0),
),
If(self.i.ack,
self.o.stb.eq(1),
If(self.i.stb,
self.o.z.eq(self.i.p + Mux(self.i.clr, 0, self.o.z + p)),
f.eq(self.i.f),
p.eq(self.i.f - self.i.p),
).Else(
self.o.z.eq(self.o.z + f),
)
)
]


class MCM(Module):
def __init__(self, width, constants):
n = len(constants)
self.i = i = Signal(width)
self.o = o = [Signal.like(self.i) for i in range(n)]

###

# TODO: improve MCM
assert range(n) == constants
assert n <= 9

if n > 0:
self.comb += o[0].eq(0)
if n > 1:
self.comb += o[1].eq(i)
if n > 2:
self.comb += o[2].eq(i << 1)
if n > 3:
self.comb += o[3].eq(i + (i << 1))
if n > 4:
self.comb += o[4].eq(i << 2)
if n > 5:
self.comb += o[5].eq(i + (i << 2))
if n > 6:
self.comb += o[6].eq(o[3] << 1)
if n > 7:
self.comb += o[7].eq((i << 3) - i)
if n > 8:
self.comb += o[8].eq(i << 3)


class PhasedAccu(Module):
def __init__(self, width, parallelism=8):
self.i = Endpoint([("p", width), ("f", width), ("clr", 1)])
self.o = Endpoint([("z{}".format(i), width) for i in
range(parallelism)])
self.parallelism = parallelism
self.latency = 2

###

a = MCM(width, range(parallelism + 1))
self.submodules += a
z = [Signal(width) for i in range(parallelism)]
o = self.o.payload.flatten()
load = Signal()
clr = Signal()
p = Signal.like(self.i.p)
f = Signal.like(self.i.f)
fp = Signal.like(self.i.f)
self.comb += [
self.i.ack.eq(self.o.ack),
a.i.eq(self.i.f),
]

self.sync += [
If(self.o.ack,
self.o.stb.eq(0),
),
If(~self.o.stb | self.o.ack,
self.o.stb.eq(1),
If(load,
load.eq(0),
[oi.eq(Mux(clr, 0, o[0] + fp) + zi)
for oi, zi in zip(o, z)],
fp.eq(f),
).Else(
[oi.eq(oi + fp) for oi in o],
),
),
If(self.i.stb & self.i.ack,
[zi.eq(self.i.p - Mux(self.i.clr, 0, p) + aoi)
for zi, aoi in zip(z, a.o)],
clr.eq(self.i.clr),
p.eq(self.i.p),
f.eq(a.o[parallelism]),
load.eq(1),
),
]
358 changes: 358 additions & 0 deletions artiq/gateware/dsp/cordic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,358 @@
# Copyright 2014-2015 Robert Jordens <jordens@gmail.com>
#
# This file is part of redpid.
#
# redpid is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# redpid is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with redpid. If not, see <http://www.gnu.org/licenses/>.

from math import atan, atanh, log, sqrt, pi

from migen import *


class TwoQuadrantCordic(Module):
"""Coordinate rotation digital computer
Trigonometric, and arithmetic functions implemented using
additions/subtractions and shifts.
http://eprints.soton.ac.uk/267873/1/tcas1_cordic_review.pdf
http://www.andraka.com/files/crdcsrvy.pdf
http://zatto.free.fr/manual/Volder_CORDIC.pdf
The way the CORDIC is executed is controlled by `eval_mode`.
If `"iterative"` the stages are iteratively evaluated, one per clock
cycle. This mode uses the least amount of registers, but has the
lowest throughput and highest latency. If `"pipelined"` all stages
are executed in every clock cycle but separated by registers. This
mode has full throughput but uses many registers and has large
latency. If `"combinatorial"`, there are no registers, throughput is
maximal and latency is zero. `"pipelined"` and `"combinatorial"` use
the same number of shifters and adders.
The type of trigonometric/arithmetic function is determined by
`cordic_mode` and `func_mode`. :math:`g` is the gain of the CORDIC.
* rotate-circular: rotate the vector `(xi, yi)` by an angle `zi`.
Used to calculate trigonometric functions, `sin(), cos(),
tan() = sin()/cos()`, or to perform polar-to-cartesian coordinate
transformation:
.. math::
x_o = g \\cos(z_i) x_i - g \\sin(z_i) y_i
y_o = g \\sin(z_i) x_i + g \\cos(z_i) y_i
* vector-circular: determine length and angle of the vector
`(xi, yi)`. Used to calculate `arctan(), sqrt()` or
to perform cartesian-to-polar transformation:
.. math::
x_o = g\\sqrt{x_i^2 + y_i^2}
z_o = z_i + \\tan^{-1}(y_i/x_i)
* rotate-hyperbolic: hyperbolic functions of `zi`. Used to
calculate hyperbolic functions, `sinh, cosh, tanh = cosh/sinh,
exp = cosh + sinh`:
.. math::
x_o = g \\cosh(z_i) x_i + g \\sinh(z_i) y_i
y_o = g \\sinh(z_i) x_i + g \\cosh(z_i) z_i
* vector-hyperbolic: natural logarithm `ln(), arctanh()`, and
`sqrt()`. Use `x_i = a + b` and `y_i = a - b` to obtain `2*
sqrt(a*b)` and `ln(a/b)/2`:
.. math::
x_o = g\\sqrt{x_i^2 - y_i^2}
z_o = z_i + \\tanh^{-1}(y_i/x_i)
* rotate-linear: multiply and accumulate (not a very good
multiplier implementation):
.. math::
y_o = g(y_i + x_i z_i)
* vector-linear: divide and accumulate:
.. math::
z_o = g(z_i + y_i/x_i)
Parameters
----------
width : int
Bit width of the input and output signals. Defaults to 16. Input
and output signals are signed.
widthz : int
Bit with of `zi` and `zo`. Defaults to the `width`.
stages : int or None
Number of CORDIC incremental rotation stages. Defaults to
`width + min(1, guard)`.
guard : int or None
Add guard bits to the intermediate signals. If `None`,
defaults to `guard = log2(width)` which guarantees accuracy
to `width` bits.
eval_mode : str, {"iterative", "pipelined", "combinatorial"}
cordic_mode : str, {"rotate", "vector"}
func_mode : str, {"circular", "linear", "hyperbolic"}
Evaluation and arithmetic mode. See above.
Attributes
----------
xi, yi, zi : Signal(width), in
Input values, signed.
xo, yo, zo : Signal(width), out
Output values, signed.
new_out : Signal(1), out
Asserted if output values are freshly updated in the current
cycle.
new_in : Signal(1), out
Asserted if new input values are being read in the next cycle.
zmax : float
`zi` and `zo` normalization factor. Floating point `zmax`
corresponds to `1<<(widthz - 1)`. `x` and `y` are scaled such
that floating point `1` corresponds to `1<<(width - 1)`.
gain : float
Cumulative, intrinsic gain and scaling factor. In circular mode
`sqrt(xi**2 + yi**2)` should be no larger than `2**(width - 1)/gain`
to prevent overflow. Additionally, in hyperbolic and linear mode,
the operation itself can cause overflow.
interval : int
Output interval in clock cycles. Inverse throughput.
latency : int
Input-to-output latency. The result corresponding to the inputs
appears at the outputs `latency` cycles later.
Notes
-----
Each stage `i` in the CORDIC performs the following operation:
.. math::
x_{i+1} = x_i - m d_i y_i r^{-s_{m,i}},
y_{i+1} = y_i + d_i x_i r^{-s_{m,i}},
z_{i+1} = z_i - d_i a_{m,i},
where:
* :math:`d_i`: clockwise or counterclockwise, determined by
`sign(z_i)` in rotate mode or `sign(-y_i)` in vector mode.
* :math:`r`: radix of the number system (2)
* :math:`m`: 1: circular, 0: linear, -1: hyperbolic
* :math:`s_{m,i}`: non decreasing integer shift sequence
* :math:`a_{m,i}`: elemetary rotation angle: :math:`a_{m,i} =
\\tan^{-1}(\\sqrt{m} s_{m,i})/\\sqrt{m}`.
"""
def __init__(self, width=16, widthz=None, stages=None, guard=0,
eval_mode="iterative", cordic_mode="rotate",
func_mode="circular"):
# validate parameters
assert eval_mode in ("combinatorial", "pipelined", "iterative")
assert cordic_mode in ("rotate", "vector")
assert func_mode in ("circular", "linear", "hyperbolic")
self.cordic_mode = cordic_mode
self.func_mode = func_mode
if guard is None:
# guard bits to guarantee "width" accuracy
guard = int(log(width)/log(2))
if widthz is None:
widthz = width
if stages is None:
stages = width + min(1, guard) # cuts error below LSB

# input output interface
self.xi = Signal((width, True))
self.yi = Signal((width, True))
self.zi = Signal((widthz, True))
self.xo = Signal((width, True))
self.yo = Signal((width, True))
self.zo = Signal((widthz, True))
self.new_in = Signal()
self.new_out = Signal()

###

a, s, self.zmax, self.gain = self._constants(stages, widthz + guard)
stages = len(a) # may have increased due to repetitions

if eval_mode == "iterative":
num_sig = 3
self.interval = stages + 1
self.latency = stages + 2
else:
num_sig = stages + 1
self.interval = 1
if eval_mode == "pipelined":
self.latency = stages
else: # combinatorial
self.latency = 0

# inter-stage signals
x = [Signal((width + guard, True)) for i in range(num_sig)]
y = [Signal((width + guard, True)) for i in range(num_sig)]
z = [Signal((widthz + guard, True)) for i in range(num_sig)]

# hook up inputs and outputs to the first and last inter-stage
# signals
self.comb += [
x[0].eq(self.xi << guard),
y[0].eq(self.yi << guard),
z[0].eq(self.zi << guard),
self.xo.eq(x[-1] >> guard),
self.yo.eq(y[-1] >> guard),
self.zo.eq(z[-1] >> guard),
]

if eval_mode == "iterative":
# We afford one additional iteration for in/out.
i = Signal(max=stages + 1)
self.comb += [
self.new_in.eq(i == stages),
self.new_out.eq(i == 1),
]
ai = Signal((widthz + guard, True))
self.sync += ai.eq(Array(a)[i])
if range(stages) == s:
si = i - 1 # shortcut if no stage repetitions
else:
si = Signal(max=stages + 1)
self.sync += si.eq(Array(s)[i])
xi, yi, zi = x[1], y[1], z[1]
self.sync += [
self._stage(xi, yi, zi, xi, yi, zi, si, ai),
i.eq(i + 1),
If(i == stages,
i.eq(0),
),
If(i == 0,
x[2].eq(xi), y[2].eq(yi), z[2].eq(zi),
xi.eq(x[0]), yi.eq(y[0]), zi.eq(z[0]),
)
]
else:
self.comb += [
self.new_out.eq(1),
self.new_in.eq(1),
]
for i, si in enumerate(s):
stmt = self._stage(x[i], y[i], z[i],
x[i + 1], y[i + 1], z[i + 1],
si, a[i])
if eval_mode == "pipelined":
self.sync += stmt
else: # combinatorial
self.comb += stmt

def _constants(self, stages, bits):
if self.func_mode == "circular":
s = range(stages)
a = [atan(2**-i) for i in s]
g = [sqrt(1 + 2**(-2*i)) for i in s]
#zmax = sum(a)
# use pi anyway as the input z can cause overflow
# and we need the range for quadrant mapping
zmax = pi
elif self.func_mode == "linear":
s = range(stages)
a = [2**-i for i in s]
g = [1 for i in s]
#zmax = sum(a)
# use 2 anyway as this simplifies a and scaling
zmax = 2.
else: # hyperbolic
s = []
# need to repeat some stages:
j = 4
for i in range(stages):
if i == j:
s.append(j)
j = 3*j + 1
s.append(i + 1)
a = [atanh(2**-i) for i in s]
g = [sqrt(1 - 2**(-2*i)) for i in s]
zmax = sum(a)*2
# round here helps the width=2**i - 1 case but hurts the
# important width=2**i case
cast = int
if log(bits)/log(2) % 1:
cast = round
a = [cast(ai*2**(bits - 1)/zmax) for ai in a]
gain = 1.
for gi in g:
gain *= gi
return a, s, zmax, gain

def _stage(self, xi, yi, zi, xo, yo, zo, i, ai):
dir = Signal()
if self.cordic_mode == "rotate":
self.comb += dir.eq(zi < 0)
else: # vector
self.comb += dir.eq(yi >= 0)
dx = yi >> i
dy = xi >> i
dz = ai
if self.func_mode == "linear":
dx = 0
elif self.func_mode == "hyperbolic":
dx = -dx
stmt = [
xo.eq(xi + Mux(dir, dx, -dx)),
yo.eq(yi + Mux(dir, -dy, dy)),
zo.eq(zi + Mux(dir, dz, -dz))
]
return stmt


class Cordic(TwoQuadrantCordic):
"""Four-quadrant CORDIC
Same as :class:`TwoQuadrantCordic` but with support and convergence
for `abs(zi) > pi/2 in circular rotate mode or `xi < 0` in circular
vector mode.
"""
def __init__(self, **kwargs):
TwoQuadrantCordic.__init__(self, **kwargs)
if self.func_mode != "circular":
return # no need to remap quadrants

cxi, cyi, czi = self.xi, self.yi, self.zi
self.xi = xi = Signal.like(cxi)
self.yi = yi = Signal.like(cyi)
self.zi = zi = Signal.like(czi)

###

q = Signal()
if self.cordic_mode == "rotate":
self.comb += q.eq(zi[-2] ^ zi[-1])
else: # vector
self.comb += q.eq(xi < 0)
self.comb += [
If(q,
Cat(cxi, cyi, czi).eq(
Cat(-xi, -yi, zi + (1 << len(zi) - 1)))
).Else(
Cat(cxi, cyi, czi).eq(Cat(xi, yi, zi))
)
]
66 changes: 66 additions & 0 deletions artiq/gateware/dsp/sawg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from migen import *
from misoc.interconnect.stream import Endpoint

from .cordic import Cordic
from .accu import PhasedAccu
from .tools import eqh


class DDSFast(Module):
def __init__(self, width, parallelism=4):
a_width = width
p_width = width
f_width = 3*width

self.o = [Signal((width, True)) for i in range(parallelism)]

self.parallelism = parallelism
self.latency = 0 # will be accumulated

q = PhasedAccu(width, parallelism)
self.submodules += q
self.latency += q.latency

self.a = Endpoint([("a", a_width)])
self.f = Endpoint([("f", f_width)])
self.p = Endpoint([("p", p_width)])
self.i = [self.a, self.f, self.p]

###

a = Signal.like(self.a.a)
self.sync += [
If(self.a.stb,
a.eq(self.a.a)
),
If(self.f.stb,
eqh(q.i.f, self.f.f)
),
If(self.p.stb,
eqh(q.i.p, self.p.p)
)
]
self.comb += [
self.a.ack.eq(1),
self.f.ack.eq(1),
self.p.ack.eq(1),
q.o.ack.eq(1),
q.i.clr.eq(0),
q.i.stb.eq(self.f.stb | self.p.stb),
]

c = []
for i in range(parallelism):
ci = Cordic(width=width, widthz=p_width,
guard=None, eval_mode="pipelined")
self.submodules += ci
c.append(ci)
qoi = getattr(q.o, "z{}".format(i))
self.comb += [
ci.xi.eq(a),
ci.yi.eq(0),
eqh(ci.zi, qoi),
eqh(self.o[i], ci.xo),
]
self.latency += c[0].latency
self.gain = c[0].gain
47 changes: 47 additions & 0 deletions artiq/gateware/dsp/test_accu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import numpy as np

from migen import *
from migen.fhdl.verilog import convert

from accu import Accu, PhasedAccu

from tools import xfer


def read(o, n):
p = []
for i in range(n):
p.append((yield from [(yield pi) for pi in o.payload.flatten()]))
yield
return p


def _test_gen_accu(dut, o):
yield dut.o.ack.eq(1)
yield from xfer(dut, i=dict(p=0, f=1, clr=1))
o.extend((yield from read(dut.o, 8)))
yield from xfer(dut, i=dict(p=0, f=2, clr=0))
o.extend((yield from read(dut.o, 8)))
yield from xfer(dut, i=dict(p=0, f=2, clr=1))
o.extend((yield from read(dut.o, 8)))
yield from xfer(dut, i=dict(p=8, f=-1, clr=1))
o.extend((yield from read(dut.o, 8)))
yield from xfer(dut, i=dict(p=0, f=0, clr=1))
yield from xfer(dut, i=dict(p=1, f=0, clr=0))
o.extend((yield from read(dut.o, 8)))


def _test_accu():
dut = PhasedAccu(8, parallelism=8)

if False:
print(convert(dut))
else:
o = []
run_simulation(dut, _test_gen_accu(dut, o), vcd_name="accu.vcd")
o = np.array(o)
print(o)


if __name__ == "__main__":
_test_accu()
39 changes: 39 additions & 0 deletions artiq/gateware/dsp/test_sawg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import numpy as np

from migen import *
from migen.fhdl.verilog import convert

from sawg import DDS

from tools import xfer


def _test_gen_dds(dut, o):
yield dut.ce.eq(1)
yield dut.clr.eq(1)
yield from xfer(dut,
a1=dict(a0=10),
p1=dict(a0=0),
f1=dict(a0=0 << 16, a1=0),
f=dict(a0=10 << 24),
p=dict(a0=0),
)
for i in range(256):
yield
o.append((yield from [((yield _[0]), (yield _[1])) for _ in dut.o]))


def _test_channel():
dut = DDS(width=8, parallelism=2)

if False:
print(convert(dut))
else:
o = []
run_simulation(dut, _test_gen_dds(dut, o), vcd_name="dds.vcd")
o = np.array(o)
print(o[:, :, 0])


if __name__ == "__main__":
_test_channel()
31 changes: 31 additions & 0 deletions artiq/gateware/dsp/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from migen import *


def set_dict(e, **k):
for k, v in k.items():
if isinstance(v, dict):
yield from set_dict(getattr(e, k), **v)
else:
yield getattr(e, k).eq(v)


def xfer(dut, **kw):
ep = []
for e, v in kw.items():
e = getattr(dut, e)
yield from set_dict(e, **v)
ep.append(e)
for e in ep:
yield e.stb.eq(1)
while ep:
yield
for e in ep[:]:
if hasattr(e, "busy") and (yield e.busy):
raise ValueError(e, "busy")
if not hasattr(e, "ack") or (yield e.ack):
yield e.stb.eq(0)
ep.remove(e)


def eqh(a, b):
return a[-len(b):].eq(b[-len(a):])
26 changes: 26 additions & 0 deletions artiq/gateware/rtio/phy/sawg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from collections import namedtuple

from migen import *
from artiq.gateware.rtio import rtlink

from artiq.gateware.dsp.sawg import DDSFast


_Phy = namedtuple("Phy", "rtlink probes overrides")


class Channel(Module):
def __init__(self, *args, **kwargs):
self.submodules._ll = ClockDomainsRenamer("rio_phy")(
DDSFast(*args, **kwargs))
self.phys = []
for i in self._ll.i:
rl = rtlink.Interface(rtlink.OInterface(
min(64, len(i.payload))))
self.comb += [
i.stb.eq(rl.o.stb),
rl.o.busy.eq(~i.ack),
Cat(i.payload.flatten()).eq(rl.o.data),
]
# no probes, overrides
self.phys.append(_Phy(rl, [], []))
54 changes: 53 additions & 1 deletion artiq/gateware/targets/kc705.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,8 @@

from artiq.gateware.soc import AMPSoC, build_artiq_soc
from artiq.gateware import rtio, nist_qc1, nist_clock, nist_qc2
from artiq.gateware.rtio.phy import ttl_simple, ttl_serdes_7series, dds, spi
from artiq.gateware.rtio.phy import (ttl_simple, ttl_serdes_7series,
dds, spi, sawg)
from artiq import __version__ as artiq_version


@@ -388,6 +389,55 @@ def __init__(self, cpu_type="or1k", **kwargs):
self.config["DDS_RTIO_CLK_RATIO"] = 24 >> self.rtio.fine_ts_width


class Phaser(_NIST_Ions):
def __init__(self, cpu_type="or1k", **kwargs):
_NIST_Ions.__init__(self, cpu_type, **kwargs)

platform = self.platform
# TODO: dummy
platform.add_extension(nist_clock.fmc_adapter_io)

rtio_channels = []

phy = ttl_serdes_7series.Inout_8X(
platform.request("user_sma_gpio_n_33"))
self.submodules += phy
rtio_channels.append(rtio.Channel.from_phy(phy, ififo_depth=128))

phy = ttl_simple.Output(platform.request("user_led", 2))
self.submodules += phy
rtio_channels.append(rtio.Channel.from_phy(phy))

self.config["RTIO_REGULAR_TTL_COUNT"] = len(rtio_channels)

self.config["RTIO_FIRST_SPI_CHANNEL"] = len(rtio_channels)
# TODO: dummy, hookup ad9154 spi here
phy = spi.SPIMaster(self.platform.request("spi", 0))
self.submodules += phy
rtio_channels.append(rtio.Channel.from_phy(
phy, ofifo_depth=128, ififo_depth=128))

self.config["RTIO_FIRST_PHASER_CHANNEL"] = len(rtio_channels)
sawgs = [sawg.Channel(width=16, parallelism=4) for i in range(4)]
self.submodules += sawgs

# TODO: dummy, hookup jesd204b phy here
o = Signal((16, True))
for ch in sawgs: # gather up dangling outputs
for oi in ch._ll.o:
o0, o = o, Signal.like(o)
self.sync += o.eq(o0 + oi)
self.sync.rio_phy += platform.request("dds").d.eq(o)

rtio_channels.extend(rtio.Channel.from_phy(phy)
for sawg in sawgs
for phy in sawg.phys)

self.config["RTIO_LOG_CHANNEL"] = len(rtio_channels)
rtio_channels.append(rtio.LogChannel())
self.add_rtio(rtio_channels)


def main():
parser = argparse.ArgumentParser(
description="ARTIQ core device builder / KC705 "
@@ -407,6 +457,8 @@ def main():
cls = NIST_CLOCK
elif hw_adapter == "nist_qc2":
cls = NIST_QC2
elif hw_adapter == "phaser":
cls = Phaser
else:
raise SystemExit("Invalid hardware adapter string (-H/--hw-adapter)")

0 comments on commit ee75220

Please sign in to comment.