whitequark · Dec 3, 2018
Showing with 391 additions and 26 deletions.

+16 −3 software/glasgow/arch/boneless/__init__.py

+7 −7 software/glasgow/arch/boneless/instr.py

+37 −16 software/glasgow/arch/boneless/opcode.py

+331 −0 software/glasgow/gateware/boneless.py
diff --git a/software/glasgow/arch/boneless/__init__.py b/software/glasgow/arch/boneless/__init__.py
@@ -21,10 +21,10 @@
 #   * Five instruction classes:
 #     - A-class, for ALU operations.
 #     - S-class, for shift operations.
-#     - M-class, for load-store operations. 5-bit zero-extended offset.
+#     - M-class, for load-store operations. 5-bit single-extended offset.
 #     - I-class, for operations with immediates. 8-bit sign-extended immediate.
 #     - C-class, for control transfers. 11-bit sign-extended offset.
-#   * Four flags: Z (zero), S (sign), C (unsigned carry), O (signed carry).
+#   * Four flags: Z (zero), S (sign), C (carry), O (overflow).
 #   * Secondary address space for special-purpose registers.
 #
 # As a result, Boneless can be efficiently implemented with a single 16-bit wide single-port
@@ -46,7 +46,7 @@
 #             +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 #     I-class | 0 | 1 |  opcode   | R-src/dst |           immediate           |
 #             +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
-#     C-class | 1 |  opcode   | F |                 offset                    |
+#     C-class | 1 | condition | F |                 offset                    |
 #             +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 #
 # Instruction decoding
@@ -68,6 +68,19 @@
 #
 # As a result, Boneless instruction decoding can be implemented with approximately 10 4-LUTs.
 #
+# Instruction set omissions
+# -------------------------
+#
+# The following instructions were deliberately omitted because of the limited opcode space and
+# less importance than other instructions:
+#   * Add/subtract with carry; shift with carry; rotate through carry.
+#     Can be emulated in software with JC/JNC.
+#   * Move with immediate that preserves register contents.
+#     Loads of 16-bit immediates can be expanded into MOVH and ADDI, with the immediate in MOVH
+#     being adjusted for sign extension performed in ADDI.
+#   * Return from interrupt.
+#     Interrupts are not currently supported.
+#
 # Instruction set summary
 # -----------------------
 #

diff --git a/software/glasgow/arch/boneless/instr.py b/software/glasgow/arch/boneless/instr.py
@@ -15,15 +15,15 @@
 
 def A_FORMAT(opcode, optype, rd, ra, rb):
     assert rd in range(8) and ra in range(8) and rb in range(8)
-    return (((opcode & 0b11111) << 10) |
+    return (((opcode & 0b11111) << 11) |
             ((    rd &   0b111) <<  8) |
             ((    ra &   0b111) <<  5) |
             ((    rb &   0b111) <<  2) |
             ((optype &    0b11) <<  0))
 
 def S_FORMAT(opcode, optype, rd, ra, amt):
     assert rd in range(8) and ra in range(8) and amt in range(16)
-    return (((opcode & 0b11111) << 10) |
+    return (((opcode & 0b11111) << 11) |
             ((    rd &   0b111) <<  8) |
             ((    ra &   0b111) <<  5) |
             ((   amt &  0b1111) <<  1) |
@@ -34,7 +34,7 @@ def M_FORMAT(opcode, rsd, ra, off):
     if isinstance(off, str):
         return lambda resolve: M_FORMAT(opcode, rsd, ra, resolve(off))
     assert -16 <= off <= 15
-    return (((opcode & 0b11111) << 10) |
+    return (((opcode & 0b11111) << 11) |
             ((   rsd &   0b111) <<  8) |
             ((    ra &   0b111) <<  5) |
             ((   off & 0b11111) <<  0))
@@ -45,15 +45,15 @@ def I_FORMAT(opcode, rsd, imm, u=False):
         return lambda resolve: I_FORMAT(opcode, rst, resolve(imm), u)
     assert ((not u and -128 <= imm <= 127) or
             (u and imm in range(256)))
-    return (((opcode & 0b11111) << 10) |
+    return (((opcode & 0b11111) << 11) |
             ((   rsd &   0b111) <<  8) |
             ((   imm &    0xff) <<  0))
 
 def C_FORMAT(opcode, off):
     if isinstance(off, str):
         return lambda resolve: C_FORMAT(opcode, resolve(off))
     assert -1024 <= off <= 1023
-    return (((opcode & 0b11111) << 10) |
+    return (((opcode & 0b11111) << 11) |
             ((   off &   0x7ff) <<  0))
 
 
@@ -68,13 +68,13 @@ def XOR (rd, ra, rb):  return [A_FORMAT(OPCODE_LOGIC,   OPTYPE_XOR, rd, ra, rb)]
 
 def ADD (rd, ra, rb):  return [A_FORMAT(OPCODE_ARITH,   OPTYPE_ADD, rd, ra, rb)]
 def SUB (rd, ra, rb):  return [A_FORMAT(OPCODE_ARITH,   OPTYPE_SUB, rd, ra, rb)]
-def CMP (rd, ra, rb):  return [A_FORMAT(OPCODE_ARITH,   OPTYPE_CMP, rd, ra, rb)]
+def CMP (    rb, ra):  return [A_FORMAT(OPCODE_ARITH,   OPTYPE_CMP,  0, ra, rb)]
 
 def SLL (rd, ra, amt): return [S_FORMAT(OPCODE_SHIFT_L, OPTYPE_SLL, rd, ra, amt)]
 def ROT (rd, ra, amt): return [S_FORMAT(OPCODE_SHIFT_L, OPTYPE_ROT, rd, ra, amt)]
 def SRL (rd, ra, amt): return [S_FORMAT(OPCODE_SHIFT_R, OPTYPE_SRL, rd, ra, amt)]
 def SRA (rd, ra, amt): return [S_FORMAT(OPCODE_SHIFT_R, OPTYPE_SRA, rd, ra, amt)]
-def MOV (rd, rs):      return [S_FORMAT(OPCODE_SHIFT_L, OPTYPE_SLL, rd, ra,   0)]
+def MOV (rd, rs):      return [S_FORMAT(OPCODE_SHIFT_L, OPTYPE_SLL, rd, rs,   0)]
 
 def LD  (rd, ra, off): return [M_FORMAT(OPCODE_LD,   rd, ra, off)]
 def ST  (rs, ra, off): return [M_FORMAT(OPCODE_ST,   rs, ra, off)]

diff --git a/software/glasgow/arch/boneless/opcode.py b/software/glasgow/arch/boneless/opcode.py
@@ -1,3 +1,9 @@
+OPCLASS_A      = 0b0000
+OPCLASS_S      = 0b0001
+OPCLASS_M      = 0b001
+OPCLASS_I      = 0b01
+OPCLASS_C      = 0b1
+
 OPCODE_LOGIC   = 0b0000_0
 OPTYPE_AND     = 0b00
 OPTYPE_OR      = 0b01
@@ -14,7 +20,7 @@
 
 OPCODE_SHIFT_R = 0b0001_1
 OPTYPE_SRL     = 0b0
-OPTYPE_SRR     = 0b1
+OPTYPE_SRA     = 0b1
 
 OPCODE_LD      = 0b001_00
 OPCODE_ST      = 0b001_01
@@ -30,22 +36,37 @@
 OPCODE_JAL     = 0b01_110
 OPCODE_JR      = 0b01_111
 
-OPCODE_J       = 0b1_000_0
-OPCODE_JNZ     = 0b1_001_0
+OPCODE_F_0     = 0b1_000
+OPCODE_J       = (OPCODE_F_0<<1)|0
+
+OPCODE_F_Z     = 0b1_001
+OPCODE_JNZ     = (OPCODE_F_Z<<1)|0
+OPCODE_JZ      = (OPCODE_F_Z<<1)|1
 OPCODE_JNE     = OPCODE_JNZ
-OPCODE_JZ      = 0b1_001_1
 OPCODE_JE      = OPCODE_JZ
-OPCODE_JNS     = 0b1_010_0
-OPCODE_JS      = 0b1_010_1
-OPCODE_JNO     = 0b1_011_0
-OPCODE_JO      = 0b1_011_1
-OPCODE_JNC     = 0b1_100_0
+
+OPCODE_F_S     = 0b1_010
+OPCODE_JNS     = (OPCODE_F_S<<1)|0
+OPCODE_JS      = (OPCODE_F_S<<1)|1
+
+OPCODE_F_O     = 0b1_011
+OPCODE_JNO     = (OPCODE_F_O<<1)|0
+OPCODE_JO      = (OPCODE_F_O<<1)|1
+
+OPCODE_F_C     = 0b1_011
+OPCODE_JNC     = (OPCODE_F_C<<1)|0
+OPCODE_JC      = (OPCODE_F_C<<1)|1
 OPCODE_JUGE    = OPCODE_JNC
-OPCODE_JC      = 0b1_100_1
 OPCODE_JULT    = OPCODE_JC
-OPCODE_JUGT    = 0b1_101_0
-OPCODE_JULE    = 0b1_101_1
-OPCODE_JSGE    = 0b1_110_0
-OPCODE_JSLT    = 0b1_110_1
-OPCODE_JSGT    = 0b1_111_0
-OPCODE_JSLE    = 0b1_111_1
+
+OPCODE_F_CoZ   = 0b1_101
+OPCODE_JUGT    = (OPCODE_F_CoZ<<1)|0
+OPCODE_JULE    = (OPCODE_F_CoZ<<1)|1
+
+OPCODE_F_SxO   = 0b1_110
+OPCODE_JSGE    = (OPCODE_F_SxO<<1)|0
+OPCODE_JSLT    = (OPCODE_F_SxO<<1)|1
+
+OPCODE_F_SxOoZ = 0b1_111
+OPCODE_JSGT    = (OPCODE_F_SxOoZ<<1)|0
+OPCODE_JSLE    = (OPCODE_F_SxOoZ<<1)|1
diff --git a/software/glasgow/gateware/boneless.py b/software/glasgow/gateware/boneless.py
@@ -0,0 +1,331 @@
+from migen import *
+from migen.fhdl.bitcontainer import value_bits_sign
+from migen.fhdl.specials import _MemoryPort
+from migen.genlib.fsm import *
+
+from ..arch.boneless.opcode import *
+
+
+__all__ = ["BonelessCore"]
+
+
+def SignExtend(v, w):
+    v_nbits, v_sign = value_bits_sign(v)
+    if v_nbits > w.nbits:
+        return v
+    else:
+        return Cat(v, Replicate(v[v_nbits - 1], w.nbits - v_nbits))
+
+
+class BonelessCore(Module):
+    def __init__(self, reset_addr, mem_port, ext_port=None, simulation=False):
+        if ext_port is None:
+            ext_port = _MemoryPort(adr=Signal(16),
+                dat_r=Signal(16), re=Signal(),
+                dat_w=Signal(16), we=Signal())
+
+        r_insn  = Signal(16)
+        r_pc    = Signal(mem_port.adr.nbits, reset=reset_addr)
+        r_win   = Signal(max(mem_port.adr.nbits - 3, 1))
+        r_z     = Signal()
+        r_s     = Signal()
+        r_c     = Signal()
+        r_o     = Signal()
+
+        r_opA   = Signal(16)
+        s_opB   = Signal(16)
+
+        r_opS   = Signal(16)
+        r_shift = Signal(5)
+
+        s_res   = Signal(17)
+
+        s_insn  = Signal(16)
+        i_type1 = s_insn[0:1]
+        i_type2 = s_insn[0:2]
+        i_shift = s_insn[1:5]
+        i_imm5  = s_insn[0:5]
+        i_imm7  = s_insn[0:8]
+        i_imm11 = s_insn[0:11]
+        i_regX  = s_insn[2:5]
+        i_regY  = s_insn[5:8]
+        i_regZ  = s_insn[8:11]
+        i_code1 = s_insn[11:12]
+        i_code2 = s_insn[11:13]
+        i_code3 = s_insn[11:14]
+        i_code5 = s_insn[11:16]
+        i_flag  = s_insn[11]
+        i_cond  = s_insn[12:15]
+
+        i_clsA  = i_code5[1:5] == OPCLASS_A
+        i_clsS  = i_code5[1:5] == OPCLASS_S
+        i_clsM  = i_code5[2:5] == OPCLASS_M
+        i_clsI  = i_code5[3:5] == OPCLASS_I
+        i_clsC  = i_code5[4:5] == OPCLASS_C
+
+        s_cond  = Signal()
+        self.comb += [
+            Case(Cat(i_cond, C(1, 1)), {
+                OPCODE_F_0:     s_cond.eq(0),
+                OPCODE_F_Z:     s_cond.eq(r_z),
+                OPCODE_F_S:     s_cond.eq(r_s),
+                OPCODE_F_O:     s_cond.eq(r_o),
+                OPCODE_F_C:     s_cond.eq(r_c),
+                OPCODE_F_CoZ:   s_cond.eq(r_c | r_o),
+                OPCODE_F_SxO:   s_cond.eq(r_s ^ r_o),
+                OPCODE_F_SxOoZ: s_cond.eq((r_s ^ r_o) | r_z),
+            })
+        ]
+
+        s_sub   = Signal()
+        s_cmp   = Signal()
+        c_flags = Signal()
+        self.sync += [
+            If(c_flags,
+                r_z.eq(s_res == 0),
+                r_s.eq(s_res[15]),
+                r_c.eq(s_res[16]),
+                # http://teaching.idallen.com/cst8214/08w/notes/overflow.txt
+                Case(Cat(s_sub | s_cmp, r_opA[15], s_opB[15], s_res[15]), {
+                    0b0001: r_o.eq(1),
+                    0b0110: r_o.eq(1),
+                    0b1011: r_o.eq(1),
+                    0b1100: r_o.eq(1),
+                    "default": r_o.eq(0),
+                })
+            )
+        ]
+
+        self.submodules.fsm = FSM(reset_state="FETCH")
+        self.comb += [
+            s_insn.eq(Mux(self.fsm.ongoing("LOAD/JUMP"), mem_port.dat_r, r_insn))
+        ]
+        self.fsm.act("FETCH",
+            mem_port.adr.eq(r_pc),
+            mem_port.re.eq(1),
+            NextValue(r_pc, r_pc + 1),
+            NextState("LOAD/JUMP")
+        )
+        self.fsm.act("LOAD/JUMP",
+            NextValue(r_insn, mem_port.dat_r),
+            If(i_clsA,
+                mem_port.adr.eq(Cat(i_regX, r_win)),
+                mem_port.re.eq(1),
+                NextState("LOAD-A")
+            ).Elif(i_clsS,
+                mem_port.adr.eq(Cat(i_regY, r_win)),
+                mem_port.re.eq(1),
+                NextState("LOAD-S")
+            # ).Elif(i_clsM,
+            #     mem_port.adr.eq(Cat(i_regY, r_win)),
+            #     mem_port.re.eq(1),
+            #     NextState("?-M")
+            # ).Elif(i_clsI,
+            #     mem_port.adr.eq(Cat(i_regZ, r_win)),
+            #     mem_port.re.eq(1),
+            #     NextState("?-I")
+            ).Elif(i_clsC,
+                If(s_cond == i_flag,
+                    NextValue(r_pc, r_pc + SignExtend(i_imm11, r_pc))
+                ),
+                NextState("FETCH"),
+                If(simulation & (i_imm11 == 0x400),
+                    NextState("HALT")
+                )
+            )
+        )
+        self.fsm.act("LOAD-A",
+            mem_port.adr.eq(Cat(i_regY, r_win)),
+            mem_port.re.eq(1),
+            NextValue(r_opA, mem_port.dat_r),
+            NextState("EXECUTE-A")
+        )
+        self.fsm.act("EXECUTE-A",
+            s_opB.eq(mem_port.dat_r),
+            Case(Cat(i_code1, C(OPCLASS_A, 4)), {
+                OPCODE_LOGIC: Case(i_type2, {
+                    OPTYPE_AND:  s_res.eq(r_opA & s_opB),
+                    OPTYPE_OR:   s_res.eq(r_opA | s_opB),
+                    OPTYPE_XOR:  s_res.eq(r_opA ^ s_opB),
+                }),
+                OPCODE_ARITH: Case(i_type2, {
+                    OPTYPE_ADD:  s_res.eq(r_opA + s_opB),
+                    OPTYPE_SUB: [s_res.eq(r_opA - s_opB), s_sub.eq(1)],
+                    OPTYPE_CMP: [s_res.eq(r_opA - s_opB), s_cmp.eq(1)],
+                })
+            }),
+            mem_port.adr.eq(Cat(i_regZ, r_win)),
+            mem_port.dat_w.eq(s_res),
+            mem_port.we.eq(~s_cmp),
+            c_flags.eq(1),
+            NextState("FETCH")
+        )
+        self.fsm.act("LOAD-S",
+            NextValue(r_opS, mem_port.dat_r),
+            NextValue(r_shift, i_shift),
+            NextState("EXECUTE-S")
+        )
+        self.fsm.act("EXECUTE-S",
+            s_res.eq(r_opS),
+            mem_port.adr.eq(Cat(i_regZ, r_win)),
+            mem_port.dat_w.eq(s_res),
+            mem_port.we.eq(1),
+            c_flags.eq(1),
+            Case(Cat(i_code1, C(OPCLASS_S, 4)), {
+                OPCODE_SHIFT_L: Case(i_type1, {
+                    OPTYPE_SLL: NextValue(r_opS, Cat(C(0, 1),   r_opS[:-1])),
+                    OPTYPE_ROT: NextValue(r_opS, Cat(r_opS[-1], r_opS[:-1])),
+                }),
+                OPCODE_SHIFT_R: Case(i_type1, {
+                    OPTYPE_SRL: NextValue(r_opS, Cat(r_opS[1:], C(0, 1))),
+                    OPTYPE_SRA: NextValue(r_opS, Cat(r_opS[1:], r_opS[-1])),
+                })
+            }),
+            NextValue(r_shift, r_shift - 1),
+            If(r_shift == 0,
+                NextState("FETCH")
+            )
+        )
+        self.fsm.act("HALT",
+            NextState("HALT")
+        )
+
+# -------------------------------------------------------------------------------------------------
+
+import unittest
+
+from . import simulation_test
+from ..arch.boneless.instr import *
+
+
+class BonelessTestbench(Module):
+    def __init__(self):
+        self.mem_init = []
+
+    def do_finalize(self):
+        self.mem = Memory(width=16, depth=len(self.mem_init), init=self.mem_init)
+        self.specials += self.mem
+
+        mem_port = self.mem.get_port(has_re=True, write_capable=True)
+        self.specials += mem_port
+
+        self.submodules.dut = BonelessCore(reset_addr=8, mem_port=mem_port, simulation=True)
+
+
+class BonelessTestCase(unittest.TestCase):
+    def setUp(self):
+        self.tb = BonelessTestbench()
+
+    def configure(self, tb, regs, code):
+        tb.mem_init = [*regs, *[0] * (8 - len(regs))] + assemble(code + [J(-1024)])
+
+    def dut_state(self, tb):
+        return tb.dut.fsm.decoding[(yield tb.dut.fsm.state)]
+
+    def run_core(self, tb):
+        while (yield from self.dut_state(tb)) != "HALT":
+            yield
+
+    def assertMemory(self, tb, addr, value):
+        self.assertEqual((yield tb.mem[addr]), value)
+
+    @simulation_test(regs=[0xA5A5, 0xAA55],
+                     code=[AND (R2, R1, R0)])
+    def test_AND(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0xA5A5)
+        yield from self.assertMemory(tb, 1, 0xAA55)
+        yield from self.assertMemory(tb, 2, 0xA005)
+
+    @simulation_test(regs=[0xA5A5, 0xAA55],
+                     code=[OR  (R2, R1, R0)])
+    def test_OR(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0xA5A5)
+        yield from self.assertMemory(tb, 1, 0xAA55)
+        yield from self.assertMemory(tb, 2, 0xAFF5)
+
+    @simulation_test(regs=[0xA5A5, 0xAA55],
+                     code=[XOR (R2, R1, R0)])
+    def test_XOR(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0xA5A5)
+        yield from self.assertMemory(tb, 1, 0xAA55)
+        yield from self.assertMemory(tb, 2, 0x0FF0)
+
+    @simulation_test(regs=[0x1234, 0x5678],
+                     code=[ADD (R2, R1, R0)])
+    def test_ADD(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0x1234)
+        yield from self.assertMemory(tb, 1, 0x5678)
+        yield from self.assertMemory(tb, 2, 0x68AC)
+
+    @simulation_test(regs=[0x1234, 0x5678],
+                     code=[SUB (R2, R1, R0)])
+    def test_SUB(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0x1234)
+        yield from self.assertMemory(tb, 1, 0x5678)
+        yield from self.assertMemory(tb, 2, 0xBBBC)
+
+    @simulation_test(regs=[0x1234, 0x5678],
+                     code=[CMP (R0, R1)])
+    def test_CMP(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0x1234)
+        yield from self.assertMemory(tb, 1, 0x5678)
+        yield from self.assertMemory(tb, 2, 0)
+
+    @simulation_test(regs=[0x1012],
+                     code=[SLL (R1, R0, 1),
+                           SLL (R2, R0, 8)])
+    def test_SLL(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0x1012)
+        yield from self.assertMemory(tb, 1, 0x2024)
+        yield from self.assertMemory(tb, 2, 0x1200)
+
+    @simulation_test(regs=[0x1012],
+                     code=[ROT (R1, R0, 1),
+                           ROT (R2, R0, 8)])
+    def test_ROT(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0x1012)
+        yield from self.assertMemory(tb, 1, 0x2024)
+        yield from self.assertMemory(tb, 2, 0x1210)
+
+    @simulation_test(regs=[0x1234],
+                     code=[MOV (R1, R0)])
+    def test_MOV(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0x1234)
+        yield from self.assertMemory(tb, 1, 0x1234)
+
+    @simulation_test(regs=[0x1210, 0x9210],
+                     code=[SRL (R2, R0, 1),
+                           SRL (R3, R0, 8),
+                           SRL (R4, R1, 1),
+                           SRL (R5, R1, 8)])
+    def test_SRL(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0x1210)
+        yield from self.assertMemory(tb, 2, 0x0908)
+        yield from self.assertMemory(tb, 3, 0x0012)
+        yield from self.assertMemory(tb, 1, 0x9210)
+        yield from self.assertMemory(tb, 4, 0x4908)
+        yield from self.assertMemory(tb, 5, 0x0092)
+
+    @simulation_test(regs=[0x1210, 0x9210],
+                     code=[SRA (R2, R0, 1),
+                           SRA (R3, R0, 8),
+                           SRA (R4, R1, 1),
+                           SRA (R5, R1, 8)])
+    def test_SRA(self, tb):
+        yield from self.run_core(tb)
+        yield from self.assertMemory(tb, 0, 0x1210)
+        yield from self.assertMemory(tb, 2, 0x0908)
+        yield from self.assertMemory(tb, 3, 0x0012)
+        yield from self.assertMemory(tb, 1, 0x9210)
+        yield from self.assertMemory(tb, 4, 0xC908)
+        yield from self.assertMemory(tb, 5, 0xFF92)