Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: whitequark/glasgow
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 636121dd70e2
Choose a base ref
...
head repository: whitequark/glasgow
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 2077725521e3
Choose a head ref
  • 1 commit
  • 4 files changed
  • 1 contributor

Commits on Dec 3, 2018

  1. gateware.boneless: implement A-class, S-class and C-class insns.

    No tests for C-class yet.
    whitequark committed Dec 3, 2018
    Copy the full SHA
    2077725 View commit details
Showing with 391 additions and 26 deletions.
  1. +16 −3 software/glasgow/arch/boneless/__init__.py
  2. +7 −7 software/glasgow/arch/boneless/instr.py
  3. +37 −16 software/glasgow/arch/boneless/opcode.py
  4. +331 −0 software/glasgow/gateware/boneless.py
19 changes: 16 additions & 3 deletions software/glasgow/arch/boneless/__init__.py
Original file line number Diff line number Diff line change
@@ -21,10 +21,10 @@
# * Five instruction classes:
# - A-class, for ALU operations.
# - S-class, for shift operations.
# - M-class, for load-store operations. 5-bit zero-extended offset.
# - M-class, for load-store operations. 5-bit single-extended offset.
# - I-class, for operations with immediates. 8-bit sign-extended immediate.
# - C-class, for control transfers. 11-bit sign-extended offset.
# * Four flags: Z (zero), S (sign), C (unsigned carry), O (signed carry).
# * Four flags: Z (zero), S (sign), C (carry), O (overflow).
# * Secondary address space for special-purpose registers.
#
# As a result, Boneless can be efficiently implemented with a single 16-bit wide single-port
@@ -46,7 +46,7 @@
# +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
# I-class | 0 | 1 | opcode | R-src/dst | immediate |
# +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
# C-class | 1 | opcode | F | offset |
# C-class | 1 | condition | F | offset |
# +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
#
# Instruction decoding
@@ -68,6 +68,19 @@
#
# As a result, Boneless instruction decoding can be implemented with approximately 10 4-LUTs.
#
# Instruction set omissions
# -------------------------
#
# The following instructions were deliberately omitted because of the limited opcode space and
# less importance than other instructions:
# * Add/subtract with carry; shift with carry; rotate through carry.
# Can be emulated in software with JC/JNC.
# * Move with immediate that preserves register contents.
# Loads of 16-bit immediates can be expanded into MOVH and ADDI, with the immediate in MOVH
# being adjusted for sign extension performed in ADDI.
# * Return from interrupt.
# Interrupts are not currently supported.
#
# Instruction set summary
# -----------------------
#
14 changes: 7 additions & 7 deletions software/glasgow/arch/boneless/instr.py
Original file line number Diff line number Diff line change
@@ -15,15 +15,15 @@

def A_FORMAT(opcode, optype, rd, ra, rb):
assert rd in range(8) and ra in range(8) and rb in range(8)
return (((opcode & 0b11111) << 10) |
return (((opcode & 0b11111) << 11) |
(( rd & 0b111) << 8) |
(( ra & 0b111) << 5) |
(( rb & 0b111) << 2) |
((optype & 0b11) << 0))

def S_FORMAT(opcode, optype, rd, ra, amt):
assert rd in range(8) and ra in range(8) and amt in range(16)
return (((opcode & 0b11111) << 10) |
return (((opcode & 0b11111) << 11) |
(( rd & 0b111) << 8) |
(( ra & 0b111) << 5) |
(( amt & 0b1111) << 1) |
@@ -34,7 +34,7 @@ def M_FORMAT(opcode, rsd, ra, off):
if isinstance(off, str):
return lambda resolve: M_FORMAT(opcode, rsd, ra, resolve(off))
assert -16 <= off <= 15
return (((opcode & 0b11111) << 10) |
return (((opcode & 0b11111) << 11) |
(( rsd & 0b111) << 8) |
(( ra & 0b111) << 5) |
(( off & 0b11111) << 0))
@@ -45,15 +45,15 @@ def I_FORMAT(opcode, rsd, imm, u=False):
return lambda resolve: I_FORMAT(opcode, rst, resolve(imm), u)
assert ((not u and -128 <= imm <= 127) or
(u and imm in range(256)))
return (((opcode & 0b11111) << 10) |
return (((opcode & 0b11111) << 11) |
(( rsd & 0b111) << 8) |
(( imm & 0xff) << 0))

def C_FORMAT(opcode, off):
if isinstance(off, str):
return lambda resolve: C_FORMAT(opcode, resolve(off))
assert -1024 <= off <= 1023
return (((opcode & 0b11111) << 10) |
return (((opcode & 0b11111) << 11) |
(( off & 0x7ff) << 0))


@@ -68,13 +68,13 @@ def XOR (rd, ra, rb): return [A_FORMAT(OPCODE_LOGIC, OPTYPE_XOR, rd, ra, rb)]

def ADD (rd, ra, rb): return [A_FORMAT(OPCODE_ARITH, OPTYPE_ADD, rd, ra, rb)]
def SUB (rd, ra, rb): return [A_FORMAT(OPCODE_ARITH, OPTYPE_SUB, rd, ra, rb)]
def CMP (rd, ra, rb): return [A_FORMAT(OPCODE_ARITH, OPTYPE_CMP, rd, ra, rb)]
def CMP ( rb, ra): return [A_FORMAT(OPCODE_ARITH, OPTYPE_CMP, 0, ra, rb)]

def SLL (rd, ra, amt): return [S_FORMAT(OPCODE_SHIFT_L, OPTYPE_SLL, rd, ra, amt)]
def ROT (rd, ra, amt): return [S_FORMAT(OPCODE_SHIFT_L, OPTYPE_ROT, rd, ra, amt)]
def SRL (rd, ra, amt): return [S_FORMAT(OPCODE_SHIFT_R, OPTYPE_SRL, rd, ra, amt)]
def SRA (rd, ra, amt): return [S_FORMAT(OPCODE_SHIFT_R, OPTYPE_SRA, rd, ra, amt)]
def MOV (rd, rs): return [S_FORMAT(OPCODE_SHIFT_L, OPTYPE_SLL, rd, ra, 0)]
def MOV (rd, rs): return [S_FORMAT(OPCODE_SHIFT_L, OPTYPE_SLL, rd, rs, 0)]

def LD (rd, ra, off): return [M_FORMAT(OPCODE_LD, rd, ra, off)]
def ST (rs, ra, off): return [M_FORMAT(OPCODE_ST, rs, ra, off)]
53 changes: 37 additions & 16 deletions software/glasgow/arch/boneless/opcode.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
OPCLASS_A = 0b0000
OPCLASS_S = 0b0001
OPCLASS_M = 0b001
OPCLASS_I = 0b01
OPCLASS_C = 0b1

OPCODE_LOGIC = 0b0000_0
OPTYPE_AND = 0b00
OPTYPE_OR = 0b01
@@ -14,7 +20,7 @@

OPCODE_SHIFT_R = 0b0001_1
OPTYPE_SRL = 0b0
OPTYPE_SRR = 0b1
OPTYPE_SRA = 0b1

OPCODE_LD = 0b001_00
OPCODE_ST = 0b001_01
@@ -30,22 +36,37 @@
OPCODE_JAL = 0b01_110
OPCODE_JR = 0b01_111

OPCODE_J = 0b1_000_0
OPCODE_JNZ = 0b1_001_0
OPCODE_F_0 = 0b1_000
OPCODE_J = (OPCODE_F_0<<1)|0

OPCODE_F_Z = 0b1_001
OPCODE_JNZ = (OPCODE_F_Z<<1)|0
OPCODE_JZ = (OPCODE_F_Z<<1)|1
OPCODE_JNE = OPCODE_JNZ
OPCODE_JZ = 0b1_001_1
OPCODE_JE = OPCODE_JZ
OPCODE_JNS = 0b1_010_0
OPCODE_JS = 0b1_010_1
OPCODE_JNO = 0b1_011_0
OPCODE_JO = 0b1_011_1
OPCODE_JNC = 0b1_100_0

OPCODE_F_S = 0b1_010
OPCODE_JNS = (OPCODE_F_S<<1)|0
OPCODE_JS = (OPCODE_F_S<<1)|1

OPCODE_F_O = 0b1_011
OPCODE_JNO = (OPCODE_F_O<<1)|0
OPCODE_JO = (OPCODE_F_O<<1)|1

OPCODE_F_C = 0b1_011
OPCODE_JNC = (OPCODE_F_C<<1)|0
OPCODE_JC = (OPCODE_F_C<<1)|1
OPCODE_JUGE = OPCODE_JNC
OPCODE_JC = 0b1_100_1
OPCODE_JULT = OPCODE_JC
OPCODE_JUGT = 0b1_101_0
OPCODE_JULE = 0b1_101_1
OPCODE_JSGE = 0b1_110_0
OPCODE_JSLT = 0b1_110_1
OPCODE_JSGT = 0b1_111_0
OPCODE_JSLE = 0b1_111_1

OPCODE_F_CoZ = 0b1_101
OPCODE_JUGT = (OPCODE_F_CoZ<<1)|0
OPCODE_JULE = (OPCODE_F_CoZ<<1)|1

OPCODE_F_SxO = 0b1_110
OPCODE_JSGE = (OPCODE_F_SxO<<1)|0
OPCODE_JSLT = (OPCODE_F_SxO<<1)|1

OPCODE_F_SxOoZ = 0b1_111
OPCODE_JSGT = (OPCODE_F_SxOoZ<<1)|0
OPCODE_JSLE = (OPCODE_F_SxOoZ<<1)|1
331 changes: 331 additions & 0 deletions software/glasgow/gateware/boneless.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,331 @@
from migen import *
from migen.fhdl.bitcontainer import value_bits_sign
from migen.fhdl.specials import _MemoryPort
from migen.genlib.fsm import *

from ..arch.boneless.opcode import *


__all__ = ["BonelessCore"]


def SignExtend(v, w):
v_nbits, v_sign = value_bits_sign(v)
if v_nbits > w.nbits:
return v
else:
return Cat(v, Replicate(v[v_nbits - 1], w.nbits - v_nbits))


class BonelessCore(Module):
def __init__(self, reset_addr, mem_port, ext_port=None, simulation=False):
if ext_port is None:
ext_port = _MemoryPort(adr=Signal(16),
dat_r=Signal(16), re=Signal(),
dat_w=Signal(16), we=Signal())

r_insn = Signal(16)
r_pc = Signal(mem_port.adr.nbits, reset=reset_addr)
r_win = Signal(max(mem_port.adr.nbits - 3, 1))
r_z = Signal()
r_s = Signal()
r_c = Signal()
r_o = Signal()

r_opA = Signal(16)
s_opB = Signal(16)

r_opS = Signal(16)
r_shift = Signal(5)

s_res = Signal(17)

s_insn = Signal(16)
i_type1 = s_insn[0:1]
i_type2 = s_insn[0:2]
i_shift = s_insn[1:5]
i_imm5 = s_insn[0:5]
i_imm7 = s_insn[0:8]
i_imm11 = s_insn[0:11]
i_regX = s_insn[2:5]
i_regY = s_insn[5:8]
i_regZ = s_insn[8:11]
i_code1 = s_insn[11:12]
i_code2 = s_insn[11:13]
i_code3 = s_insn[11:14]
i_code5 = s_insn[11:16]
i_flag = s_insn[11]
i_cond = s_insn[12:15]

i_clsA = i_code5[1:5] == OPCLASS_A
i_clsS = i_code5[1:5] == OPCLASS_S
i_clsM = i_code5[2:5] == OPCLASS_M
i_clsI = i_code5[3:5] == OPCLASS_I
i_clsC = i_code5[4:5] == OPCLASS_C

s_cond = Signal()
self.comb += [
Case(Cat(i_cond, C(1, 1)), {
OPCODE_F_0: s_cond.eq(0),
OPCODE_F_Z: s_cond.eq(r_z),
OPCODE_F_S: s_cond.eq(r_s),
OPCODE_F_O: s_cond.eq(r_o),
OPCODE_F_C: s_cond.eq(r_c),
OPCODE_F_CoZ: s_cond.eq(r_c | r_o),
OPCODE_F_SxO: s_cond.eq(r_s ^ r_o),
OPCODE_F_SxOoZ: s_cond.eq((r_s ^ r_o) | r_z),
})
]

s_sub = Signal()
s_cmp = Signal()
c_flags = Signal()
self.sync += [
If(c_flags,
r_z.eq(s_res == 0),
r_s.eq(s_res[15]),
r_c.eq(s_res[16]),
# http://teaching.idallen.com/cst8214/08w/notes/overflow.txt
Case(Cat(s_sub | s_cmp, r_opA[15], s_opB[15], s_res[15]), {
0b0001: r_o.eq(1),
0b0110: r_o.eq(1),
0b1011: r_o.eq(1),
0b1100: r_o.eq(1),
"default": r_o.eq(0),
})
)
]

self.submodules.fsm = FSM(reset_state="FETCH")
self.comb += [
s_insn.eq(Mux(self.fsm.ongoing("LOAD/JUMP"), mem_port.dat_r, r_insn))
]
self.fsm.act("FETCH",
mem_port.adr.eq(r_pc),
mem_port.re.eq(1),
NextValue(r_pc, r_pc + 1),
NextState("LOAD/JUMP")
)
self.fsm.act("LOAD/JUMP",
NextValue(r_insn, mem_port.dat_r),
If(i_clsA,
mem_port.adr.eq(Cat(i_regX, r_win)),
mem_port.re.eq(1),
NextState("LOAD-A")
).Elif(i_clsS,
mem_port.adr.eq(Cat(i_regY, r_win)),
mem_port.re.eq(1),
NextState("LOAD-S")
# ).Elif(i_clsM,
# mem_port.adr.eq(Cat(i_regY, r_win)),
# mem_port.re.eq(1),
# NextState("?-M")
# ).Elif(i_clsI,
# mem_port.adr.eq(Cat(i_regZ, r_win)),
# mem_port.re.eq(1),
# NextState("?-I")
).Elif(i_clsC,
If(s_cond == i_flag,
NextValue(r_pc, r_pc + SignExtend(i_imm11, r_pc))
),
NextState("FETCH"),
If(simulation & (i_imm11 == 0x400),
NextState("HALT")
)
)
)
self.fsm.act("LOAD-A",
mem_port.adr.eq(Cat(i_regY, r_win)),
mem_port.re.eq(1),
NextValue(r_opA, mem_port.dat_r),
NextState("EXECUTE-A")
)
self.fsm.act("EXECUTE-A",
s_opB.eq(mem_port.dat_r),
Case(Cat(i_code1, C(OPCLASS_A, 4)), {
OPCODE_LOGIC: Case(i_type2, {
OPTYPE_AND: s_res.eq(r_opA & s_opB),
OPTYPE_OR: s_res.eq(r_opA | s_opB),
OPTYPE_XOR: s_res.eq(r_opA ^ s_opB),
}),
OPCODE_ARITH: Case(i_type2, {
OPTYPE_ADD: s_res.eq(r_opA + s_opB),
OPTYPE_SUB: [s_res.eq(r_opA - s_opB), s_sub.eq(1)],
OPTYPE_CMP: [s_res.eq(r_opA - s_opB), s_cmp.eq(1)],
})
}),
mem_port.adr.eq(Cat(i_regZ, r_win)),
mem_port.dat_w.eq(s_res),
mem_port.we.eq(~s_cmp),
c_flags.eq(1),
NextState("FETCH")
)
self.fsm.act("LOAD-S",
NextValue(r_opS, mem_port.dat_r),
NextValue(r_shift, i_shift),
NextState("EXECUTE-S")
)
self.fsm.act("EXECUTE-S",
s_res.eq(r_opS),
mem_port.adr.eq(Cat(i_regZ, r_win)),
mem_port.dat_w.eq(s_res),
mem_port.we.eq(1),
c_flags.eq(1),
Case(Cat(i_code1, C(OPCLASS_S, 4)), {
OPCODE_SHIFT_L: Case(i_type1, {
OPTYPE_SLL: NextValue(r_opS, Cat(C(0, 1), r_opS[:-1])),
OPTYPE_ROT: NextValue(r_opS, Cat(r_opS[-1], r_opS[:-1])),
}),
OPCODE_SHIFT_R: Case(i_type1, {
OPTYPE_SRL: NextValue(r_opS, Cat(r_opS[1:], C(0, 1))),
OPTYPE_SRA: NextValue(r_opS, Cat(r_opS[1:], r_opS[-1])),
})
}),
NextValue(r_shift, r_shift - 1),
If(r_shift == 0,
NextState("FETCH")
)
)
self.fsm.act("HALT",
NextState("HALT")
)

# -------------------------------------------------------------------------------------------------

import unittest

from . import simulation_test
from ..arch.boneless.instr import *


class BonelessTestbench(Module):
def __init__(self):
self.mem_init = []

def do_finalize(self):
self.mem = Memory(width=16, depth=len(self.mem_init), init=self.mem_init)
self.specials += self.mem

mem_port = self.mem.get_port(has_re=True, write_capable=True)
self.specials += mem_port

self.submodules.dut = BonelessCore(reset_addr=8, mem_port=mem_port, simulation=True)


class BonelessTestCase(unittest.TestCase):
def setUp(self):
self.tb = BonelessTestbench()

def configure(self, tb, regs, code):
tb.mem_init = [*regs, *[0] * (8 - len(regs))] + assemble(code + [J(-1024)])

def dut_state(self, tb):
return tb.dut.fsm.decoding[(yield tb.dut.fsm.state)]

def run_core(self, tb):
while (yield from self.dut_state(tb)) != "HALT":
yield

def assertMemory(self, tb, addr, value):
self.assertEqual((yield tb.mem[addr]), value)

@simulation_test(regs=[0xA5A5, 0xAA55],
code=[AND (R2, R1, R0)])
def test_AND(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0xA5A5)
yield from self.assertMemory(tb, 1, 0xAA55)
yield from self.assertMemory(tb, 2, 0xA005)

@simulation_test(regs=[0xA5A5, 0xAA55],
code=[OR (R2, R1, R0)])
def test_OR(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0xA5A5)
yield from self.assertMemory(tb, 1, 0xAA55)
yield from self.assertMemory(tb, 2, 0xAFF5)

@simulation_test(regs=[0xA5A5, 0xAA55],
code=[XOR (R2, R1, R0)])
def test_XOR(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0xA5A5)
yield from self.assertMemory(tb, 1, 0xAA55)
yield from self.assertMemory(tb, 2, 0x0FF0)

@simulation_test(regs=[0x1234, 0x5678],
code=[ADD (R2, R1, R0)])
def test_ADD(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0x1234)
yield from self.assertMemory(tb, 1, 0x5678)
yield from self.assertMemory(tb, 2, 0x68AC)

@simulation_test(regs=[0x1234, 0x5678],
code=[SUB (R2, R1, R0)])
def test_SUB(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0x1234)
yield from self.assertMemory(tb, 1, 0x5678)
yield from self.assertMemory(tb, 2, 0xBBBC)

@simulation_test(regs=[0x1234, 0x5678],
code=[CMP (R0, R1)])
def test_CMP(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0x1234)
yield from self.assertMemory(tb, 1, 0x5678)
yield from self.assertMemory(tb, 2, 0)

@simulation_test(regs=[0x1012],
code=[SLL (R1, R0, 1),
SLL (R2, R0, 8)])
def test_SLL(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0x1012)
yield from self.assertMemory(tb, 1, 0x2024)
yield from self.assertMemory(tb, 2, 0x1200)

@simulation_test(regs=[0x1012],
code=[ROT (R1, R0, 1),
ROT (R2, R0, 8)])
def test_ROT(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0x1012)
yield from self.assertMemory(tb, 1, 0x2024)
yield from self.assertMemory(tb, 2, 0x1210)

@simulation_test(regs=[0x1234],
code=[MOV (R1, R0)])
def test_MOV(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0x1234)
yield from self.assertMemory(tb, 1, 0x1234)

@simulation_test(regs=[0x1210, 0x9210],
code=[SRL (R2, R0, 1),
SRL (R3, R0, 8),
SRL (R4, R1, 1),
SRL (R5, R1, 8)])
def test_SRL(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0x1210)
yield from self.assertMemory(tb, 2, 0x0908)
yield from self.assertMemory(tb, 3, 0x0012)
yield from self.assertMemory(tb, 1, 0x9210)
yield from self.assertMemory(tb, 4, 0x4908)
yield from self.assertMemory(tb, 5, 0x0092)

@simulation_test(regs=[0x1210, 0x9210],
code=[SRA (R2, R0, 1),
SRA (R3, R0, 8),
SRA (R4, R1, 1),
SRA (R5, R1, 8)])
def test_SRA(self, tb):
yield from self.run_core(tb)
yield from self.assertMemory(tb, 0, 0x1210)
yield from self.assertMemory(tb, 2, 0x0908)
yield from self.assertMemory(tb, 3, 0x0012)
yield from self.assertMemory(tb, 1, 0x9210)
yield from self.assertMemory(tb, 4, 0xC908)
yield from self.assertMemory(tb, 5, 0xFF92)