Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: whitequark/libfx2
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: f452c537ad7c
Choose a base ref
...
head repository: whitequark/libfx2
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 8d4970502803
Choose a head ref
  • 4 commits
  • 14 files changed
  • 1 contributor

Commits on May 19, 2019

  1. Copy the full SHA
    15746b8 View commit details
  2. Reimplement delay routines in cycle-accurate assembly.

    Also, allow factoring caller overhead into delay cycle count.
    whitequark committed May 19, 2019
    Copy the full SHA
    2f17ed4 View commit details
  3. Copy the full SHA
    608f785 View commit details
  4. Copy the full SHA
    8d49705 View commit details
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@ def __getattr__(cls, name):
'../firmware/library/include', [
'usb.h', 'usbmicrosoft.h', 'usbdfu.h', 'usbcdc.h', 'usbmassstor.h',
'fx2regs.h', 'fx2ints.h', 'fx2lib.h',
'fx2delay.h', 'fx2i2c.h', 'fx2eeprom.h', 'fx2spi.h', 'fx2spiflash.h',
'fx2delay.h', 'fx2i2c.h', 'fx2eeprom.h', 'fx2spi.h', 'fx2spiflash.h', 'fx2debug.h',
'fx2usb.h', 'fx2usbdfu.h', 'fx2usbmassstor.h', 'fx2uf2.h',
]
)
@@ -60,6 +60,6 @@ def __getattr__(cls, name):
master_doc = 'index'
project = 'libfx2 Reference'
author = 'whitequark'
copyright = '2018, whitequark'
copyright = '2018-2019, whitequark'
pygments_style = 'sphinx'
html_theme = 'sphinx_rtd_theme'
1 change: 1 addition & 0 deletions docs/device_library.rst
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@ Device-side library reference
fx2ints_h
fx2lib_h
fx2delay_h
fx2debug_h
fx2i2c_h
fx2eeprom_h
fx2spi_h
9 changes: 9 additions & 0 deletions docs/fx2debug_h.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
fx2debug.h
==========

The ``fx2debug.h`` header contains templated debug serial port bitbang routines for the Cypress FX2 series implemented in assembly. This header is the complete implementation of the debug serial port and does not have a corresponding library.

Reference
---------

.. autodoxygenfile:: fx2debug.h
2 changes: 1 addition & 1 deletion examples/boot-dfu-spiflash/main.c
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@ void flash_bus_deinit() {
OEA &= ~0b0111;
}

DEFINE_SPIFLASH_FNS(flash, /*cs=*/_PA0, /*sck=*/_PA1, /*si=*/_PA2, /*so=*/_PA3)
DEFINE_SPIFLASH_FNS(flash, /*cs=*/PA0, /*sck=*/PA1, /*si=*/PA2, /*so=*/PA3)

// Application mode descriptors.

6 changes: 6 additions & 0 deletions examples/printf/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TARGET = printf
LIBRARIES = fx2
MODEL = small

LIBFX2 = ../../firmware/library
include $(LIBFX2)/fx2rules.mk
18 changes: 18 additions & 0 deletions examples/printf/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include <fx2regs.h>
#include <fx2debug.h>
#include <stdio.h>

DEFINE_DEBUG_PUTCHAR_FN(PA0, 57600)

int main() {
// Any of these will work at 57600 baud:
CPUCS = 0;
// CPUCS = _CLKSPD0;
// CPUCS = _CLKSPD1;

OEA = (1U<<0);
PA0 = 1;

printf("Hello, world!\n");
while(1);
}
246 changes: 195 additions & 51 deletions firmware/library/delay.c
Original file line number Diff line number Diff line change
@@ -1,40 +1,42 @@
#include <fx2delay.h>
#include <fx2regs.h>

// Spin for exactly min(28, DP*4) cycles (i.e. min(112, DP*16) clocks).
// The implementation is a bit complicated, but the calculations
// in the callee are simpler.
void delay_4c(uint16_t delay) __naked {
delay;
// Spin for exactly min(32, DP*4) cycles (i.e. min(128, DP*16) clocks), including the call of
// this function. The implementation is a bit complicated, but the calculations in the callee
// are simpler.
void delay_4c(uint16_t count) __naked {
count;
__asm
// subtract fixed delay; 13c prolog, 11c loop conditions, 4c epilog
; (ljmp delay_4c) ; 4c
// subtract fixed delay; 4c ljmp, 13c prolog, 11c loop conditions, 4c epilog
clr c ; 1c
mov a, dpl ; 2c
subb a, #7 ; 2c
subb a, #8 ; 2c
mov dpl, a ; 2c
mov a, dph ; 2c
subb a, #0 ; 2c
mov dph, a ; 2c

// only run for minimum cycle count on underflow
jnc 00000$ ; 3c
// we've ran for 16 cycles, but need 28, fill in
mov dpl, dpl ; 3c nop
mov dpl, dpl ; 3c nop
mov a, dpl ; 2c nop
ret ; 4c
// we've ran for 20 cycles, but need 32, fill in
nop ; 1c
nop ; 1c
nop ; 1c
nop ; 1c
nop ; 1c
sjmp 00005$ ; 3c

00000$:
// don't run the DPH loop if DPH is zero
jz 00003$ ; 3c
// loop for DPH*256*4 cycles, DPH*512 instructions
00001$:
mov a, #0xfe ; 2c
nop ; 1c
nop ; 1c
mov a, #0xfe ; 2c
00002$:
dec a ; 1c
jnz 00002$ ; 3c
djnz acc, 00002$ ; 3c
djnz dph, 00001$ ; 4c

00003$:
@@ -46,45 +48,187 @@ void delay_4c(uint16_t delay) __naked {
djnz dpl, 00004$ ; 4c

00005$:
#if !defined(__SDCC_MODEL_HUGE)
ret ; 4c
#else
ljmp __sdcc_banked_ret
#endif
__endasm;
}

void delay_us(uint16_t count) {
// Empirically correct for our CLKSPD detection code.
#define OVERHEAD 20
// 1 loop iteration is 16 clocks.
// At 48 MHz, 16 clocks is 1/3 µs.
// Thus, iteration-count = µs-count * 3.
// At 24 and 12 MHz we divide that by 2 and 4.
uint16_t iters = count + count + count;
uint8_t cpucs = CPUCS;
if(cpucs & _CLKSPD1)
;
else if(cpucs & _CLKSPD0)
iters >>= 1;
else
iters >>= 2;
if(iters <= OVERHEAD)
return;
iters = iters - OVERHEAD;
delay_4c(iters);
void delay_us_overhead(uint16_t count, uint8_t caller_overh) __naked __reentrant {
count;
caller_overh;
__asm;
; (ljmp delay_us_overhead) ; 4c

// prolog
ar7 = 0x07
ar6 = 0x06
ar1 = 0x01
ar0 = 0x00
// count dph:dpl
// iters r7:r6
// overh r1
// cpucs r0
push ar7 ; 2c
push ar6 ; 2c
push ar1 ; 2c
push ar0 ; 2c
mov r6, dpl ; 2c
mov r7, dph ; 2c

// iters = count * 3
mov a, r6 ; 1c
add a, r6 ; 1c
mov r0, a ; 1c
mov a, r7 ; 1c
rlc a ; 1c
mov r1, a ; 1c
mov a, r6 ; 1c
add a, r0 ; 1c
mov r6, a ; 1c
mov a, r7 ; 1c
addc a, r1 ; 1c
mov r7, a ; 1c

// cpucs = CPUCS
mov dptr, #_CPUCS ; 3c
movx a, @dptr ; 2c
mov r0, a ; 1c

// overh = (48 MHz cycle tally)
mov r1, #(40+36) ; 2c

// if(cpucs & _CLKSPD1) skip
jb acc.4, 00000$ ; 4c => 40c

// iters >>= 1
clr c ; 1c
mov a, r7 ; 1c
rrc a ; 1c
xch a, r6 ; 1c
rrc a ; 1c
xch a, r6 ; 1c
mov r7, a ; 1c

// overh = (24 MHz cycle tally)
mov r1, #(40+14+36) ; 2c

// if(cpucs & _CLKSPD0) skip
mov a, r0 ; 1c
jb acc.3, 00000$ ; 4c => 14c

// iters >>= 1
clr c ; 1c
mov a, r7 ; 1c
rrc a ; 1c
xch a, r6 ; 1c
rrc a ; 1c
xch a, r6 ; 1c
mov r7, a ; 1c

// overh = (12 MHz cycle tally)
mov r1, #(40+14+9+36) ; 2c => 9c

00000$:
// overh = (overh + caller_overh) >> 2
mov a, sp ; 2c
add a, #-6 ; 2c
mov r0, a ; 1c
mov a, r1 ; 1c
add a, @r0 ; 1c
clr c ; 1c
rrc a ; 1c
clr c ; 1c
rrc a ; 1c
mov r1, a ; 1c

// iters -= overh
clr c ; 1c
mov a, r6 ; 1c
subb a, r1 ; 1c
mov dpl, a ; 1c
mov a, r7 ; 1c
subb a, #0 ; 2c
mov dph, a ; 1c

// if(underflow) return
jnc 00001$ ; 4c
#if !defined(__SDCC_MODEL_HUGE)
ret
#else
ljmp __sdcc_banked_ret
#endif

00001$:
// delay_4c(iters)
pop ar0 ; 2c
pop ar1 ; 2c
pop ar6 ; 2c
pop ar7 ; 2c => 36c
ljmp _delay_4c
__endasm;
}

void delay_us(uint16_t count) __naked {
count;
__asm;
; (mov dptr, #?) ; 3c
; (ljmp delay_us) ; 4c
mov a, #17 ; 2c
push acc ; 2c
lcall _delay_us_overhead
dec sp ; 2c
#if !defined(__SDCC_MODEL_HUGE)
ret ; 4c => 17c
#else
ljmp __sdcc_banked_ret
#endif
__endasm;
}
#undef OVERHEAD

void delay_ms(uint16_t count) {
// Empirically correct for our CLKSPD detection code.
#define OVERHEAD 5
// Inlined version of delay_us above.
uint16_t iters;
uint8_t cpucs = CPUCS;
if(cpucs & _CLKSPD1)
iters = 1000 * 3 - OVERHEAD;
else if(cpucs & _CLKSPD0)
iters = 1000 * 3 / 2 - OVERHEAD;
else
iters = 1000 * 3 / 4 - OVERHEAD;
while(count--)
delay_4c(iters);

void delay_ms(uint16_t count) __naked {
count;
__asm;
// prolog
ar7 = 0x07
ar6 = 0x06
push ar7
push ar6
mov r6, dpl
mov r7, dph

// overhead = (cycle tally)
mov a, #18
push acc

00000$:
mov a, r6 ; 1c
orl a, r7 ; 1c
jz 00002$ ; 3c

dec r6 ; 1c
cjne r6, #0xff, 00001$ ; 4c
dec r7 ; 1c

00001$:
mov dpl, #(1000&0xff) ; 2c
mov dph, #(1000>>8) ; 2c
lcall _delay_us_overhead

sjmp 00000$ ; 3c

00002$:
dec sp

// epilog
pop ar6
pop ar7
#if !defined(__SDCC_MODEL_HUGE)
ret ; 4c => 17c
#else
ljmp __sdcc_banked_ret
#endif
__endasm;
}
#undef OVERHEAD
Loading