Skip to content

Commit

Permalink
Report watchdog expiration and RTIO clock failure as exceptions.
Browse files Browse the repository at this point in the history
Fixes #316.
whitequark committed Mar 18, 2016
1 parent 80b13b1 commit 501de30
Showing 6 changed files with 44 additions and 20 deletions.
7 changes: 7 additions & 0 deletions artiq/coredevice/comm_generic.py
Original file line number Diff line number Diff line change
@@ -50,6 +50,9 @@ class _D2HMsgType(Enum):
FLASH_OK_REPLY = 12
FLASH_ERROR_REPLY = 13

WATCHDOG_EXPIRED = 14
CLOCK_FAILURE = 15

This comment has been minimized.

Copy link
@sbourdeauducq

sbourdeauducq Mar 19, 2016

Member

This is redundant with CLOCK_SWITCH_FAILED.

This comment has been minimized.

Copy link
@whitequark

whitequark Mar 19, 2016

Author Contributor

Not really since when we lose the lock, we weren't switching the clock.

This comment has been minimized.

Copy link
@sbourdeauducq

sbourdeauducq Mar 19, 2016

Member

Ok, what I meant is that level of detail is not warranted.

This comment has been minimized.

Copy link
@whitequark

whitequark Mar 19, 2016

Author Contributor

I don't really see a problem with it... it will help to debug this error when it crops out, and naturally rare errors are harder to debug in the first place, and it doesn't cause any issues otherwise.

This comment has been minimized.

Copy link
@sbourdeauducq

sbourdeauducq Mar 19, 2016

Member

Okay, then just change the exception documentation to point out the lost lock.



class UnsupportedDevice(Exception):
pass
@@ -522,6 +525,10 @@ def serve(self, object_map, symbolizer):
self._serve_rpc(object_map)
elif self._read_type == _D2HMsgType.KERNEL_EXCEPTION:
self._serve_exception(object_map, symbolizer)
elif self._read_type == _D2HMsgType.WATCHDOG_EXPIRED:
raise exceptions.WatchdogExpired
elif self._read_type == _D2HMsgType.CLOCK_FAILURE:
raise exceptions.ClockFailure
else:
self._read_expect(_D2HMsgType.KERNEL_FINISHED)
return
6 changes: 6 additions & 0 deletions artiq/coredevice/exceptions.py
Original file line number Diff line number Diff line change
@@ -129,3 +129,9 @@ class DDSBatchError(Exception):
class I2CError(Exception):
"""Raised with a I2C transaction fails."""
artiq_builtin = True

class WatchdogExpired(Exception):
"""Raised when a watchdog expires."""

class ClockFailure(Exception):
"""Raised when RTIO PLL is unable to lock."""

This comment has been minimized.

Copy link
@sbourdeauducq

sbourdeauducq Mar 19, 2016

Member

In this case, lost lock. It it didn't lock at all, you'd get clock switch failed.

8 changes: 5 additions & 3 deletions artiq/runtime/net_server.c
Original file line number Diff line number Diff line change
@@ -158,18 +158,20 @@ static void tcp_pcb_service(void *arg, struct tcp_pcb *pcb)
/* Writer interface */
if(cs == instance->open_session_cs) {
void *data;
int len, sndbuf;
int len, sndbuf, close_flag;

cs->instance->poll(&data, &len);
cs->instance->poll(&data, &len, &close_flag);
if(len > 0) {
sndbuf = tcp_sndbuf(pcb);
if(len > sndbuf)
len = sndbuf;
tcp_write(pcb, data, len, 0);
instance->ack_consumed(len);
}
if(len < 0)

This comment has been minimized.

Copy link
@sbourdeauducq

sbourdeauducq Mar 19, 2016

Member

Nope.

if(close_flag) {
tcp_output(pcb);
net_server_close(cs, pcb);
}
}
}

2 changes: 1 addition & 1 deletion artiq/runtime/net_server.h
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@ struct net_server_instance {
void (*start)(void);
void (*end)(void);
int (*input)(void *data, int length);
void (*poll)(void **data, int *length);
void (*poll)(void **data, int *length, int *close_flag);
void (*ack_consumed)(int length);
void (*ack_sent)(int length);

39 changes: 24 additions & 15 deletions artiq/runtime/session.c
Original file line number Diff line number Diff line change
@@ -392,7 +392,10 @@ enum {

REMOTEMSG_TYPE_FLASH_READ_REPLY,
REMOTEMSG_TYPE_FLASH_OK_REPLY,
REMOTEMSG_TYPE_FLASH_ERROR_REPLY
REMOTEMSG_TYPE_FLASH_ERROR_REPLY,

REMOTEMSG_TYPE_WATCHDOG_EXPIRED,
REMOTEMSG_TYPE_CLOCK_FAILURE,
};

static int receive_rpc_value(const char **tag, void **slot);
@@ -1083,30 +1086,36 @@ int session_input(void *data, int length)
/* *length is set to -1 in case of irrecoverable error
* (the session must be dropped and session_end called)
*/
void session_poll(void **data, int *length)
void session_poll(void **data, int *length, int *close_flag)
{
*close_flag = 0;

if(user_kernel_state == USER_KERNEL_RUNNING) {
if(watchdog_expired()) {
core_log("Watchdog expired\n");
*length = -1;
return;

*close_flag = 1;
out_packet_empty(REMOTEMSG_TYPE_WATCHDOG_EXPIRED);
}
if(!rtiocrg_check()) {
core_log("RTIO clock failure\n");
*length = -1;
return;

*close_flag = 1;
out_packet_empty(REMOTEMSG_TYPE_CLOCK_FAILURE);

This comment has been minimized.

Copy link
@jordens

jordens Mar 18, 2016

Member

InternalError would be sufficient here FWIW. It seems unrealistic that users will ever try to specifically catch errors like clock failures.

This comment has been minimized.

Copy link
@whitequark

whitequark Mar 18, 2016

Author Contributor

Feel free to change as you see fit--I took the most generic option.

}
}

/* If the output buffer is available,
* check if the kernel CPU has something to transmit.
*/
if(out_packet_available()) {
struct msg_base *umsg = mailbox_receive();
if(umsg) {
if(!process_kmsg(umsg)) {
*length = -1;
return;
if(!*close_flag) {
/* If the output buffer is available,
* check if the kernel CPU has something to transmit.
*/
if(out_packet_available()) {
struct msg_base *umsg = mailbox_receive();
if(umsg) {
if(!process_kmsg(umsg)) {
*length = -1;
return;
}
}
}
}
2 changes: 1 addition & 1 deletion artiq/runtime/session.h
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@ void session_start(void);
void session_end(void);

int session_input(void *data, int length);
void session_poll(void **data, int *length);
void session_poll(void **data, int *length, int *close_flag);
void session_ack_consumed(int length);
void session_ack_sent(int length);

0 comments on commit 501de30

Please sign in to comment.