Skip to content
This repository has been archived by the owner on May 4, 2018. It is now read-only.

Commit

Permalink
unix: reimplement accept() EMFILE trick
Browse files Browse the repository at this point in the history
Implement a best effort approach to mitigating accept() EMFILE errors.

We have a spare file descriptor stashed away that we close to get below
the EMFILE limit. Next, we accept all pending connections and close them
immediately to signal the clients that we're overloaded - and we are, but
we still keep on trucking.

There is one caveat: it's not reliable in a multi-threaded environment.
The file descriptor limit is per process. Our party trick fails if another
thread opens a file or creates a socket in the time window between us
calling close() and accept().

Fixes #315.
  • Loading branch information
bnoordhuis committed Sep 10, 2012
1 parent 86cb520 commit 4f5c8da
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 20 deletions.
1 change: 1 addition & 0 deletions include/uv-private/uv-unix.h
Expand Up @@ -137,6 +137,7 @@ typedef struct {
uint64_t time; \
void* signal_ctx; \
uv_signal_t child_watcher; \
int emfile_fd; \
UV_PLATFORM_LOOP_FIELDS \

#define UV_REQ_TYPE_PRIVATE /* empty */
Expand Down
6 changes: 6 additions & 0 deletions src/unix/loop.c
Expand Up @@ -51,6 +51,7 @@ int uv__loop_init(uv_loop_t* loop, int default_loop) {
loop->time = uv_hrtime() / 1000000;
loop->async_pipefd[0] = -1;
loop->async_pipefd[1] = -1;
loop->emfile_fd = -1;
loop->ev = (default_loop ? ev_default_loop : ev_loop_new)(flags);
ev_set_userdata(loop->ev, loop);
eio_channel_init(&loop->uv_eio_channel, loop);
Expand All @@ -73,4 +74,9 @@ void uv__loop_delete(uv_loop_t* loop) {
uv__platform_loop_delete(loop);
uv__signal_unregister(loop);
ev_loop_destroy(loop->ev);

if (loop->emfile_fd != -1) {
close(loop->emfile_fd);
loop->emfile_fd = -1;
}
}
132 changes: 112 additions & 20 deletions src/unix/stream.c
Expand Up @@ -62,6 +62,29 @@ static void uv__read(uv_stream_t* stream);
static void uv__stream_io(uv_loop_t* loop, uv__io_t* w, int events);


/* Used by the accept() EMFILE party trick. */
static int uv__open_cloexec(const char* path, int flags) {
int fd;

#if defined(__linux__)
fd = open(path, flags | UV__O_CLOEXEC);
if (fd != -1)
return fd;

if (errno != EINVAL)
return -1;

/* O_CLOEXEC not supported. */
#endif

fd = open(path, flags);
if (fd != -1)
uv__cloexec(fd, 1);

return fd;
}


static size_t uv__buf_count(uv_buf_t bufs[], int bufcnt) {
size_t total = 0;
int i;
Expand Down Expand Up @@ -90,6 +113,9 @@ void uv__stream_init(uv_loop_t* loop,
ngx_queue_init(&stream->write_completed_queue);
stream->write_queue_size = 0;

if (loop->emfile_fd == -1)
loop->emfile_fd = uv__open_cloexec("/", O_RDONLY);

#if defined(__APPLE__)
stream->select = NULL;
#endif /* defined(__APPLE_) */
Expand Down Expand Up @@ -370,10 +396,56 @@ static void uv__next_accept(uv_idle_t* idle, int status) {
}


/* Implements a best effort approach to mitigating accept() EMFILE errors.
* We have a spare file descriptor stashed away that we close to get below
* the EMFILE limit. Next, we accept all pending connections and close them
* immediately to signal the clients that we're overloaded - and we are, but
* we still keep on trucking.
*
* There is one caveat: it's not reliable in a multi-threaded environment.
* The file descriptor limit is per process. Our party trick fails if another
* thread opens a file or creates a socket in the time window between us
* calling close() and accept().
*/
static int uv__emfile_trick(uv_loop_t* loop, int accept_fd) {
int fd;
int r;

if (loop->emfile_fd == -1)
return -1;

close(loop->emfile_fd);

for (;;) {
fd = uv__accept(accept_fd);

if (fd != -1) {
close(fd);
continue;
}

if (errno == EINTR)
continue;

if (errno == EAGAIN || errno == EWOULDBLOCK)
r = 0;
else
r = -1;

loop->emfile_fd = uv__open_cloexec("/", O_RDONLY);

return r;
}
}


void uv__server_io(uv_loop_t* loop, uv__io_t* w, int events) {
static __read_mostly int use_emfile_trick = -1;
uv_stream_t* stream;
int fd;
uv_stream_t* stream = container_of(w, uv_stream_t, read_watcher);
int r;

stream = container_of(w, uv_stream_t, read_watcher);
assert(events == UV__IO_READ);
assert(!(stream->flags & UV_CLOSING));

Expand All @@ -389,28 +461,48 @@ void uv__server_io(uv_loop_t* loop, uv__io_t* w, int events) {
assert(stream->accepted_fd < 0);
fd = uv__accept(stream->fd);

if (fd < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
/* No problem. */
return;
} else if (errno == ECONNABORTED) {
/* ignore */
if (fd == -1) {
switch (errno) {
#if EWOULDBLOCK != EAGAIN
case EWOULDBLOCK:
#endif
case EAGAIN:
return; /* Not an error. */

case ECONNABORTED:
continue; /* Ignore. */

case EMFILE:
case ENFILE:
if (use_emfile_trick == -1) {
const char* val = getenv("UV_ACCEPT_EMFILE_TRICK");
use_emfile_trick = (val == NULL || atoi(val) != 0);
}

if (use_emfile_trick) {
SAVE_ERRNO(r = uv__emfile_trick(loop, stream->fd));
if (r == 0)
continue;
}

/* Fall through. */

default:
uv__set_sys_error(loop, errno);
stream->connection_cb(stream, -1);
continue;
} else {
uv__set_sys_error(stream->loop, errno);
stream->connection_cb((uv_stream_t*)stream, -1);
}
} else {
stream->accepted_fd = fd;
stream->connection_cb(stream, 0);

if (stream->accepted_fd != -1 ||
(stream->type == UV_TCP && stream->flags == UV_TCP_SINGLE_ACCEPT)) {
/* The user hasn't yet accepted called uv_accept() */
uv__io_stop(stream->loop, &stream->read_watcher);
break;
}
}

stream->accepted_fd = fd;
stream->connection_cb(stream, 0);

if (stream->accepted_fd != -1 ||
(stream->type == UV_TCP && stream->flags == UV_TCP_SINGLE_ACCEPT)) {
/* The user hasn't yet accepted called uv_accept() */
uv__io_stop(loop, &stream->read_watcher);
break;
}
}

if (stream->fd != -1 &&
Expand Down

0 comments on commit 4f5c8da

Please sign in to comment.