Skip to content

Commit 0874a5b

Browse files
committedJun 29, 2018
std.atomic.queue - document limitation and add MPSC queue
·
0.15.20.3.0
1 parent 4a35d7e commit 0874a5b

File tree

4 files changed

+255
-34
lines changed

4 files changed

+255
-34
lines changed
 

‎CMakeLists.txt‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,8 @@ set(ZIG_CPP_SOURCES
431431
set(ZIG_STD_FILES
432432
"array_list.zig"
433433
"atomic/index.zig"
434-
"atomic/queue.zig"
434+
"atomic/queue_mpmc.zig"
435+
"atomic/queue_mpsc.zig"
435436
"atomic/stack.zig"
436437
"base64.zig"
437438
"buf_map.zig"

‎std/atomic/index.zig‎

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
pub const Stack = @import("stack.zig").Stack;
2-
pub const Queue = @import("queue.zig").Queue;
2+
pub const QueueMpsc = @import("queue_mpsc.zig").QueueMpsc;
3+
pub const QueueMpmc = @import("queue_mpmc.zig").QueueMpmc;
34

45
test "std.atomic" {
5-
_ = @import("stack.zig").Stack;
6-
_ = @import("queue.zig").Queue;
6+
_ = @import("stack.zig");
7+
_ = @import("queue_mpsc.zig");
8+
_ = @import("queue_mpmc.zig");
79
}

‎std/atomic/queue_mpmc.zig‎

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
const builtin = @import("builtin");
2+
const AtomicOrder = builtin.AtomicOrder;
3+
const AtomicRmwOp = builtin.AtomicRmwOp;
4+
5+
/// Many producer, many consumer, non-allocating, thread-safe, lock-free
6+
/// This implementation has a crippling limitation - it hangs onto node
7+
/// memory for 1 extra get() and 1 extra put() operation - when get() returns a node, that
8+
/// node must not be freed until both the next get() and the next put() completes.
9+
pub fn QueueMpmc(comptime T: type) type {
10+
return struct {
11+
head: *Node,
12+
tail: *Node,
13+
root: Node,
14+
15+
pub const Self = this;
16+
17+
pub const Node = struct {
18+
next: ?*Node,
19+
data: T,
20+
};
21+
22+
/// TODO: well defined copy elision: https://github.com/ziglang/zig/issues/287
23+
pub fn init(self: *Self) void {
24+
self.root.next = null;
25+
self.head = &self.root;
26+
self.tail = &self.root;
27+
}
28+
29+
pub fn put(self: *Self, node: *Node) void {
30+
node.next = null;
31+
32+
const tail = @atomicRmw(*Node, &self.tail, AtomicRmwOp.Xchg, node, AtomicOrder.SeqCst);
33+
_ = @atomicRmw(?*Node, &tail.next, AtomicRmwOp.Xchg, node, AtomicOrder.SeqCst);
34+
}
35+
36+
/// node must not be freed until both the next get() and the next put() complete
37+
pub fn get(self: *Self) ?*Node {
38+
var head = @atomicLoad(*Node, &self.head, AtomicOrder.SeqCst);
39+
while (true) {
40+
const node = head.next orelse return null;
41+
head = @cmpxchgWeak(*Node, &self.head, head, node, AtomicOrder.SeqCst, AtomicOrder.SeqCst) orelse return node;
42+
}
43+
}
44+
45+
///// This is a debug function that is not thread-safe.
46+
pub fn dump(self: *Self) void {
47+
std.debug.warn("head: ");
48+
dumpRecursive(self.head, 0);
49+
std.debug.warn("tail: ");
50+
dumpRecursive(self.tail, 0);
51+
}
52+
53+
fn dumpRecursive(optional_node: ?*Node, indent: usize) void {
54+
var stderr_file = std.io.getStdErr() catch return;
55+
const stderr = &std.io.FileOutStream.init(&stderr_file).stream;
56+
stderr.writeByteNTimes(' ', indent) catch return;
57+
if (optional_node) |node| {
58+
std.debug.warn("0x{x}={}\n", @ptrToInt(node), node.data);
59+
dumpRecursive(node.next, indent + 1);
60+
} else {
61+
std.debug.warn("(null)\n");
62+
}
63+
}
64+
};
65+
}
66+
67+
const std = @import("std");
68+
const assert = std.debug.assert;
69+
70+
const Context = struct {
71+
allocator: *std.mem.Allocator,
72+
queue: *QueueMpmc(i32),
73+
put_sum: isize,
74+
get_sum: isize,
75+
get_count: usize,
76+
puts_done: u8, // TODO make this a bool
77+
};
78+
79+
// TODO add lazy evaluated build options and then put puts_per_thread behind
80+
// some option such as: "AggressiveMultithreadedFuzzTest". In the AppVeyor
81+
// CI we would use a less aggressive setting since at 1 core, while we still
82+
// want this test to pass, we need a smaller value since there is so much thrashing
83+
// we would also use a less aggressive setting when running in valgrind
84+
const puts_per_thread = 500;
85+
const put_thread_count = 3;
86+
87+
test "std.atomic.queue_mpmc" {
88+
var direct_allocator = std.heap.DirectAllocator.init();
89+
defer direct_allocator.deinit();
90+
91+
var plenty_of_memory = try direct_allocator.allocator.alloc(u8, 300 * 1024);
92+
defer direct_allocator.allocator.free(plenty_of_memory);
93+
94+
var fixed_buffer_allocator = std.heap.ThreadSafeFixedBufferAllocator.init(plenty_of_memory);
95+
var a = &fixed_buffer_allocator.allocator;
96+
97+
var queue: QueueMpmc(i32) = undefined;
98+
queue.init();
99+
var context = Context{
100+
.allocator = a,
101+
.queue = &queue,
102+
.put_sum = 0,
103+
.get_sum = 0,
104+
.puts_done = 0,
105+
.get_count = 0,
106+
};
107+
108+
var putters: [put_thread_count]*std.os.Thread = undefined;
109+
for (putters) |*t| {
110+
t.* = try std.os.spawnThread(&context, startPuts);
111+
}
112+
var getters: [put_thread_count]*std.os.Thread = undefined;
113+
for (getters) |*t| {
114+
t.* = try std.os.spawnThread(&context, startGets);
115+
}
116+
117+
for (putters) |t|
118+
t.wait();
119+
_ = @atomicRmw(u8, &context.puts_done, builtin.AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
120+
for (getters) |t|
121+
t.wait();
122+
123+
if (context.put_sum != context.get_sum) {
124+
std.debug.panic("failure\nput_sum:{} != get_sum:{}", context.put_sum, context.get_sum);
125+
}
126+
127+
if (context.get_count != puts_per_thread * put_thread_count) {
128+
std.debug.panic(
129+
"failure\nget_count:{} != puts_per_thread:{} * put_thread_count:{}",
130+
context.get_count,
131+
u32(puts_per_thread),
132+
u32(put_thread_count),
133+
);
134+
}
135+
}
136+
137+
fn startPuts(ctx: *Context) u8 {
138+
var put_count: usize = puts_per_thread;
139+
var r = std.rand.DefaultPrng.init(0xdeadbeef);
140+
while (put_count != 0) : (put_count -= 1) {
141+
std.os.time.sleep(0, 1); // let the os scheduler be our fuzz
142+
const x = @bitCast(i32, r.random.scalar(u32));
143+
const node = ctx.allocator.create(QueueMpmc(i32).Node{
144+
.next = undefined,
145+
.data = x,
146+
}) catch unreachable;
147+
ctx.queue.put(node);
148+
_ = @atomicRmw(isize, &ctx.put_sum, builtin.AtomicRmwOp.Add, x, AtomicOrder.SeqCst);
149+
}
150+
return 0;
151+
}
152+
153+
fn startGets(ctx: *Context) u8 {
154+
while (true) {
155+
const last = @atomicLoad(u8, &ctx.puts_done, builtin.AtomicOrder.SeqCst) == 1;
156+
157+
while (ctx.queue.get()) |node| {
158+
std.os.time.sleep(0, 1); // let the os scheduler be our fuzz
159+
_ = @atomicRmw(isize, &ctx.get_sum, builtin.AtomicRmwOp.Add, node.data, builtin.AtomicOrder.SeqCst);
160+
_ = @atomicRmw(usize, &ctx.get_count, builtin.AtomicRmwOp.Add, 1, builtin.AtomicOrder.SeqCst);
161+
}
162+
163+
if (last) return 0;
164+
}
165+
}
166+
167+
test "std.atomic.queue_mpmc single-threaded" {
168+
var queue: QueueMpmc(i32) = undefined;
169+
queue.init();
170+
171+
var node_0 = QueueMpmc(i32).Node{
172+
.data = 0,
173+
.next = undefined,
174+
};
175+
queue.put(&node_0);
176+
177+
var node_1 = QueueMpmc(i32).Node{
178+
.data = 1,
179+
.next = undefined,
180+
};
181+
queue.put(&node_1);
182+
183+
assert(queue.get().?.data == 0);
184+
185+
var node_2 = QueueMpmc(i32).Node{
186+
.data = 2,
187+
.next = undefined,
188+
};
189+
queue.put(&node_2);
190+
191+
var node_3 = QueueMpmc(i32).Node{
192+
.data = 3,
193+
.next = undefined,
194+
};
195+
queue.put(&node_3);
196+
197+
assert(queue.get().?.data == 1);
198+
199+
assert(queue.get().?.data == 2);
200+
201+
var node_4 = QueueMpmc(i32).Node{
202+
.data = 4,
203+
.next = undefined,
204+
};
205+
queue.put(&node_4);
206+
207+
assert(queue.get().?.data == 3);
208+
// if we were to set node_3.next to null here, it would cause this test
209+
// to fail. this demonstrates the limitation of hanging on to extra memory.
210+
211+
assert(queue.get().?.data == 4);
212+
213+
assert(queue.get() == null);
214+
}

‎std/atomic/queue.zig‎ renamed to ‎std/atomic/queue_mpsc.zig‎

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,54 @@
1+
const std = @import("std");
2+
const assert = std.debug.assert;
13
const builtin = @import("builtin");
24
const AtomicOrder = builtin.AtomicOrder;
35
const AtomicRmwOp = builtin.AtomicRmwOp;
46

5-
/// Many reader, many writer, non-allocating, thread-safe, lock-free
6-
pub fn Queue(comptime T: type) type {
7+
/// Many producer, single consumer, non-allocating, thread-safe, lock-free
8+
pub fn QueueMpsc(comptime T: type) type {
79
return struct {
8-
head: *Node,
9-
tail: *Node,
10-
root: Node,
10+
inboxes: [2]std.atomic.Stack(T),
11+
outbox: std.atomic.Stack(T),
12+
inbox_index: usize,
1113

1214
pub const Self = this;
1315

14-
pub const Node = struct {
15-
next: ?*Node,
16-
data: T,
17-
};
18-
19-
// TODO: well defined copy elision: https://github.com/ziglang/zig/issues/287
20-
pub fn init(self: *Self) void {
21-
self.root.next = null;
22-
self.head = &self.root;
23-
self.tail = &self.root;
16+
pub const Node = std.atomic.Stack(T).Node;
17+
18+
pub fn init() Self {
19+
return Self{
20+
.inboxes = []std.atomic.Stack(T){
21+
std.atomic.Stack(T).init(),
22+
std.atomic.Stack(T).init(),
23+
},
24+
.outbox = std.atomic.Stack(T).init(),
25+
.inbox_index = 0,
26+
};
2427
}
2528

2629
pub fn put(self: *Self, node: *Node) void {
27-
node.next = null;
28-
29-
const tail = @atomicRmw(*Node, &self.tail, AtomicRmwOp.Xchg, node, AtomicOrder.SeqCst);
30-
_ = @atomicRmw(?*Node, &tail.next, AtomicRmwOp.Xchg, node, AtomicOrder.SeqCst);
30+
const inbox_index = @atomicLoad(usize, &self.inbox_index, AtomicOrder.SeqCst);
31+
const inbox = &self.inboxes[inbox_index];
32+
inbox.push(node);
3133
}
3234

3335
pub fn get(self: *Self) ?*Node {
34-
var head = @atomicLoad(*Node, &self.head, AtomicOrder.SeqCst);
35-
while (true) {
36-
const node = head.next orelse return null;
37-
head = @cmpxchgWeak(*Node, &self.head, head, node, AtomicOrder.SeqCst, AtomicOrder.SeqCst) orelse return node;
36+
if (self.outbox.pop()) |node| {
37+
return node;
3838
}
39+
const prev_inbox_index = @atomicRmw(usize, &self.inbox_index, AtomicRmwOp.Xor, 0x1, AtomicOrder.SeqCst);
40+
const prev_inbox = &self.inboxes[prev_inbox_index];
41+
while (prev_inbox.pop()) |node| {
42+
self.outbox.push(node);
43+
}
44+
return self.outbox.pop();
3945
}
4046
};
4147
}
4248

43-
const std = @import("std");
4449
const Context = struct {
4550
allocator: *std.mem.Allocator,
46-
queue: *Queue(i32),
51+
queue: *QueueMpsc(i32),
4752
put_sum: isize,
4853
get_sum: isize,
4954
get_count: usize,
@@ -58,7 +63,7 @@ const Context = struct {
5863
const puts_per_thread = 500;
5964
const put_thread_count = 3;
6065

61-
test "std.atomic.queue" {
66+
test "std.atomic.queue_mpsc" {
6267
var direct_allocator = std.heap.DirectAllocator.init();
6368
defer direct_allocator.deinit();
6469

@@ -68,8 +73,7 @@ test "std.atomic.queue" {
6873
var fixed_buffer_allocator = std.heap.ThreadSafeFixedBufferAllocator.init(plenty_of_memory);
6974
var a = &fixed_buffer_allocator.allocator;
7075

71-
var queue: Queue(i32) = undefined;
72-
queue.init();
76+
var queue = QueueMpsc(i32).init();
7377
var context = Context{
7478
.allocator = a,
7579
.queue = &queue,
@@ -83,7 +87,7 @@ test "std.atomic.queue" {
8387
for (putters) |*t| {
8488
t.* = try std.os.spawnThread(&context, startPuts);
8589
}
86-
var getters: [put_thread_count]*std.os.Thread = undefined;
90+
var getters: [1]*std.os.Thread = undefined;
8791
for (getters) |*t| {
8892
t.* = try std.os.spawnThread(&context, startGets);
8993
}
@@ -114,7 +118,7 @@ fn startPuts(ctx: *Context) u8 {
114118
while (put_count != 0) : (put_count -= 1) {
115119
std.os.time.sleep(0, 1); // let the os scheduler be our fuzz
116120
const x = @bitCast(i32, r.random.scalar(u32));
117-
const node = ctx.allocator.create(Queue(i32).Node{
121+
const node = ctx.allocator.create(QueueMpsc(i32).Node{
118122
.next = undefined,
119123
.data = x,
120124
}) catch unreachable;

0 commit comments

Comments
 (0)
Please sign in to comment.