Skip to content

Commit

Permalink
Improve HTTP::ChunkedContent implementation (#5928)
Browse files Browse the repository at this point in the history
* Fix HTTP::ChunkedContent read incomplete chunks

* fixup! Fix HTTP::ChunkedContent read incomplete chunks

* Improve error messages

* Improve ChunkedContent implementation

* Remove enforce CRLF

* Add const HTTP::MAX_HEADER_SIZE

* Adjust error messages

* Lazy initialize headers

* Add spec for \n

* Fix issue with delayed chunks

* Refactor chunk_bytes_read and @expect_chunk_start into @received_final_chunk
straight-shoota authored and RX14 committed Apr 11, 2018
1 parent f91af5f commit c541aae
Showing 3 changed files with 268 additions and 53 deletions.
189 changes: 189 additions & 0 deletions spec/std/http/chunked_content_spec.cr
Original file line number Diff line number Diff line change
@@ -55,4 +55,193 @@ describe HTTP::ChunkedContent do
content.skip(4)
content.gets_to_end.should eq("456")
end

it "#gets reads multiple chunks" do
mem = IO::Memory.new("1\r\nA\r\n1\r\nB\r\n0\r\n\r\n")
content = HTTP::ChunkedContent.new(mem)

content.gets.should eq "AB"
mem.pos.should eq mem.bytesize
end

it "#gets reads multiple chunks with \n" do
# The RFC format requires CRLF, but several standard implementations also
# accept LF, so we should too.
mem = IO::Memory.new("1\nA\n1\nB\n0\n\n")
content = HTTP::ChunkedContent.new(mem)

content.gets.should eq "AB"
mem.pos.should eq mem.bytesize
end

it "#read reads empty content" do
mem = IO::Memory.new("0\r\n\r\n")
content = HTTP::ChunkedContent.new(mem)

content.read(Bytes.new(5)).should eq 0
mem.pos.should eq mem.bytesize
end

it "#read_byte reads empty content" do
mem = IO::Memory.new("0\r\n\r\n")
content = HTTP::ChunkedContent.new(mem)

content.read_byte.should be_nil
mem.pos.should eq mem.bytesize
end

it "#peek reads empty content" do
mem = IO::Memory.new("0\r\n\r\n")
content = HTTP::ChunkedContent.new(mem)

content.peek.should be_nil
mem.pos.should eq mem.bytesize
end

it "#read handles interrupted io" do
mem = IO::Memory.new("3\r\n123\r\n")
content = HTTP::ChunkedContent.new(mem)

content.skip(3)
expect_raises IO::EOFError do
content.read Bytes.new(5)
end
end

it "#read_byte handles interrupted io" do
mem = IO::Memory.new("3\r\n123\r\n")
content = HTTP::ChunkedContent.new(mem)

content.skip(3)
expect_raises IO::EOFError do
content.read_byte
end
end

it "#peek handles interrupted io" do
mem = IO::Memory.new("3\r\n123\r\n")
content = HTTP::ChunkedContent.new(mem)

content.skip(3)
expect_raises IO::EOFError do
content.peek
end
end

it "#read handles empty data" do
mem = IO::Memory.new("3\r\n")
content = HTTP::ChunkedContent.new(mem)

expect_raises IO::EOFError do
content.read Bytes.new(1)
end
end

it "#read_byte handles empty data" do
mem = IO::Memory.new("3\r\n")
content = HTTP::ChunkedContent.new(mem)

expect_raises IO::Error do
content.read_byte
end
end

it "#peek handles empty data" do
mem = IO::Memory.new("3\r\n")
content = HTTP::ChunkedContent.new(mem)

expect_raises IO::EOFError do
content.peek
end
end

it "handles empty io" do
mem = IO::Memory.new("")
chunked = HTTP::ChunkedContent.new(mem)
expect_raises IO::EOFError do
chunked.gets
end
end

it "reads chunk extensions" do
mem = IO::Memory.new(%(1;progress=50;foo="bar"\r\nA\r\n1;progress=100\r\nB\r\n0\r\n\r\n"))

chunked = HTTP::ChunkedContent.new(mem)
chunked.gets(2).should eq "AB"
end

it "reads chunked trailer part" do
mem = IO::Memory.new("0\r\nAdditional-Header: Foo\r\n\r\n")

chunked = HTTP::ChunkedContent.new(mem)
chunked.gets.should be_nil
mem.gets.should be_nil
chunked.headers.should eq HTTP::Headers{"Additional-Header" => "Foo"}
end

it "fails if unterminated chunked trailer part" do
mem = IO::Memory.new("0\r\nAdditional-Header: Foo")

chunked = HTTP::ChunkedContent.new(mem)
expect_raises IO::EOFError do
chunked.gets
end
end

it "fails if not properly delimited" do
mem = IO::Memory.new("0\r\n")

chunked = HTTP::ChunkedContent.new(mem)
expect_raises IO::EOFError do
chunked.gets
end
end

it "fails if not properly delimited" do
mem = IO::Memory.new("1\r\nA\r\n0\r\n")

chunked = HTTP::ChunkedContent.new(mem)
expect_raises IO::EOFError do
chunked.gets
end
end

it "fails if invalid chunk size" do
mem = IO::Memory.new("G\r\n")

chunked = HTTP::ChunkedContent.new(mem)
expect_raises IO::Error, "Invalid HTTP chunked content: invalid chunk size" do
chunked.gets
end
end

it "#read stops reading after final chunk" do
mem = IO::Memory.new("0\r\n\r\n1\r\nA\r\n0\r\n\r\n")

chunked = HTTP::ChunkedContent.new(mem)
chunked.read(Bytes.new(8)).should eq 0
mem.pos.should eq 5
chunked.read(Bytes.new(8)).should eq 0
mem.pos.should eq 5
end

it "#read_byte stops reading after final chunk" do
mem = IO::Memory.new("0\r\n\r\n1\r\nA\r\n0\r\n\r\n")

chunked = HTTP::ChunkedContent.new(mem)
chunked.read_byte.should be_nil
mem.pos.should eq 5
chunked.read_byte.should be_nil
mem.pos.should eq 5
end

it "#peek stops reading after final chunk" do
mem = IO::Memory.new("0\r\n\r\n1\r\nA\r\n0\r\n\r\n")

chunked = HTTP::ChunkedContent.new(mem)
chunked.peek.should be_nil
mem.pos.should eq 5
chunked.peek.should be_nil
mem.pos.should eq 5
end
end
7 changes: 5 additions & 2 deletions src/http/common.cr
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@
module HTTP
private DATE_PATTERNS = {"%a, %d %b %Y %H:%M:%S %z", "%d %b %Y %H:%M:%S %z", "%A, %d-%b-%y %H:%M:%S %z", "%a %b %e %H:%M:%S %Y"}

# :nodoc:
MAX_HEADER_SIZE = 16_384

# :nodoc:
enum BodyType
OnDemand
@@ -18,9 +21,9 @@ module HTTP
headers = Headers.new

headers_size = 0
while line = io.gets(16_384, chomp: true)
while line = io.gets(MAX_HEADER_SIZE, chomp: true)
headers_size += line.bytesize
break if headers_size > 16_384
break if headers_size > MAX_HEADER_SIZE

if line.empty?
body = nil
125 changes: 74 additions & 51 deletions src/http/content.cr
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require "http/common"

module HTTP
# :nodoc:
module Content
@@ -47,106 +49,127 @@ module HTTP
# :nodoc:
class ChunkedContent < IO
include Content
@chunk_remaining : Int32

# Returns trailing headers read by this chunked content.
#
# The value will only be populated once the entire content has been read,
# i.e. this IO is at EOF.
#
# All headers in the trailing headers section will be returned. Applications
# need to make sure to ignore them or fail if headers are not allowed
# in the chunked trailer part (see [RFC 7230 section 4.1.2](https://tools.ietf.org/html/rfc7230#section-4.1.2)).
getter headers : HTTP::Headers { HTTP::Headers.new }

def initialize(@io : IO)
@chunk_remaining = io.gets.not_nil!.to_i(16)
@read_chunk_start = false
check_last_chunk
@chunk_remaining = -1
@received_final_chunk = false
end

def read(slice : Bytes)
count = slice.size
return 0 if count == 0

# Check if the last read consumed a chunk and we
# need to start consuming the next one.
if @read_chunk_start
read_chunk_start
end
next_chunk

return 0 if @chunk_remaining == 0

to_read = Math.min(slice.size, @chunk_remaining)
to_read = Math.min(count, @chunk_remaining)

bytes_read = @io.read slice[0, to_read]
@chunk_remaining -= bytes_read

check_chunk_remaining_is_zero
if bytes_read == 0
raise IO::EOFError.new("Invalid HTTP chunked content")
end

@chunk_remaining -= bytes_read

bytes_read
end

def read_byte
if @chunk_remaining > 0
byte = @io.read_byte
if byte
@chunk_remaining -= 1
check_chunk_remaining_is_zero
end
next_chunk
return super if @chunk_remaining == 0

byte = @io.read_byte
if byte
@chunk_remaining -= 1
byte
else
super
raise IO::EOFError.new("Invalid HTTP chunked content")
end
end

def peek
while true
if @chunk_remaining > 0
peek = @io.peek
return nil unless peek

if @chunk_remaining < peek.size
peek = peek[0, @chunk_remaining]
end
next_chunk
return if @chunk_remaining == 0

return peek
elsif @read_chunk_start
read_chunk_start
next
end
peek = @io.peek || return

break
if @chunk_remaining < peek.size
peek = peek[0, @chunk_remaining]
elsif peek.size == 0
raise IO::EOFError.new("Invalid HTTP chunked content")
end

nil
peek
end

def skip(bytes_count)
if bytes_count <= @chunk_remaining
@io.skip(bytes_count)
@chunk_remaining -= bytes_count
check_chunk_remaining_is_zero
else
super
end
end

private def check_chunk_remaining_is_zero
# Check if the last read consumed a chunk and we
# need to start consuming the next one.
private def next_chunk
return if @chunk_remaining > 0 || @received_final_chunk

# As soon as we finish reading a chunk we return,
# in case the next content is delayed (see #3270).
# We set @read_chunk_start to true so we read the next
# chunk start on the next call to `read`.
# in case the following content is delayed (see #3270) and read the chunk
# delimiter and next chunk start on the next call to `read`.
read_crlf unless @chunk_remaining == -1 # -1 is the initial value

@chunk_remaining = read_chunk_size
if @chunk_remaining == 0
@read_chunk_start = true
read_trailer
@received_final_chunk = true
end
end

private def read_chunk_start
read_chunk_end
@chunk_remaining = @io.gets.not_nil!.to_i(16)
check_last_chunk
@read_chunk_start = false
private def read_crlf
char = @io.read_char
if char == '\r'
char = @io.read_char
end
if char != '\n'
raise IO::Error.new("Invalid HTTP chunked content: expected CRLF")
end
end

private def read_chunk_end
# Read "\r\n"
@io.skip(2)
private def read_chunk_size
line = @io.read_line(HTTP::MAX_HEADER_SIZE, chomp: true)

if index = line.byte_index(';'.ord)
chunk_size = line.byte_slice(0, index)
else
chunk_size = line
end

chunk_size.to_i?(16) || raise IO::Error.new("Invalid HTTP chunked content: invalid chunk size")
end

private def check_last_chunk
# If we read "0\r\n", we need to read another "\r\n"
read_chunk_end if @chunk_remaining == 0
private def read_trailer
while true
line = @io.read_line(HTTP::MAX_HEADER_SIZE, chomp: true)
break if line.empty?

key, value = HTTP.parse_header(line)
break unless headers.add?(key, value)
end
end

def write(slice : Bytes)

0 comments on commit c541aae

Please sign in to comment.