@@ -1851,7 +1851,7 @@ def each(&block)
1851
1851
if @separator
1852
1852
if @separator . empty?
1853
1853
@separator = "\n \n "
1854
- @skip = 10
1854
+ @skip = " \n "
1855
1855
end
1856
1856
1857
1857
if @limit
@@ -1868,21 +1868,43 @@ def each(&block)
1868
1868
end
1869
1869
end
1870
1870
1871
- def do_skip ( buffer )
1872
- return 0 unless @skip
1871
+ def read_and_yield_count_chars ( str , buffer , byte_count , & block )
1872
+ str << buffer . slice! ( 0 , byte_count )
1873
1873
1874
- skip_count = 0
1875
- skip_count += 1 while buffer [ skip_count ] . ord == @skip
1876
- if skip_count > 0
1877
- slice = buffer . slice! ( 0 , skip_count )
1878
- slice . bytesize
1874
+ if @limit
1875
+ # Always read to char boundary because the +limit+ may have cut a multi-byte
1876
+ # character in the middle. Returning such a string would have an invalid encoding.
1877
+ buffer += ( @io . read ( PEEK_AHEAD_LIMIT ) || '' ) if buffer . size < PEEK_AHEAD_LIMIT
1878
+ str , bytes_read = read_to_char_boundary ( @io , str , buffer )
1879
1879
else
1880
- 0
1880
+ # We are confident that our +str+ ends on a char boundary
1881
+ str = IO . read_encode ( @io , str )
1881
1882
end
1883
+
1884
+ str . taint
1885
+ $. = @io . increment_lineno
1886
+ skip_contiguous_chars ( buffer )
1887
+
1888
+ # Unused bytes/chars should be saved for the next read. Since the block that we yield to
1889
+ # may +return+ we don't want to drop the bytes that are stored in +buffer+. To save,
1890
+ # unget them so the next read will fetch them again. This might be expensive and could
1891
+ # potentially use a little tuning. Maybe use an +unread(bytes)+ method which just moves
1892
+ # a pointer around. Think about this for the mmap stuff.
1893
+ @io . ungetc ( buffer )
1894
+ buffer . clear
1895
+
1896
+ yield str
1897
+ end
1898
+
1899
+ def read_and_yield_entire_string ( str , &block )
1900
+ str = IO . read_encode ( @io , str )
1901
+ str . taint
1902
+ $. = @io . increment_lineno
1903
+ yield str
1882
1904
end
1883
1905
1884
1906
# method A, D
1885
- def read_to_separator
1907
+ def read_to_separator ( & block )
1886
1908
str = "" . force_encoding ( Encoding ::ASCII_8BIT )
1887
1909
buffer = "" . force_encoding ( Encoding ::ASCII_8BIT )
1888
1910
separator_size = @separator . bytesize
@@ -1899,25 +1921,7 @@ def read_to_separator
1899
1921
# the pattern/separator which may be >1. therefore, add the separator size.
1900
1922
count += separator_size
1901
1923
1902
- substring = buffer . slice! ( 0 , count )
1903
- str << substring
1904
-
1905
- str = IO . read_encode ( @io , str )
1906
- str . taint
1907
-
1908
- $. = @io . increment_lineno
1909
-
1910
- do_skip ( buffer )
1911
-
1912
- # Unused bytes/chars should be saved for the next read. Since the block that we yield to
1913
- # may +return+ we don't want to drop the bytes that are stored in +buffer+. To save,
1914
- # unget them so the next read will fetch them again. This might be expensive and could
1915
- # potentially use a little tuning. Maybe use an +unread(bytes)+ method which just moves
1916
- # a pointer around. Think about this for the mmap stuff.
1917
- @io . ungetc ( buffer )
1918
- buffer . clear
1919
- yield str
1920
-
1924
+ read_and_yield_count_chars ( str , buffer , count , &block )
1921
1925
str = "" . force_encoding ( Encoding ::ASCII_8BIT )
1922
1926
else
1923
1927
str << buffer
@@ -1928,44 +1932,13 @@ def read_to_separator
1928
1932
str << buffer
1929
1933
1930
1934
unless str . empty?
1931
- str = IO . read_encode ( @io , str )
1932
- str . taint
1933
- $. = @io . increment_lineno
1934
- yield str
1935
+ read_and_yield_entire_string ( str , &block )
1935
1936
end
1936
1937
end
1937
1938
1938
1939
# method B, E
1939
1940
1940
- def try_to_force_encoding ( io , str )
1941
- str . force_encoding ( io . external_encoding || Encoding . default_external )
1942
-
1943
- IO . read_encode io , str
1944
- end
1945
-
1946
- PEEK_AHEAD_LIMIT = 16
1947
-
1948
- def read_to_char_boundary ( io , str , buffer )
1949
- str . force_encoding ( io . external_encoding || Encoding . default_external )
1950
- return [ IO . read_encode ( io , str ) , 0 ] if str . valid_encoding?
1951
-
1952
- peek_ahead = 0
1953
- while buffer . size > 0 and peek_ahead < PEEK_AHEAD_LIMIT
1954
- str . force_encoding Encoding ::ASCII_8BIT
1955
- substring = buffer . slice! ( 0 , 1 )
1956
- str << substring
1957
- peek_ahead += 1
1958
-
1959
- str . force_encoding ( io . external_encoding || Encoding . default_external )
1960
- if str . valid_encoding?
1961
- return [ IO . read_encode ( io , str ) , peek_ahead ]
1962
- end
1963
- end
1964
-
1965
- [ IO . read_encode ( io , str ) , peek_ahead ]
1966
- end
1967
-
1968
- def read_to_separator_with_limit
1941
+ def read_to_separator_with_limit ( &block )
1969
1942
str = "" . force_encoding ( Encoding ::ASCII_8BIT )
1970
1943
buffer = "" . force_encoding ( Encoding ::ASCII_8BIT )
1971
1944
separator_size = @separator . bytesize
@@ -1984,39 +1957,13 @@ def read_to_separator_with_limit
1984
1957
# #index returns a 0-based location but we want a length (so +1) and it should include
1985
1958
# the pattern/separator which may be >1. therefore, add the separator size.
1986
1959
count += separator_size
1987
- bytes = count < wanted ? count : wanted
1988
- str << buffer . slice! ( 0 , bytes )
1989
-
1990
- # Always read to char boundary because the +limit+ may have cut a multi-byte
1991
- # character in the middle. Returning such a string would have an invalid encoding.
1992
- buffer += ( @io . read ( PEEK_AHEAD_LIMIT ) || '' ) if buffer . size < PEEK_AHEAD_LIMIT
1993
- str , bytes_read = read_to_char_boundary ( @io , str , buffer )
1994
- str . taint
1995
-
1996
- $. = @io . increment_lineno
1997
- do_skip ( buffer )
1998
- @io . ungetc ( buffer )
1999
- buffer . clear
2000
-
2001
- yield str
1960
+ count = count < wanted ? count : wanted
1961
+ read_and_yield_count_chars ( str , buffer , count , &block )
2002
1962
2003
1963
str = "" . force_encoding ( Encoding ::ASCII_8BIT )
2004
1964
else
2005
1965
if wanted < buffer . size
2006
- str << buffer . slice! ( 0 , wanted )
2007
-
2008
- # replenish the buffer if we don't have enough bytes to satisfy the peek ahead
2009
- buffer += ( @io . read ( PEEK_AHEAD_LIMIT ) || '' ) if buffer . size < PEEK_AHEAD_LIMIT
2010
- str , bytes_read = read_to_char_boundary ( @io , str , buffer )
2011
- str . taint
2012
-
2013
- $. = @io . increment_lineno
2014
- do_skip ( buffer )
2015
- @io . ungetc ( buffer )
2016
- buffer . clear
2017
-
2018
- yield str
2019
-
1966
+ read_and_yield_count_chars ( str , buffer , wanted , &block )
2020
1967
str = "" . force_encoding ( Encoding ::ASCII_8BIT )
2021
1968
else
2022
1969
str << buffer
@@ -2027,53 +1974,84 @@ def read_to_separator_with_limit
2027
1974
end until buffer . size == 0 && @io . eof?
2028
1975
2029
1976
unless str . empty?
2030
- str = IO . read_encode ( @io , str )
2031
- str . taint
2032
- $. = @io . increment_lineno
2033
- yield str
1977
+ read_and_yield_entire_string ( str , &block )
2034
1978
end
2035
1979
end
2036
1980
2037
1981
# Method G
2038
- def read_all
2039
- str = ""
1982
+ def read_all ( & block )
1983
+ str = "" . force_encoding ( Encoding :: ASCII_8BIT )
2040
1984
2041
1985
begin
2042
1986
str << @io . read
2043
1987
end until @io . eof?
2044
1988
2045
1989
unless str . empty?
2046
- str = IO . read_encode ( @io , str )
2047
- str . taint
2048
- $. = @io . increment_lineno
2049
- yield str
1990
+ read_and_yield_entire_string ( str , &block )
2050
1991
end
2051
1992
end
2052
1993
2053
1994
# Method H
2054
- def read_to_limit
2055
- str = ""
1995
+ def read_to_limit ( & block )
1996
+ str = "" . force_encoding ( Encoding :: ASCII_8BIT )
2056
1997
wanted = limit = @limit . abs
2057
1998
2058
1999
begin
2059
2000
str << @io . read ( wanted )
2001
+ read_and_yield_count_chars ( str , '' , str . bytesize , &block )
2002
+ str = "" . force_encoding ( Encoding ::ASCII_8BIT )
2003
+ end until @io . eof?
2060
2004
2061
- buffer = ( @io . read ( PEEK_AHEAD_LIMIT ) || '' )
2062
- str , bytes_read = read_to_char_boundary ( @io , str , buffer )
2063
- str . taint
2064
- @io . ungetc ( buffer )
2005
+ unless str . empty?
2006
+ read_and_yield_entire_string ( str , & block )
2007
+ end
2008
+ end
2065
2009
2066
- $. = @io . increment_lineno
2067
- yield str
2010
+ # Utility methods
2068
2011
2069
- str = ""
2070
- end until @io . eof?
2012
+ def try_to_force_encoding ( io , str )
2013
+ str . force_encoding ( io . external_encoding || Encoding . default_external )
2071
2014
2072
- unless str . empty?
2073
- str = IO . read_encode ( @io , str )
2074
- str . taint
2075
- $. = @io . increment_lineno
2076
- yield str
2015
+ IO . read_encode io , str
2016
+ end
2017
+
2018
+ PEEK_AHEAD_LIMIT = 16
2019
+
2020
+ def read_to_char_boundary ( io , str , buffer )
2021
+ str . force_encoding ( io . external_encoding || Encoding . default_external )
2022
+ return [ IO . read_encode ( io , str ) , 0 ] if str . valid_encoding?
2023
+
2024
+ peek_ahead = 0
2025
+ while buffer . size > 0 and peek_ahead < PEEK_AHEAD_LIMIT
2026
+ str . force_encoding Encoding ::ASCII_8BIT
2027
+ substring = buffer . slice! ( 0 , 1 )
2028
+ str << substring
2029
+ peek_ahead += 1
2030
+
2031
+ str . force_encoding ( io . external_encoding || Encoding . default_external )
2032
+ if str . valid_encoding?
2033
+ return [ IO . read_encode ( io , str ) , peek_ahead ]
2034
+ end
2035
+ end
2036
+
2037
+ [ IO . read_encode ( io , str ) , peek_ahead ]
2038
+ end
2039
+
2040
+ # Advances the buffer index past any number of contiguous
2041
+ # characters == +skip+ and throws away that data. For
2042
+ # example, if +skip+ is ?\n and the buffer contents are
2043
+ # "\n\n\nAbc...", the buffer will discard all chars
2044
+ # up to 'A'.
2045
+ def skip_contiguous_chars ( buffer )
2046
+ return 0 unless @skip
2047
+
2048
+ skip_count = 0
2049
+ skip_count += 1 while buffer [ skip_count ] == @skip
2050
+ if skip_count > 0
2051
+ slice = buffer . slice! ( 0 , skip_count )
2052
+ slice . bytesize
2053
+ else
2054
+ 0
2077
2055
end
2078
2056
end
2079
2057
end
0 commit comments