Rework the Random module (#3402)

* Rework random number generation to work for any integer type * Upgrade random float generation to 53 bit precision * Prevent overflows so it's possible to get random numbers in any Range * Add more specs for random number generation * Improve docs and messages in random.cr
crystal-lang · Oct 15, 2016 · dc2e6d7 · dc2e6d7
1 parent 3b26981
commit dc2e6d7
Showing 6 changed files with 305 additions and 59 deletions.
diff --git a/spec/std/random/isaac_spec.cr b/spec/std/random/isaac_spec.cr
@@ -342,7 +342,7 @@ describe "Random::ISAAC" do
 
     m = Random::ISAAC.new seed
     numbers.each do |n|
-      m.next_u32.should eq(n)
+      m.next_u.should eq(n)
     end
   end
 end
diff --git a/spec/std/random/mt19937_spec.cr b/spec/std/random/mt19937_spec.cr
@@ -211,7 +211,7 @@ describe "Random::MT19937" do
 
     m = Random::MT19937.new [0x123, 0x234, 0x345, 0x456]
     numbers.each do |n|
-      m.next_u32.should eq(n)
+      m.next_u.should eq(n)
     end
   end
 end
diff --git a/spec/std/random_spec.cr b/spec/std/random_spec.cr
@@ -1,18 +1,47 @@
 require "spec"
 
+class TestRNG(T)
+  include Random
+
+  def initialize(@data : Array(T))
+    @i = 0
+  end
+
+  def next_u : T
+    i = @i
+    @i = (i + 1) % @data.size
+    @data[i]
+  end
+
+  def reset
+    @i = 0
+  end
+end
+
+RNG_DATA_8  = [234u8, 153u8, 0u8, 0u8, 127u8, 128u8, 255u8, 255u8]
+RNG_DATA_32 = [31541451u32, 0u32, 1u32, 234u32, 342475672u32, 863u32, 0xffffffffu32, 50967465u32]
+RNG_DATA_64 = [148763248732657823u64, 18446744073709551615u64, 0u64,
+  32456325635673576u64, 2456245614625u64, 32452456246u64, 3956529762u64,
+  9823674982364u64, 234253464546456u64, 14345435645646u64]
+
 describe "Random" do
   it "limited number" do
     rand(1).should eq(0)
 
     x = rand(2)
     x.should be >= 0
     x.should be < 2
+
+    # issue 3350
+    5.times do
+      rand(Int64::MAX).should be >= 0
+    end
   end
 
   it "float number" do
     x = rand
-    x.should be > 0
-    x.should be < 1
+    x.should be >= 0
+    x.should be <= 1
   end
 
   it "limited float number" do
@@ -22,23 +51,27 @@ describe "Random" do
   end
 
   it "raises on invalid number" do
-    expect_raises ArgumentError, "incorrect rand value: 0" do
+    expect_raises ArgumentError, "invalid bound for rand: 0" do
       rand(0)
     end
   end
 
   it "does with inclusive range" do
-    rand(1..1).should eq(1)
-    x = rand(1..3)
-    x.should be >= 1
-    x.should be <= 3
+    [1..1, 1..3, 0u8..255u8, -1..1, Int64::MIN..7i64,
+      -7i64..Int64::MAX, 0u64..0u64].each do |range|
+      x = rand(range)
+      x.should be >= range.begin
+      x.should be <= range.end
+    end
   end
 
   it "does with exclusive range" do
-    rand(1...2).should eq(1)
-    x = rand(1...4)
-    x.should be >= 1
-    x.should be < 4
+    [1...2, 1...4, 0u8...255u8, -1...1, Int64::MIN...7i64,
+      -7i64...Int64::MAX, -1i8...0i8].each do |range|
+      x = rand(range)
+      x.should be >= range.begin
+      x.should be < range.end
+    end
   end
 
   it "does with inclusive range of floats" do
@@ -55,9 +88,18 @@ describe "Random" do
   end
 
   it "raises on invalid range" do
-    expect_raises ArgumentError, "incorrect rand value: 1...1" do
+    expect_raises ArgumentError, "invalid range for rand: 1...1" do
       rand(1...1)
     end
+    expect_raises ArgumentError, "invalid range for rand: 1..0" do
+      rand(1..0)
+    end
+    expect_raises ArgumentError, "invalid range for rand: 1.0...1.0" do
+      rand(1.0...1.0)
+    end
+    expect_raises ArgumentError, "invalid range for rand: 1.0..0.0" do
+      rand(1.0..0.0)
+    end
   end
 
   it "allows creating a new default random" do
@@ -67,16 +109,71 @@ describe "Random" do
   end
 
   it "allows creating a new default random with a seed" do
-    rand = Random.new(1234)
-    value1 = rand.rand
-
-    rand = Random.new(1234)
-    value2 = rand.rand
+    values = Array.new(2) do
+      rand = Random.new(1234)
+      {rand.rand, rand.rand(0xffffffffffffffff), rand.rand(2), rand.rand(-5i8..5i8)}
+    end
 
-    value1.should eq(value2)
+    values[0].should eq values[1]
   end
 
   it "gets a random bool" do
     Random::DEFAULT.next_bool.should be_a(Bool)
   end
+
+  it "generates by accumulation" do
+    rng = TestRNG.new([234u8, 153u8, 0u8, 0u8, 127u8, 128u8, 255u8, 255u8])
+    rng.rand(65536).should eq 60057    # 234*0x100 + 153
+    rng.rand(60000).should eq 0        # 0*0x100 + 0
+    rng.rand(30000).should eq 2640     # (127*0x100 + 128) % 30000
+    rng.rand(65535u16).should eq 60057 # 255*0x100 + 255 [skip]-> 234*0x100 + 153
+    rng.reset
+    rng.rand(65537).should eq 38934 # (234*0x10000 + 153*0x100 + 0) % 65537
+    rng.reset
+    rng.rand(32768u16).should eq 27289 # (234*0x100 + 153) % 32768
+  end
+
+  it "generates by truncation" do
+    rng = TestRNG.new([31541451u32, 0u32, 1u32, 234u32, 342475672u32])
+    rng.rand(1).should eq 0
+    rng.rand(10).should eq 0
+    rng.rand(2).should eq 1
+    rng.rand(256u64).should eq 234
+    rng.rand(255u8).should eq 217   # 342475672 % 255
+    rng.rand(65536).should eq 18635 # 31541451 % 65536
+    rng = TestRNG.new([0xffffffffu32, 0u32])
+    rng.rand(0x7fffffff).should eq 0
+  end
+
+  it "generates full-range" do
+    rng = TestRNG.new(RNG_DATA_64)
+    RNG_DATA_64.each do |a|
+      rng.rand(UInt64::MIN..UInt64::MAX).should eq a
+    end
+  end
+
+  it "generates full-range by accumulation" do
+    rng = TestRNG.new(RNG_DATA_8)
+    RNG_DATA_8.each_slice(2) do |(a, b)|
+      expected = a.to_u16 * 0x100u16 + b.to_u16
+      rng.rand(UInt16::MIN..UInt16::MAX).should eq expected
+    end
+  end
+
+  it "generates full-range by truncation" do
+    rng = TestRNG.new(RNG_DATA_32)
+    RNG_DATA_32.each do |a|
+      expected = a % 0x10000
+      rng.rand(UInt16::MIN..UInt16::MAX).should eq expected
+    end
+  end
+
+  it "generates full-range by negation" do
+    rng = TestRNG.new(RNG_DATA_8)
+    RNG_DATA_8.each do |a|
+      expected = a.to_i
+      expected -= 0x100 if a >= 0x80
+      rng.rand(Int8::MIN..Int8::MAX).should eq expected
+    end
+  end
 end
diff --git a/src/random.cr b/src/random.cr
@@ -23,6 +23,20 @@ require "random/mt19937"
 # rand     # => 0.293829
 # rand(10) # => 8
 # ```
+#
+# An instance of each class that includes `Random` is a random number generator with its own state.
+# Usually such RNGs can be initialized with a seed, which defines their initial state. It is
+# guaranteed that after initializing two different instances with the same seed, and then executing
+# the same calls on both of them, you will get the same results. This allows exactly reproducing the
+# same seemingly random events by just keeping the seed.
+#
+# It is possible to make a custom RNG by including `Random` and implementing `next_u` to return an
+# unsigned number of a pre-determined type (usually `UInt32`). The numbers generated by your RNG
+# must be uniformly distributed in the whole range of possible values for that type (e.g.
+# `0u32..UInt32::MAX`). This allows all other methods of `Random` to be based on this and still
+# produce uniformly distributed results. Your `Random` class should also have a way to initialize
+# it. For pseudo-random number generators that will usually be an integer seed, but there are no
+# rigid requirements for the initialization.
 module Random
   DEFAULT = MT19937.new
 
@@ -32,37 +46,34 @@ module Random
     Intrinsics.read_cycle_counter.to_u32
   end
 
-  # Initiates an instance with the given *seed*. (Default: `#new_seed`)
+  # Initializes an instance with the given *seed*. (Default: `#new_seed`)
   def self.new(seed = new_seed)
     MT19937.new(seed)
   end
 
-  # Generates a random `UInt32`.
-  abstract def next_u32
+  # Generates a random unsigned integer.
+  #
+  # The integers must be uniformly distributed between 0 and the maximal value for the chosen type.
+  abstract def next_u : UInt
 
   # Generates a random `Bool`.
   #
   # ```
   # Random.new.next_bool # => true
   # ```
   def next_bool : Bool
-    next_u32.even?
+    next_u.odd?
   end
 
-  # Generates a random `Int32`.
-  #
-  # ```
-  # Random.new.next_int # => 440038499
-  # Random.new.next_int # => -1848788484
-  # ```
+  # Same as `rand(Int32::MIN..Int32::MAX)`
   def next_int : Int32
-    next_u32.to_i32
+    rand_type(Int32)
   end
 
   # see `#rand`
   def next_float : Float64
-    # Divided by 2^32-1
-    next_u32 * (1.0/4294967295.0)
+    max_prec = 1u64 << 53 # Float64, excluding mantissa, has 2^53 values
+    rand(max_prec) / (max_prec - 1).to_f64
   end
 
   # Generates a random `Float64` between 0 and 1.
@@ -76,47 +87,179 @@ module Random
     next_float
   end
 
-  # Returns a random `Int32` which is greater than or equal to 0 and less than *max*.
+  # Generates a random integer which is greater than or equal to 0 and less than *max*.
+  #
+  # The return type always matches the supplied argument.
   #
   # ```
   # Random.new.rand(10)   # => 5
   # Random.new.rand(5000) # => 2243
   # ```
-  def rand(max : Int) : Int32
-    if max > 0
-      (next_u32 % max).to_i32
-    else
-      raise ArgumentError.new "incorrect rand value: #{max}"
-    end
+  def rand(max : Int) : Int
+    rand_int(max)
   end
 
+  {% for size in [8, 16, 32, 64] %}
+    {% utype = "UInt#{size}".id %}
+    {% for type in ["Int#{size}".id, utype] %}
+      private def rand_int(max : {{type}}) : {{type}}
+        unless max > 0
+          raise ArgumentError.new "invalid bound for rand: #{max}"
+        end
+
+        # The basic ideas of the algorithm are best illustrated with examples.
+        #
+        # Let's say we have a random number generator that gives uniformly distributed random
+        # numbers between 0 and 15. We need to get a uniformly distributed random number between
+        # 0 and 5 (`max` = 6). The typical mistake made in this case is to just use `rand() % 6`,
+        # but it is clear that some results will appear more often than others. So, the surefire
+        # approach is to make the RNG spit out numbers until it gives one inside our desired range.
+        # That is really wasteful though. So the approach taken here is to discard only a small
+        # range of the possible generated numbers, and use the modulo operation on the "valid" ones,
+        # like this (where X means "discard and try again"):
+        #
+        # Generated number:  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
+        #           Result:  0  1  2  3  4  5  0  1  2  3  4  5  X  X  X  X
+        #
+        # 12 is the `limit` here - the highest number divisible by `max` while still being within
+        # bounds of what the RNG can produce.
+        #
+        # On the other side of the spectrum is the problem of generating a random number in a higher
+        # range than what the RNG can produce. Let's say we have the same mentioned RNG, but we need
+        # a uniformly distributed random number between 0 and 255. All that needs to be done is to
+        # generate two random numbers between 0 and 15, and combine their bits
+        # (i.e. `rand()*16 + rand()`).
+        #
+        # Using a combination of these tricks, any RNG can be turned into any RNG, however, there
+        # are several difficult parts about this. The code below uses as few calls to the underlying
+        # RNG as possible, meaning that (with the above example) with `max` being 257, it would call
+        # the RNG 3 times. (Of course, it doesn't actually deal with RNGs that produce numbers
+        # 0 to 15, only with the `UInt8`, `UInt16`, `UInt32` and `UInt64` ranges.
+        #
+        # Another problem is how to actually compute the `limit`. The obvious way to do it, which is
+        # `(RAND_MAX + 1) / max * max`, fails because `RAND_MAX` is usually already the highest
+        # number that an integer type can hold. And even the `limit` itself will often be
+        # `RAND_MAX + 1`, meaning that we don't have to discard anything. The ways to deal with this
+        # are described below.
+
+        # if max - 1 <= typeof(next_u)::MAX
+        if typeof(next_u).new(max - 1) == max - 1
+          # One number from the RNG will be enough.
+          # All the computations will (almost) fit into `typeof(next_u)`.
+
+          # Relies on integer overflow + wraparound to find the highest number divisible by `max`.
+          limit = typeof(next_u).new(0) - (typeof(next_u).new(0) - max) % max
+          # `limit` might be 0, which means it would've been ``typeof(next_u)::MAX + 1``, but didn't
+          # fit into the integer type.
+
+          loop do
+            result = next_u
+
+            # For a uniform distribution we may need to throw away some numbers
+            if result < limit || limit == 0
+              return {{type}}.new(result % max)
+            end
+          end
+        else
+          # We need to find out how many random numbers need to be combined to be able to generate a
+          # random number of this magnitude.
+          # All the computations will be based on `{{utype}}` as the larger type.
+
+          # `rand_max - 1` is the maximal number we can get from combining `needed_parts` random
+          # numbers.
+          # Compute `rand_max` as `(typeof(next_u)::MAX + 1) ** needed_parts)`.
+          # If `rand_max` overflows, that means it has reached ``high({{utype}}) + 1``.
+          rand_max = {{utype}}.new(1) << (sizeof(typeof(next_u))*8)
+          needed_parts = 1
+          while rand_max < max && rand_max > 0
+            rand_max <<= sizeof(typeof(next_u))*8
+            needed_parts += 1
+          end
+
+          limit =
+            if rand_max > 0
+              # `rand_max` didn't overflow, so we can calculate the `limit` the straightforward way.
+              rand_max / max * max
+            else
+              # `rand_max` is ``{{utype}}::MAX + 1``, need the same wraparound trick. `limit` might
+              # overflow, which means it would've been ``{{utype}}::MAX + 1``, but didn't fit into
+              # the integer type.
+              {{utype}}.new(0) - ({{utype}}.new(0) - max) % max
+            end
+
+          loop do
+            # Build up the number combining multiple outputs from the RNG.
+            result = {{utype}}.new(next_u)
+            (needed_parts - 1).times do
+              result <<= sizeof(typeof(next_u))*8
+              result |= {{utype}}.new(next_u)
+            end
+
+            # For a uniform distribution we may need to throw away some numbers.
+            if result < limit || limit == 0
+              return {{type}}.new(result % max)
+            end
+          end
+        end
+      end
+
+      private def rand_range(range : Range({{type}}, {{type}})) : {{type}}
+        span = {{utype}}.new(range.end - range.begin)
+        if range.excludes_end?
+          unless range.begin < range.end
+            raise ArgumentError.new "invalid range for rand: #{range}"
+          end
+        else
+          unless range.begin <= range.end
+            raise ArgumentError.new "invalid range for rand: #{range}"
+          end
+          if range.begin == {{type}}::MIN && range.end == {{type}}::MAX
+            return rand_type({{type}})
+          end
+          span += 1
+        end
+        range.begin + {{type}}.new(rand_int(span))
+      end
+
+      # Generates a random integer in range `{{type}}::MIN..{{type}}::MAX`.
+      private def rand_type(type : {{type}}.class) : {{type}}
+        needed_parts = {{size/8}} / sizeof(typeof(next_u))
+
+        # Build up the number combining multiple outputs from the RNG.
+        result = {{utype}}.new(next_u)
+        (needed_parts - 1).times do
+          result <<= sizeof(typeof(next_u))*8
+          result |= {{utype}}.new(next_u)
+        end
+        {{type}}.new(result)
+      end
+    {% end %}
+  {% end %}
+
   # Returns a random `Float64` which is greater than or equal to 0 and less than *max*.
   #
   # ```
   # Random.new.rand(3.5)    # => 2.88938
   # Random.new.rand(10.725) # => 7.70147
   # ```
   def rand(max : Float) : Float64
-    if max > 0
-      next_u32 * (1 / (UInt32::MAX.to_f64 + 1)) * max
-    else
-      raise ArgumentError.new "incorrect rand value: #{max}"
+    unless max > 0
+      raise ArgumentError.new "invalid bound for rand: #{max}"
     end
+    max_prec = 1u64 << 53 # Float64, excluding mantissa, has 2^53 values
+    rand(max_prec) / max_prec.to_f64 * max
   end
 
-  # Returns a random `Int32` in the given *range*.
+  # Returns a random integer in the given *range*.
+  #
+  # The return type always matches the supplied argument.
   #
   # ```
-  # Random.new.rand(10..20) # => 14
+  # Random.new.rand(10..20)                 # => 14
+  # Random.new.rand(Int64::MIN..Int64::MAX) # => -5297435808626736140
   # ```
-  def rand(range : Range(Int, Int)) : Int32
-    span = range.end - range.begin
-    span += 1 unless range.excludes_end?
-    if span > 0
-      range.begin + rand(span)
-    else
-      raise ArgumentError.new "incorrect rand value: #{range}"
-    end
+  def rand(range : Range(Int, Int)) : Int
+    rand_range(range)
   end
 
   # Returns a random `Float64` in the given *range*.
@@ -126,10 +269,16 @@ module Random
   # ```
   def rand(range : Range(Float, Float)) : Float64
     span = range.end - range.begin
-    if span > 0 || !range.excludes_end?
-      range.begin + (range.excludes_end? ? rand(span) : rand * span)
+    if range.excludes_end?
+      unless range.begin < range.end
+        raise ArgumentError.new "invalid range for rand: #{range}"
+      end
+      range.begin + rand(span)
     else
-      raise ArgumentError.new "incorrect rand value: #{range}"
+      unless range.begin <= range.end
+        raise ArgumentError.new "invalid range for rand: #{range}"
+      end
+      range.begin + rand * span
     end
   end
 

diff --git a/src/random/isaac.cr b/src/random/isaac.cr
@@ -23,7 +23,7 @@ class Random::ISAAC
     init_by_array(seeds)
   end
 
-  def next_u32
+  def next_u
     if (@counter -= 1) == -1
       isaac
       @counter = 255

diff --git a/src/random/mt19937.cr b/src/random/mt19937.cr
@@ -129,7 +129,7 @@ class Random::MT19937
     @mt[0] = 0x80000000u32
   end
 
-  def next_u32
+  def next_u
     if @mti >= N
       if @mti == N + 1
         init_genrand(5489u32)