jruby · Feb 9, 2017 · Feb 9, 2017
Showing with 41 additions and 19 deletions.

+15 −19 core/src/main/java/org/jruby/util/Pack.java

+12 −0 spec/ruby/core/string/unpack/a_spec.rb

+6 −0 spec/ruby/core/string/unpack/b_spec.rb

+8 −0 spec/ruby/core/string/unpack/u_spec.rb
diff --git a/core/src/main/java/org/jruby/util/Pack.java b/core/src/main/java/org/jruby/util/Pack.java
@@ -786,16 +786,15 @@ private static ByteList encodes(Ruby runtime, ByteList io2Append,byte[]charsToEn
      * @see RubyArray#pack
      **/
     public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList formatString) {
-        Encoding encoding = encodedString.getEncoding();
-        RubyArray result = runtime.newArray();
+        // Encoding encoding = encodedString.getEncoding();
+        final RubyArray result = runtime.newArray();
         // FIXME: potentially could just use ByteList here?
         ByteBuffer format = ByteBuffer.wrap(formatString.getUnsafeBytes(), formatString.begin(), formatString.length());
         ByteBuffer encode = ByteBuffer.wrap(encodedString.getUnsafeBytes(), encodedString.begin(), encodedString.length());
-        int type = 0;
         int next = safeGet(format);
 
         mainLoop: while (next != 0) {
-            type = next;
+            int type = next;
             next = safeGet(format);
             if (UNPACK_IGNORE_NULL_CODES.indexOf(type) != -1 && next == 0) {
                 next = safeGetIgnoreNull(format);
@@ -813,8 +812,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
             if (next == '_' || next == '!') {
                 int index = NATIVE_CODES.indexOf(type);
                 if (index == -1) {
-                    throw runtime.newArgumentError("'" + next +
-                            "' allowed only after types " + NATIVE_CODES);
+                    throw runtime.newArgumentError("'" + next + "' allowed only after types " + NATIVE_CODES);
                 }
                 type = MAPPED_CODES.charAt(index);
 
@@ -825,8 +823,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
                 next = next == '>' ? BE : LE;
                 int index = ENDIANESS_CODES.indexOf(type + next);
                 if (index == -1) {
-                    throw runtime.newArgumentError("'" + (char)next +
-                            "' allowed only after types sSiIlLqQ");
+                    throw runtime.newArgumentError("'" + (char)next + "' allowed only after types sSiIlLqQ");
                 }
                 type = ENDIANESS_CODES.charAt(index);
                 next = safeGet(format);
@@ -835,7 +832,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
             }
 
             // How many occurrences of 'type' we want
-            int occurrences = 0;
+            int occurrences;
             if (next == 0) {
                 occurrences = 1;
             } else {
@@ -893,7 +890,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
                            }
                     }
 
-                    result.append(RubyString.newString(runtime, new ByteList(potential, 0, occurrences, encoding, false)));
+                    result.append(RubyString.newString(runtime, new ByteList(potential, 0, occurrences, ASCIIEncoding.INSTANCE, false)));
                     }
                     break;
                 case 'Z' :
@@ -916,7 +913,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
                             t++;
                         }
 
-                        result.append(RubyString.newString(runtime, new ByteList(potential, 0, t, encoding, false)));
+                        result.append(RubyString.newString(runtime, new ByteList(potential, 0, t, ASCIIEncoding.INSTANCE, false)));
 
                         // In case when the number of occurences is
                         // explicitly specified, we have to read up
@@ -943,7 +940,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
                     }
                     byte[] potential = new byte[occurrences];
                     encode.get(potential);
-                    result.append(RubyString.newString(runtime, new ByteList(potential, encoding, false)));
+                    result.append(RubyString.newString(runtime, new ByteList(potential, ASCIIEncoding.INSTANCE, false)));
                     break;
                 case 'b' :
                     {
@@ -960,7 +957,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
                             }
                             lElem[lCurByte] = (bits & 1) != 0 ? (byte)'1' : (byte)'0';
                         }
-                        result.append(RubyString.newString(runtime, new ByteList(lElem, encoding, false)));
+                        result.append(RubyString.newString(runtime, new ByteList(lElem, ASCIIEncoding.INSTANCE, false)));
                     }
                     break;
                 case 'B' :
@@ -979,7 +976,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
                             lElem[lCurByte] = (bits & 128) != 0 ? (byte)'1' : (byte)'0';
                         }
 
-                        result.append(RubyString.newString(runtime, new ByteList(lElem, encoding, false)));
+                        result.append(RubyString.newString(runtime, new ByteList(lElem, ASCIIEncoding.INSTANCE, false)));
                     }
                     break;
                 case 'h' :
@@ -997,7 +994,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
                             }
                             lElem[lCurByte] = sHexDigits[bits & 15];
                         }
-                        result.append(RubyString.newString(runtime, new ByteList(lElem, encoding, false)));
+                        result.append(RubyString.newString(runtime, new ByteList(lElem, ASCIIEncoding.INSTANCE, false)));
                     }
                     break;
                 case 'H' :
@@ -1015,7 +1012,7 @@ public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList fo
                             }
                             lElem[lCurByte] = sHexDigits[(bits >>> 4) & 15];
                         }
-                        result.append(RubyString.newString(runtime, new ByteList(lElem, encoding, false)));
+                        result.append(RubyString.newString(runtime, new ByteList(lElem, ASCIIEncoding.INSTANCE, false)));
                     }
                     break;
 
@@ -1083,7 +1080,7 @@ else if (encode.hasRemaining()) {
                             }
                         }
                     }
-                    result.append(RubyString.newString(runtime, new ByteList(lElem, 0, index, encoding, false)));
+                    result.append(RubyString.newString(runtime, new ByteList(lElem, 0, index, ASCIIEncoding.INSTANCE, false)));
                 }
                 break;
 
@@ -1261,8 +1258,7 @@ else if (encode.hasRemaining()) {
                                 // but should use UTF8Encoding facilities
                                 // from Joni, once it starts prefroming
                                 // UTF-8 content validation. 
-                                result.append(
-                                        runtime.newFixnum(utf8Decode(encode)));
+                                result.append(runtime.newFixnum(utf8Decode(encode)));
                             } catch (IllegalArgumentException e) {
                                 throw runtime.newArgumentError(e.getMessage());
                             }

diff --git a/spec/ruby/core/string/unpack/a_spec.rb b/spec/ruby/core/string/unpack/a_spec.rb
@@ -27,6 +27,12 @@
       ["a\x00 b\x00\v", ["a\x00 b\x00\v"]],
     ].should be_computed_by(:unpack, "A*")
   end
+
+  it "decodes into raw (ascii) string values" do
+    str = "str".force_encoding('UTF-8').unpack("A*")[0]
+    str.encoding.name.should == 'ASCII-8BIT'
+  end
+
 end
 
 describe "String#unpack with format 'a'" do
@@ -48,4 +54,10 @@
       ["a\x00 b\v",     ["a\x00 b\v"]]
     ].should be_computed_by(:unpack, "a*")
   end
+
+  it "decodes into raw (ascii) string values" do
+    str = "".unpack("a*")[0]
+    str.encoding.name.should == 'ASCII-8BIT'
+  end
+
 end
diff --git a/spec/ruby/core/string/unpack/b_spec.rb b/spec/ruby/core/string/unpack/b_spec.rb
@@ -181,4 +181,10 @@
   it "ignores spaces between directives" do
     "\x01\x00".unpack("b b").should == ["1", "0"]
   end
+
+  it "decodes into raw (ascii) string values" do
+    str = "s".force_encoding('UTF-8').unpack("b*")[0]
+    str.encoding.name.should == 'ASCII-8BIT'
+  end
+
 end
diff --git a/spec/ruby/core/string/unpack/u_spec.rb b/spec/ruby/core/string/unpack/u_spec.rb
@@ -26,6 +26,14 @@
     "".unpack("u").should == [""]
   end
 
+  it "decodes into raw (ascii) string values" do
+    str = "".unpack("u")[0]
+    str.encoding.name.should == 'ASCII-8BIT'
+
+    str = "1".force_encoding('UTF-8').unpack("u")[0]
+    str.encoding.name.should == 'ASCII-8BIT'
+  end
+
   it "decodes the complete string ignoring newlines when given a single directive" do
     "#86)C\n#1$5&\n".unpack("u").should == ["abcDEF"]
   end