Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: opal/opal
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 2dce252fa67e
Choose a base ref
...
head repository: opal/opal
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 0e56d9630b81
Choose a head ref
  • 2 commits
  • 6 files changed
  • 2 contributors

Commits on May 28, 2015

  1. * Add implementation+tests for union method

    * Change Regexp initializer to support Ruby constants (not just character flags), check for false/nil/other object in options, and deal with options properly when an existing expression is passed in
    * Implement Regexp#options
    * Add comment explaining why match method constructs a new RE
    * Filter out encoding and extended flag related specs
    * Mark hex and other character escaping as bugs (filtered)
    * Update to latest RubySpecs that wrap extended in non_supported and exclude escape sequences
    wied03 committed May 28, 2015

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    0c9b220 View commit details

Commits on May 29, 2015

  1. Merge pull request #890 from wied03/bugfix/regexp_union

    Implement Regexp#union, Regexp#options, and proper handling of options arg in Regexp#new
    vais committed May 29, 2015

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    0e56d96 View commit details
Showing with 246 additions and 6 deletions.
  1. +110 −4 opal/corelib/regexp.rb
  2. +1 −1 spec/corelib
  3. +41 −0 spec/filters/bugs/regular_expressions.rb
  4. +90 −0 spec/filters/unsupported/regular_expressions.rb
  5. +1 −1 spec/opal/core/language/regexp_spec.rb
  6. +3 −0 spec/rubyspecs
114 changes: 110 additions & 4 deletions opal/corelib/regexp.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
class RegexpError < StandardError; end
class Regexp
`def.$$is_regexp = true`
IGNORECASE = 1
MULTILINE = 4

`def.$$is_regexp = true`

class << self
def escape(string)
@@ -23,11 +27,77 @@ def last_match(n=nil)
alias quote escape

def union(*parts)
`new RegExp(parts.join(''))`
%x{
var is_first_part_array, quoted_validated, part, options, each_part_options;
if (parts.length == 0) {
return /(?!)/;
}
// cover the 2 arrays passed as arguments case
is_first_part_array = parts[0].$$is_array;
if (parts.length > 1 && is_first_part_array) {
#{raise TypeError, 'no implicit conversion of Array into String'}
}
// deal with splat issues (related to https://github.com/opal/opal/issues/858)
if (is_first_part_array) {
parts = parts[0];
}
options = undefined;
quoted_validated = [];
for (var i=0; i < parts.length; i++) {
part = parts[i];
if (part.$$is_string) {
quoted_validated.push(#{escape(`part`)});
}
else if (part.$$is_regexp) {
each_part_options = #{`part`.options};
if (options != undefined && options != each_part_options) {
#{raise TypeError, 'All expressions must use the same options'}
}
options = each_part_options;
quoted_validated.push('('+part.source+')');
}
else {
quoted_validated.push(#{escape(`part`.to_str)});
}
}
}
# Take advantage of logic that can parse options from JS Regex
new(`quoted_validated`.join('|'), `options`)
end

def new(regexp, options = undefined)
`new RegExp(regexp, options)`
def new(regexp, options = undefined)
%x{
// Play nice with IE8
if (regexp.$$is_string && regexp.substr(regexp.length-1, 1) == "\\") {
#{raise RegexpError, "too short escape sequence: /#{regexp}/"}
}
if (options == undefined || #{!options}) {
options = undefined;
}
if (options != undefined) {
if (regexp.$$is_regexp) {
// options are already in regex
options = undefined;
}
else if (options.$$is_number) {
var result = '';
if (#{IGNORECASE} & options) {
result += 'i';
}
if (#{MULTILINE} & options) {
result += 'm';
}
options = result;
}
else {
options = 'i';
}
}
return new RegExp(regexp, options);
}
end
end

@@ -70,6 +140,7 @@ def match(string, pos = undefined, &block)
}
}
// global RegExp maintains state, so not using self/this
var md, re = new RegExp(self.source, 'gm' + (self.ignoreCase ? 'i' : ''));
while (true) {
@@ -93,6 +164,41 @@ def ~
def source
`self.source`
end

def options
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/flags is still experimental
# we need the flags and source does not give us that
%x{
var as_string, text_flags, result, text_flag;
as_string = self.toString();
if (as_string == "/(?:)/") {
#{raise TypeError, 'uninitialized Regexp'}
}
text_flags = as_string.replace(self.source, '').match(/\w+/);
result = 0;
// may have no flags
if (text_flags == null) {
return result;
}
// first match contains all of our flags
text_flags = text_flags[0];
for (var i=0; i < text_flags.length; i++) {
text_flag = text_flags[i];
switch(text_flag) {
case 'i':
result |= #{IGNORECASE};
break;
case 'm':
result |= #{MULTILINE};
break;
default:
#{raise "RegExp flag #{`text_flag`} does not have a match in Ruby"}
}
}
return result;
}
end

alias to_s source
end
2 changes: 1 addition & 1 deletion spec/corelib
41 changes: 41 additions & 0 deletions spec/filters/bugs/regular_expressions.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
opal_filter "regular_expressions" do
fails "Regexp.new works by default for subclasses with overridden #initialize"
fails "Regexp.new given a String raises a RegexpError when passed an incorrect regexp"
fails "Regexp.new given a String with escaped characters raises a RegexpError if \\x is not followed by any hexadecimal digits"
fails "Regexp.new given a String with escaped characters accepts '\\C-\\n'"
fails "Regexp.new given a String with escaped characters accepts '\\C-\\t'"
fails "Regexp.new given a String with escaped characters accepts '\\C-\\r'"
fails "Regexp.new given a String with escaped characters accepts '\\C-\\f'"
fails "Regexp.new given a String with escaped characters accepts '\\C-\\v'"
fails "Regexp.new given a String with escaped characters accepts '\\C-\\a'"
fails "Regexp.new given a String with escaped characters accepts '\\C-\\e'"
fails "Regexp.new given a String with escaped characters accepts '\\c\\n'"
fails "Regexp.new given a String with escaped characters accepts '\\c\\t'"
fails "Regexp.new given a String with escaped characters accepts '\\c\\r'"
fails "Regexp.new given a String with escaped characters accepts '\\c\\f'"
fails "Regexp.new given a String with escaped characters accepts '\\c\\v'"
fails "Regexp.new given a String with escaped characters accepts '\\c\\a'"
fails "Regexp.new given a String with escaped characters accepts '\\c\\e'"
fails "Regexp.new given a String with escaped characters raises a RegexpError if more than six hexadecimal digits are given"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\n'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\t'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\r'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\f'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\v'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\a'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\e'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\C-\\n'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\C-\\t'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\C-\\r'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\C-\\f'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\C-\\v'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\C-\\a'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\C-\\e'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\c\\n'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\c\\t'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\c\\r'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\c\\f'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\c\\v'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\c\\a'"
fails "Regexp.new given a String with escaped characters accepts '\\M-\\c\\e'"
end
90 changes: 90 additions & 0 deletions spec/filters/unsupported/regular_expressions.rb
Original file line number Diff line number Diff line change
@@ -44,4 +44,94 @@
fails "String#scan supports \\G which matches the end of the previous match / string start for first match"

fails "String#sub with pattern, replacement supports \\G which matches at the beginning of the string"

fails "Regexp#options includes Regexp::FIXEDENCODING for a Regexp literal with the 'u' option"
fails "Regexp#options includes Regexp::FIXEDENCODING for a Regexp literal with the 'e' option"
fails "Regexp#options includes Regexp::FIXEDENCODING for a Regexp literal with the 's' option"
fails "Regexp#options does not include Regexp::FIXEDENCODING for a Regexp literal with the 'n' option"
fails "Regexp#options includes Regexp::NOENCODING for a Regexp literal with the 'n' option"

fails "Regexp.union returns a Regexp with the encoding of an ASCII-incompatible String argument"
fails "Regexp.union returns a Regexp with the encoding of a String containing non-ASCII-compatible characters"
fails "Regexp.union returns a Regexp with US-ASCII encoding if all arguments are ASCII-only"
fails "Regexp.union returns a Regexp with the encoding of multiple non-conflicting ASCII-incompatible String arguments"
fails "Regexp.union returns a Regexp with the encoding of multiple non-conflicting Strings containing non-ASCII-compatible characters"
fails "Regexp.union returns a Regexp with the encoding of a String containing non-ASCII-compatible characters and another ASCII-only String"
fails "Regexp.union returns a Regexp with UTF-8 if one part is UTF-8"
fails "Regexp.union raises ArgumentError if the arguments include conflicting ASCII-incompatible Regexps"
fails "Regexp.union raises ArgumentError if the arguments include conflicting fixed encoding Regexps"
fails "Regexp.union raises ArgumentError if the arguments include a fixed encoding Regexp and a String containing non-ASCII-compatible characters in a different encoding"
fails "Regexp.union raises ArgumentError if the arguments include a String containing non-ASCII-compatible characters and a fixed encoding Regexp in a different encoding"
fails "Regexp.union raises ArgumentError if the arguments include an ASCII-incompatible String and an ASCII-only String"
fails "Regexp.union raises ArgumentError if the arguments include an ASCII-incompatible Regexp and an ASCII-only String"
fails "Regexp.union raises ArgumentError if the arguments include an ASCII-incompatible String and an ASCII-only Regexp"
fails "Regexp.union raises ArgumentError if the arguments include an ASCII-incompatible Regexp and an ASCII-only Regexp"
fails "Regexp.union raises ArgumentError if the arguments include an ASCII-incompatible String and a String containing non-ASCII-compatible characters in a different encoding"
fails "Regexp.union raises ArgumentError if the arguments include an ASCII-incompatible Regexp and a String containing non-ASCII-compatible characters in a different encoding"
fails "Regexp.union raises ArgumentError if the arguments include an ASCII-incompatible String and a Regexp containing non-ASCII-compatible characters in a different encoding"
fails "Regexp.union raises ArgumentError if the arguments include an ASCII-incompatible Regexp and a Regexp containing non-ASCII-compatible characters in a different encoding"

fails "Regexp.new given a Regexp uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters"
fails "Regexp.new given a Regexp uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters"
fails "Regexp.new given a Regexp returns a Regexp with US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding"
fails "Regexp.new given a Regexp returns a Regexp with source String having US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding"
fails "Regexp.new given a Regexp returns a Regexp with US-ASCII encoding if UTF-8 escape sequences using only 7-bit ASCII are present"
fails "Regexp.new given a Regexp returns a Regexp with source String having US-ASCII encoding if UTF-8 escape sequences using only 7-bit ASCII are present"
fails "Regexp.new given a Regexp returns a Regexp with UTF-8 encoding if any UTF-8 escape sequences outside 7-bit ASCII are present"
fails "Regexp.new given a Regexp returns a Regexp with source String having UTF-8 encoding if any UTF-8 escape sequences outside 7-bit ASCII are present"
fails "Regexp.new given a Regexp returns a Regexp with the input String's encoding"
fails "Regexp.new given a Regexp returns a Regexp with source String having the input String's encoding"
fails "Regexp.new given a Regexp sets the encoding to UTF-8 if the Regexp literal has the 'u' option"
fails "Regexp.new given a Regexp sets the encoding to EUC-JP if the Regexp literal has the 'e' option"
fails "Regexp.new given a Regexp sets the encoding to Windows-31J if the Regexp literal has the 's' option"
fails "Regexp.new given a Regexp sets the encoding to US-ASCII if the Regexp literal has the 'n' option and the source String is ASCII only"
fails "Regexp.new given a Regexp sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only"
fails "Regexp.new given a String uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters"
fails "Regexp.new given a String uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters"
fails "Regexp.new given a String returns a Regexp with US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding"
fails "Regexp.new given a String returns a Regexp with source String having US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding"
fails "Regexp.new given a String returns a Regexp with US-ASCII encoding if UTF-8 escape sequences using only 7-bit ASCII are present"
fails "Regexp.new given a String returns a Regexp with source String having US-ASCII encoding if UTF-8 escape sequences using only 7-bit ASCII are present"
fails "Regexp.new given a String returns a Regexp with UTF-8 encoding if any UTF-8 escape sequences outside 7-bit ASCII are present"
fails "Regexp.new given a String returns a Regexp with source String having UTF-8 encoding if any UTF-8 escape sequences outside 7-bit ASCII are present"
fails "Regexp.new given a String returns a Regexp with the input String's encoding"
fails "Regexp.new given a String returns a Regexp with source String having the input String's encoding"
fails "Regexp.new given a String sets the encoding to UTF-8 if the Regexp literal has the 'u' option"
fails "Regexp.new given a String sets the encoding to EUC-JP if the Regexp literal has the 'e' option"
fails "Regexp.new given a String sets the encoding to Windows-31J if the Regexp literal has the 's' option"
fails "Regexp.new given a String sets the encoding to US-ASCII if the Regexp literal has the 'n' option and the source String is ASCII only"
fails "Regexp.new given a String sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only"
fails "Regexp.new given a String with escaped characters uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters"
fails "Regexp.new given a String with escaped characters uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters"
fails "Regexp.new given a String with escaped characters returns a Regexp with US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding"
fails "Regexp.new given a String with escaped characters returns a Regexp with source String having US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding"
fails "Regexp.new given a String with escaped characters returns a Regexp with US-ASCII encoding if UTF-8 escape sequences using only 7-bit ASCII are present"
fails "Regexp.new given a String with escaped characters returns a Regexp with source String having US-ASCII encoding if UTF-8 escape sequences using only 7-bit ASCII are present"
fails "Regexp.new given a String with escaped characters returns a Regexp with UTF-8 encoding if any UTF-8 escape sequences outside 7-bit ASCII are present"
fails "Regexp.new given a String with escaped characters returns a Regexp with source String having UTF-8 encoding if any UTF-8 escape sequences outside 7-bit ASCII are present"
fails "Regexp.new given a String with escaped characters returns a Regexp with the input String's encoding"
fails "Regexp.new given a String with escaped characters returns a Regexp with source String having the input String's encoding"
fails "Regexp.new given a String with escaped characters sets the encoding to UTF-8 if the Regexp literal has the 'u' option"
fails "Regexp.new given a String with escaped characters sets the encoding to EUC-JP if the Regexp literal has the 'e' option"
fails "Regexp.new given a String with escaped characters sets the encoding to Windows-31J if the Regexp literal has the 's' option"
fails "Regexp.new given a String with escaped characters sets the encoding to US-ASCII if the Regexp literal has the 'n' option and the source String is ASCII only"
fails "Regexp.new given a String with escaped characters sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only"
fails "Regexp.new given a String ignores the third argument if it is 'e' or 'euc' (case-insensitive)"
fails "Regexp.new given a String ignores the third argument if it is 's' or 'sjis' (case-insensitive)"
fails "Regexp.new given a String ignores the third argument if it is 'u' or 'utf8' (case-insensitive)"
fails "Regexp.new given a String with escaped characters accepts \\u{H} for a single Unicode codepoint"
fails "Regexp.new given a String with escaped characters accepts \\u{HH} for a single Unicode codepoint"
fails "Regexp.new given a String with escaped characters accepts \\u{HHH} for a single Unicode codepoint"
fails "Regexp.new given a String with escaped characters accepts \\u{HHHH} for a single Unicode codepoint"
fails "Regexp.new given a String with escaped characters accepts \\u{HHHHH} for a single Unicode codepoint"
fails "Regexp.new given a String with escaped characters accepts \\u{HHHHHH} for a single Unicode codepoint"
fails "Regexp.new given a String with escaped characters accepts characters followed by \\u{HHHH}"
fails "Regexp.new given a String with escaped characters accepts \\u{HHHH} followed by characters"
fails "Regexp.new given a String with escaped characters accepts escaped hexadecimal digits followed by \\u{HHHH}"
fails "Regexp.new given a String with escaped characters accepts escaped octal digits followed by \\u{HHHH}"
fails "Regexp.new given a String with escaped characters accepts a combination of escaped octal and hexadecimal digits and \\u{HHHH}"
fails "Regexp.new given a String with escaped characters raises a RegexpError if less than four digits are given for \\uHHHH"
fails "Regexp.new given a String with escaped characters raises a RegexpError if the \\u{} escape is empty"
# regexp extended related
fails "Regexp.new given a String accepts a Fixnum of two or more options ORed together as the second argument"
end
2 changes: 1 addition & 1 deletion spec/opal/core/language/regexp_spec.rb
Original file line number Diff line number Diff line change
@@ -17,4 +17,4 @@
str = "a"
/[#{str}-z]/.should == /[a-z]/
end
end
end
3 changes: 3 additions & 0 deletions spec/rubyspecs
Original file line number Diff line number Diff line change
@@ -130,6 +130,9 @@ corelib/core/regexp/escape_spec
corelib/core/regexp/last_match_spec
corelib/core/regexp/match_spec
corelib/core/regexp/quote_spec
corelib/core/regexp/options_spec
corelib/core/regexp/union_spec
corelib/core/regexp/new_spec

corelib/language/BEGIN_spec
corelib/language/alias_spec