Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #126 from MoeOrganization/prakashk/trans-op-2
Perl5 string transliteration (method and operator)
  • Loading branch information
Stevan Little committed Jun 15, 2013
2 parents 20c4653 + 609a1f5 commit 52885b6
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 1 deletion.
3 changes: 3 additions & 0 deletions src/main/scala/org/moe/ast/AST.scala
Expand Up @@ -149,6 +149,9 @@ case class SubstExpressionNode(pattern: AST, replacement: AST, flags: AST) exten
case class RegexMatchNode(target: AST, pattern: AST, flags: AST) extends AST
case class RegexSubstNode(target: AST, pattern: AST, replacement: AST, flags: AST) extends AST

case class TransExpressionNode(search: AST, replacement: AST, flags: AST) extends AST
case class TransOpNode(target: AST, search: AST, replacement: AST, flags: AST) extends AST

case class StringSequenceNode(exprs: List[AST]) extends AST
case class EvalExpressionNode(expr: String) extends AST

Expand Down
24 changes: 24 additions & 0 deletions src/main/scala/org/moe/ast/Serializer.scala
Expand Up @@ -531,6 +531,30 @@ object Serializer {
)
)

case TransExpressionNode(search, replacement, flags) => JSONObject(
Map(
"TransExpressionNode" -> JSONObject(
Map(
"search" -> toJSON(search),
"replacement" -> toJSON(replacement),
"flags" -> toJSON(flags)
)
)
)
)
case TransOpNode(target, search, replacement, flags) => JSONObject(
Map(
"TransOpNode" -> JSONObject(
Map(
"target" -> toJSON(target),
"search" -> toJSON(search),
"replacement" -> toJSON(replacement),
"flags" -> toJSON(flags)
)
)
)
)

case StringSequenceNode(strParts) => JSONObject(
Map(
"StringSequenceNode" -> JSONObject(
Expand Down
9 changes: 9 additions & 0 deletions src/main/scala/org/moe/interpreter/guts/Operators.scala
Expand Up @@ -40,6 +40,7 @@ object Operators extends Utils {
// TODO: interpolation of the regex variable value
case VariableAccessNode (pattern) => i.evaluate(env, RegexMatchNode(lhs, rhs, StringLiteralNode("")))
case SubstExpressionNode(pattern, replacement, flags) => i.evaluate(env, RegexSubstNode(lhs, pattern, replacement, flags))
case TransExpressionNode(search, replacement, flags) => i.evaluate(env, TransOpNode(lhs, search, replacement, flags))
}
}

Expand Down Expand Up @@ -86,6 +87,14 @@ object Operators extends Utils {
callMethod(receiver, "subst", List(argPattern, argReplacement, argFlags))
}

case (env, TransOpNode(target: AST, search: AST, replacement: AST, flags: AST)) => {
val receiver = i.evaluate(env, target)
val argSearch = i.evaluate(env, search)
val argReplacement = i.evaluate(env, replacement)
val argFlags = i.evaluate(env, flags);
callMethod(receiver, "trans", List(argSearch, argReplacement, argFlags))
}

case (env, ExecuteCommandNode(cmd: AST)) => {
val cmd_str = i.evaluate(env, cmd).asInstanceOf[MoeStrObject].getNativeValue
import sys.process._
Expand Down
20 changes: 19 additions & 1 deletion src/main/scala/org/moe/parser/MoeProductions.scala
Expand Up @@ -333,8 +333,26 @@ trait MoeProductions extends MoeLiterals with JavaTokenParsers with PackratParse
}

// TODO: tr (transliteration) operator
lazy val transModifiers: Parser[AST] = """[cdsr]*""".r ^^ StringLiteralNode

def quoteExpression = (substExpression_2 | substExpression_1 | matchExpression | quoteOp | quoteRegexOp)
def transExpression_1 = ("tr" ~> quotedPair('/')) ~ opt(transModifiers) ^^ {
case (search, replacement) ~ None => TransExpressionNode(StringLiteralNode(search), StringLiteralNode(replacement), StringLiteralNode(""))
case (search, replacement) ~ Some(flags) => TransExpressionNode(StringLiteralNode(search), StringLiteralNode(replacement), flags)
}
def transExpression_2 = ("tr" ~> bracketedString) ~ bracketedString ~ opt(transModifiers) ^^ {
case search ~ replacement ~ None => TransExpressionNode(StringLiteralNode(search), StringLiteralNode(replacement), StringLiteralNode(""))
case search ~ replacement ~ Some(flags) => TransExpressionNode(StringLiteralNode(search), StringLiteralNode(replacement), flags)
}

def quoteExpression = (
substExpression_1
| substExpression_2
| matchExpression
| transExpression_1
| transExpression_2
| quoteOp
| quoteRegexOp
)

def matchOp = simpleExpression ~ "=~" ~ expression ^^ {
case left ~ op ~ right => BinaryOpNode(left, op, right)
Expand Down
19 changes: 19 additions & 0 deletions src/main/scala/org/moe/runtime/builtins/StrClass.scala
Expand Up @@ -304,6 +304,25 @@ object StrClass {
)
)

strClass.addMethod(
new MoeMethod(
"trans",
new MoeSignature(List(new MoePositionalParameter("$search"),
new MoePositionalParameter("$replace"),
new MoeOptionalParameter("$flags"))),
env,
(e) => self(e).trans(
r,
e.getAs[MoeStrObject]("$search").get,
e.getAs[MoeStrObject]("$replace").get,
e.get("$flags") match {
case Some(s: MoeStrObject) => s.copy
case _ => getStr("")
}
)
)
)

/**
* List of Operators to support:
* - infix:<.>
Expand Down
64 changes: 64 additions & 0 deletions src/main/scala/org/moe/runtime/nativeobjects/MoeStrObject.scala
Expand Up @@ -134,6 +134,70 @@ class MoeStrObject(
): MoeStrObject =
pattern.replace(r, this, replacement, Some(flags))

// transliteration -- like in Perl5, except /r flag is the default
// behavior; i.e. the original string is not modified and the
// transliterated string is returned

import scala.util.matching.Regex._
def trans(
r: MoeRuntime,
search: MoeStrObject,
replace: MoeStrObject,
flags: MoeStrObject
): MoeStrObject = {
def expandCharSequence(s: String): List[Char] = {
s.foldLeft(List[Char]()){
(a, c) => if (a.length > 1 && a.last == '-') a.dropRight(2) ++ (a.init.last to c).toList else a ++ List(c)
}
}

val complement = flags.unboxToString.get.contains('c')
val squash = flags.unboxToString.get.contains('s')
val delete = flags.unboxToString.get.contains('d')

val searchList = expandCharSequence(search.unboxToString.get)
var replaceList_t = expandCharSequence(replace.unboxToString.get)

val replaceList = if (delete) {
replaceList_t // use the replace-list as is
}
else {
if (replaceList_t.isEmpty)
searchList
else // truncate/extend replace-list to match search-list length
if (replaceList_t.length > searchList.length)
replaceList_t.drop(replaceList_t.length - searchList.length)
else if (searchList.length > replaceList_t.length)
replaceList_t ++ List.fill(searchList.length - replaceList_t.length)(replaceList_t.last)
else
replaceList_t
}

val transMap = searchList.zip(replaceList).toMap

def isFound(c: Char) = if (complement) !searchList.contains(c) else searchList.contains(c)
def maybeSquashed(a: String, c: Char) = if (squash && !a.isEmpty && a.last == c) a else a + c

r.NativeObjects.getStr(
getNativeValue.foldLeft(""){
(a, c) => {
if (isFound(c)) {
if (complement)
if (delete) a else maybeSquashed(a, replaceList.last)
else
transMap.get(c) match {
case Some(x) => maybeSquashed(a, x)
case None => if (delete) a else a + c
}
}
else {
a + c
}
}
}
)
}

// MoeNativeObject overrides

override def copy = new MoeStrObject(getNativeValue, getAssociatedType)
Expand Down
91 changes: 91 additions & 0 deletions t/002-operators/002-transliteration.t
@@ -0,0 +1,91 @@
use Test::More;

is("ABC".trans("ABC", "abc"), "abc", "... literal transliteration using Str.trans method");
is("ABC" =~ tr/ABC/abc/, "abc", "... literal transliteration using tr operator");

{
my $var = "ABC";
is($var.trans("ABC", "abc"), "abc", "... variable transliteration using Str.trans method");
is($var =~ tr/ABC/abc/, "abc", "... variable transliteration using tr operator");
is($var, "ABC", "... input variable is unchanged after transliteration");
}

is("ABC" =~ tr/A-C/a-c/,
"abc",
"... transliteration with character ranges");

is("ABC-DEF".trans("- AB-Z", "_ a-z"),
"abc_def",
"... If the first character is a dash it isn't part of a range");

is("ABC-DEF".trans("A-YZ-", "a-z_"),
"abc_def",
"... If the last character is a dash it isn't part of a range");

is("ABCDEF".trans( 'AB-E', 'ab-e' ),
"abcdeF",
"... The two sides can consists of both chars and ranges");

is("ABCDEFGH".trans( 'A-CE-G', 'a-ce-g' ),
"abcDefgH",
"... The two sides can consist of multiple ranges");

is("Whfg nabgure Zbr unpxre".trans('a-zA-Z', 'n-za-mN-ZA-M'),
"Just another Moe hacker",
"... Multiple ranges interpreted in string");

my $a = "abcdefghijklmnopqrstuvwxyz";
my $b = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

is($a.trans('a-z', 'A-Z'), $b);
is($b.trans('A-Z', 'a-z'), $a);
is($a.trans('b-y', 'B-Y'), 'aBCDEFGHIJKLMNOPQRSTUVWXYz');

is($b.trans('A-H-Z', 'a-h-z'), $a,
'... ambiguous ranges combined');

is($b.trans('-H-Z', '_h-z'),
'ABCDEFGhijklmnopqrstuvwxyz',
'... leading ranges interpreted as string');

is($b.trans('A-H-', 'a-h_'), 'abcdefghIJKLMNOPQRSTUVWXYZ',
'... trailing ranges interpreted as string');

is($b.trans('-A-H-', '_a-h_'), 'abcdefghIJKLMNOPQRSTUVWXYZ',
'... leading, trailing ranges interpreted as string');

is("hello".trans("l", ""), "hello", "... empty replacement list uses search list");

# complement, squeeze/squash, delete

is('bookkeeper'.trans('a-z', 'a-z', 's'), 'bokeper',
'... s flag (squash)');

is('bookkeeper'.trans('ok', '', 'd'), 'beeper',
'... d flag (delete)');

is('ABC123DEF456GHI'.trans('A-Z', 'x'), 'xxx123xxx456xxx',
'... no flags');

is('ABC123DEF456GHI'.trans('A-Z', 'x', 'c'),'ABCxxxDEFxxxGHI',
'... with c (complement) flag');

is('ABC111DEF222GHI'.trans('0-9', 'x', 's'),'ABCxDEFxGHI',
'... with s (squash) flag');

is('ABC111DEF222GHI'.trans('A-Z', 'x', 'cs'),'ABCxDEFxGHI',
'... with s and c');

is('ABC111DEF222GHI'.trans('A-Z', '', 'cd'),'ABCDEFGHI',
'... with d and c');

is('Good&Plenty'.trans('len', 'x'), 'Good&Pxxxty',
'... no flags');

is('Good&Plenty'.trans('len', 'x', 's'), 'Good&Pxty',
'... squashing depends on replacement repeat, not searchlist repeat');

is('Good&Plenty'.trans('len', 't', 's'), 'Good&Ptty',
'... squashing depends on replacement repeat, not searchlist repeat');

done_testing();

0 comments on commit 52885b6

Please sign in to comment.