Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
basic regex support. Just a wrapper for scala (java) regex classes.
  • Loading branch information
prakashk committed May 27, 2013
1 parent bc0a748 commit 0a1b2cc
Show file tree
Hide file tree
Showing 13 changed files with 410 additions and 10 deletions.
6 changes: 6 additions & 0 deletions src/main/scala/org/moe/ast/AST.scala
Expand Up @@ -142,3 +142,9 @@ case class DoWhileNode(condition: AST, body: StatementsNode) extends AST

case class ForeachNode(topic: AST, list: AST, body: StatementsNode) extends AST
case class ForNode(init: AST, condition: AST, update: AST, body: StatementsNode) extends AST

case class RegexLiteralNode(rx: String) extends AST
case class MatchExpressionNode(pattern: AST, flags: AST) extends AST
case class SubstExpressionNode(pattern: AST, replacement: AST, flags: AST) extends AST
case class RegexMatchNode(target: AST, pattern: AST, flags: AST) extends AST
case class RegexSubstNode(target: AST, pattern: AST, replacement: AST, flags: AST) extends AST
50 changes: 50 additions & 0 deletions src/main/scala/org/moe/ast/Serializer.scala
Expand Up @@ -18,6 +18,8 @@ object Serializer {
case ClassLiteralNode() => "ClassLiteralNode"
case SuperCallNode() => "SuperCallNode"

case RegexLiteralNode(value) => JSONObject(Map("RegexLiteralNode" -> value.toString))

case PairLiteralNode(key, value) => JSONObject(
Map(
"PairLiteralNode" -> JSONObject(
Expand Down Expand Up @@ -481,6 +483,54 @@ object Serializer {
)
)

case RegexMatchNode(target, pattern, flags) => JSONObject(
Map(
"RegexMatchNode" -> JSONObject(
Map(
"target" -> toJSON(target),
"pattern" -> toJSON(pattern),
"flags" -> toJSON(flags)
)
)
)
)

case RegexSubstNode(target, pattern, replacement, flags) => JSONObject(
Map(
"RegexSubstNode" -> JSONObject(
Map(
"target" -> toJSON(target),
"pattern" -> toJSON(pattern),
"replacement" -> toJSON(replacement),
"flags" -> toJSON(flags)
)
)
)
)

case MatchExpressionNode(pattern, flags) => JSONObject(
Map(
"MatchExpressionNode" -> JSONObject(
Map(
"pattern" -> toJSON(pattern),
"flags" -> toJSON(flags)
)
)
)
)

case SubstExpressionNode(pattern, replacement, flags) => JSONObject(
Map(
"SubstExpressionNode" -> JSONObject(
Map(
"pattern" -> toJSON(pattern),
"replacement" -> toJSON(replacement),
"flags" -> toJSON(flags)
)
)
)
)

case x => "stub: " + x
}

Expand Down
14 changes: 13 additions & 1 deletion src/main/scala/org/moe/interpreter/guts/Literals.scala
Expand Up @@ -81,6 +81,18 @@ object Literals extends Utils {
r.NativeObjects.getArray(result:_*)
}

case (env, RegexLiteralNode(value)) => r.NativeObjects.getRegex(value)

case (env, MatchExpressionNode(pattern: AST, flags: AST)) =>
(pattern, flags) match {
case (RegexLiteralNode(p), StringLiteralNode(f)) => r.NativeObjects.getRegex(p, f)
}

case (env, SubstExpressionNode(pattern: AST, replacement: AST, flags: AST)) =>
(pattern, replacement, flags) match {
case (RegexLiteralNode(p), StringLiteralNode(r_), StringLiteralNode(f)) =>
r.NativeObjects.getArray(List(r.NativeObjects.getRegex(p, f), r.NativeObjects.getStr(r_)) : _*)
}
}

}
}
27 changes: 27 additions & 0 deletions src/main/scala/org/moe/interpreter/guts/Operators.scala
Expand Up @@ -31,6 +31,15 @@ object Operators extends Utils {
}
}

// regexes

case (env, BinaryOpNode(lhs: AST, "=~", rhs: AST)) => {
rhs match {
case MatchExpressionNode(pattern, flags) => i.evaluate(env, RegexMatchNode(lhs, pattern, flags))
case SubstExpressionNode(pattern, replacement, flags) => i.evaluate(env, RegexSubstNode(lhs, pattern, replacement, flags))
}
}

// other binary operators

case (env, BinaryOpNode(lhs: AST, operator: String, rhs: AST)) => {
Expand All @@ -56,5 +65,23 @@ object Operators extends Utils {
val argFalse = new MoeLazyEval(i, env, falseExpr)
callMethod(receiver, "infix:<?:>", List(argTrue, argFalse))
}

// regex operations

case (env, RegexMatchNode(target: AST, pattern: AST, flags: AST)) => {
val receiver = i.evaluate(env, target)
val argPattern = i.evaluate(env, pattern)
val argFlags = i.evaluate(env, flags);
callMethod(receiver, "match", List(argPattern, argFlags))
}

case (env, RegexSubstNode(target: AST, pattern: AST, replacement: AST, flags: AST)) => {
val receiver = i.evaluate(env, target)
val argPattern = i.evaluate(env, pattern)
val argReplacement = i.evaluate(env, replacement)
val argFlags = i.evaluate(env, flags);
callMethod(receiver, "subst", List(argPattern, argReplacement, argFlags))
}

}
}
8 changes: 8 additions & 0 deletions src/main/scala/org/moe/parser/MoeLiterals.scala
Expand Up @@ -66,6 +66,13 @@ trait MoeLiterals extends JavaTokenParsers {
def selfLiteral : Parser[SelfLiteralNode] = "self".r ^^^ SelfLiteralNode()
def superLiteral : Parser[SuperCallNode] = "super".r ^^^ SuperCallNode()

// Regex Literal

def regexString = """(\\.|[^/])*""".r

// TODO: support for other delimiters
def regexLiteral: Parser[RegexLiteralNode] = "/" ~> regexString <~ "/" ^^ { rx => RegexLiteralNode(rx) }

def literalValue: Parser[AST] = (
floatNumber
| intNumber
Expand All @@ -79,5 +86,6 @@ trait MoeLiterals extends JavaTokenParsers {
| string
| selfLiteral
| superLiteral
| regexLiteral
)
}
38 changes: 29 additions & 9 deletions src/main/scala/org/moe/parser/MoeProductions.scala
Expand Up @@ -13,7 +13,7 @@ trait MoeProductions extends MoeLiterals with JavaTokenParsers with PackratParse
*********************************************************************
*/

lazy val expression: PackratParser[AST] = assignOp
lazy val expression: PackratParser[AST] = matchOp | assignOp

// TODO: left or xor
// TODO: left and
Expand Down Expand Up @@ -77,14 +77,8 @@ trait MoeProductions extends MoeLiterals with JavaTokenParsers with PackratParse
// left * / % x
lazy val mulOp: PackratParser[AST] = mulOp ~ "[*/%x]".r ~ expOp ^^ {
case left ~ op ~ right => BinaryOpNode(left, op, right)
} | matchOp

// left =~ TODO: !~
lazy val matchOp: PackratParser[AST] = matchOp ~ "=~" ~ expOp ^^ {
case left ~ op ~ right => BinaryOpNode(left, op, right)
} | expOp

// TODO: left =~ !~
// TODO: right ! ~ \ and unary + and -

// This one is right-recursive (associative) instead of left
Expand Down Expand Up @@ -118,7 +112,7 @@ trait MoeProductions extends MoeLiterals with JavaTokenParsers with PackratParse
lazy val applyOp: PackratParser[AST] = (applyOp <~ ".") ~ identifier ~ ("(" ~> repsep(expression, ",") <~ ")").? ^^ {
case invocant ~ method ~ Some(args) => MethodCallNode(invocant, method, args)
case invocant ~ method ~ None => MethodCallNode(invocant, method, List())
} | subroutineCall
} | regexExpression | subroutineCall

lazy val subroutineCall: PackratParser[AST] = namespacedIdentifier ~ ("(" ~> repsep(expression, ",") <~ ")") ^^ {
case sub ~ args => SubroutineCallNode(sub, args)
Expand Down Expand Up @@ -149,7 +143,8 @@ trait MoeProductions extends MoeLiterals with JavaTokenParsers with PackratParse
*/

lazy val simpleExpression: PackratParser[AST] = (
arrayIndex
regexExpression
| arrayIndex
| hashIndex
| hash
| array
Expand Down Expand Up @@ -302,6 +297,31 @@ trait MoeProductions extends MoeLiterals with JavaTokenParsers with PackratParse
case vars ~ _ ~ exprs => MultiAttributeAssignmentNode(vars.map({case AttributeNameNode(aname) => aname}), exprs)
}

/**
* regex match/substitution etc
*/

lazy val regexModifiers: Parser[AST] = """[igsmx]*""".r ^^ { flags => StringLiteralNode(flags) }

def matchExpression: Parser[AST] =
("m".? ~> regexLiteral) ~ opt(regexModifiers) ^^ {
case pattern ~ None => MatchExpressionNode(pattern, StringLiteralNode(""))
case pattern ~ Some(flags) => MatchExpressionNode(pattern, flags)
}

def substExpression: Parser[AST] = ("s" ~> regexLiteral) ~ ("""(\\.|[^/])*""".r <~ "/") ~ opt(regexModifiers) ^^ {
case pattern ~ replacement ~ None => SubstExpressionNode(pattern, StringLiteralNode(replacement), StringLiteralNode(""))
case pattern ~ replacement ~ Some(flags) => SubstExpressionNode(pattern, StringLiteralNode(replacement), flags)
}

// TODO: tr (transliteration) operator

def regexExpression = (substExpression | matchExpression)

def matchOp = simpleExpression ~ "=~" ~ expression ^^ {
case left ~ op ~ right => BinaryOpNode(left, op, right)
}

/**
*********************************************************************
* Now we are getting into statements,
Expand Down
6 changes: 6 additions & 0 deletions src/main/scala/org/moe/runtime/MoeRuntime.scala
Expand Up @@ -102,6 +102,7 @@ class MoeRuntime (
val intClass = new MoeClass("Int", Some(VERSION), Some(AUTHORITY), Some(scalarClass))
val numClass = new MoeClass("Num", Some(VERSION), Some(AUTHORITY), Some(scalarClass))
val exceptionClass = new MoeClass("Exception", Some(VERSION), Some(AUTHORITY), Some(scalarClass))
val regexClass = new MoeClass("Regex", Some(VERSION), Some(AUTHORITY), Some(scalarClass))

// set the associated class for all classes
// this must be classClass because these are
Expand All @@ -120,6 +121,7 @@ class MoeRuntime (
intClass.setAssociatedType(Some(MoeClassType(Some(coreClassClass))))
numClass.setAssociatedType(Some(MoeClassType(Some(coreClassClass))))
exceptionClass.setAssociatedType(Some(MoeClassType(Some(coreClassClass))))
regexClass.setAssociatedType(Some(MoeClassType(Some(coreClassClass))))

// add all these classes to the corePackage
corePackage.addClass(objectClass)
Expand All @@ -140,6 +142,7 @@ class MoeRuntime (
corePackage.addClass(intClass)
corePackage.addClass(numClass)
corePackage.addClass(exceptionClass)
corePackage.addClass(regexClass)

setupBuiltins

Expand Down Expand Up @@ -281,6 +284,9 @@ class MoeRuntime (
e
}

def getRegex (rx: String) = new MoeRegexObject(rx, None, Some(MoeScalarType(getCoreClassFor("Regex"))))
def getRegex (rx: String, flags: String) = new MoeRegexObject(rx, Some(flags), Some(MoeScalarType(getCoreClassFor("Regex"))))

def fixupSubroutine (c: MoeSubroutine): MoeSubroutine = {
c.setAssociatedType(Some(MoeCodeType(getCoreClassFor("Code"))))
c
Expand Down
39 changes: 39 additions & 0 deletions src/main/scala/org/moe/runtime/builtins/StrClass.scala
Expand Up @@ -265,6 +265,45 @@ object StrClass {
)
)

// regex matching
strClass.addMethod(
new MoeMethod(
"match",
new MoeSignature(List(new MoePositionalParameter("$rx"),
new MoeOptionalParameter("$flags"))),
env,
(e) => {
e.get("$rx") match {
case Some(rx: MoeRegexObject) => self(e).matches(r, rx)
case Some(s: MoeStrObject) => self(e).matches(r, s)
case _ => throw new MoeErrors.IncompatibleType("Str or Regex expected")
}
}
)
)

strClass.addMethod(
new MoeMethod(
"subst",
new MoeSignature(List(new MoePositionalParameter("$rx"),
new MoePositionalParameter("$replacement"),
new MoeOptionalParameter("$flags"))),
env,
(e) => {
val replacement = e.getAs[MoeStrObject]("$replacement").get
val flags = e.get("$flags") match {
case Some(s: MoeStrObject) => s.copy
case _ => getStr("")
}
e.get("$rx") match {
case Some(rx: MoeRegexObject) => self(e).subst(r, rx, replacement, flags)
case Some(s: MoeStrObject) => self(e).subst(r, s, replacement, flags)
case _ => throw new MoeErrors.IncompatibleType("Str or Regex expected")
}
}
)
)

/**
* List of Operators to support:
* - infix:<.>
Expand Down
75 changes: 75 additions & 0 deletions src/main/scala/org/moe/runtime/nativeobjects/MoeRegexObject.scala
@@ -0,0 +1,75 @@
package org.moe.runtime.nativeobjects

import org.moe.runtime._
import org.moe.runtime.nativeobjects._

import scala.util.matching.Regex

class MoeRegexObject(
rx: String,
flags: Option[String] = None,
t: Option[MoeType] = None
) extends MoeNativeObject[(String, Option[String])]((rx, flags), t) {

private val regex = new Regex(rx).unanchored

def getRegex = regex
def getFlags = flags.getOrElse("")

def matches (r: MoeRuntime, text: MoeStrObject): MoeBoolObject = r.NativeObjects.getBool(
regex.findFirstIn(text.unboxToString.get).nonEmpty
)

private def regexFlagsToMap(flags: String): Map[String, Boolean] = {
val validFlags = Map(
'i' -> "ignore_case",
'g' -> "global",
'm' -> "match_multiple_lines",
's' -> "match_single_line",
'x' -> "extended"
)
(for (f <- flags) yield (validFlags(f) -> true)).toMap
}

def find (r: MoeRuntime,
target: MoeStrObject,
flags: Option[MoeStrObject] = None
) = {
val f: String = flags match {
case Some(f) => f.unboxToString.get
case None => getFlags
}

// TODO: other flags
val find_all = regexFlagsToMap(f).getOrElse("global", false)
val matches = if (find_all)
regex.findAllIn(target.unboxToString.get)
else
regex.findFirstIn(target.unboxToString.get)
}

def replace (
r: MoeRuntime,
target: MoeStrObject,
replacement: MoeStrObject,
flags: Option[MoeStrObject]
): MoeStrObject = {
val f = flags match {
case Some(f) => f.unboxToString.get
case None => getFlags
}
val replace_all = regexFlagsToMap(f).getOrElse("global", false)
// TODO: other flags
r.NativeObjects.getStr(
if (replace_all)
regex.replaceAllIn(target.unboxToString.get, replacement.unboxToString.get)
else
regex.replaceFirstIn(target.unboxToString.get, replacement.unboxToString.get)
)
}

// MoeNativeObject overrides

override def copy = new MoeRegexObject(getNativeValue._1, getNativeValue._2, getAssociatedType)

}

0 comments on commit 0a1b2cc

Please sign in to comment.