I’ve been playing with Scala parser combinators for some time and learned some ways to make it behave well and do most of the things I want using the built-in function.
But how do you create an embedded language (e.g. php or ruby erb)? It requires that spaces are not ignored, outside the embedding of real code.
I managed to create a simple parser that matches the entire text to a given regular expression match, but I'm looking for a better, more beautiful way to do this. There is the possibility of using some already defined function that makes the necessary material.
The test language analyzes the text as follows:
now: [[ millis; ]]
and now: [[; millis; ]]
and is generated by the following code:
package test
import scala.util.parsing.combinator.RegexParsers
import scala.util.matching.Regex
sealed abstract class Statement
case class Print(s: String) extends Statement
case class Millis() extends Statement
object SimpleLang extends RegexParsers {
def until(r: Regex): Parser[String] = new Parser[String]{
def apply(in: Input) = {
val source = in.source
val offset = in.offset
val start = offset
(r.findFirstMatchIn( source.subSequence(offset, source.length) )) match {
case Some(matched) =>
Success(source.subSequence(offset, offset + matched.start).toString, in.drop(matched.start))
case None =>
Failure("string matching regex `"+ r +"' expected but `"+ in.first +"' found", in.drop(0))
}
}
}
def until(s: String): Parser[String] = until(java.util.regex.Pattern.quote(s).r)
def interpret(stats: List[Statement]): Unit = stats match {
case Print(s) :: rest => {
print(s)
interpret(rest)
}
case Millis() :: rest => {
print(System.currentTimeMillis)
interpret(rest)
}
case Nil => ()
}
def apply(input: String) : List[Statement] = parseAll(beginning, input) match {
case Success(tree,_) => tree
case e: NoSuccess => throw new RuntimeException("Syntax error: " + e)
}
def beginning = (
"[[" ~> stats |
until("[[") ~ "[[" ~ stats ^^ {
case s ~ _ ~ ss => Print(s) :: ss
}
)
def stats = rep1sep(stat, ";")
def stat = (
"millis" ^^^ { Millis() } |
"]]" ~> ( (until("[[") <~ "[[") | until("\\z".r)) ^^ {
case s => Print(s)
}
)
def main(args: Array[String]){
val tree = SimpleLang("now: [[ millis; ]]\nand now: [[; millis; ]]")
println(tree)
interpret(tree)
}
}