SlideShare una empresa de Scribd logo
1 de 66
Descargar para leer sin conexión
Parser Combinators in Scala
     Илья Ключников
       @lambdamix


                              1
Комбинаторные библиотеки
●   Actors
●   Parsers
●   ScalaCheck, Spesc
●   Scalaz
●   SBT
●   EDSLs
●   ...

                                     2
33/35   11/14   8/9   4/13




                             3
Intro: combinators, parsers




        Scala Parser Combinators from the Ground Up




                 How to write typical parser




33/35             11/14 Pros, cons             8/9    4/13



                   Advanced techniques
                                                             4
Parser?
●   Трансформирует текст в структуру


                                          +


       2*3 + 4                    *           3



                              2       3




                                                  5
Hello, parser
import scala.util.parsing.combinator._
import syntactical.StandardTokenParsers

sealed trait Expr
case class Num(i: Int) extends Expr
case class Var(n: String) extends Expr
case class Plus(e1: Expr, e2: Expr) extends Expr
case class Mult(e1: Expr, e2: Expr) extends Expr

object ArithParsers extends StandardTokenParsers with ImplicitConversions {
  lexical.delimiters += ("(", ")", "+", "*")
  def expr: Parser[Expr] =
    term ~ ("+" ~> expr) ^^ Plus | term
  def term: Parser[Expr] =
    factor ~ ("*" ~> term) ^^ Mult | factor
  def factor: Parser[Expr] =
    numericLit ^^ { s => Num(s.toInt) } | ident ^^ Var | "(" ~> expr <~ ")"

    def parseExpr(s: String) = phrase(expr)(new lexical.Scanner(s))
}


scala> ArithParsers.parseExpr("1")
res1: ArithParsers.ParseResult[parsers2.Expr] = [1.2] parsed: Num(1)

scala> ArithParsers.parseExpr("1 + 1 * 2")
res2: ArithParsers.ParseResult[parsers2.Expr] = [1.10] parsed: Plus(Num(1),Mult(Num(1),Num(2)))

scala> ArithParsers.parseExpr("a * (a * a)")
res3: ArithParsers.ParseResult[parsers2.Expr] = [1.12] parsed: Mult(Var(a),Mult(Var(a),Var(a)))
                                                                                                  6
Example 2: Lambda calculus
t ::=                           terms:
 x                             variable
 λx.t                    abstraction
 tt                          application


         x y z = ((x y) z)
        λx.λy.y = λx.(λy.y)
                                           7
Example 2
sealed trait Term
case class Var(n: String) extends Term
case class Lam(v: Var, body: Term) extends Term
case class App(t1: Term, t2: Term) extends Term

object LamParsers extends StandardTokenParsers with ImplicitConversions with PackratParsers {
  lexical.delimiters += ("(", ")", ".", "")
  lazy val term: PackratParser[Term] =
    appTerm | lam
  lazy val vrb: PackratParser[Var] =
    ident ^^ Var
  lazy val lam: PackratParser[Term] =
    ("" ~> vrb) ~ ("." ~> term) ^^ Lam
  lazy val appTerm: PackratParser[Term] =
    appTerm ~ aTerm ^^ App | aTerm
  lazy val aTerm: PackratParser[Term] =
    vrb | "(" ~> term <~ ")"
  def parseTerm(s: String) =
    phrase(term)(new lexical.Scanner(s))
}

scala> LamParsers.parseTerm("x y z")
res1: LamParsers.ParseResult[parsers.Term] = [1.6] parsed: App(App(Var(x),Var(y)),Var(z))

scala> LamParsers.parseTerm("""x.y.x y""")
res2: LamParsers.ParseResult[parsers.Term] = [1.10] parsed:
Lam(Var(x),Lam(Var(y),App(Var(x),Var(y))))

scala> LamParsers.parseTerm("""(x.x x) (x. x x)""")
res3: LamParsers.ParseResult[parsers.Term] = [1.19] parsed:
App(Lam(Var(x),App(Var(x),Var(x))),Lam(Var(x),App(Var(x),Var(x))))                              8
Combinators



              9
Комбинаторные библиотеки
●   Actors
●   Parsers
●   ScalaCheck, Spesc
●   Scalaz
●   SBT
●   EDSLs
●   ...

                                     10
Принципы комбинаторных библиотек
●   Соответствие терминологии библиотеки и
    терминологии предметной области.
●   Состав
    ●   типы,
    ●   примитивы,
    ●   комбинаторы первого порядка,
    ●   комбинаторы высшего порядка.
●   Свойство замыкания (композиционность).
●   Возможность эффективной реализации.
E. Кирпичев. Элементы функциональных языков. Практика функционального   11
программирования №3.
Парсеры



          12
Предметная область
●   Грамматика              ●   Парсеры
    ●   Регулярная              ●   LL-парсеры
    ●   Бесконтекстная          ●   LR-парсеры
    ●   Леворекурсивная         ●   Нисходящие
    ●   Праворекурсивная        ●   Восходящие
    ●   Аттрибутная             ●   GLL
    ●   Boolean                 ●   Packrat-парсеры
    ●   PEG                     ●   Parsing with
    ●   ...                         derivativatives
                                                      13
Предметная область




                     14
Подходы к созданию парсеров
●   Parser-generator         ●   Hand-written
    ●   Yacc                     ●   Low-level
    ●   Lex                      ●   High-level
    ●   JavaCC
    ●   AntLR
    ●   Rat!




                                                  15
Parsers in Scala



C9 Lectures: Dr. Erik Meijer - Functional Programming Fundamentals Chapter 8 of 13
A. Moors, F. Piessens, M. Odersky. Parser Combinators in Scala. Report CW 49 // Feb 2008
                                                                                     16
Scala parser combinators are
a form of recursive descent parsing
     with infinite backtracking.


                                      17
Parsers in Scala are functional
Background:
  ●   W. Burge. Recursive Programming Techniques.
      Addison-Wesley, 1975.
  ●   Ph. Wadler. How to Replace Failure by a List of
      Successes. A method for exception handling,
      backtracking, and pattern matching in lazy
      functional languages // 1985
  ●   G. Hutton. Higher-order functions for parsing //
      Journal of functional programming. 1992/2
  ●   J. Fokker. Functional Parsers // 1995

                                                         18
Parser?
●   Трансформирует текст в структуру


                                          +


       2*3 + 4                    *           3



                              2       3




                                                  19
Парсер – это функция
             type Parser[A] = String => A



Нет композиции функций, не обязательно парсить всю строку



       type Parser[A] = String => (A, String)




              Может закончиться неудачей


   type Parser[A] = String => Option[(A, String)]


                                                             20
Attempt #1



             21
Results
trait SimpleResults {
  type Input
  trait Result[+T] {
    def next: Input
  }
  case class Success[+T](result: T, next: Input) extends Result[T]
  case class Failure(msg: String, next: Input) extends Result[Nothing]
}

object XParser extends SimpleResults {
  type Input = String
  val acceptX: Input => Result[Char] = { (in: String) =>
    if (in.charAt(0) == 'x')
      Success('x', in.substring(1))
    else
      Failure("expected an x", in)
  }
}




scala> XParser.acceptX("xyz")
res0: parsers.XParser.Result[Char] = Success(x,yz)

scala> XParser.acceptX("yz")
res1: parsers.XParser.Result[Char] = Failure(expected an x,yz)



                                                                         22
The basis: Parser, |, ~, accept
trait SimpleParsers extends SimpleResults {
  trait Parser[+T] extends (Input => Result[T]) {
    def apply(in: Input): Result[T]
    def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] {
      def apply(in: Input) = Parser.this(in) match {
        case Failure(_, _) => p(in)
        case Success(x, n) => Success(x, n)}}

        def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] {
          def apply(in: Input) = Parser.this(in) match {
            case Success(x, next) => p(next) match {
              case Success(x2, next2) => Success((x, x2), next2)
              case Failure(m, n)      => Failure(m, n) }
            case Failure(m, n) => Failure(m, n)}}
    }
}

trait StringParsers extends SimpleParsers {
  type Input = String
  private val EOI = 0.toChar

    def accept(expected: Char) = new Parser[Char] {
      def apply(in: String) =
        if (in == "") {
          if (expected == EOI) Success(expected, "")
          else Failure("no more input", in)
        } else if (in.charAt(0) == expected)
          Success(expected, in.substring(1))
        else Failure("expected '" + expected + "'", in)
    }
    def eoi = accept(EOI)
}
                                                                           23
The simplest parser
object OXOParser extends StringParsers {
  def oxo = accept('o') ~ accept('x') ~ accept('o')
  def oxos: Parser[Any] =
    (oxo ~ accept(' ') ~ oxos | oxo)
}




scala> OXOParser.oxos("123")
res2: parsers.OXOParser.Result[Any] = Failure(expected 'o',123)

scala> OXOParser.oxos("oxo")
res3: parsers.OXOParser.Result[Any] = Success(((o,x),o),)

scala> OXOParser.oxos("oxo oxo")
res4: parsers.OXOParser.Result[Any] = Success(((((o,x),o), ),((o,x),o)),)

scala> OXOParser.oxos("oxo oxo 1")
res5: parsers.OXOParser.Result[Any] = Success(((((o,x),o), ),((o,x),o)), 1)

scala> (OXOParser.oxos ~ OXOParser.eoi)("oxo oxo 1")
res6: parsers.OXOParser.Result[(Any, Char)] = Failure(expected '?', 1)




                                                                              24
Be careful!
trait SimpleParsers extends SimpleResults {
  trait Parser[+T] extends (Input => Result[T]) {
    def apply(in: Input): Result[T]
    def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] {            call-by-name param
      def apply(in: Input) = Parser.this(in) match {
        case Failure(_, _) => p(in)
        case Success(x, n) => Success(x, n)}}

        def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] {   call-by-name param
          def apply(in: Input) = Parser.this(in) match {
            case Success(x, next) => p(next) match {
              case Success(x2, next2) => Success((x, x2), next2)
              case Failure(m, n)      => Failure(m, n) }
            case Failure(m, n) => Failure(m, n)}}
    }
}

object OXOParser extends StringParsers {
  def oxo = accept('o') ~ accept('x') ~ accept('o')
  def oxos: Parser[Any] =
    (oxo ~ accept(' ') ~ oxos | oxo)
}




                                                                                                25
Be careful!
trait SimpleParsers extends SimpleResults {
  trait Parser[+T] extends (Input => Result[T]) {
    def apply(in: Input): Result[T]
    def |[U >: T](p: Parser[U]): Parser[U] = new Parser[U] {            call-by-value param
      def apply(in: Input) = Parser.this(in) match {
        case Failure(_, _) => p(in)
        case Success(x, n) => Success(x, n)}}

        def ~[U](p: Parser[U]): Parser[(T, U)] = new Parser[(T, U)] {   call-by-value param
          def apply(in: Input) = Parser.this(in) match {
            case Success(x, next) => p(next) match {
              case Success(x2, next2) => Success((x, x2), next2)
              case Failure(m, n)      => Failure(m, n) }
            case Failure(m, n) => Failure(m, n)}}
    }
}

object OXOParser extends StringParsers {
  def oxo = accept('o') ~ accept('x') ~ accept('o')
  def oxos: Parser[Any] =
    (oxo ~ accept(' ') ~ oxos | oxo)
}


scala> OXOParser.oxos("123")
java.lang.StackOverflowError
     at parsers.OXOParser$.oxo(stepbystep.scala:67)
     at parsers.OXOParser$.oxos(stepbystep.scala:69)
     at parsers.OXOParser$.oxos(stepbystep.scala:69)
     at parsers.OXOParser$.oxos(stepbystep.scala:69)
     at parsers.OXOParser$.oxos(stepbystep.scala:69)
     at parsers.OXOParser$.oxos(stepbystep.scala:69)
     at parsers.OXOParser$.oxos(stepbystep.scala:69)                                          26
     ...
Attempt #2
(Factoring out Plumbing)



                           27
Where is a problem?
trait SimpleParsers extends SimpleResults {
  trait Parser[+T] extends (Input => Result[T]) {
    def apply(in: Input): Result[T]
    def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] {
      def apply(in: Input) = Parser.this(in) match {
        case Failure(_, _) => p(in)
        case Success(x, n) => Success(x, n)}}

        def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] {
          def apply(in: Input) = Parser.this(in) match {
            case Success(x, next) => p(next) match {
              case Success(x2, next2) => Success((x, x2), next2)
              case Failure(m, n)      => Failure(m, n) }
            case Failure(m, n) => Failure(m, n)}}
    }
}

object OXOParser extends StringParsers {
  def oxo = accept('o') ~ accept('x') ~ accept('o')
  def oxos: Parser[Any] =
    (oxo ~ accept(' ') ~ oxos | oxo)
}




                                                                           28
Too much “threading”
trait SimpleParsers extends SimpleResults {
  trait Parser[+T] extends (Input => Result[T]) {
    def apply(in: Input): Result[T]
    def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] {
      def apply(in: Input) = Parser.this(in) match {
        case Failure(_, _) => p(in)
        case Success(x, n) => Success(x, n)}}

        def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] {
          def apply(in: Input) = Parser.this(in) match {
            case Success(x, next) => p(next) match {
              case Success(x2, next2) => Success((x, x2), next2)
              case Failure(m, n)      => Failure(m, n) }
            case Failure(m, n) => Failure(m, n)}}
    }
}

object OXOParser extends StringParsers {
  def oxo = accept('o') ~ accept('x') ~ accept('o')
  def oxos: Parser[Any] =
    (oxo ~ accept(' ') ~ oxos | oxo)
}




                                                                           29
Improved Results
trait SimpleResults {
  type Input
  trait Result[+T] {
    def next: Input
    def map[U](f: T => U): Result[U]
    def flatMapWithNext[U](f: T => Input => Result[U]): Result[U]
    def append[U >: T](alt: => Result[U]): Result[U]
  }
  case class Success[+T](result: T, next: Input) extends Result[T] {
    def map[U](f: T => U) = Success(f(result), next)
    def flatMapWithNext[U](f: T => Input => Result[U]) = f(result)(next)
    def append[U >: T](alt: => Result[U]) = this
  }
  case class Failure(msg: String, next: Input) extends Result[Nothing] {
    def map[U](f: Nothing => U) = this
    def flatMapWithNext[U](f: Nothing => Input => Result[U]) = this
    def append[U](alt: => Result[U]) = alt
  }
}



●map -...
●flatMapWithNext - ...

●append – for multiple results (we do not consider it here)




                                                                           30
Parser is a function with many results
             type Parser[A] = String => A




        type Parser[A] = String => (A, String)




     type Parser[A] = String => Option[(A, String)]




      type Parser[A] = String => List[(A, String)]



                                                      31
After improving
trait SimpleResults {
  type Input
  trait Result[+T] {
    def next: Input
    def map[U](f: T => U): Result[U]
    def flatMapWithNext[U](f: T => Input => Result[U]): Result[U]
    def append[U >: T](alt: => Result[U]): Result[U]
  }
  case class Success[+T](result: T, next: Input) extends Result[T] {
    def map[U](f: T => U) = Success(f(result), next)
    def flatMapWithNext[U](f: T => Input => Result[U]) = f(result)(next)
    def append[U >: T](alt: => Result[U]) = this
  }
  case class Failure(msg: String, next: Input) extends Result[Nothing] {
    def map[U](f: Nothing => U) = this
    def flatMapWithNext[U](f: Nothing => Input => Result[U]) = this
    def append[U](alt: => Result[U]) = alt
  }
}

trait SimpleParsers extends SimpleResults {
  abstract class Parser[+T] extends (Input => Result[T]) {
    def apply(in: Input): Result[T]
    def flatMap[U](f: T => Parser[U]): Parser[U] = new Parser[U] {
      def apply(in: Input) = Parser.this(in) flatMapWithNext (f)
    }
    def map[U](f: T => U): Parser[U] = new Parser[U] {
      def apply(in: Input) = Parser.this(in) map (f)
    }
    def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] {
      def apply(in: Input) = Parser.this(in) append p(in)
    }
    def ~[U](p: => Parser[U]): Parser[(T, U)] =
      for (a <- this; b <- p) yield (a, b)
                                                                           Hey!   32
  }
}
So, Parser is a Monad!!



                          33
Where is my “withFilter”?
●   In Scala 2.10
●   It was not easy...




                                        34
Removing noise...
trait SimpleParsers extends SimpleResults {

    def Parser[T](f: Input => Result[T]) =
      new Parser[T] { def apply(in: Input) = f(in) }
                                                                    Removing
                                                                    Boilerplate
    abstract class Parser[+T] extends (Input => Result[T]) {
      def apply(in: Input): Result[T]
                                                                 New Parser{apply}
        def flatMap[U](f: T => Parser[U]): Parser[U] =
          Parser { in => Parser.this(in) flatMapWithNext (f) }

        def map[U](f: T => U): Parser[U] =
          Parser { in => Parser.this(in) map (f) }

        def |[U >: T](p: => Parser[U]): Parser[U] =
          Parser { in => Parser.this(in) append p(in) }

        def ~[U](p: => Parser[U]): Parser[(T, U)] =
          for (a <- this; b <- p) yield (a, b)
    }

}




                                                                                     35
Real Parsers



               36
Real Parsers
package scala.util.parsing.combinator

trait Parsers {                                               Stream annotated with
  type Elem
  type Input = Reader[Elem]                                        coordinates
    sealed abstract class ParseResult[+T]
    case class Success[+T](result: T, override val next: Input) extends ParseResult[T]
    sealed abstract class NoSuccess(val msg: String, override val next: Input)
       extends ParseResult[Nothing]
    case class Failure(override val msg: String, override val next: Input)
       extends NoSuccess(msg, next)
    case class Error(override val msg: String, override val next: Input)
       extends NoSuccess(msg, next)

    ...
    abstract class Parser[+T] extends (Input => ParseResult[T]) {            Controlling
    ...                                                                     backtracking
    }
    case class ~[+a, +b](_1: a, _2: b) {
      override def toString = "("+ _1 +"~"+ _2 +")"
    }
                                                                Deconstructing sequencing
}


package scala.util.parsing.input

abstract class Reader[+T] {
  def first: T
  def rest: Reader[T]                                                                       37
}
Simplified picture
package scala.util.parsing.combinator

trait Parsers {
  type Elem
  type Input = Reader[Elem]

    sealed abstract class ParseResult[+T]

    abstract class Parser[+T] extends (Input => ParseResult[T]) {
      combinators
    }

    combinators
}




                                                                    38
Combinators



              39
Basic Combinators
package scala.util.parsing.combinator

trait Parsers {

    def elem(kind: String, p: Elem => Boolean): Parser[Elem]
    def elem(e: Elem): Parser[Elem]
    implicit def accept(e: Elem): Parser[Elem]

    abstract class Parser[+T] extends (Input => ParseResult[T]) {
      def ~ [U](q: => Parser[U]): Parser[~[T, U]]
      def <~ [U](q: => Parser[U]): Parser[T]
      def ~! [U](p: => Parser[U]): Parser[~[T, U]]
      def | [U >: T](q: => Parser[U]): Parser[U]
      def ||| [U >: T](q0: => Parser[U]): Parser[U]
      def ^^ [U](f: T => U): Parser[U]
      def ^^^ [U](v: => U): Parser[U]
      def ^? [U](f: PartialFunction[T, U], error: T => String): Parser[U]
      def ^? [U](f: PartialFunction[T, U]): Parser[U]
      def >>[U](fq: T => Parser[U])
      def *: Parser[List[T]]
      def +: Parser[List[T]]
      def ?: Parser[Option[T]]
    }

}




                                                                            40
Swiss army knife Combinators
package scala.util.parsing.combinator

trait Parsers {

    def   commit[T](p: => Parser[T]): Parser[T]
    def   accept[ES <% List[Elem]](es: ES): Parser[List[Elem]]
    def   accept[U](expected: String, f: PartialFunction[Elem, U]): Parser[U]
    def   failure(msg: String): Parser[Nothing]
    def   err(msg: String): Parser[Nothing]
    def   success[T](v: T): Parser[T]
    def   rep[T](p: => Parser[T]): Parser[List[T]]
    def   repsep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]]
    def   rep1[T](p: => Parser[T]): Parser[List[T]]
    def   rep1[T](first: => Parser[T], p0: => Parser[T]): Parser[List[T]]
    def   repN[T](num: Int, p: => Parser[T]): Parser[List[T]]
    def   rep1sep[T](p : => Parser[T], q : => Parser[Any]): Parser[List[T]]
    def   chainl1[T](p: => Parser[T], q: => Parser[(T, T) => T]): Parser[T]
    def   chainl1[T, U](first: => Parser[T], p: => Parser[U], q: => Parser[(T, U) => T]): Parser[T]
    def   chainr1[T, U](p: => Parser[T], q: => Parser[(T, U) => U],
             combine: (T, U) => U, first: U): Parser[U]
    def   opt[T](p: => Parser[T]): Parser[Option[T]]
    def   not[T](p: => Parser[T]): Parser[Unit]
    def   guard[T](p: => Parser[T]): Parser[T]
    def   positioned[T <: Positional](p: => Parser[T]): Parser[T]
    def   phrase[T](p: Parser[T]): Parser[T]

}

            Inpired by G. Hutton and E. Meijer. Monadic Parser Combinators.
                                                                                                      41
Lexing



         42
Простейший (low-level) парсер
trait SimplestParsers extends Parsers {
  type Elem = Char
  def whitespaceChar: Parser[Char] =
    elem("space char", ch => ch <= ' ' && ch != EofCh)
  def letter: Parser[Char] = elem("letter", _.isLetter)

    def whitespace: Parser[List[Char]] = rep(whitespaceChar)
    def ident: Parser[List[Char]] = rep1(letter)

    def parse[T](p: Parser[T], in: String): ParseResult[T] =
      p(new CharSequenceReader(in))
}



scala> val p1 = new SimplestParsers{}
p1: java.lang.Object with parsers.SimplestParsers = $anon$1@17d59ff0

scala> import p1._
import p1._

scala> parse(letter, "foo bar")
res0: p1.ParseResult[Char] = [1.2] parsed: f

scala> parse(ident, "foo bar")
res1: p1.ParseResult[List[Char]] = [1.4] parsed: List(f, o, o)

scala> parse(ident, "123")
res2: p1.ParseResult[List[Char]] =
[1.1] failure: letter expected
                                                                       43
123
^
Towards AST
trait Token
case class Id(n: String) extends Token
case class Num(n: String) extends Token
case object ErrorToken extends Token

trait TokenParsers extends Parsers {

    type Elem = Char

    private def whitespaceChar: Parser[Char] =
      elem("space char", ch => ch <= ' ' && ch != EofCh)
    def letter: Parser[Char] = elem("letter", _.isLetter)
    def digit: Parser[Char] = elem("digit", _.isDigit)

    def whitespace: Parser[List[Char]] = rep(whitespaceChar)
    def idLit: Parser[String] = rep1(letter) ^^ { _.mkString("") }
    def numLit: Parser[String] = rep1(digit) ^^ { _.mkString("") }

    def id: Parser[Token] = idLit ^^ Id
    def num: Parser[Token] = numLit ^^ Num

    def token = id | num

    def parse[T](p: Parser[T], in: String): ParseResult[T] =
      p(new CharSequenceReader(in))

}



                                                                     44
Lexer/Scanner
trait Scanners extends TokenParsers {
  class Scanner(in: Reader[Char]) extends Reader[Token] {
    def this(in: String) = this(new CharArrayReader(in.toCharArray()))
    private val (tok, rest1, rest2) = whitespace(in) match {
      case Success(_, in1) => token(in1) match {
        case Success(tok, in2) => (tok, in1, in2)
        case ns: NoSuccess     => (ErrorToken, ns.next, ns.next.rest)
      }
      case ns: NoSuccess => (ErrorToken, ns.next, ns.next.rest)
    }
    def first = tok
    def rest = new Scanner(rest2)
  }
}


scala> val scs = new Scanners {}
scs: java.lang.Object with Scanners = $anon$1@68a750a

scala> val reader = new scs.Scanner("foo bar")
reader: scs.Scanner = Scanners$Scanner@6a75863f

scala> reader.first
res0: Token = Id(foo)

scala> reader.rest.first
res1: Num = Num(123)

scala> reader.rest.rest.first
res2: Token = ErrorToken
                                                                         45
Lexing
Reader[Char]   Low-level Parsing   Reader[Token]




                                                   46
Typical Parser



                 47
RAM++




        48
AST




      49
Parser


         Implicit magic
           “~” magic




                          50
Итак, ...
●   Parsers Combinators in Scala позволяют описывать
    исполняемые грамматики в виде, близком к BNF.
●   Внутреннее устройство Parser Combinators - самый
    настоящий Programming Pearl.
●   Internal DSL for External DSLs.




                                                       51
Discussion
(Parser Combinators vs Parser Generator)



                                           52
PROS
●   Toт же язык (Scala) – не нужно учить новый
    инструмент.
●   Исполняемая грамматика - всегда
    актуальный код.
●   Краткость + богатая выразительность: LL(*) и
    больше (в том числе, контекстные
    грамматики).
●   Можно делать fusion синтаксического
    разбора и чего-нибудь еще.
●   Модульность                                   53
CONS
●   Некоторые простые вещи могут
    кодироваться очень непросто.
●   Performance.




                                   54
Performance




Hand-written Lift-json is 350 times faster than version based on parser
combinators (proof link)


                                                                          55
Packrat Parsers



                  56
Parsing “9”: Too much backtracking
import scala.util.parsing.combinator._
import syntactical.StandardTokenParsers

sealed trait Expr
case class Num(i: Int) extends Expr
case class Var(n: String) extends Expr
case class Plus(e1: Expr, e2: Expr) extends Expr
case class Mult(e1: Expr, e2: Expr) extends Expr

object ArithParsers extends StandardTokenParsers with ImplicitConversions {
  lexical.delimiters += ("(", ")", "+", "*")
  def expr: Parser[Expr] =
    term ~ ("+" ~> expr) ^^ Plus | term
  def term: Parser[Expr] =
    factor ~ ("*" ~> term) ^^ Mult | factor
  def factor: Parser[Expr] =
    numericLit ^^ { s => Num(s.toInt) } | ident ^^ Var | "(" ~> expr <~ ")"

    def parseExpr(s: String) = phrase(expr)(new lexical.Scanner(s))
}




                                                                              57
Idea: Memoization (Really, Laziness)




                                       58
+ Left Recursion
sealed trait Term
case class Var(n: String) extends Term
case class Lam(v: Var, body: Term) extends Term
case class App(t1: Term, t2: Term) extends Term

object LamParsers extends StandardTokenParsers with ImplicitConversions with PackratParsers {
  lexical.delimiters += ("(", ")", ".", "")
  lazy val term: PackratParser[Term] =
    appTerm | lam
  lazy val vrb: PackratParser[Var] =
    ident ^^ Var
  lazy val lam: PackratParser[Term] =
    ("" ~> vrb) ~ ("." ~> term) ^^ Lam
  lazy val appTerm: PackratParser[Term] =
    appTerm ~ aTerm ^^ App | aTerm
  lazy val aTerm: PackratParser[Term] =
    vrb | "(" ~> term <~ ")"
  def parseTerm(s: String) =
    phrase(term)(new lexical.Scanner(s))
}




                                                                                                59
+ Left Recursion
sealed trait Term
case class Var(n: String) extends Term
case class Lam(v: Var, body: Term) extends Term
case class App(t1: Term, t2: Term) extends Term

object LamParsers extends StandardTokenParsers with ImplicitConversions with PackratParsers {
  lexical.delimiters += ("(", ")", ".", "")
  lazy val term: PackratParser[Term] =
    appTerm | lam
  lazy val vrb: PackratParser[Var] =
    ident ^^ Var
  lazy val lam: PackratParser[Term] =
    ("" ~> vrb) ~ ("." ~> term) ^^ Lam
  lazy val appTerm: PackratParser[Term] =
    appTerm ~ aTerm ^^ App | aTerm                                 lazy val
  lazy val aTerm: PackratParser[Term] =
    vrb | "(" ~> term <~ ")"
  def parseTerm(s: String) =
    phrase(term)(new lexical.Scanner(s))
}




                                                                                                60
Without Left Recursion
sealed trait Term
case class Var(n: String) extends Term
case class Lam(v: Var, body: Term) extends Term
case class App(t1: Term, t2: Term) extends Term

object LamParsers extends StandardTokenParsers with ImplicitConversions {
  lexical.delimiters += ("(", ")", ".", "")
  lazy val term: Parser[Term] =
    appTerm | lam
  lazy val vrb: Parser[Var] =
    ident ^^ Var
  lazy val lam: Parser[Term] =
    ("" ~> vrb) ~ ("." ~> term) ^^ Lam
  lazy val appTerm: Parser[Term] =
    (aTerm +) ^^ { _.reduceLeft(App) }
  lazy val aTerm: Parser[Term] =
    vrb | "(" ~> term <~ ")"
  def parseTerm(s: String) =
    phrase(term)(new lexical.Scanner(s))
}




                                                                            61
Packrat Performance




                      62
Other Parsers
●     Pairboled Parser (PEG parser)
●     GLL parser
●     Derivative combinators




    http://stackoverflow.com/questions/4423514/scala-parsers-availability-differences-and-combining
                                                                                                      63
Trends
●   Merging two worlds
    ●   Compositionality (Functional programming)
    ●   Performance




                                                    64
Спасибо!



           65
https://github.com/ilya-klyuchnikov/tapl-scala




                                                 66

Más contenido relacionado

La actualidad más candente

Operator Overloading In Scala
Operator Overloading In ScalaOperator Overloading In Scala
Operator Overloading In ScalaJoey Gibson
 
TI1220 Lecture 9: Parsing & interpretation
TI1220 Lecture 9: Parsing & interpretationTI1220 Lecture 9: Parsing & interpretation
TI1220 Lecture 9: Parsing & interpretationEelco Visser
 
Object Equality in Scala
Object Equality in ScalaObject Equality in Scala
Object Equality in ScalaKnoldus Inc.
 
Advance LISP (Artificial Intelligence)
Advance LISP (Artificial Intelligence) Advance LISP (Artificial Intelligence)
Advance LISP (Artificial Intelligence) wahab khan
 
The... Wonderful? World of Lambdas
The... Wonderful? World of LambdasThe... Wonderful? World of Lambdas
The... Wonderful? World of LambdasEsther Lozano
 
BayFP: Concurrent and Multicore Haskell
BayFP: Concurrent and Multicore HaskellBayFP: Concurrent and Multicore Haskell
BayFP: Concurrent and Multicore HaskellBryan O'Sullivan
 
DEFUN 2008 - Real World Haskell
DEFUN 2008 - Real World HaskellDEFUN 2008 - Real World Haskell
DEFUN 2008 - Real World HaskellBryan O'Sullivan
 
INTRODUCTION TO LISP
INTRODUCTION TO LISPINTRODUCTION TO LISP
INTRODUCTION TO LISPNilt1234
 
Practical Functional Programming Presentation by Bogdan Hodorog
Practical Functional Programming Presentation by Bogdan HodorogPractical Functional Programming Presentation by Bogdan Hodorog
Practical Functional Programming Presentation by Bogdan Hodorog3Pillar Global
 
Javaz. Functional design in Java 8.
Javaz. Functional design in Java 8.Javaz. Functional design in Java 8.
Javaz. Functional design in Java 8.Vadim Dubs
 
Regular expressions in Python
Regular expressions in PythonRegular expressions in Python
Regular expressions in PythonSujith Kumar
 
Haskell retrospective
Haskell retrospectiveHaskell retrospective
Haskell retrospectivechenge2k
 
Logic programming a ruby perspective
Logic programming a ruby perspectiveLogic programming a ruby perspective
Logic programming a ruby perspectiveNorman Richards
 
Introduction to Programming in LISP
Introduction to Programming in LISPIntroduction to Programming in LISP
Introduction to Programming in LISPKnoldus Inc.
 

La actualidad más candente (20)

Operator Overloading In Scala
Operator Overloading In ScalaOperator Overloading In Scala
Operator Overloading In Scala
 
TI1220 Lecture 9: Parsing & interpretation
TI1220 Lecture 9: Parsing & interpretationTI1220 Lecture 9: Parsing & interpretation
TI1220 Lecture 9: Parsing & interpretation
 
02. haskell motivation
02. haskell motivation02. haskell motivation
02. haskell motivation
 
Object Equality in Scala
Object Equality in ScalaObject Equality in Scala
Object Equality in Scala
 
Pythonppt28 11-18
Pythonppt28 11-18Pythonppt28 11-18
Pythonppt28 11-18
 
Lisp
LispLisp
Lisp
 
Advance LISP (Artificial Intelligence)
Advance LISP (Artificial Intelligence) Advance LISP (Artificial Intelligence)
Advance LISP (Artificial Intelligence)
 
(Ai lisp)
(Ai lisp)(Ai lisp)
(Ai lisp)
 
The... Wonderful? World of Lambdas
The... Wonderful? World of LambdasThe... Wonderful? World of Lambdas
The... Wonderful? World of Lambdas
 
Java gets a closure
Java gets a closureJava gets a closure
Java gets a closure
 
Lisp
LispLisp
Lisp
 
BayFP: Concurrent and Multicore Haskell
BayFP: Concurrent and Multicore HaskellBayFP: Concurrent and Multicore Haskell
BayFP: Concurrent and Multicore Haskell
 
DEFUN 2008 - Real World Haskell
DEFUN 2008 - Real World HaskellDEFUN 2008 - Real World Haskell
DEFUN 2008 - Real World Haskell
 
INTRODUCTION TO LISP
INTRODUCTION TO LISPINTRODUCTION TO LISP
INTRODUCTION TO LISP
 
Practical Functional Programming Presentation by Bogdan Hodorog
Practical Functional Programming Presentation by Bogdan HodorogPractical Functional Programming Presentation by Bogdan Hodorog
Practical Functional Programming Presentation by Bogdan Hodorog
 
Javaz. Functional design in Java 8.
Javaz. Functional design in Java 8.Javaz. Functional design in Java 8.
Javaz. Functional design in Java 8.
 
Regular expressions in Python
Regular expressions in PythonRegular expressions in Python
Regular expressions in Python
 
Haskell retrospective
Haskell retrospectiveHaskell retrospective
Haskell retrospective
 
Logic programming a ruby perspective
Logic programming a ruby perspectiveLogic programming a ruby perspective
Logic programming a ruby perspective
 
Introduction to Programming in LISP
Introduction to Programming in LISPIntroduction to Programming in LISP
Introduction to Programming in LISP
 

Similar a Parsers Combinators in Scala, Ilya @lambdamix Kliuchnikov

Functional programming ii
Functional programming iiFunctional programming ii
Functional programming iiPrashant Kalkar
 
Transform your State \/ Err
Transform your State \/ ErrTransform your State \/ Err
Transform your State \/ ErrGermán Ferrari
 
lisps - A Lisp Interpreter written in Postscript
lisps - A Lisp Interpreter written in Postscriptlisps - A Lisp Interpreter written in Postscript
lisps - A Lisp Interpreter written in PostscriptGabriel Grill
 
Real Time Big Data Management
Real Time Big Data ManagementReal Time Big Data Management
Real Time Big Data ManagementAlbert Bifet
 
Algorithm and Programming (Introduction of dev pascal, data type, value, and ...
Algorithm and Programming (Introduction of dev pascal, data type, value, and ...Algorithm and Programming (Introduction of dev pascal, data type, value, and ...
Algorithm and Programming (Introduction of dev pascal, data type, value, and ...Adam Mukharil Bachtiar
 
Mastering Grammars with PetitParser
Mastering Grammars with PetitParserMastering Grammars with PetitParser
Mastering Grammars with PetitParserLukas Renggli
 
TMPA-2017: Functional Parser of Markdown Language Based on Monad Combining an...
TMPA-2017: Functional Parser of Markdown Language Based on Monad Combining an...TMPA-2017: Functional Parser of Markdown Language Based on Monad Combining an...
TMPA-2017: Functional Parser of Markdown Language Based on Monad Combining an...Iosif Itkin
 
Use PEG to Write a Programming Language Parser
Use PEG to Write a Programming Language ParserUse PEG to Write a Programming Language Parser
Use PEG to Write a Programming Language ParserYodalee
 
Introducing Pattern Matching in Scala
 Introducing Pattern Matching  in Scala Introducing Pattern Matching  in Scala
Introducing Pattern Matching in ScalaAyush Mishra
 
Scala Parser Combinators - Scalapeno Lightning Talk
Scala Parser Combinators - Scalapeno Lightning TalkScala Parser Combinators - Scalapeno Lightning Talk
Scala Parser Combinators - Scalapeno Lightning TalkLior Schejter
 
Testing for share
Testing for share Testing for share
Testing for share Rajeev Mehta
 
CS101- Introduction to Computing- Lecture 29
CS101- Introduction to Computing- Lecture 29CS101- Introduction to Computing- Lecture 29
CS101- Introduction to Computing- Lecture 29Bilal Ahmed
 
Advanced REXX Programming Techniques
Advanced REXX Programming TechniquesAdvanced REXX Programming Techniques
Advanced REXX Programming TechniquesDan O'Dea
 

Similar a Parsers Combinators in Scala, Ilya @lambdamix Kliuchnikov (20)

Functional programming ii
Functional programming iiFunctional programming ii
Functional programming ii
 
Transform your State \/ Err
Transform your State \/ ErrTransform your State \/ Err
Transform your State \/ Err
 
lisps - A Lisp Interpreter written in Postscript
lisps - A Lisp Interpreter written in Postscriptlisps - A Lisp Interpreter written in Postscript
lisps - A Lisp Interpreter written in Postscript
 
Real Time Big Data Management
Real Time Big Data ManagementReal Time Big Data Management
Real Time Big Data Management
 
C# programming
C# programming C# programming
C# programming
 
Algorithm and Programming (Introduction of dev pascal, data type, value, and ...
Algorithm and Programming (Introduction of dev pascal, data type, value, and ...Algorithm and Programming (Introduction of dev pascal, data type, value, and ...
Algorithm and Programming (Introduction of dev pascal, data type, value, and ...
 
Mastering Grammars with PetitParser
Mastering Grammars with PetitParserMastering Grammars with PetitParser
Mastering Grammars with PetitParser
 
TMPA-2017: Functional Parser of Markdown Language Based on Monad Combining an...
TMPA-2017: Functional Parser of Markdown Language Based on Monad Combining an...TMPA-2017: Functional Parser of Markdown Language Based on Monad Combining an...
TMPA-2017: Functional Parser of Markdown Language Based on Monad Combining an...
 
Ch2 (1).ppt
Ch2 (1).pptCh2 (1).ppt
Ch2 (1).ppt
 
Use PEG to Write a Programming Language Parser
Use PEG to Write a Programming Language ParserUse PEG to Write a Programming Language Parser
Use PEG to Write a Programming Language Parser
 
Java 8
Java 8Java 8
Java 8
 
Introducing Pattern Matching in Scala
 Introducing Pattern Matching  in Scala Introducing Pattern Matching  in Scala
Introducing Pattern Matching in Scala
 
cp05.pptx
cp05.pptxcp05.pptx
cp05.pptx
 
Scala Parser Combinators - Scalapeno Lightning Talk
Scala Parser Combinators - Scalapeno Lightning TalkScala Parser Combinators - Scalapeno Lightning Talk
Scala Parser Combinators - Scalapeno Lightning Talk
 
Clojure intro
Clojure introClojure intro
Clojure intro
 
Testing for share
Testing for share Testing for share
Testing for share
 
Special topics in finance lecture 2
Special topics in finance   lecture 2Special topics in finance   lecture 2
Special topics in finance lecture 2
 
An Intoduction to R
An Intoduction to RAn Intoduction to R
An Intoduction to R
 
CS101- Introduction to Computing- Lecture 29
CS101- Introduction to Computing- Lecture 29CS101- Introduction to Computing- Lecture 29
CS101- Introduction to Computing- Lecture 29
 
Advanced REXX Programming Techniques
Advanced REXX Programming TechniquesAdvanced REXX Programming Techniques
Advanced REXX Programming Techniques
 

Más de Vasil Remeniuk

Product Minsk - РТБ и Программатик
Product Minsk - РТБ и ПрограмматикProduct Minsk - РТБ и Программатик
Product Minsk - РТБ и ПрограмматикVasil Remeniuk
 
Работа с Akka Сluster, @afiskon, scalaby#14
Работа с Akka Сluster, @afiskon, scalaby#14Работа с Akka Сluster, @afiskon, scalaby#14
Работа с Akka Сluster, @afiskon, scalaby#14Vasil Remeniuk
 
Cake pattern. Presentation by Alex Famin at scalaby#14
Cake pattern. Presentation by Alex Famin at scalaby#14Cake pattern. Presentation by Alex Famin at scalaby#14
Cake pattern. Presentation by Alex Famin at scalaby#14Vasil Remeniuk
 
Scala laboratory: Globus. iteration #3
Scala laboratory: Globus. iteration #3Scala laboratory: Globus. iteration #3
Scala laboratory: Globus. iteration #3Vasil Remeniuk
 
Testing in Scala by Adform research
Testing in Scala by Adform researchTesting in Scala by Adform research
Testing in Scala by Adform researchVasil Remeniuk
 
Spark Intro by Adform Research
Spark Intro by Adform ResearchSpark Intro by Adform Research
Spark Intro by Adform ResearchVasil Remeniuk
 
Types by Adform Research, Saulius Valatka
Types by Adform Research, Saulius ValatkaTypes by Adform Research, Saulius Valatka
Types by Adform Research, Saulius ValatkaVasil Remeniuk
 
Types by Adform Research
Types by Adform ResearchTypes by Adform Research
Types by Adform ResearchVasil Remeniuk
 
Scalding by Adform Research, Alex Gryzlov
Scalding by Adform Research, Alex GryzlovScalding by Adform Research, Alex Gryzlov
Scalding by Adform Research, Alex GryzlovVasil Remeniuk
 
Scalding by Adform Research, Alex Gryzlov
Scalding by Adform Research, Alex GryzlovScalding by Adform Research, Alex Gryzlov
Scalding by Adform Research, Alex GryzlovVasil Remeniuk
 
Spark by Adform Research, Paulius
Spark by Adform Research, PauliusSpark by Adform Research, Paulius
Spark by Adform Research, PauliusVasil Remeniuk
 
Scala Style by Adform Research (Saulius Valatka)
Scala Style by Adform Research (Saulius Valatka)Scala Style by Adform Research (Saulius Valatka)
Scala Style by Adform Research (Saulius Valatka)Vasil Remeniuk
 
Spark intro by Adform Research
Spark intro by Adform ResearchSpark intro by Adform Research
Spark intro by Adform ResearchVasil Remeniuk
 
SBT by Aform Research, Saulius Valatka
SBT by Aform Research, Saulius ValatkaSBT by Aform Research, Saulius Valatka
SBT by Aform Research, Saulius ValatkaVasil Remeniuk
 
Scala laboratory: Globus. iteration #2
Scala laboratory: Globus. iteration #2Scala laboratory: Globus. iteration #2
Scala laboratory: Globus. iteration #2Vasil Remeniuk
 
Testing in Scala. Adform Research
Testing in Scala. Adform ResearchTesting in Scala. Adform Research
Testing in Scala. Adform ResearchVasil Remeniuk
 
Scala laboratory. Globus. iteration #1
Scala laboratory. Globus. iteration #1Scala laboratory. Globus. iteration #1
Scala laboratory. Globus. iteration #1Vasil Remeniuk
 
Cassandra + Spark + Elk
Cassandra + Spark + ElkCassandra + Spark + Elk
Cassandra + Spark + ElkVasil Remeniuk
 
Опыт использования Spark, Основано на реальных событиях
Опыт использования Spark, Основано на реальных событияхОпыт использования Spark, Основано на реальных событиях
Опыт использования Spark, Основано на реальных событияхVasil Remeniuk
 

Más de Vasil Remeniuk (20)

Product Minsk - РТБ и Программатик
Product Minsk - РТБ и ПрограмматикProduct Minsk - РТБ и Программатик
Product Minsk - РТБ и Программатик
 
Работа с Akka Сluster, @afiskon, scalaby#14
Работа с Akka Сluster, @afiskon, scalaby#14Работа с Akka Сluster, @afiskon, scalaby#14
Работа с Akka Сluster, @afiskon, scalaby#14
 
Cake pattern. Presentation by Alex Famin at scalaby#14
Cake pattern. Presentation by Alex Famin at scalaby#14Cake pattern. Presentation by Alex Famin at scalaby#14
Cake pattern. Presentation by Alex Famin at scalaby#14
 
Scala laboratory: Globus. iteration #3
Scala laboratory: Globus. iteration #3Scala laboratory: Globus. iteration #3
Scala laboratory: Globus. iteration #3
 
Testing in Scala by Adform research
Testing in Scala by Adform researchTesting in Scala by Adform research
Testing in Scala by Adform research
 
Spark Intro by Adform Research
Spark Intro by Adform ResearchSpark Intro by Adform Research
Spark Intro by Adform Research
 
Types by Adform Research, Saulius Valatka
Types by Adform Research, Saulius ValatkaTypes by Adform Research, Saulius Valatka
Types by Adform Research, Saulius Valatka
 
Types by Adform Research
Types by Adform ResearchTypes by Adform Research
Types by Adform Research
 
Scalding by Adform Research, Alex Gryzlov
Scalding by Adform Research, Alex GryzlovScalding by Adform Research, Alex Gryzlov
Scalding by Adform Research, Alex Gryzlov
 
Scalding by Adform Research, Alex Gryzlov
Scalding by Adform Research, Alex GryzlovScalding by Adform Research, Alex Gryzlov
Scalding by Adform Research, Alex Gryzlov
 
Spark by Adform Research, Paulius
Spark by Adform Research, PauliusSpark by Adform Research, Paulius
Spark by Adform Research, Paulius
 
Scala Style by Adform Research (Saulius Valatka)
Scala Style by Adform Research (Saulius Valatka)Scala Style by Adform Research (Saulius Valatka)
Scala Style by Adform Research (Saulius Valatka)
 
Spark intro by Adform Research
Spark intro by Adform ResearchSpark intro by Adform Research
Spark intro by Adform Research
 
SBT by Aform Research, Saulius Valatka
SBT by Aform Research, Saulius ValatkaSBT by Aform Research, Saulius Valatka
SBT by Aform Research, Saulius Valatka
 
Scala laboratory: Globus. iteration #2
Scala laboratory: Globus. iteration #2Scala laboratory: Globus. iteration #2
Scala laboratory: Globus. iteration #2
 
Testing in Scala. Adform Research
Testing in Scala. Adform ResearchTesting in Scala. Adform Research
Testing in Scala. Adform Research
 
Scala laboratory. Globus. iteration #1
Scala laboratory. Globus. iteration #1Scala laboratory. Globus. iteration #1
Scala laboratory. Globus. iteration #1
 
Cassandra + Spark + Elk
Cassandra + Spark + ElkCassandra + Spark + Elk
Cassandra + Spark + Elk
 
Опыт использования Spark, Основано на реальных событиях
Опыт использования Spark, Основано на реальных событияхОпыт использования Spark, Основано на реальных событиях
Опыт использования Spark, Основано на реальных событиях
 
ETL со Spark
ETL со SparkETL со Spark
ETL со Spark
 

Último

AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsMemoori
 
SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024Lorenzo Miniero
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Patryk Bandurski
 
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxNavinnSomaal
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfAddepto
 
Search Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfSearch Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfRankYa
 
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLScyllaDB
 
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024Stephanie Beckett
 
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Scott Keck-Warren
 
Commit 2024 - Secret Management made easy
Commit 2024 - Secret Management made easyCommit 2024 - Secret Management made easy
Commit 2024 - Secret Management made easyAlfredo García Lavilla
 
Training state-of-the-art general text embedding
Training state-of-the-art general text embeddingTraining state-of-the-art general text embedding
Training state-of-the-art general text embeddingZilliz
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfAlex Barbosa Coqueiro
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationSafe Software
 
Powerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time ClashPowerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time Clashcharlottematthew16
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsMark Billinghurst
 
Vector Databases 101 - An introduction to the world of Vector Databases
Vector Databases 101 - An introduction to the world of Vector DatabasesVector Databases 101 - An introduction to the world of Vector Databases
Vector Databases 101 - An introduction to the world of Vector DatabasesZilliz
 
"ML in Production",Oleksandr Bagan
"ML in Production",Oleksandr Bagan"ML in Production",Oleksandr Bagan
"ML in Production",Oleksandr BaganFwdays
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024BookNet Canada
 
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024BookNet Canada
 

Último (20)

AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial Buildings
 
SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
 
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptx
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdf
 
Search Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfSearch Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdf
 
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQL
 
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024
 
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024
 
Commit 2024 - Secret Management made easy
Commit 2024 - Secret Management made easyCommit 2024 - Secret Management made easy
Commit 2024 - Secret Management made easy
 
Training state-of-the-art general text embedding
Training state-of-the-art general text embeddingTraining state-of-the-art general text embedding
Training state-of-the-art general text embedding
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdf
 
DMCC Future of Trade Web3 - Special Edition
DMCC Future of Trade Web3 - Special EditionDMCC Future of Trade Web3 - Special Edition
DMCC Future of Trade Web3 - Special Edition
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
 
Powerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time ClashPowerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time Clash
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR Systems
 
Vector Databases 101 - An introduction to the world of Vector Databases
Vector Databases 101 - An introduction to the world of Vector DatabasesVector Databases 101 - An introduction to the world of Vector Databases
Vector Databases 101 - An introduction to the world of Vector Databases
 
"ML in Production",Oleksandr Bagan
"ML in Production",Oleksandr Bagan"ML in Production",Oleksandr Bagan
"ML in Production",Oleksandr Bagan
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
 
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
 

Parsers Combinators in Scala, Ilya @lambdamix Kliuchnikov

  • 1. Parser Combinators in Scala Илья Ключников @lambdamix 1
  • 2. Комбинаторные библиотеки ● Actors ● Parsers ● ScalaCheck, Spesc ● Scalaz ● SBT ● EDSLs ● ... 2
  • 3. 33/35 11/14 8/9 4/13 3
  • 4. Intro: combinators, parsers Scala Parser Combinators from the Ground Up How to write typical parser 33/35 11/14 Pros, cons 8/9 4/13 Advanced techniques 4
  • 5. Parser? ● Трансформирует текст в структуру + 2*3 + 4 * 3 2 3 5
  • 6. Hello, parser import scala.util.parsing.combinator._ import syntactical.StandardTokenParsers sealed trait Expr case class Num(i: Int) extends Expr case class Var(n: String) extends Expr case class Plus(e1: Expr, e2: Expr) extends Expr case class Mult(e1: Expr, e2: Expr) extends Expr object ArithParsers extends StandardTokenParsers with ImplicitConversions { lexical.delimiters += ("(", ")", "+", "*") def expr: Parser[Expr] = term ~ ("+" ~> expr) ^^ Plus | term def term: Parser[Expr] = factor ~ ("*" ~> term) ^^ Mult | factor def factor: Parser[Expr] = numericLit ^^ { s => Num(s.toInt) } | ident ^^ Var | "(" ~> expr <~ ")" def parseExpr(s: String) = phrase(expr)(new lexical.Scanner(s)) } scala> ArithParsers.parseExpr("1") res1: ArithParsers.ParseResult[parsers2.Expr] = [1.2] parsed: Num(1) scala> ArithParsers.parseExpr("1 + 1 * 2") res2: ArithParsers.ParseResult[parsers2.Expr] = [1.10] parsed: Plus(Num(1),Mult(Num(1),Num(2))) scala> ArithParsers.parseExpr("a * (a * a)") res3: ArithParsers.ParseResult[parsers2.Expr] = [1.12] parsed: Mult(Var(a),Mult(Var(a),Var(a))) 6
  • 7. Example 2: Lambda calculus t ::= terms: x variable λx.t abstraction tt application x y z = ((x y) z) λx.λy.y = λx.(λy.y) 7
  • 8. Example 2 sealed trait Term case class Var(n: String) extends Term case class Lam(v: Var, body: Term) extends Term case class App(t1: Term, t2: Term) extends Term object LamParsers extends StandardTokenParsers with ImplicitConversions with PackratParsers { lexical.delimiters += ("(", ")", ".", "") lazy val term: PackratParser[Term] = appTerm | lam lazy val vrb: PackratParser[Var] = ident ^^ Var lazy val lam: PackratParser[Term] = ("" ~> vrb) ~ ("." ~> term) ^^ Lam lazy val appTerm: PackratParser[Term] = appTerm ~ aTerm ^^ App | aTerm lazy val aTerm: PackratParser[Term] = vrb | "(" ~> term <~ ")" def parseTerm(s: String) = phrase(term)(new lexical.Scanner(s)) } scala> LamParsers.parseTerm("x y z") res1: LamParsers.ParseResult[parsers.Term] = [1.6] parsed: App(App(Var(x),Var(y)),Var(z)) scala> LamParsers.parseTerm("""x.y.x y""") res2: LamParsers.ParseResult[parsers.Term] = [1.10] parsed: Lam(Var(x),Lam(Var(y),App(Var(x),Var(y)))) scala> LamParsers.parseTerm("""(x.x x) (x. x x)""") res3: LamParsers.ParseResult[parsers.Term] = [1.19] parsed: App(Lam(Var(x),App(Var(x),Var(x))),Lam(Var(x),App(Var(x),Var(x)))) 8
  • 10. Комбинаторные библиотеки ● Actors ● Parsers ● ScalaCheck, Spesc ● Scalaz ● SBT ● EDSLs ● ... 10
  • 11. Принципы комбинаторных библиотек ● Соответствие терминологии библиотеки и терминологии предметной области. ● Состав ● типы, ● примитивы, ● комбинаторы первого порядка, ● комбинаторы высшего порядка. ● Свойство замыкания (композиционность). ● Возможность эффективной реализации. E. Кирпичев. Элементы функциональных языков. Практика функционального 11 программирования №3.
  • 13. Предметная область ● Грамматика ● Парсеры ● Регулярная ● LL-парсеры ● Бесконтекстная ● LR-парсеры ● Леворекурсивная ● Нисходящие ● Праворекурсивная ● Восходящие ● Аттрибутная ● GLL ● Boolean ● Packrat-парсеры ● PEG ● Parsing with ● ... derivativatives 13
  • 15. Подходы к созданию парсеров ● Parser-generator ● Hand-written ● Yacc ● Low-level ● Lex ● High-level ● JavaCC ● AntLR ● Rat! 15
  • 16. Parsers in Scala C9 Lectures: Dr. Erik Meijer - Functional Programming Fundamentals Chapter 8 of 13 A. Moors, F. Piessens, M. Odersky. Parser Combinators in Scala. Report CW 49 // Feb 2008 16
  • 17. Scala parser combinators are a form of recursive descent parsing with infinite backtracking. 17
  • 18. Parsers in Scala are functional Background: ● W. Burge. Recursive Programming Techniques. Addison-Wesley, 1975. ● Ph. Wadler. How to Replace Failure by a List of Successes. A method for exception handling, backtracking, and pattern matching in lazy functional languages // 1985 ● G. Hutton. Higher-order functions for parsing // Journal of functional programming. 1992/2 ● J. Fokker. Functional Parsers // 1995 18
  • 19. Parser? ● Трансформирует текст в структуру + 2*3 + 4 * 3 2 3 19
  • 20. Парсер – это функция type Parser[A] = String => A Нет композиции функций, не обязательно парсить всю строку type Parser[A] = String => (A, String) Может закончиться неудачей type Parser[A] = String => Option[(A, String)] 20
  • 22. Results trait SimpleResults { type Input trait Result[+T] { def next: Input } case class Success[+T](result: T, next: Input) extends Result[T] case class Failure(msg: String, next: Input) extends Result[Nothing] } object XParser extends SimpleResults { type Input = String val acceptX: Input => Result[Char] = { (in: String) => if (in.charAt(0) == 'x') Success('x', in.substring(1)) else Failure("expected an x", in) } } scala> XParser.acceptX("xyz") res0: parsers.XParser.Result[Char] = Success(x,yz) scala> XParser.acceptX("yz") res1: parsers.XParser.Result[Char] = Failure(expected an x,yz) 22
  • 23. The basis: Parser, |, ~, accept trait SimpleParsers extends SimpleResults { trait Parser[+T] extends (Input => Result[T]) { def apply(in: Input): Result[T] def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] { def apply(in: Input) = Parser.this(in) match { case Failure(_, _) => p(in) case Success(x, n) => Success(x, n)}} def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] { def apply(in: Input) = Parser.this(in) match { case Success(x, next) => p(next) match { case Success(x2, next2) => Success((x, x2), next2) case Failure(m, n) => Failure(m, n) } case Failure(m, n) => Failure(m, n)}} } } trait StringParsers extends SimpleParsers { type Input = String private val EOI = 0.toChar def accept(expected: Char) = new Parser[Char] { def apply(in: String) = if (in == "") { if (expected == EOI) Success(expected, "") else Failure("no more input", in) } else if (in.charAt(0) == expected) Success(expected, in.substring(1)) else Failure("expected '" + expected + "'", in) } def eoi = accept(EOI) } 23
  • 24. The simplest parser object OXOParser extends StringParsers { def oxo = accept('o') ~ accept('x') ~ accept('o') def oxos: Parser[Any] = (oxo ~ accept(' ') ~ oxos | oxo) } scala> OXOParser.oxos("123") res2: parsers.OXOParser.Result[Any] = Failure(expected 'o',123) scala> OXOParser.oxos("oxo") res3: parsers.OXOParser.Result[Any] = Success(((o,x),o),) scala> OXOParser.oxos("oxo oxo") res4: parsers.OXOParser.Result[Any] = Success(((((o,x),o), ),((o,x),o)),) scala> OXOParser.oxos("oxo oxo 1") res5: parsers.OXOParser.Result[Any] = Success(((((o,x),o), ),((o,x),o)), 1) scala> (OXOParser.oxos ~ OXOParser.eoi)("oxo oxo 1") res6: parsers.OXOParser.Result[(Any, Char)] = Failure(expected '?', 1) 24
  • 25. Be careful! trait SimpleParsers extends SimpleResults { trait Parser[+T] extends (Input => Result[T]) { def apply(in: Input): Result[T] def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] { call-by-name param def apply(in: Input) = Parser.this(in) match { case Failure(_, _) => p(in) case Success(x, n) => Success(x, n)}} def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] { call-by-name param def apply(in: Input) = Parser.this(in) match { case Success(x, next) => p(next) match { case Success(x2, next2) => Success((x, x2), next2) case Failure(m, n) => Failure(m, n) } case Failure(m, n) => Failure(m, n)}} } } object OXOParser extends StringParsers { def oxo = accept('o') ~ accept('x') ~ accept('o') def oxos: Parser[Any] = (oxo ~ accept(' ') ~ oxos | oxo) } 25
  • 26. Be careful! trait SimpleParsers extends SimpleResults { trait Parser[+T] extends (Input => Result[T]) { def apply(in: Input): Result[T] def |[U >: T](p: Parser[U]): Parser[U] = new Parser[U] { call-by-value param def apply(in: Input) = Parser.this(in) match { case Failure(_, _) => p(in) case Success(x, n) => Success(x, n)}} def ~[U](p: Parser[U]): Parser[(T, U)] = new Parser[(T, U)] { call-by-value param def apply(in: Input) = Parser.this(in) match { case Success(x, next) => p(next) match { case Success(x2, next2) => Success((x, x2), next2) case Failure(m, n) => Failure(m, n) } case Failure(m, n) => Failure(m, n)}} } } object OXOParser extends StringParsers { def oxo = accept('o') ~ accept('x') ~ accept('o') def oxos: Parser[Any] = (oxo ~ accept(' ') ~ oxos | oxo) } scala> OXOParser.oxos("123") java.lang.StackOverflowError at parsers.OXOParser$.oxo(stepbystep.scala:67) at parsers.OXOParser$.oxos(stepbystep.scala:69) at parsers.OXOParser$.oxos(stepbystep.scala:69) at parsers.OXOParser$.oxos(stepbystep.scala:69) at parsers.OXOParser$.oxos(stepbystep.scala:69) at parsers.OXOParser$.oxos(stepbystep.scala:69) at parsers.OXOParser$.oxos(stepbystep.scala:69) 26 ...
  • 28. Where is a problem? trait SimpleParsers extends SimpleResults { trait Parser[+T] extends (Input => Result[T]) { def apply(in: Input): Result[T] def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] { def apply(in: Input) = Parser.this(in) match { case Failure(_, _) => p(in) case Success(x, n) => Success(x, n)}} def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] { def apply(in: Input) = Parser.this(in) match { case Success(x, next) => p(next) match { case Success(x2, next2) => Success((x, x2), next2) case Failure(m, n) => Failure(m, n) } case Failure(m, n) => Failure(m, n)}} } } object OXOParser extends StringParsers { def oxo = accept('o') ~ accept('x') ~ accept('o') def oxos: Parser[Any] = (oxo ~ accept(' ') ~ oxos | oxo) } 28
  • 29. Too much “threading” trait SimpleParsers extends SimpleResults { trait Parser[+T] extends (Input => Result[T]) { def apply(in: Input): Result[T] def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] { def apply(in: Input) = Parser.this(in) match { case Failure(_, _) => p(in) case Success(x, n) => Success(x, n)}} def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] { def apply(in: Input) = Parser.this(in) match { case Success(x, next) => p(next) match { case Success(x2, next2) => Success((x, x2), next2) case Failure(m, n) => Failure(m, n) } case Failure(m, n) => Failure(m, n)}} } } object OXOParser extends StringParsers { def oxo = accept('o') ~ accept('x') ~ accept('o') def oxos: Parser[Any] = (oxo ~ accept(' ') ~ oxos | oxo) } 29
  • 30. Improved Results trait SimpleResults { type Input trait Result[+T] { def next: Input def map[U](f: T => U): Result[U] def flatMapWithNext[U](f: T => Input => Result[U]): Result[U] def append[U >: T](alt: => Result[U]): Result[U] } case class Success[+T](result: T, next: Input) extends Result[T] { def map[U](f: T => U) = Success(f(result), next) def flatMapWithNext[U](f: T => Input => Result[U]) = f(result)(next) def append[U >: T](alt: => Result[U]) = this } case class Failure(msg: String, next: Input) extends Result[Nothing] { def map[U](f: Nothing => U) = this def flatMapWithNext[U](f: Nothing => Input => Result[U]) = this def append[U](alt: => Result[U]) = alt } } ●map -... ●flatMapWithNext - ... ●append – for multiple results (we do not consider it here) 30
  • 31. Parser is a function with many results type Parser[A] = String => A type Parser[A] = String => (A, String) type Parser[A] = String => Option[(A, String)] type Parser[A] = String => List[(A, String)] 31
  • 32. After improving trait SimpleResults { type Input trait Result[+T] { def next: Input def map[U](f: T => U): Result[U] def flatMapWithNext[U](f: T => Input => Result[U]): Result[U] def append[U >: T](alt: => Result[U]): Result[U] } case class Success[+T](result: T, next: Input) extends Result[T] { def map[U](f: T => U) = Success(f(result), next) def flatMapWithNext[U](f: T => Input => Result[U]) = f(result)(next) def append[U >: T](alt: => Result[U]) = this } case class Failure(msg: String, next: Input) extends Result[Nothing] { def map[U](f: Nothing => U) = this def flatMapWithNext[U](f: Nothing => Input => Result[U]) = this def append[U](alt: => Result[U]) = alt } } trait SimpleParsers extends SimpleResults { abstract class Parser[+T] extends (Input => Result[T]) { def apply(in: Input): Result[T] def flatMap[U](f: T => Parser[U]): Parser[U] = new Parser[U] { def apply(in: Input) = Parser.this(in) flatMapWithNext (f) } def map[U](f: T => U): Parser[U] = new Parser[U] { def apply(in: Input) = Parser.this(in) map (f) } def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] { def apply(in: Input) = Parser.this(in) append p(in) } def ~[U](p: => Parser[U]): Parser[(T, U)] = for (a <- this; b <- p) yield (a, b) Hey! 32 } }
  • 33. So, Parser is a Monad!! 33
  • 34. Where is my “withFilter”? ● In Scala 2.10 ● It was not easy... 34
  • 35. Removing noise... trait SimpleParsers extends SimpleResults { def Parser[T](f: Input => Result[T]) = new Parser[T] { def apply(in: Input) = f(in) } Removing Boilerplate abstract class Parser[+T] extends (Input => Result[T]) { def apply(in: Input): Result[T] New Parser{apply} def flatMap[U](f: T => Parser[U]): Parser[U] = Parser { in => Parser.this(in) flatMapWithNext (f) } def map[U](f: T => U): Parser[U] = Parser { in => Parser.this(in) map (f) } def |[U >: T](p: => Parser[U]): Parser[U] = Parser { in => Parser.this(in) append p(in) } def ~[U](p: => Parser[U]): Parser[(T, U)] = for (a <- this; b <- p) yield (a, b) } } 35
  • 37. Real Parsers package scala.util.parsing.combinator trait Parsers { Stream annotated with type Elem type Input = Reader[Elem] coordinates sealed abstract class ParseResult[+T] case class Success[+T](result: T, override val next: Input) extends ParseResult[T] sealed abstract class NoSuccess(val msg: String, override val next: Input) extends ParseResult[Nothing] case class Failure(override val msg: String, override val next: Input) extends NoSuccess(msg, next) case class Error(override val msg: String, override val next: Input) extends NoSuccess(msg, next) ... abstract class Parser[+T] extends (Input => ParseResult[T]) { Controlling ... backtracking } case class ~[+a, +b](_1: a, _2: b) { override def toString = "("+ _1 +"~"+ _2 +")" } Deconstructing sequencing } package scala.util.parsing.input abstract class Reader[+T] { def first: T def rest: Reader[T] 37 }
  • 38. Simplified picture package scala.util.parsing.combinator trait Parsers { type Elem type Input = Reader[Elem] sealed abstract class ParseResult[+T] abstract class Parser[+T] extends (Input => ParseResult[T]) { combinators } combinators } 38
  • 40. Basic Combinators package scala.util.parsing.combinator trait Parsers { def elem(kind: String, p: Elem => Boolean): Parser[Elem] def elem(e: Elem): Parser[Elem] implicit def accept(e: Elem): Parser[Elem] abstract class Parser[+T] extends (Input => ParseResult[T]) { def ~ [U](q: => Parser[U]): Parser[~[T, U]] def <~ [U](q: => Parser[U]): Parser[T] def ~! [U](p: => Parser[U]): Parser[~[T, U]] def | [U >: T](q: => Parser[U]): Parser[U] def ||| [U >: T](q0: => Parser[U]): Parser[U] def ^^ [U](f: T => U): Parser[U] def ^^^ [U](v: => U): Parser[U] def ^? [U](f: PartialFunction[T, U], error: T => String): Parser[U] def ^? [U](f: PartialFunction[T, U]): Parser[U] def >>[U](fq: T => Parser[U]) def *: Parser[List[T]] def +: Parser[List[T]] def ?: Parser[Option[T]] } } 40
  • 41. Swiss army knife Combinators package scala.util.parsing.combinator trait Parsers { def commit[T](p: => Parser[T]): Parser[T] def accept[ES <% List[Elem]](es: ES): Parser[List[Elem]] def accept[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] def failure(msg: String): Parser[Nothing] def err(msg: String): Parser[Nothing] def success[T](v: T): Parser[T] def rep[T](p: => Parser[T]): Parser[List[T]] def repsep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]] def rep1[T](p: => Parser[T]): Parser[List[T]] def rep1[T](first: => Parser[T], p0: => Parser[T]): Parser[List[T]] def repN[T](num: Int, p: => Parser[T]): Parser[List[T]] def rep1sep[T](p : => Parser[T], q : => Parser[Any]): Parser[List[T]] def chainl1[T](p: => Parser[T], q: => Parser[(T, T) => T]): Parser[T] def chainl1[T, U](first: => Parser[T], p: => Parser[U], q: => Parser[(T, U) => T]): Parser[T] def chainr1[T, U](p: => Parser[T], q: => Parser[(T, U) => U], combine: (T, U) => U, first: U): Parser[U] def opt[T](p: => Parser[T]): Parser[Option[T]] def not[T](p: => Parser[T]): Parser[Unit] def guard[T](p: => Parser[T]): Parser[T] def positioned[T <: Positional](p: => Parser[T]): Parser[T] def phrase[T](p: Parser[T]): Parser[T] } Inpired by G. Hutton and E. Meijer. Monadic Parser Combinators. 41
  • 42. Lexing 42
  • 43. Простейший (low-level) парсер trait SimplestParsers extends Parsers { type Elem = Char def whitespaceChar: Parser[Char] = elem("space char", ch => ch <= ' ' && ch != EofCh) def letter: Parser[Char] = elem("letter", _.isLetter) def whitespace: Parser[List[Char]] = rep(whitespaceChar) def ident: Parser[List[Char]] = rep1(letter) def parse[T](p: Parser[T], in: String): ParseResult[T] = p(new CharSequenceReader(in)) } scala> val p1 = new SimplestParsers{} p1: java.lang.Object with parsers.SimplestParsers = $anon$1@17d59ff0 scala> import p1._ import p1._ scala> parse(letter, "foo bar") res0: p1.ParseResult[Char] = [1.2] parsed: f scala> parse(ident, "foo bar") res1: p1.ParseResult[List[Char]] = [1.4] parsed: List(f, o, o) scala> parse(ident, "123") res2: p1.ParseResult[List[Char]] = [1.1] failure: letter expected 43 123 ^
  • 44. Towards AST trait Token case class Id(n: String) extends Token case class Num(n: String) extends Token case object ErrorToken extends Token trait TokenParsers extends Parsers { type Elem = Char private def whitespaceChar: Parser[Char] = elem("space char", ch => ch <= ' ' && ch != EofCh) def letter: Parser[Char] = elem("letter", _.isLetter) def digit: Parser[Char] = elem("digit", _.isDigit) def whitespace: Parser[List[Char]] = rep(whitespaceChar) def idLit: Parser[String] = rep1(letter) ^^ { _.mkString("") } def numLit: Parser[String] = rep1(digit) ^^ { _.mkString("") } def id: Parser[Token] = idLit ^^ Id def num: Parser[Token] = numLit ^^ Num def token = id | num def parse[T](p: Parser[T], in: String): ParseResult[T] = p(new CharSequenceReader(in)) } 44
  • 45. Lexer/Scanner trait Scanners extends TokenParsers { class Scanner(in: Reader[Char]) extends Reader[Token] { def this(in: String) = this(new CharArrayReader(in.toCharArray())) private val (tok, rest1, rest2) = whitespace(in) match { case Success(_, in1) => token(in1) match { case Success(tok, in2) => (tok, in1, in2) case ns: NoSuccess => (ErrorToken, ns.next, ns.next.rest) } case ns: NoSuccess => (ErrorToken, ns.next, ns.next.rest) } def first = tok def rest = new Scanner(rest2) } } scala> val scs = new Scanners {} scs: java.lang.Object with Scanners = $anon$1@68a750a scala> val reader = new scs.Scanner("foo bar") reader: scs.Scanner = Scanners$Scanner@6a75863f scala> reader.first res0: Token = Id(foo) scala> reader.rest.first res1: Num = Num(123) scala> reader.rest.rest.first res2: Token = ErrorToken 45
  • 46. Lexing Reader[Char] Low-level Parsing Reader[Token] 46
  • 48. RAM++ 48
  • 49. AST 49
  • 50. Parser Implicit magic “~” magic 50
  • 51. Итак, ... ● Parsers Combinators in Scala позволяют описывать исполняемые грамматики в виде, близком к BNF. ● Внутреннее устройство Parser Combinators - самый настоящий Programming Pearl. ● Internal DSL for External DSLs. 51
  • 52. Discussion (Parser Combinators vs Parser Generator) 52
  • 53. PROS ● Toт же язык (Scala) – не нужно учить новый инструмент. ● Исполняемая грамматика - всегда актуальный код. ● Краткость + богатая выразительность: LL(*) и больше (в том числе, контекстные грамматики). ● Можно делать fusion синтаксического разбора и чего-нибудь еще. ● Модульность 53
  • 54. CONS ● Некоторые простые вещи могут кодироваться очень непросто. ● Performance. 54
  • 55. Performance Hand-written Lift-json is 350 times faster than version based on parser combinators (proof link) 55
  • 57. Parsing “9”: Too much backtracking import scala.util.parsing.combinator._ import syntactical.StandardTokenParsers sealed trait Expr case class Num(i: Int) extends Expr case class Var(n: String) extends Expr case class Plus(e1: Expr, e2: Expr) extends Expr case class Mult(e1: Expr, e2: Expr) extends Expr object ArithParsers extends StandardTokenParsers with ImplicitConversions { lexical.delimiters += ("(", ")", "+", "*") def expr: Parser[Expr] = term ~ ("+" ~> expr) ^^ Plus | term def term: Parser[Expr] = factor ~ ("*" ~> term) ^^ Mult | factor def factor: Parser[Expr] = numericLit ^^ { s => Num(s.toInt) } | ident ^^ Var | "(" ~> expr <~ ")" def parseExpr(s: String) = phrase(expr)(new lexical.Scanner(s)) } 57
  • 59. + Left Recursion sealed trait Term case class Var(n: String) extends Term case class Lam(v: Var, body: Term) extends Term case class App(t1: Term, t2: Term) extends Term object LamParsers extends StandardTokenParsers with ImplicitConversions with PackratParsers { lexical.delimiters += ("(", ")", ".", "") lazy val term: PackratParser[Term] = appTerm | lam lazy val vrb: PackratParser[Var] = ident ^^ Var lazy val lam: PackratParser[Term] = ("" ~> vrb) ~ ("." ~> term) ^^ Lam lazy val appTerm: PackratParser[Term] = appTerm ~ aTerm ^^ App | aTerm lazy val aTerm: PackratParser[Term] = vrb | "(" ~> term <~ ")" def parseTerm(s: String) = phrase(term)(new lexical.Scanner(s)) } 59
  • 60. + Left Recursion sealed trait Term case class Var(n: String) extends Term case class Lam(v: Var, body: Term) extends Term case class App(t1: Term, t2: Term) extends Term object LamParsers extends StandardTokenParsers with ImplicitConversions with PackratParsers { lexical.delimiters += ("(", ")", ".", "") lazy val term: PackratParser[Term] = appTerm | lam lazy val vrb: PackratParser[Var] = ident ^^ Var lazy val lam: PackratParser[Term] = ("" ~> vrb) ~ ("." ~> term) ^^ Lam lazy val appTerm: PackratParser[Term] = appTerm ~ aTerm ^^ App | aTerm lazy val lazy val aTerm: PackratParser[Term] = vrb | "(" ~> term <~ ")" def parseTerm(s: String) = phrase(term)(new lexical.Scanner(s)) } 60
  • 61. Without Left Recursion sealed trait Term case class Var(n: String) extends Term case class Lam(v: Var, body: Term) extends Term case class App(t1: Term, t2: Term) extends Term object LamParsers extends StandardTokenParsers with ImplicitConversions { lexical.delimiters += ("(", ")", ".", "") lazy val term: Parser[Term] = appTerm | lam lazy val vrb: Parser[Var] = ident ^^ Var lazy val lam: Parser[Term] = ("" ~> vrb) ~ ("." ~> term) ^^ Lam lazy val appTerm: Parser[Term] = (aTerm +) ^^ { _.reduceLeft(App) } lazy val aTerm: Parser[Term] = vrb | "(" ~> term <~ ")" def parseTerm(s: String) = phrase(term)(new lexical.Scanner(s)) } 61
  • 63. Other Parsers ● Pairboled Parser (PEG parser) ● GLL parser ● Derivative combinators http://stackoverflow.com/questions/4423514/scala-parsers-availability-differences-and-combining 63
  • 64. Trends ● Merging two worlds ● Compositionality (Functional programming) ● Performance 64