RegexParsers、StandardTokenParsers を使用する必要がありますか、またはこれらはこの種の構文の解析に適していますか? 構文の例はhereから見つけることができます。
4157 次
4 に答える
5
私は正規表現を使用します。それはいくつかのことを簡素化し、残りを標準にします。
def process(src: scala.io.Source) {
import scala.util.matching.Regex
val FilePattern = """(.*) ''(.*)''"""
val OriginalFile = new Regex("--- "+FilePattern, "path", "timestamp")
val NewFile = new Regex("+++ "+FilePattern, "path", "timestamp")
val Chunk = new Regex("""@@ -(\d+),(\d+) +(\d+),(\d+) @@""", "orgStarting", "orgSize", "newStarting", "newSize")
val AddedLine = """+(.*)""".r
val RemovedLine = """-(.*)""".r
val UnchangedLine = """ (.*)""".r
src.getLines() foreach {
case OriginalFile(path, timestamp) => println("Original file: "+path)
case NewFile(path, timestamp) => println("New file: "+path)
case Chunk(l1, s1, l2, s2) => println("Modifying %d lines at line %d, to %d lines at %d" format (s1, l1, s2, l2))
case AddedLine(line) => println("Adding line "+line)
case RemovedLine(line) => println("Removing line "+line)
case UnchangedLine(line) => println("Keeping line "+line)
}
}
于 2010-08-24T19:56:02.687 に答える
4
この形式は解析しやすいように設計されており、正規表現や入力をトークン化せずに解析できます。行ごとに移動して、最初の数文字を見てください。ファイル ヘッダーとチャンク ヘッダーにはもう少し注意が必要ですが、分割でできないことは何もありません。
もちろん、いくつかの解析ライブラリの使用方法を学びたい場合は、それを選択してください。
于 2010-08-24T19:13:08.430 に答える
3
を使った解決方法はこちらRegexParsers
。
import scala.util.parsing.combinator.RegexParsers
object UnifiedDiffParser extends RegexParsers {
// case classes representing the data of the diff
case class UnifiedDiff(oldFile: File, newFile: File, changeChunks: List[ChangeChunk])
case class File(name: String, timeStamp: String)
case class ChangeChunk(rangeInformation: RangeInformation, changeLines: List[String])
case class RangeInformation(oldOffset: Int, oldLength: Int, newOffset: Int, newLength: Int)
override def skipWhitespace = false
def unifiedDiff: Parser[UnifiedDiff] = oldFile ~ newFile ~ rep1(changeChunk) ^^ {
case of ~ nf ~ l => UnifiedDiff(of, nf, l)
}
def oldFile: Parser[File] = ("--- " ~> filename) ~ ("""\s+""".r ~> timestamp <~ newline) ^^ {
case f~t => File(f, t)
}
def newFile: Parser[File] = ("+++ " ~> filename) ~ ("""\s+""".r ~> timestamp <~ newline) ^^ {
case f~t => File(f, t)
}
def filename: Parser[String] = """[\S]+""".r
def timestamp: Parser[String] = """.*""".r
def changeChunk: Parser[ChangeChunk] = rangeInformation ~ (newline ~> rep1(lineChange)) ^^ {
case ri ~ l => ChangeChunk(ri, l)
}
def rangeInformation: Parser[RangeInformation] = ("@@ " ~> "-" ~> number) ~ ("," ~> number) ~ (" +" ~> number) ~ ("," ~> number) <~ " @@" ^^ {
case a ~ b ~ c ~ d => RangeInformation(a, b, c, d)
}
def lineChange: Parser[String] = contextLine | addedLine | deletedLine
def contextLine: Parser[String] = """ .*""".r <~ newline
def addedLine: Parser[String] = """\+.*""".r <~ newline
def deletedLine: Parser[String] = """-.*""".r <~ newline
def newline: Parser[String] = """\n""".r
def number: Parser[Int] = """\d+""".r ^^ {_.toInt}
def main(args: Array[String]) {
val reader = {
if (args.length == 0) {
// read from stdin
Console.in
} else {
new java.io.FileReader(args(0))
}
}
println(parseAll(unifiedDiff, reader))
}
}
于 2010-08-25T09:32:58.133 に答える
1
実行によって生成された git diff 用の Scala パーサーを構築しようとしているときに、これに遭遇しましたgit diff-tree
。これは統合差分と非常によく似ていますが、いくつかの興味深いバリエーションがあります。
私は上記の回答に大きく依存し、最終的にここに含まれるパーサーを作成しました。もちろん、元のポスターが厳密にどうであったかというわけではありませんが、他の人にとっては役に立つかもしれないと考えました.
import util.parsing.combinator._
object GitDiff {
// file names have "a/" or "b/" as prefix, need to drop that to compare
def apply (files: (String,String), op: FileOperation, chunks: List[ChangeChunk]) = {
def strip(s: String) = s.dropWhile(_ != '/').drop(1)
new GitDiff( strip( files._1 ), strip( files._2 ), op, chunks )
}
}
case class GitDiff(oldFile: String, newFile: String, op: FileOperation, chunks: List[ChangeChunk]) {
val isRename = oldFile != newFile
}
sealed trait FileOperation
case class NewFile(mode: Int) extends FileOperation
case class DeletedFile(mode: Int) extends FileOperation
case object UpdatedFile extends FileOperation
sealed trait LineChange { def line: String }
case class ContextLine(line: String) extends LineChange
case class LineRemoved(line: String) extends LineChange
case class LineAdded(line: String) extends LineChange
case class RangeInformation(oldOffset: Int, oldLength: Int, newOffset: Int, newLength: Int)
case class ChangeChunk(rangeInformation: RangeInformation, changeLines: List[LineChange])
// Code taken from http://stackoverflow.com/questions/3560073/how-to-write-parser-for-unified-diff-syntax
object GitDiffParser extends RegexParsers {
override def skipWhitespace = false
def allDiffs: Parser[List[GitDiff]] = rep1(gitDiff)
def gitDiff: Parser[GitDiff] = filesChanged ~ fileOperation ~ diffChunks ^^ {
case files ~ op ~ chunks => GitDiff(files, op, chunks)
}
def filesChanged: Parser[(String, String)] =
"diff --git " ~> filename ~ (" " ~> filename) <~ newline ^^ { case f1 ~ f2 => (f1,f2) }
def fileOperation: Parser[FileOperation] =
opt(deletedFileMode | newFileMode) <~ index ^^ { _ getOrElse UpdatedFile }
def index: Parser[Any] = ( "index " ~ hash ~ ".." ~ hash ) ~> opt(" " ~> mode) <~ newline
def deletedFileMode: Parser[DeletedFile] = "deleted file mode " ~> mode <~ newline ^^ { m => DeletedFile(m) }
def newFileMode: Parser[NewFile] = "new file mode " ~> mode <~ newline ^^ { m => NewFile(m) }
def hash: Parser[String] = """[0-9a-f]{7}""".r
def mode: Parser[Int] = """\d{6}""".r ^^ { _.toInt }
def diffChunks: Parser[List[ChangeChunk]] = (oldFile ~ newFile) ~> rep1(changeChunk)
def oldFile: Parser[String] = "--- " ~> filename <~ newline
def newFile: Parser[String] = "+++ " ~> filename <~ newline
def filename: Parser[String] = """[\S]+""".r
def changeChunk: Parser[ChangeChunk] = rangeInformation ~ opt(contextLine) ~ (opt(newline) ~> rep1(lineChange)) ^^ {
case ri ~ opCtx ~ lines => ChangeChunk(ri, opCtx map (_ :: lines) getOrElse (lines))
}
def rangeInformation: Parser[RangeInformation] =
("@@ " ~> "-" ~> number) ~ opt("," ~> number) ~ (" +" ~> number) ~ opt("," ~> number) <~ " @@" ^^ {
case a ~ b ~ c ~ d => RangeInformation(a, b getOrElse 0, c, d getOrElse 0)
}
def lineChange: Parser[LineChange] = contextLine | addedLine | deletedLine
def contextLine: Parser[ContextLine] = " " ~> """.*""".r <~ newline ^^ { l => ContextLine(l) }
def addedLine: Parser[LineAdded] = "+" ~> """.*""".r <~ newline ^^ { l => LineAdded(l) }
def deletedLine: Parser[LineRemoved] = "-" ~> """.*""".r <~ newline ^^ { l => LineRemoved(l) }
def newline: Parser[String] = """\n""".r
def number: Parser[Int] = """\d+""".r ^^ { _.toInt }
def parse(str: String) = parseAll(allDiffs, str)
def main(args: Array[String]) {
val reader = {
if (args.length == 0) {
// read from stdin
Console.in
} else {
new java.io.FileReader(args(0))
}
}
parseAll(allDiffs, reader) match {
case Success(s,_) => println( s )
case NoSuccess(msg,_) => sys.error("ERROR: " + msg)
}
}
}
于 2011-12-01T03:16:05.197 に答える