1

ファイルの内容を行ごとに解析するための多くのアプローチを試しましたが、現時点では機能しておらず、実行すると大量のメモリ (16GB 以上) を使用します。

これは、解析したいファイルのサブセットですhttp://lpaste.net/144719

次の 3 種類のエラーが必要です。

1) バックトレースのエラー (複数行、最初の行は 3))
2) もう 1 行の単一エラー
3) 単一行エラー

これが私の現在のコードです:

import qualified Data.ByteString as B
import Data.ByteString.Char8 as B8 hiding (lines, filter, unlines, head, readFile, take, length,
                                           putStrLn, tail, map, concat, or, writeFile, intersperse,
                                           groupBy, hGetContents)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Data.Attoparsec.Text hiding (take)
import Control.Applicative
import Control.Monad (replicateM, mapM)
import Data.Either (either)
import Data.List (intersperse, groupBy)
import System.Environment
import qualified System.IO as SIO

data TimeStamp = MkTimeStamp T.Text
               deriving Show

data LogFileInfo = BackTraceLineInfo T.Text
                 | BackTraceInfo TimeStamp T.Text T.Text [LogFileInfo]
                 | Error TimeStamp T.Text
                 | LargeError TimeStamp T.Text T.Text
                 deriving Show

data LineType = SingleLineError TimeStamp T.Text
              | DirectoryInfo T.Text
              | ErrorInfo T.Text
              | LineBackTraceInfo T.Text
              | BackTraceString T.Text
              | BackTraceLine T.Text
              deriving Show

parseTimeStamp :: Parser TimeStamp
parseTimeStamp = do
  year <- many digit
  char '-'
  month <- many digit
  char '-'
  day <- many digit
  char ' '
  hour <- many digit
  char ':'
  minute <- many digit
  char ':'
  second <- many digit
  char ' '
  (return . MkTimeStamp) $ T.pack $ year ++ "-" ++ month ++ "-" ++ day ++ " " ++ hour ++ ":" ++ minute ++ ":" ++ second

parseError :: Parser LineType
parseError = do
  string $ T.pack "ERROR - "
  timeStamp <- parseTimeStamp
  errorInfo <- parseAnyLine
  return $ SingleLineError timeStamp errorInfo

parseDirectoryInfo :: Parser LineType
parseDirectoryInfo = do
  char '/'
  directoryInfo <- parseAnyLine
  (return . DirectoryInfo) $ T.append (T.pack "/") directoryInfo

parseErrorInfo :: Parser LineType
parseErrorInfo = do
  errorInfo <- parseAnyLine
  (return . ErrorInfo) errorInfo

parseBackTraceString :: Parser LineType
parseBackTraceString = do
  let backTraceStr = T.pack " Backtrace: "
  string backTraceStr
  return $ BackTraceString backTraceStr

parseBacktraceLine :: Parser LineType
parseBacktraceLine = do
  char '#'
  number <- many1 digit
  backTraceInfo <- parseAnyLine
  let numberPart = T.pack $ '#' : number
  return $ LineBackTraceInfo $ T.append numberPart backTraceInfo

parseAnyLine :: Parser T.Text
parseAnyLine = fmap T.pack $ many anyChar

-- Skips n lines for allowing other parsers to succeed
skipNLines n = replicateM n $ manyTill anyChar endOfLine

-- performParser :: Parser a -> T.Text -> BackTraceInfo
performParser = parseOnly

getEitherRight :: Either a b -> b
getEitherRight (Right b) = b

parseLogFile :: [T.Text] -> [LineType]
parseLogFile textxs = 
  let listaEithers = mapM (parseOnly $
                           try parseError
                       <|> try parseDirectoryInfo
                       <|> try parseBacktraceLine
                       <|> try parseBackTraceString
                       <|> parseErrorInfo) textxs
  in getEitherRight listaEithers

customUnlines :: [String] -> String
customUnlines []     = []
customUnlines (x:xs) = if x == "\n"
                         then '\n':customUnlines xs
                         else x ++ "\n" ++ customUnlines xs

main = do
  (fileName : _) <- getArgs
  h <- SIO.openFile fileName SIO.ReadMode
  SIO.hSetEncoding h SIO.latin1
  fileContents <- SIO.hGetContents h
  let titleLength           = length fileName
      titleWithoutExtension = take (titleLength - 4) fileName
      allNonEmptyLines      = map T.pack $ intersperse "\n" $ tail $ filter (/= "") $ lines fileContents -- [T.Text]
      listParseResults      = parseLogFile allNonEmptyLines -- [LineType]
      -- onlyModelErrors       = filter isModelError parseResult -- [LogFileInfo]
      -- onlyOneRepresentative = map head $ groupBy equalErrors onlyModelErrors
      listOfStrings         = map show listParseResults
  writeFile (titleWithoutExtension ++ ".logsummary") $ customUnlines listOfStrings

最初の問題は、パーサーが何も解析していないことです。2 つ目の問題は、16GB の RAM を使用していることです。どうすれば私のアプローチを改善できますか?

4

1 に答える 1