From a967a8e53235f79ad7cc888771927ed1a57eb97d Mon Sep 17 00:00:00 2001 From: Patrick Aldis Date: Tue, 3 Mar 2026 14:26:31 +0000 Subject: [PATCH] Datalog Parser annotates SrcLoc --- datalog/src/Datalog/Parser.hs | 67 +++++++++++++++-------------------- 1 file changed, 28 insertions(+), 39 deletions(-) diff --git a/datalog/src/Datalog/Parser.hs b/datalog/src/Datalog/Parser.hs index dac730f..7a80615 100644 --- a/datalog/src/Datalog/Parser.hs +++ b/datalog/src/Datalog/Parser.hs @@ -1,11 +1,9 @@ -{-# LANGUAGE MultilineStrings #-} {-# LANGUAGE BlockArguments #-} +{-# LANGUAGE MultilineStrings #-} module Datalog.Parser ( parseTerm, - parseAtom, parseRule, - parseQuery, parseProgram, ) where @@ -13,7 +11,8 @@ where import Data.Text (Text) import Data.Text qualified as T import Data.Void (Void) -import Datalog.Syntax +import Datalog.Syntax hiding (Atom, Program, Rule, Term) +import Datalog.Syntax (Atom' (..), Program' (..), Rule' (..), Term' (..)) import Text.Megaparsec import Text.Megaparsec.Char import Text.Megaparsec.Char.Lexer qualified as L @@ -21,6 +20,11 @@ import Text.Pretty.Simple type Parser = Parsec Void Text +type Atom = Atom' SrcLoc +type Term = Term' SrcLoc +type Rule = Rule' SrcLoc +type Program = Program' SrcLoc + lexeme :: (MonadParsec e Text m) => m a -> m a lexeme = L.lexeme whitespace @@ -41,61 +45,46 @@ comma, period :: (MonadParsec e Text m) => m () comma = () <$ symbol "," period = () <$ symbol "." -parseCon :: (MonadParsec e Text m) => m Term -parseCon = Con () . ConId . T.pack <$> lexeme (liftA2 (:) (numberChar <|> lowerChar) (many alphaNumChar)) +parseCon :: Parser Term +parseCon = parseThingWithSub Con $ ConId . T.pack <$> lexeme (liftA2 (:) (numberChar <|> lowerChar) (many alphaNumChar)) -parseVar :: (MonadParsec e Text m) => m Term -parseVar = Var () . VarId . T.pack <$> lexeme (liftA2 (:) upperChar (many alphaNumChar)) +parseVar :: Parser Term +parseVar = parseThingWithSub Var $ VarId . T.pack <$> lexeme (liftA2 (:) upperChar (many alphaNumChar)) parseTerm :: Parser Term parseTerm = parseVar <|> parseCon parseAtom :: Parser Atom -parseAtom = do +parseAtom = parseThingWithSub (\loc (rel, args) -> Atom loc rel args) do rel <- RelId . T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar)) args <- parens (parseTerm `sepBy` comma) - return (Atom () rel args) + return (rel, args) parseQuery :: Parser [Atom] parseQuery = parseAtom `sepBy` comma -parseFact :: Parser Rule -parseFact = do - headAtom <- parseAtom - period - return (Rule () headAtom []) - parseRule :: Parser Rule -parseRule = - try parseFact <|> do +parseRule = parseThingWithSub (\loc (a, as) -> Rule loc a as) $ + try rule1 <|> rule2 + where + rule1 = do + headAtom <- parseAtom + period + return (headAtom, []) + rule2 = do headAtom <- parseAtom <* symbol ":-" bodyAtoms <- parseQuery period - return (Rule () headAtom bodyAtoms) -parseRule' :: Parser (Rule' SrcLoc) -parseRule' = _ + return (headAtom, bodyAtoms) --- parseProgram :: Parser Program -parseProgram :: Parser (Program' SrcLoc) -parseProgram = do - -- annotateSrcLoc $ +parseProgram :: Parser Program +parseProgram = parseThingWithSub Program (many parseRule) + +parseThingWithSub :: (SrcLoc -> c -> f SrcLoc) -> Parser c -> Parser (f SrcLoc) +parseThingWithSub f parseSub = do s <- getSourcePos - c <- many parseRule' - e <- getSourcePos - -- Program _ <$> many parseRule - pure $ Program (SrcLoc s e) c - -parseProgram' :: Parser (Program' SrcLoc) -parseProgram' = parseThingWithSub (many parseRule') Program - -parseThingWithSub :: (Parser c) -> (SrcLoc -> c -> f SrcLoc) -> Parser (f SrcLoc) -parseThingWithSub parseSub f = do - -- annotateSrcLoc $ - s <- getSourcePos - -- c <- many parseRule' c <- parseSub e <- getSourcePos - -- Program _ <$> many parseRule pure $ f (SrcLoc s e) c annotateSrcLoc :: (Functor f) => Parser (f a) -> Parser (f SrcLoc)