99 lines
2.6 KiB
Haskell
Raw Normal View History

2026-03-03 11:26:13 +00:00
{-# LANGUAGE BlockArguments #-}
2026-03-03 14:26:31 +00:00
{-# LANGUAGE MultilineStrings #-}
2026-03-06 18:11:43 +00:00
{- HLINT ignore "Use void" -}
2026-03-03 10:29:22 +00:00
2026-03-03 11:14:25 +00:00
module Datalog.Parser (
parseTerm,
parseRule,
parseProgram,
2026-03-03 16:00:36 +00:00
SrcLoc (..),
2026-03-03 11:14:25 +00:00
)
where
2026-03-02 16:19:40 +00:00
import Data.Text (Text)
import Data.Text qualified as T
import Data.Void (Void)
2026-03-06 18:11:43 +00:00
import Datalog.Syntax (Atom' (..), Program' (..), Rule' (..))
import Datalog.Syntax hiding (Atom, Program, Rule, Term)
2026-03-02 16:19:40 +00:00
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Char.Lexer qualified as L
type Parser = Parsec Void Text
data SrcLoc = SrcLoc
{ startLine :: Int
, startCol :: Int
, endLine :: Int
, endCol :: Int
}
deriving (Show)
2026-03-03 14:26:31 +00:00
type Atom = Atom' SrcLoc
type Term = Term' SrcLoc
type Rule = Rule' SrcLoc
type Program = Program' SrcLoc
2026-03-02 16:19:40 +00:00
lexeme :: (MonadParsec e Text m) => m a -> m a
lexeme = L.lexeme whitespace
symbol :: (MonadParsec e Text m) => Text -> m Text
symbol = L.symbol whitespace
whitespace :: (MonadParsec e Text m) => m ()
2026-03-03 11:14:25 +00:00
whitespace =
L.space
space1
(L.skipLineComment "--")
(L.skipBlockComment "{-" "-}")
2026-03-02 16:19:40 +00:00
parens :: (MonadParsec e Text m) => m a -> m a
parens = between (symbol "(") (symbol ")")
comma, period :: (MonadParsec e Text m) => m ()
comma = () <$ symbol ","
period = () <$ symbol "."
2026-03-03 14:26:31 +00:00
parseCon :: Parser Term
parseCon = parseThingWithSub Con $ ConId . T.pack <$> lexeme (liftA2 (:) (numberChar <|> lowerChar) (many alphaNumChar))
2026-03-02 16:19:40 +00:00
2026-03-03 14:26:31 +00:00
parseVar :: Parser Term
parseVar = parseThingWithSub Var $ VarId . T.pack <$> lexeme (liftA2 (:) upperChar (many alphaNumChar))
2026-03-02 16:19:40 +00:00
parseTerm :: Parser Term
parseTerm = parseVar <|> parseCon
parseAtom :: Parser Atom
2026-03-03 14:26:31 +00:00
parseAtom = parseThingWithSub (\loc (rel, args) -> Atom loc rel args) do
2026-03-03 11:14:25 +00:00
rel <- RelId . T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar))
args <- parens (parseTerm `sepBy` comma)
2026-03-03 14:26:31 +00:00
return (rel, args)
2026-03-02 16:19:40 +00:00
parseQuery :: Parser [Atom]
parseQuery = parseAtom `sepBy` comma
parseRule :: Parser Rule
parseRule =
parseThingWithSub (\loc (a, as) -> Rule loc a as) $
2026-03-03 14:26:31 +00:00
try rule1 <|> rule2
where
rule1 = do
headAtom <- parseAtom
period
return (headAtom, [])
rule2 = do
2026-03-03 11:14:25 +00:00
headAtom <- parseAtom <* symbol ":-"
bodyAtoms <- parseQuery
period
2026-03-03 14:26:31 +00:00
return (headAtom, bodyAtoms)
2026-03-03 11:26:13 +00:00
2026-03-03 14:26:31 +00:00
parseProgram :: Parser Program
parseProgram = parseThingWithSub Program (many parseRule)
2026-03-03 11:26:13 +00:00
2026-03-03 14:26:31 +00:00
parseThingWithSub :: (SrcLoc -> c -> f SrcLoc) -> Parser c -> Parser (f SrcLoc)
parseThingWithSub f parseSub = do
SourcePos _ sl sc <- getSourcePos
2026-03-03 11:26:13 +00:00
c <- parseSub
SourcePos _ el ec <- getSourcePos
pure $ f (SrcLoc (unPos sl) (unPos sc) (unPos el) (unPos ec)) c