diff --git a/src/Datalog.hs b/src/Datalog.hs index ca155dc..416efa9 100644 --- a/src/Datalog.hs +++ b/src/Datalog.hs @@ -1,8 +1,12 @@ module Datalog ( module Datalog.Syntax , module Datalog.Parser + , module Datalog.Eval + , module Datalog.IO ) where import Datalog.Syntax import Datalog.Parser +import Datalog.Eval +import Datalog.IO diff --git a/src/Datalog/Eval.hs b/src/Datalog/Eval.hs new file mode 100644 index 0000000..9b26770 --- /dev/null +++ b/src/Datalog/Eval.hs @@ -0,0 +1,173 @@ +module Datalog.Eval where + +import Data.Bifunctor (Bifunctor(..)) +import Data.Foldable +import Data.Map (Map) +import Data.Map qualified as Map +import Data.Map.Merge.Lazy qualified as Map +import Data.Maybe +import Data.Set (Set) +import Data.Set qualified as Set + +import Datalog.Syntax + +data Relation = Relation + { _relation_arity :: Int + , _relation_members :: Set [ConId] + -- all the tuples in the relation + , _relation_indices :: Map (Int, ConId) (Set [ConId]) + -- map from (position, value) pairs to the subset of the member tuples of the relation that have that value at the given position + } + deriving (Eq, Ord, Show) + +emptyRelation :: Int -> Relation +emptyRelation arity = Relation arity Set.empty Map.empty + +insertInRelation :: [ConId] -> Relation -> Relation +insertInRelation tuple rel = rel + { _relation_members = Set.insert tuple (_relation_members rel) + , _relation_indices = foldl (\ixs p -> Map.insertWith Set.union p (Set.singleton tuple) ixs) (_relation_indices rel) (zip [0..] tuple) + } + +insertManyInRelation :: Set [ConId] -> Relation -> Relation +insertManyInRelation tuples rel = foldl (\r x -> insertInRelation x r) rel tuples + +data Database = Database + { _database_universe :: Set ConId -- Herbrand universe, set of constants + , _database_relations :: Map RelId Relation -- relations indexed by name + , _database_rules :: Map RelId [Rule] -- rules indexed by head relation name + } + deriving (Eq, Ord, Show) + +emptyDatabase :: Database +emptyDatabase = Database Set.empty Map.empty Map.empty + +insertManyInRelationDb :: RelId -> Set [ConId] -> Database -> Database +insertManyInRelationDb relName tuples db = db + { _database_relations = + Map.insert + relName + (insertManyInRelation tuples + (Map.findWithDefault (error "insertManyInRelationDb: shouldn't happen") + relName + rels + )) + rels + } + where + rels = _database_relations db + +data EvalError = + EvalError_RuleWrongArity Rule WrongArity + | EvalError_AtomWrongArity Atom WrongArity + deriving (Eq, Ord, Show) + +data WrongArity = WrongArity + { _wrongArity_relation :: RelId + , _wrongArity_expected :: Int + , _wrongArity_got :: Int + } + deriving (Eq, Ord, Show) + +addRelation :: RelId -> Relation -> Database -> Database +addRelation relName rel db = db + { _database_relations = Map.insert relName rel (_database_relations db) + } + +arityCheck :: Database -> Atom -> Either WrongArity (Database, Relation) +arityCheck db (Atom relName args) = case Map.lookup relName (_database_relations db) of + Nothing -> + let n = length args + rel = emptyRelation n + in Right (addRelation relName rel db, rel) + Just rel -> + let n = length args + in if n == _relation_arity rel + then Right (db, rel) + else Left $ WrongArity + { _wrongArity_relation = relName + , _wrongArity_expected = _relation_arity rel + , _wrongArity_got = n + } + +addRule :: Rule -> Database -> Either EvalError Database +addRule r@(ruleHead@(Atom relName _) :- ruleBody) db = do + let ruleConstants = constants r + db' <- first (EvalError_RuleWrongArity r) $ foldlM (\d a -> fmap fst $ arityCheck d a) db (ruleHead : ruleBody) + + return $ db' + { _database_universe = Set.union ruleConstants (_database_universe db) + , _database_rules = Map.insertWith (<>) relName [r] (_database_rules db) + } + +addProgram :: Program -> Database -> Either EvalError Database +addProgram (Program rs) db = foldlM (\d r -> addRule r d) db rs + +evalAtomDb :: Database -> Atom -> Either EvalError (Set (Map VarId ConId)) +evalAtomDb db a = + case arityCheck db a of + Left w -> Left (EvalError_AtomWrongArity a w) + Right (_, rel) -> Right (unsafeEvalAtomRel rel a) + +-- NB: Assumes the atom has appropriate arity and that provided relation is the right one for the atom +unsafeEvalAtomRel :: Relation -> Atom -> Set (Map VarId ConId) +unsafeEvalAtomRel rel (Atom _ args) = Set.fromList [b | m <- Set.toList matches, b <- maybeToList (mkBinding args m Map.empty)] + where + keys = [(k, v) | (k, Con v) <- zip [0..] args] + sets = map (\k -> Map.findWithDefault Set.empty k (_relation_indices rel)) keys + matches = case sets of + [] -> _relation_members rel + (s:ss) -> foldl Set.intersection s ss + + mkBinding :: [Term] -> [ConId] -> Map VarId ConId -> Maybe (Map VarId ConId) + mkBinding [] [] m = Just m + mkBinding (Con _ : ts) (_ : cs) m = mkBinding ts cs m + mkBinding (Var v : ts) (c : cs) m = + case Map.lookup v m of + Nothing -> mkBinding ts cs (Map.insert v c m) + Just c' + | c == c' -> mkBinding ts cs m + | otherwise -> Nothing + mkBinding _ _ _ = error "mkBinding: arity mismatch" + +joinBindings :: Map VarId ConId -> Map VarId ConId -> Maybe (Map VarId ConId) +joinBindings = Map.mergeA + (Map.traverseMissing (\_ v -> Just v)) + (Map.traverseMissing (\_ v -> Just v)) + (Map.zipWithAMatched (\_ v1 v2 -> if v1 == v2 then Just v1 else Nothing)) + +(><) :: Set (Map VarId ConId) -> Set (Map VarId ConId) -> Set (Map VarId ConId) +s1 >< s2 = Set.fromList (catMaybes [joinBindings b1 b2 | b1 <- Set.toList s1, b2 <- Set.toList s2]) + +evalConjunction :: Database -> [Atom] -> Either EvalError (Set (Map VarId ConId)) +evalConjunction db atoms = do + rs <- mapM (evalAtomDb db) atoms + return (foldr (><) (Set.singleton Map.empty) rs) + +projectTuple :: Set ConId -> Map VarId ConId -> [Term] -> Set [ConId] +projectTuple univ m = Set.fromList . mapM proj + where + proj (Con c) = [c] + proj (Var v) = Map.findWithDefault (Set.toList univ) v (fmap (:[]) m) + +immediateConsequences :: Database -> Rule -> Either EvalError (Set [ConId]) +immediateConsequences db (Atom _ ts :- bodyAtoms) = do + rs <- evalConjunction db bodyAtoms + return $ Set.unions [projectTuple (_database_universe db) binding ts | binding <- Set.toList rs] + +allImmediateConsequences :: Database -> RelId -> Either EvalError (Set [ConId]) +allImmediateConsequences db relName = do + cs <- sequence [immediateConsequences db r | r <- Map.findWithDefault [] relName (_database_rules db)] + return (Set.unions cs) + +extendDb :: Database -> Either EvalError Database +extendDb db = do + consequences <- mapM (\relName -> fmap ((,) relName) (allImmediateConsequences db relName)) (Map.keys (_database_relations db)) + return $ foldl (\d (relName, tuples) -> insertManyInRelationDb relName tuples d) db consequences + +extendFixedpointDb :: Database -> Either EvalError Database +extendFixedpointDb db = do + db' <- extendDb db + if fmap _relation_members (_database_relations db) == fmap _relation_members (_database_relations db') + then return db' + else extendFixedpointDb db' diff --git a/src/Datalog/IO.hs b/src/Datalog/IO.hs new file mode 100644 index 0000000..d7f7dd4 --- /dev/null +++ b/src/Datalog/IO.hs @@ -0,0 +1,26 @@ +module Datalog.IO where + +import Data.Bifunctor (first) +import Data.Text (Text) +import Data.Void (Void) +import Data.Text.IO qualified as T +import Text.Megaparsec qualified as P + +import Datalog.Syntax +import Datalog.Parser +import Datalog.Eval + +data ReadFailure = ParseFailure (P.ParseErrorBundle Text Void) | EvalFailure EvalError + deriving (Eq, Show) + +readProgram :: FilePath -> IO (Either ReadFailure Program) +readProgram fp = do + inp <- T.readFile fp + return $ first ParseFailure (P.parse parseProgram fp inp) + +readDatabase :: FilePath -> IO (Either ReadFailure Database) +readDatabase fp = do + progE <- readProgram fp + return $ case progE of + Left e -> Left e + Right prog -> first EvalFailure (addProgram prog emptyDatabase) diff --git a/src/Datalog/Parser.hs b/src/Datalog/Parser.hs index 4215ae7..571ab39 100644 --- a/src/Datalog/Parser.hs +++ b/src/Datalog/Parser.hs @@ -35,36 +35,21 @@ comma, period :: (MonadParsec e Text m) => m () comma = () <$ symbol "," period = () <$ symbol "." -{- -data Term = Con Ident | Var Ident - deriving (Eq, Ord, Show) --} - parseCon :: (MonadParsec e Text m) => m Term -parseCon = Con . T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar)) +parseCon = Con . ConId . T.pack <$> lexeme (many alphaNumChar) parseVar :: (MonadParsec e Text m) => m Term -parseVar = Var . T.pack <$> lexeme (liftA2 (:) upperChar (many alphaNumChar)) +parseVar = Var . VarId . T.pack <$> lexeme (liftA2 (:) upperChar (many alphaNumChar)) parseTerm :: Parser Term -parseTerm = parseCon <|> parseVar - -{- -data Atom = Atom Ident [Term] - deriving (Eq, Ord, Show) --} +parseTerm = parseVar <|> parseCon parseAtom :: Parser Atom parseAtom = do - rel <- T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar)) + rel <- RelId . T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar)) args <- parens (parseTerm `sepBy` comma) return (Atom rel args) -{- -data Rule = Atom :- [Atom] - deriving (Eq, Ord, Show) --} - parseFact :: Parser Rule parseFact = do headAtom <- parseAtom @@ -78,9 +63,5 @@ parseRule = try parseFact <|> do period return (headAtom :- bodyAtoms) -{- -data Program = Program [Rule] --} - parseProgram :: Parser Program parseProgram = Program <$> many parseRule diff --git a/src/Datalog/Syntax.hs b/src/Datalog/Syntax.hs index 956bbfc..5982e7a 100644 --- a/src/Datalog/Syntax.hs +++ b/src/Datalog/Syntax.hs @@ -1,17 +1,60 @@ module Datalog.Syntax where +import Data.Char (isUpper) import Data.Text (Text) +import Data.Text qualified as T +import Data.Set qualified as Set +import Data.Set (Set) -type Ident = Text - -data Term = Con Ident | Var Ident +newtype ConId = ConId Text deriving (Eq, Ord, Show) -data Atom = Atom Ident [Term] +newtype VarId = VarId Text deriving (Eq, Ord, Show) +newtype RelId = RelId Text + deriving (Eq, Ord, Show) + +data Term = Con ConId | Var VarId + deriving (Eq, Ord, Show) + +con :: Text -> Term +con = Con . ConId + +var :: Text -> Term +var = Var . VarId + +term :: Text -> Term +term t = if not (T.null t) && isUpper (T.head t) then var t else con t + +data Atom = Atom RelId [Term] + deriving (Eq, Ord, Show) + +atom :: Text -> [Text] -> Atom +atom relName args = Atom (RelId relName) (map term args) + data Rule = Atom :- [Atom] deriving (Eq, Ord, Show) data Program = Program [Rule] deriving (Eq, Ord, Show) + +class HasConstants a where + constants :: a -> Set ConId + +instance HasConstants Term where + constants t = case t of + Con x -> Set.singleton x + Var _ -> Set.empty + +instance HasConstants a => HasConstants [a] where + constants xs = Set.unions (map constants xs) + +instance HasConstants Atom where + constants (Atom _ ts) = constants ts + +instance HasConstants Rule where + constants (h :- b) = Set.union (constants h) (constants b) + +instance HasConstants Program where + constants (Program rs) = constants rs diff --git a/test/Main.hs b/test/Main.hs index e8f19aa..da6f7a0 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,27 +1,40 @@ module Main (main) where import Datalog + import Data.Text qualified as T import Data.Text.IO qualified as T -import Text.Megaparsec qualified as M -parserGoldenTest :: String -> IO Bool -parserGoldenTest prefix = do +import Test.Tasty +import Test.Tasty.HUnit + +golden :: (Show err, Show val) => String -> String -> (FilePath -> IO (Either err val)) -> IO () +golden outExt prefix run = do let inFile = "test/golden/" <> prefix <> ".dl" - outFile = "test/golden/" <> prefix <> ".show" - inp <- T.readFile inFile + outFile = "test/golden/" <> prefix <> outExt + res <- run inFile out <- T.readFile outFile - result <- case M.parse parseProgram inFile inp of - Left e -> do - print e - return False - Right out' -> do - return (T.strip out == T.pack (show out')) - let resultS = if result then "passed" else "failed" - putStrLn $ unwords ["Parser test", prefix, resultS] - return result + case res of + Left e -> assertFailure (show e) + Right out' -> assertEqual "" (T.strip out) (T.pack (show out')) + +parserGoldenTest :: String -> TestTree +parserGoldenTest prefix = testCase ("Parser test: " <> prefix) $ golden ".show" prefix readProgram + +{- +databaseGoldenTest :: String -> TestTree +databaseGoldenTest prefix = testCase ("Database test: " <> prefix) $ golden ".db0" prefix readDatabase + +fixedpointGoldenTest :: String -> TestTree +fixedpointGoldenTest prefix = testCase ("Fixedpoint test: " <> prefix) $ golden ".dbF" prefix $ \fp -> do + fmap (fmap extendFixedpointDb) (readDatabase fp) +-} main :: IO () -main = do - True <- parserGoldenTest "ancestor" - return () +main = defaultMain $ + testGroup "all" + [ testGroup "parser" + [ parserGoldenTest "ancestor" + , parserGoldenTest "graph" + ] + ] diff --git a/test/golden/ancestor.show b/test/golden/ancestor.show index f2673d7..c2405e2 100644 --- a/test/golden/ancestor.show +++ b/test/golden/ancestor.show @@ -1 +1 @@ -Program [Atom "parent" [Con "xerces",Con "brooke"] :- [],Atom "parent" [Con "brooke",Con "damocles"] :- [],Atom "ancestor" [Var "X",Var "Y"] :- [Atom "parent" [Var "X",Var "Y"]],Atom "ancestor" [Var "X",Var "Y"] :- [Atom "parent" [Var "X",Var "Z"],Atom "ancestor" [Var "Z",Var "Y"]]] +Program [Atom (RelId "parent") [Con (ConId "xerces"),Con (ConId "brooke")] :- [],Atom (RelId "parent") [Con (ConId "brooke"),Con (ConId "damocles")] :- [],Atom (RelId "ancestor") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "parent") [Var (VarId "X"),Var (VarId "Y")]],Atom (RelId "ancestor") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "parent") [Var (VarId "X"),Var (VarId "Z")],Atom (RelId "ancestor") [Var (VarId "Z"),Var (VarId "Y")]]] diff --git a/test/golden/graph.dl b/test/golden/graph.dl new file mode 100644 index 0000000..01879ed --- /dev/null +++ b/test/golden/graph.dl @@ -0,0 +1,11 @@ +odd(X,Y) :- r(X,Y). +odd(X,Y) :- even(X,Z), r(Z,Y). +even(X,Y) :- odd(X,Z), r(Z,Y). + +r(0,1). +r(1,2). +r(2,3). +r(3,4). +r(4,5). + +r(X,Y) :- r(Y,X). diff --git a/test/golden/graph.show b/test/golden/graph.show new file mode 100644 index 0000000..0668ac0 --- /dev/null +++ b/test/golden/graph.show @@ -0,0 +1 @@ +Program [Atom (RelId "odd") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "r") [Var (VarId "X"),Var (VarId "Y")]],Atom (RelId "odd") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "even") [Var (VarId "X"),Var (VarId "Z")],Atom (RelId "r") [Var (VarId "Z"),Var (VarId "Y")]],Atom (RelId "even") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "odd") [Var (VarId "X"),Var (VarId "Z")],Atom (RelId "r") [Var (VarId "Z"),Var (VarId "Y")]],Atom (RelId "r") [Con (ConId "0"),Con (ConId "1")] :- [],Atom (RelId "r") [Con (ConId "1"),Con (ConId "2")] :- [],Atom (RelId "r") [Con (ConId "2"),Con (ConId "3")] :- [],Atom (RelId "r") [Con (ConId "3"),Con (ConId "4")] :- [],Atom (RelId "r") [Con (ConId "4"),Con (ConId "5")] :- [],Atom (RelId "r") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "r") [Var (VarId "Y"),Var (VarId "X")]]] diff --git a/toy-datalog.cabal b/toy-datalog.cabal index d4e7fa3..9151344 100644 --- a/toy-datalog.cabal +++ b/toy-datalog.cabal @@ -24,10 +24,13 @@ library Datalog Datalog.Parser Datalog.Syntax + Datalog.Eval + Datalog.IO build-depends: base, text, - megaparsec + megaparsec, + containers hs-source-dirs: src default-language: Haskell2010 @@ -51,4 +54,6 @@ test-suite toy-datalog-test base, toy-datalog, megaparsec, - text + text, + tasty, + tasty-hunit