Add basic evaluation, improved testing, some facilities to help testing by hand at ghci prompt

This commit is contained in:
Cale Gibbard 2026-01-16 01:05:19 -05:00
parent 9dd48b46af
commit 3ed2a84cce
10 changed files with 304 additions and 47 deletions

View File

@ -1,8 +1,12 @@
module Datalog
( module Datalog.Syntax
, module Datalog.Parser
, module Datalog.Eval
, module Datalog.IO
)
where
import Datalog.Syntax
import Datalog.Parser
import Datalog.Eval
import Datalog.IO

173
src/Datalog/Eval.hs Normal file
View File

@ -0,0 +1,173 @@
module Datalog.Eval where
import Data.Bifunctor (Bifunctor(..))
import Data.Foldable
import Data.Map (Map)
import Data.Map qualified as Map
import Data.Map.Merge.Lazy qualified as Map
import Data.Maybe
import Data.Set (Set)
import Data.Set qualified as Set
import Datalog.Syntax
data Relation = Relation
{ _relation_arity :: Int
, _relation_members :: Set [ConId]
-- all the tuples in the relation
, _relation_indices :: Map (Int, ConId) (Set [ConId])
-- map from (position, value) pairs to the subset of the member tuples of the relation that have that value at the given position
}
deriving (Eq, Ord, Show)
emptyRelation :: Int -> Relation
emptyRelation arity = Relation arity Set.empty Map.empty
insertInRelation :: [ConId] -> Relation -> Relation
insertInRelation tuple rel = rel
{ _relation_members = Set.insert tuple (_relation_members rel)
, _relation_indices = foldl (\ixs p -> Map.insertWith Set.union p (Set.singleton tuple) ixs) (_relation_indices rel) (zip [0..] tuple)
}
insertManyInRelation :: Set [ConId] -> Relation -> Relation
insertManyInRelation tuples rel = foldl (\r x -> insertInRelation x r) rel tuples
data Database = Database
{ _database_universe :: Set ConId -- Herbrand universe, set of constants
, _database_relations :: Map RelId Relation -- relations indexed by name
, _database_rules :: Map RelId [Rule] -- rules indexed by head relation name
}
deriving (Eq, Ord, Show)
emptyDatabase :: Database
emptyDatabase = Database Set.empty Map.empty Map.empty
insertManyInRelationDb :: RelId -> Set [ConId] -> Database -> Database
insertManyInRelationDb relName tuples db = db
{ _database_relations =
Map.insert
relName
(insertManyInRelation tuples
(Map.findWithDefault (error "insertManyInRelationDb: shouldn't happen")
relName
rels
))
rels
}
where
rels = _database_relations db
data EvalError =
EvalError_RuleWrongArity Rule WrongArity
| EvalError_AtomWrongArity Atom WrongArity
deriving (Eq, Ord, Show)
data WrongArity = WrongArity
{ _wrongArity_relation :: RelId
, _wrongArity_expected :: Int
, _wrongArity_got :: Int
}
deriving (Eq, Ord, Show)
addRelation :: RelId -> Relation -> Database -> Database
addRelation relName rel db = db
{ _database_relations = Map.insert relName rel (_database_relations db)
}
arityCheck :: Database -> Atom -> Either WrongArity (Database, Relation)
arityCheck db (Atom relName args) = case Map.lookup relName (_database_relations db) of
Nothing ->
let n = length args
rel = emptyRelation n
in Right (addRelation relName rel db, rel)
Just rel ->
let n = length args
in if n == _relation_arity rel
then Right (db, rel)
else Left $ WrongArity
{ _wrongArity_relation = relName
, _wrongArity_expected = _relation_arity rel
, _wrongArity_got = n
}
addRule :: Rule -> Database -> Either EvalError Database
addRule r@(ruleHead@(Atom relName _) :- ruleBody) db = do
let ruleConstants = constants r
db' <- first (EvalError_RuleWrongArity r) $ foldlM (\d a -> fmap fst $ arityCheck d a) db (ruleHead : ruleBody)
return $ db'
{ _database_universe = Set.union ruleConstants (_database_universe db)
, _database_rules = Map.insertWith (<>) relName [r] (_database_rules db)
}
addProgram :: Program -> Database -> Either EvalError Database
addProgram (Program rs) db = foldlM (\d r -> addRule r d) db rs
evalAtomDb :: Database -> Atom -> Either EvalError (Set (Map VarId ConId))
evalAtomDb db a =
case arityCheck db a of
Left w -> Left (EvalError_AtomWrongArity a w)
Right (_, rel) -> Right (unsafeEvalAtomRel rel a)
-- NB: Assumes the atom has appropriate arity and that provided relation is the right one for the atom
unsafeEvalAtomRel :: Relation -> Atom -> Set (Map VarId ConId)
unsafeEvalAtomRel rel (Atom _ args) = Set.fromList [b | m <- Set.toList matches, b <- maybeToList (mkBinding args m Map.empty)]
where
keys = [(k, v) | (k, Con v) <- zip [0..] args]
sets = map (\k -> Map.findWithDefault Set.empty k (_relation_indices rel)) keys
matches = case sets of
[] -> _relation_members rel
(s:ss) -> foldl Set.intersection s ss
mkBinding :: [Term] -> [ConId] -> Map VarId ConId -> Maybe (Map VarId ConId)
mkBinding [] [] m = Just m
mkBinding (Con _ : ts) (_ : cs) m = mkBinding ts cs m
mkBinding (Var v : ts) (c : cs) m =
case Map.lookup v m of
Nothing -> mkBinding ts cs (Map.insert v c m)
Just c'
| c == c' -> mkBinding ts cs m
| otherwise -> Nothing
mkBinding _ _ _ = error "mkBinding: arity mismatch"
joinBindings :: Map VarId ConId -> Map VarId ConId -> Maybe (Map VarId ConId)
joinBindings = Map.mergeA
(Map.traverseMissing (\_ v -> Just v))
(Map.traverseMissing (\_ v -> Just v))
(Map.zipWithAMatched (\_ v1 v2 -> if v1 == v2 then Just v1 else Nothing))
(><) :: Set (Map VarId ConId) -> Set (Map VarId ConId) -> Set (Map VarId ConId)
s1 >< s2 = Set.fromList (catMaybes [joinBindings b1 b2 | b1 <- Set.toList s1, b2 <- Set.toList s2])
evalConjunction :: Database -> [Atom] -> Either EvalError (Set (Map VarId ConId))
evalConjunction db atoms = do
rs <- mapM (evalAtomDb db) atoms
return (foldr (><) (Set.singleton Map.empty) rs)
projectTuple :: Set ConId -> Map VarId ConId -> [Term] -> Set [ConId]
projectTuple univ m = Set.fromList . mapM proj
where
proj (Con c) = [c]
proj (Var v) = Map.findWithDefault (Set.toList univ) v (fmap (:[]) m)
immediateConsequences :: Database -> Rule -> Either EvalError (Set [ConId])
immediateConsequences db (Atom _ ts :- bodyAtoms) = do
rs <- evalConjunction db bodyAtoms
return $ Set.unions [projectTuple (_database_universe db) binding ts | binding <- Set.toList rs]
allImmediateConsequences :: Database -> RelId -> Either EvalError (Set [ConId])
allImmediateConsequences db relName = do
cs <- sequence [immediateConsequences db r | r <- Map.findWithDefault [] relName (_database_rules db)]
return (Set.unions cs)
extendDb :: Database -> Either EvalError Database
extendDb db = do
consequences <- mapM (\relName -> fmap ((,) relName) (allImmediateConsequences db relName)) (Map.keys (_database_relations db))
return $ foldl (\d (relName, tuples) -> insertManyInRelationDb relName tuples d) db consequences
extendFixedpointDb :: Database -> Either EvalError Database
extendFixedpointDb db = do
db' <- extendDb db
if fmap _relation_members (_database_relations db) == fmap _relation_members (_database_relations db')
then return db'
else extendFixedpointDb db'

26
src/Datalog/IO.hs Normal file
View File

@ -0,0 +1,26 @@
module Datalog.IO where
import Data.Bifunctor (first)
import Data.Text (Text)
import Data.Void (Void)
import Data.Text.IO qualified as T
import Text.Megaparsec qualified as P
import Datalog.Syntax
import Datalog.Parser
import Datalog.Eval
data ReadFailure = ParseFailure (P.ParseErrorBundle Text Void) | EvalFailure EvalError
deriving (Eq, Show)
readProgram :: FilePath -> IO (Either ReadFailure Program)
readProgram fp = do
inp <- T.readFile fp
return $ first ParseFailure (P.parse parseProgram fp inp)
readDatabase :: FilePath -> IO (Either ReadFailure Database)
readDatabase fp = do
progE <- readProgram fp
return $ case progE of
Left e -> Left e
Right prog -> first EvalFailure (addProgram prog emptyDatabase)

View File

@ -35,36 +35,21 @@ comma, period :: (MonadParsec e Text m) => m ()
comma = () <$ symbol ","
period = () <$ symbol "."
{-
data Term = Con Ident | Var Ident
deriving (Eq, Ord, Show)
-}
parseCon :: (MonadParsec e Text m) => m Term
parseCon = Con . T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar))
parseCon = Con . ConId . T.pack <$> lexeme (many alphaNumChar)
parseVar :: (MonadParsec e Text m) => m Term
parseVar = Var . T.pack <$> lexeme (liftA2 (:) upperChar (many alphaNumChar))
parseVar = Var . VarId . T.pack <$> lexeme (liftA2 (:) upperChar (many alphaNumChar))
parseTerm :: Parser Term
parseTerm = parseCon <|> parseVar
{-
data Atom = Atom Ident [Term]
deriving (Eq, Ord, Show)
-}
parseTerm = parseVar <|> parseCon
parseAtom :: Parser Atom
parseAtom = do
rel <- T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar))
rel <- RelId . T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar))
args <- parens (parseTerm `sepBy` comma)
return (Atom rel args)
{-
data Rule = Atom :- [Atom]
deriving (Eq, Ord, Show)
-}
parseFact :: Parser Rule
parseFact = do
headAtom <- parseAtom
@ -78,9 +63,5 @@ parseRule = try parseFact <|> do
period
return (headAtom :- bodyAtoms)
{-
data Program = Program [Rule]
-}
parseProgram :: Parser Program
parseProgram = Program <$> many parseRule

View File

@ -1,17 +1,60 @@
module Datalog.Syntax where
import Data.Char (isUpper)
import Data.Text (Text)
import Data.Text qualified as T
import Data.Set qualified as Set
import Data.Set (Set)
type Ident = Text
data Term = Con Ident | Var Ident
newtype ConId = ConId Text
deriving (Eq, Ord, Show)
data Atom = Atom Ident [Term]
newtype VarId = VarId Text
deriving (Eq, Ord, Show)
newtype RelId = RelId Text
deriving (Eq, Ord, Show)
data Term = Con ConId | Var VarId
deriving (Eq, Ord, Show)
con :: Text -> Term
con = Con . ConId
var :: Text -> Term
var = Var . VarId
term :: Text -> Term
term t = if not (T.null t) && isUpper (T.head t) then var t else con t
data Atom = Atom RelId [Term]
deriving (Eq, Ord, Show)
atom :: Text -> [Text] -> Atom
atom relName args = Atom (RelId relName) (map term args)
data Rule = Atom :- [Atom]
deriving (Eq, Ord, Show)
data Program = Program [Rule]
deriving (Eq, Ord, Show)
class HasConstants a where
constants :: a -> Set ConId
instance HasConstants Term where
constants t = case t of
Con x -> Set.singleton x
Var _ -> Set.empty
instance HasConstants a => HasConstants [a] where
constants xs = Set.unions (map constants xs)
instance HasConstants Atom where
constants (Atom _ ts) = constants ts
instance HasConstants Rule where
constants (h :- b) = Set.union (constants h) (constants b)
instance HasConstants Program where
constants (Program rs) = constants rs

View File

@ -1,27 +1,40 @@
module Main (main) where
import Datalog
import Data.Text qualified as T
import Data.Text.IO qualified as T
import Text.Megaparsec qualified as M
parserGoldenTest :: String -> IO Bool
parserGoldenTest prefix = do
import Test.Tasty
import Test.Tasty.HUnit
golden :: (Show err, Show val) => String -> String -> (FilePath -> IO (Either err val)) -> IO ()
golden outExt prefix run = do
let inFile = "test/golden/" <> prefix <> ".dl"
outFile = "test/golden/" <> prefix <> ".show"
inp <- T.readFile inFile
outFile = "test/golden/" <> prefix <> outExt
res <- run inFile
out <- T.readFile outFile
result <- case M.parse parseProgram inFile inp of
Left e -> do
print e
return False
Right out' -> do
return (T.strip out == T.pack (show out'))
let resultS = if result then "passed" else "failed"
putStrLn $ unwords ["Parser test", prefix, resultS]
return result
case res of
Left e -> assertFailure (show e)
Right out' -> assertEqual "" (T.strip out) (T.pack (show out'))
parserGoldenTest :: String -> TestTree
parserGoldenTest prefix = testCase ("Parser test: " <> prefix) $ golden ".show" prefix readProgram
{-
databaseGoldenTest :: String -> TestTree
databaseGoldenTest prefix = testCase ("Database test: " <> prefix) $ golden ".db0" prefix readDatabase
fixedpointGoldenTest :: String -> TestTree
fixedpointGoldenTest prefix = testCase ("Fixedpoint test: " <> prefix) $ golden ".dbF" prefix $ \fp -> do
fmap (fmap extendFixedpointDb) (readDatabase fp)
-}
main :: IO ()
main = do
True <- parserGoldenTest "ancestor"
return ()
main = defaultMain $
testGroup "all"
[ testGroup "parser"
[ parserGoldenTest "ancestor"
, parserGoldenTest "graph"
]
]

View File

@ -1 +1 @@
Program [Atom "parent" [Con "xerces",Con "brooke"] :- [],Atom "parent" [Con "brooke",Con "damocles"] :- [],Atom "ancestor" [Var "X",Var "Y"] :- [Atom "parent" [Var "X",Var "Y"]],Atom "ancestor" [Var "X",Var "Y"] :- [Atom "parent" [Var "X",Var "Z"],Atom "ancestor" [Var "Z",Var "Y"]]]
Program [Atom (RelId "parent") [Con (ConId "xerces"),Con (ConId "brooke")] :- [],Atom (RelId "parent") [Con (ConId "brooke"),Con (ConId "damocles")] :- [],Atom (RelId "ancestor") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "parent") [Var (VarId "X"),Var (VarId "Y")]],Atom (RelId "ancestor") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "parent") [Var (VarId "X"),Var (VarId "Z")],Atom (RelId "ancestor") [Var (VarId "Z"),Var (VarId "Y")]]]

11
test/golden/graph.dl Normal file
View File

@ -0,0 +1,11 @@
odd(X,Y) :- r(X,Y).
odd(X,Y) :- even(X,Z), r(Z,Y).
even(X,Y) :- odd(X,Z), r(Z,Y).
r(0,1).
r(1,2).
r(2,3).
r(3,4).
r(4,5).
r(X,Y) :- r(Y,X).

1
test/golden/graph.show Normal file
View File

@ -0,0 +1 @@
Program [Atom (RelId "odd") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "r") [Var (VarId "X"),Var (VarId "Y")]],Atom (RelId "odd") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "even") [Var (VarId "X"),Var (VarId "Z")],Atom (RelId "r") [Var (VarId "Z"),Var (VarId "Y")]],Atom (RelId "even") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "odd") [Var (VarId "X"),Var (VarId "Z")],Atom (RelId "r") [Var (VarId "Z"),Var (VarId "Y")]],Atom (RelId "r") [Con (ConId "0"),Con (ConId "1")] :- [],Atom (RelId "r") [Con (ConId "1"),Con (ConId "2")] :- [],Atom (RelId "r") [Con (ConId "2"),Con (ConId "3")] :- [],Atom (RelId "r") [Con (ConId "3"),Con (ConId "4")] :- [],Atom (RelId "r") [Con (ConId "4"),Con (ConId "5")] :- [],Atom (RelId "r") [Var (VarId "X"),Var (VarId "Y")] :- [Atom (RelId "r") [Var (VarId "Y"),Var (VarId "X")]]]

View File

@ -24,10 +24,13 @@ library
Datalog
Datalog.Parser
Datalog.Syntax
Datalog.Eval
Datalog.IO
build-depends:
base,
text,
megaparsec
megaparsec,
containers
hs-source-dirs: src
default-language: Haskell2010
@ -51,4 +54,6 @@ test-suite toy-datalog-test
base,
toy-datalog,
megaparsec,
text
text,
tasty,
tasty-hunit