commit 9dd48b46af36b6ef092716a722297de37995e023 Author: Cale Gibbard Date: Mon Jan 12 16:10:51 2026 -0500 Initial commit with basic datalog parser and abstract syntax diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7448ac2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +tags +dist/ +dist-newstyle/ +result +result-* +/.direnv/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..67a24c8 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# Revision history for toy-datalog + +## 0.1.0.0 -- YYYY-mm-dd + +* First version. Released on an unsuspecting world. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..20f4b8b --- /dev/null +++ b/LICENSE @@ -0,0 +1,26 @@ +Copyright (c) 2026, Cale Gibbard +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/app/Main.hs b/app/Main.hs new file mode 100644 index 0000000..60d904e --- /dev/null +++ b/app/Main.hs @@ -0,0 +1,8 @@ +module Main where + +import qualified MyLib (someFunc) + +main :: IO () +main = do + putStrLn "Hello, Haskell!" + MyLib.someFunc diff --git a/src/Datalog.hs b/src/Datalog.hs new file mode 100644 index 0000000..ca155dc --- /dev/null +++ b/src/Datalog.hs @@ -0,0 +1,8 @@ +module Datalog + ( module Datalog.Syntax + , module Datalog.Parser + ) + where + +import Datalog.Syntax +import Datalog.Parser diff --git a/src/Datalog/Parser.hs b/src/Datalog/Parser.hs new file mode 100644 index 0000000..4215ae7 --- /dev/null +++ b/src/Datalog/Parser.hs @@ -0,0 +1,86 @@ +module Datalog.Parser + ( parseTerm + , parseAtom + , parseRule + , parseProgram + ) + where + +import Datalog.Syntax +import Data.Text (Text) +import Data.Text qualified as T +import Data.Void (Void) +import Text.Megaparsec +import Text.Megaparsec.Char +import Text.Megaparsec.Char.Lexer qualified as L + +type Parser = Parsec Void Text + +lexeme :: (MonadParsec e Text m) => m a -> m a +lexeme = L.lexeme whitespace + +symbol :: (MonadParsec e Text m) => Text -> m Text +symbol = L.symbol whitespace + +whitespace :: (MonadParsec e Text m) => m () +whitespace = L.space + space1 + (L.skipLineComment "--") + (L.skipBlockComment "{-" "-}") + +parens :: (MonadParsec e Text m) => m a -> m a +parens = between (symbol "(") (symbol ")") + +comma, period :: (MonadParsec e Text m) => m () +comma = () <$ symbol "," +period = () <$ symbol "." + +{- +data Term = Con Ident | Var Ident + deriving (Eq, Ord, Show) +-} + +parseCon :: (MonadParsec e Text m) => m Term +parseCon = Con . T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar)) + +parseVar :: (MonadParsec e Text m) => m Term +parseVar = Var . T.pack <$> lexeme (liftA2 (:) upperChar (many alphaNumChar)) + +parseTerm :: Parser Term +parseTerm = parseCon <|> parseVar + +{- +data Atom = Atom Ident [Term] + deriving (Eq, Ord, Show) +-} + +parseAtom :: Parser Atom +parseAtom = do + rel <- T.pack <$> lexeme (liftA2 (:) lowerChar (many alphaNumChar)) + args <- parens (parseTerm `sepBy` comma) + return (Atom rel args) + +{- +data Rule = Atom :- [Atom] + deriving (Eq, Ord, Show) +-} + +parseFact :: Parser Rule +parseFact = do + headAtom <- parseAtom + period + return (headAtom :- []) + +parseRule :: Parser Rule +parseRule = try parseFact <|> do + headAtom <- parseAtom <* symbol ":-" + bodyAtoms <- parseAtom `sepBy` comma + period + return (headAtom :- bodyAtoms) + +{- +data Program = Program [Rule] +-} + +parseProgram :: Parser Program +parseProgram = Program <$> many parseRule diff --git a/src/Datalog/Syntax.hs b/src/Datalog/Syntax.hs new file mode 100644 index 0000000..956bbfc --- /dev/null +++ b/src/Datalog/Syntax.hs @@ -0,0 +1,17 @@ +module Datalog.Syntax where + +import Data.Text (Text) + +type Ident = Text + +data Term = Con Ident | Var Ident + deriving (Eq, Ord, Show) + +data Atom = Atom Ident [Term] + deriving (Eq, Ord, Show) + +data Rule = Atom :- [Atom] + deriving (Eq, Ord, Show) + +data Program = Program [Rule] + deriving (Eq, Ord, Show) diff --git a/test/Main.hs b/test/Main.hs new file mode 100644 index 0000000..e8f19aa --- /dev/null +++ b/test/Main.hs @@ -0,0 +1,27 @@ +module Main (main) where + +import Datalog +import Data.Text qualified as T +import Data.Text.IO qualified as T +import Text.Megaparsec qualified as M + +parserGoldenTest :: String -> IO Bool +parserGoldenTest prefix = do + let inFile = "test/golden/" <> prefix <> ".dl" + outFile = "test/golden/" <> prefix <> ".show" + inp <- T.readFile inFile + out <- T.readFile outFile + result <- case M.parse parseProgram inFile inp of + Left e -> do + print e + return False + Right out' -> do + return (T.strip out == T.pack (show out')) + let resultS = if result then "passed" else "failed" + putStrLn $ unwords ["Parser test", prefix, resultS] + return result + +main :: IO () +main = do + True <- parserGoldenTest "ancestor" + return () diff --git a/test/golden/ancestor.dl b/test/golden/ancestor.dl new file mode 100644 index 0000000..21ce022 --- /dev/null +++ b/test/golden/ancestor.dl @@ -0,0 +1,5 @@ +parent(xerces, brooke). +parent(brooke, damocles). + +ancestor(X, Y) :- parent(X, Y). +ancestor(X, Y) :- parent(X, Z), ancestor(Z, Y). diff --git a/test/golden/ancestor.show b/test/golden/ancestor.show new file mode 100644 index 0000000..f2673d7 --- /dev/null +++ b/test/golden/ancestor.show @@ -0,0 +1 @@ +Program [Atom "parent" [Con "xerces",Con "brooke"] :- [],Atom "parent" [Con "brooke",Con "damocles"] :- [],Atom "ancestor" [Var "X",Var "Y"] :- [Atom "parent" [Var "X",Var "Y"]],Atom "ancestor" [Var "X",Var "Y"] :- [Atom "parent" [Var "X",Var "Z"],Atom "ancestor" [Var "Z",Var "Y"]]] diff --git a/toy-datalog.cabal b/toy-datalog.cabal new file mode 100644 index 0000000..d4e7fa3 --- /dev/null +++ b/toy-datalog.cabal @@ -0,0 +1,54 @@ +cabal-version: 3.0 +name: toy-datalog +version: 0.1.0.0 +-- synopsis: +-- description: +license: BSD-2-Clause +license-file: LICENSE +author: Cale Gibbard +maintainer: cale.gibbard@obsidian.systems +category: Database +build-type: Simple +extra-doc-files: CHANGELOG.md + +common common + ghc-options: -Wall + default-extensions: + ImportQualifiedPost, + OverloadedStrings, + FlexibleContexts + +library + import: common + exposed-modules: + Datalog + Datalog.Parser + Datalog.Syntax + build-depends: + base, + text, + megaparsec + hs-source-dirs: src + default-language: Haskell2010 + +executable toy-datalog + import: common + main-is: Main.hs + build-depends: + base, + toy-datalog + hs-source-dirs: app + default-language: Haskell2010 + +test-suite toy-datalog-test + import: common + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Main.hs + + build-depends: + base, + toy-datalog, + megaparsec, + text