diff --git a/README.md b/README.md index e6b3a8f..3f59fd2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ -# Really Bad Compiler in Haskell +# HEAR Compiler in Haskell -A compiler written in Haskell which can currently perform basic arithmetic. Currently using the megaparsec and llvm-hs-\* libraries, but I may reimplement certain libraries myself. Built for the Introduction to Compiler Design class at The University of Texas at Dallas. +A compiler for HEAR, a language for when you cannot C. + +Written in Haskell, and currently using the megaparsec and llvm-hs-\* libraries, but I plan to eventually rewrite the lexar/parser from scratch. Built for the Introduction to Compiler Design class at The University of Texas at Dallas. Main repo: https://git.sudoer.ch/me/really-bad-compiler-in-haskell @@ -8,13 +10,19 @@ Main repo: https://git.sudoer.ch/me/really-bad-compiler-in-haskell - Install `ghcup` (for managing Haskell tools) and `nix` (for managing external dependencies). - Clone the repo. -- Use `ghcup` to install `stack 2.9.3`, `HLS 2.2.0.0`, `GHC 9.2.8`, and `cabal 3.6.2.0`. +- Use `ghcup` to install `stack 2.11.1`, `HLS 2.3.0.0`, and `cabal 3.8.1.0`. ## Run Instructions -- Use `stack run ` to run the program (for example, `stack run example/1`). +- Use `stack run ` to run the program (for example, `stack run example/1.hear`). - The LLVM will be generated as `a.out.ll`, and the executable will be generated as `a.out`. +## Currently Supported functionality + +- Arithmetic +- Parenthesis +- print() statement + ## To edit I recommend using VSCodium, which is preconfigured to have syntax highlighting and (currently broken) debugging features and will automatically suggest the Haskell extensions to install. @@ -44,12 +52,13 @@ I recommend using VSCodium, which is preconfigured to have syntax highlighting a - https://danieljharvey.github.io/posts/2023-02-08-llvm-compiler-part-1.html (for help using llvm-hs-pure) - https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs (source code for above resource) - https://9to5tutorial.com/homebrew-compiler-made-with-haskell-llvm-configuration (for help using llvm-hs-pure) +- https://blog.ocharles.org.uk/blog/posts/2012-12-17-24-days-of-hackage-optparse-applicative.html (for help parsing command line arguments with optparse-applicative) ### Tools - Language: Haskell -- Haskell tools: GHCup, Stack, Cabal, GHC 9.2 -- Libraries: megaparsec, parser-combinators, text, process, llvm-hs 15, llvm-hs-pure 15, +- Haskell/management tools: GHCup, Stack, Cabal, GHC 9.2, Nix +- Libraries: See `package.yaml` - Dependencies: llvm 15, clang 15 - IDE: VSCodium - Git platform: Forgejo diff --git a/example/1 b/example/1 deleted file mode 100644 index 0f9d1ef..0000000 --- a/example/1 +++ /dev/null @@ -1 +0,0 @@ -5*3+2 \ No newline at end of file diff --git a/example/1.hear b/example/1.hear new file mode 100644 index 0000000..62b1f81 --- /dev/null +++ b/example/1.hear @@ -0,0 +1 @@ +print(5*(3-2)+-4-4); \ No newline at end of file diff --git a/example/2 b/example/2 deleted file mode 100644 index 50f4893..0000000 --- a/example/2 +++ /dev/null @@ -1 +0,0 @@ -6+8/3 \ No newline at end of file diff --git a/example/2.hear b/example/2.hear new file mode 100644 index 0000000..977a5ab --- /dev/null +++ b/example/2.hear @@ -0,0 +1,2 @@ + print(6+8/3); + print(5000); \ No newline at end of file diff --git a/main/Compiler/LLVMGen.hs b/main/Compiler/LLVMGen.hs deleted file mode 100644 index d36277d..0000000 --- a/main/Compiler/LLVMGen.hs +++ /dev/null @@ -1,67 +0,0 @@ -{-# LANGUAGE ImportQualifiedPost #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RecursiveDo #-} - --- see https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs - -module Compiler.LLVMGen (llvmGen) where - --- import LLVM.Pretty - -import Data.ByteString (ByteString) -import Debug.Trace -import LLVM (moduleLLVMAssembly, withModuleFromAST, writeLLVMAssemblyToFile) -import LLVM.AST hiding (function) -import LLVM.AST.Type -import LLVM.Context -import LLVM.IRBuilder.Constant -import LLVM.IRBuilder.Instruction -import LLVM.IRBuilder.Module -import LLVM.IRBuilder.Monad -import Types.Expr as Expr - -getLLVM :: Expr -> Module -getLLVM expr = - buildModule "program" $ mdo - -- TODO: better module name - printf <- externVarArgs "printf" [ptr] i32 - function "main" [] i32 $ \_ -> mdo - numFormatStr <- globalStringPtr "%d\n" (mkName "str") - ourExpression <- exprToLLVM expr - _ <- call (FunctionType i32 [ptr] True) printf [(ConstantOperand numFormatStr, []), (ourExpression, [])] - ret (int32 0) - -exprToLLVM :: - ( MonadIRBuilder m, - MonadModuleBuilder m - ) => - Expr -> - m Operand -exprToLLVM (Lit prim) = pure $ primToLLVM prim -exprToLLVM (Expr.Add a b) = mdo - lhs <- exprToLLVM a - rhs <- exprToLLVM b - add lhs rhs -exprToLLVM (Expr.Sub a b) = mdo - lhs <- exprToLLVM a - rhs <- exprToLLVM b - sub lhs rhs -exprToLLVM (Expr.Mul a b) = mdo - lhs <- exprToLLVM a - rhs <- exprToLLVM b - mul lhs rhs -exprToLLVM (Expr.Div a b) = mdo - lhs <- exprToLLVM a - rhs <- exprToLLVM b - sdiv lhs rhs - -primToLLVM :: Int -> Operand -primToLLVM i = int32 (fromIntegral i) - -llvmGen :: Expr -> IO ByteString -llvmGen expr = do - let l = getLLVM expr - withContext - ( \c -> - withModuleFromAST c l moduleLLVMAssembly - ) \ No newline at end of file diff --git a/main/Main.hs b/main/Main.hs deleted file mode 100644 index dda57c6..0000000 --- a/main/Main.hs +++ /dev/null @@ -1,28 +0,0 @@ -{-# LANGUAGE ImportQualifiedPost #-} -{-# LANGUAGE OverloadedStrings #-} - -module Main (main) where - --- import Compiler.ExeGen -import Compiler.LLVMGen -import Data.ByteString.Char8 qualified as B -import Data.Text.Lazy.IO qualified as T -import Parser.Expr -import System.Environment -import System.Process -import Types.Expr - -getRight :: ParseResult -> Expr -getRight (Right r) = r - -main :: IO () -main = do - fileName <- fmap head getArgs - contents <- readFile fileName - T.putStrLn "- Parsing file..." - let parsed = getRight (parseExpr contents) - T.putStrLn "- Generating LLVM to './a.out.ll'..." - llvmGen parsed >>= B.writeFile "a.out.ll" - T.putStrLn "- Compiling to executable './a.out'..." - callCommand "clang a.out.ll" - T.putStrLn "- Done." diff --git a/main/Parser/Expr.hs b/main/Parser/Expr.hs deleted file mode 100644 index a2f3f4c..0000000 --- a/main/Parser/Expr.hs +++ /dev/null @@ -1,49 +0,0 @@ --- see https://markkarpov.com/tutorial/megaparsec.html -{-# LANGUAGE ImportQualifiedPost #-} - -module Parser.Expr (parseExpr, ParseResult) where - -import Control.Monad.Combinators.Expr -import Data.Functor.Identity qualified -import Data.Void (Void) -import Text.Megaparsec as MP -import Text.Megaparsec.Char as C -import Text.Megaparsec.Char.Lexer as L -import Types.Expr - -type Parser = Parsec Void String - -lexemeParser :: Parser a -> Parser a -lexemeParser = L.lexeme C.space - -symbolParser :: String -> Parser String -symbolParser = L.symbol C.space - -intParser :: Parser Int -intParser = lexemeParser L.decimal - -term :: Parser Expr -term = Lit <$> intParser - -table :: [[Operator Parser Expr]] -table = - [ [ binaryOp "*" Mul, - binaryOp "/" Div - ], - [ binaryOp "+" Add, - binaryOp "-" Sub - ] - ] - -binaryOp :: String -> (a -> a -> a) -> Operator (ParsecT Void String Data.Functor.Identity.Identity) a -binaryOp name f = InfixL (f <$ symbolParser name) - -expr :: Parser Expr -expr = makeExprParser term table - -type ParseResult = Either (ParseErrorBundle String Void) Expr - -parseExpr :: String -> ParseResult -parseExpr = MP.parse (C.space *> expr <* eof) "" - --- parseE = parseExpr \ No newline at end of file diff --git a/main/Types/Expr.hs b/main/Types/Expr.hs deleted file mode 100644 index 51c8925..0000000 --- a/main/Types/Expr.hs +++ /dev/null @@ -1,11 +0,0 @@ -module Types.Expr (Expr (..)) where - -data Expr - = Lit Int - | Add Expr Expr - | Sub Expr Expr - | Mul Expr Expr - | Div Expr Expr - deriving - ( Show - ) \ No newline at end of file diff --git a/package.yaml b/package.yaml index 99c82b0..5e889c5 100644 --- a/package.yaml +++ b/package.yaml @@ -7,9 +7,14 @@ dependencies: - parser-combinators - text - process + - mtl + - containers - llvm-hs >= 15 && < 16 - llvm-hs-pure >= 15 && < 16 - bytestring + - string-conversions + - transformers + - optparse-applicative >= 0.17 && < 1 tested-with: GHC == 9.2.8 category: Compilers/Interpreters @@ -19,7 +24,7 @@ ghc-options: -threaded -Wall -j8 +RTS -A64M -RTS -fllvm # source-dirs: src executable: - source-dirs: main + source-dirs: src main: Main.hs # tests: # testall: diff --git a/really-bad-compiler-in-haskell.cabal b/really-bad-compiler-in-haskell.cabal index d82f626..0b1bace 100644 --- a/really-bad-compiler-in-haskell.cabal +++ b/really-bad-compiler-in-haskell.cabal @@ -1,6 +1,6 @@ cabal-version: 1.12 --- This file has been generated from package.yaml by hpack version 0.35.1. +-- This file has been generated from package.yaml by hpack version 0.35.2. -- -- see: https://github.com/sol/hpack @@ -16,22 +16,27 @@ tested-with: executable really-bad-compiler-in-haskell main-is: Main.hs other-modules: - Compiler.LLVMGen - Parser.Expr - Types.Expr + Main.LLVMGen + Main.Parser.Megaparsec + Main.Type Paths_really_bad_compiler_in_haskell hs-source-dirs: - main + src default-extensions: OverloadedStrings, LambdaCase ghc-options: -threaded -Wall -j8 +RTS -A64M -RTS -fllvm build-depends: base >=4.14.3 && <5 , bytestring + , containers , llvm-hs ==15.* , llvm-hs-pure ==15.* , megaparsec >=9.0.1 && <10 + , mtl + , optparse-applicative >=0.17 && <1 , parser-combinators , process + , string-conversions , text + , transformers default-language: Haskell2010 diff --git a/src/Main.hs b/src/Main.hs new file mode 100644 index 0000000..dee11e3 --- /dev/null +++ b/src/Main.hs @@ -0,0 +1,47 @@ +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where + +import Data.ByteString.Char8 qualified as B +import Data.Text.IO qualified as T +import Main.LLVMGen +import Main.Parser.Megaparsec +import Options.Applicative +import System.Environment +import System.Process (callCommand) + +data Opt = Opt + { filePath :: String, + showLLVM :: Bool, + showDebug :: Bool + } + +run :: Opt -> IO () +run opts = do + let fileName = filePath opts + contents <- T.readFile fileName + T.putStrLn "- Generating LLVM to './a.out.ll'..." + result <- (llvmGen . parse) contents + B.writeFile "a.out.ll" result + T.putStrLn "- Compiling to executable './a.out'..." + callCommand "clang a.out.ll" + T.putStrLn "- Done." + +main :: IO () +main = execParser opts >>= run + where + parser = + Opt + <$> argument str (metavar "FILE_PATH") + <*> switch + ( short 'l' + <> long "showLLVM" + <> help "Create .ll with LLVM used to compile the binary" + ) + <*> switch + ( short 'd' + <> long "showDebug" + <> help "Show debug output" + ) + opts = info parser mempty diff --git a/src/Main/LLVMGen.hs b/src/Main/LLVMGen.hs new file mode 100644 index 0000000..620e168 --- /dev/null +++ b/src/Main/LLVMGen.hs @@ -0,0 +1,115 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecursiveDo #-} + +-- see https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs +-- see https://blog.josephmorag.com/posts/mcc3/ + +module Main.LLVMGen (llvmGen) where + +import Control.Monad.State +import Control.Monad.Trans.Class (lift) +import Data.ByteString (ByteString) +import Data.Map qualified as M +import Data.String.Conversions +import Data.Text +import LLVM (moduleLLVMAssembly, withModuleFromAST) +import LLVM.AST hiding (function) +import LLVM.AST.Type +import LLVM.Context +import LLVM.IRBuilder.Constant +import LLVM.IRBuilder.Instruction +import LLVM.IRBuilder.Module +import LLVM.IRBuilder.Monad +import Main.Type as Expr + +data Env = Env + { operands :: M.Map Text Operand, + strings :: M.Map Text Operand + } + deriving (Eq, Show) + +registerOperand :: (MonadState Env m) => Text -> Operand -> m () +registerOperand name op = + modify $ \env -> env {operands = M.insert name op (operands env)} + +registerString :: (MonadState Env m) => Text -> Operand -> m () +registerString name op = + modify $ \env -> env {strings = M.insert name op (operands env)} + +getOperand :: (MonadState Env m, MonadModuleBuilder m, MonadIRBuilder m) => Text -> m Operand +getOperand name = do + env <- get + case M.lookup name (operands env) of + Just op -> return op + Nothing -> error $ "Unknown operand: " ++ show name + +getString :: (MonadState Env m, MonadModuleBuilder m, MonadIRBuilder m) => Text -> m Operand +getString str = do + env <- get + case M.lookup str (strings env) of + Just s -> return s + Nothing -> do + s <- globalStringPtr (unpack str) (mkName "str") + let operand = ConstantOperand s + modify $ \env -> env {strings = M.insert str operand (strings env)} + return operand + +getLLVM :: [Expr] -> Module +getLLVM expr = + flip evalState (Env {operands = M.empty, strings = M.empty}) $ + buildModuleT "program" $ mdo + -- TODO: better module name + printf <- externVarArgs "printf" [ptr] i32 + lift $ registerOperand "printf" printf + function "main" [] i32 $ \_ -> mdo + printNumStr <- globalStringPtr "%d\n" (mkName "str") + lift $ registerString "%d\n" $ ConstantOperand printNumStr + _ <- forM_ expr exprToLLVM + ret $ int32 0 + +-- +-- ourExpression <- exprToLLVM expr +-- _ <- call (FunctionType i32 [ptr] True) printf [(ConstantOperand numFormatStr, []), (ourExpression, [])] +-- ret $ int32 0 + +exprToLLVM :: + ( MonadIRBuilder m, + MonadModuleBuilder m, + MonadState Env m + ) => + Expr -> + m Operand +exprToLLVM (Lit prim) = pure $ primToLLVM prim +exprToLLVM (Paren e) = exprToLLVM e +exprToLLVM (Print e) = mdo + val <- exprToLLVM e + printf <- getOperand "printf" + formatStr <- getString "%d\n" + _ <- call (FunctionType i32 [ptr] True) printf [(formatStr, []), (val, [])] + pure val +exprToLLVM (Expr.Add a b) = mdo + lhs <- exprToLLVM a + rhs <- exprToLLVM b + add lhs rhs +exprToLLVM (Expr.Sub a b) = mdo + lhs <- exprToLLVM a + rhs <- exprToLLVM b + sub lhs rhs +exprToLLVM (Expr.Mul a b) = mdo + lhs <- exprToLLVM a + rhs <- exprToLLVM b + mul lhs rhs +exprToLLVM (Expr.Div a b) = mdo + lhs <- exprToLLVM a + rhs <- exprToLLVM b + sdiv lhs rhs + +primToLLVM :: Int -> Operand +primToLLVM i = int32 $ fromIntegral i + +llvmGen :: [Expr] -> IO ByteString +llvmGen expr = do + let l = getLLVM expr + withContext $ \c -> withModuleFromAST c l moduleLLVMAssembly \ No newline at end of file diff --git a/src/Main/Parser/Megaparsec.hs b/src/Main/Parser/Megaparsec.hs new file mode 100644 index 0000000..aa1ea72 --- /dev/null +++ b/src/Main/Parser/Megaparsec.hs @@ -0,0 +1,78 @@ +-- see https://markkarpov.com/tutorial/megaparsec.html +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module Main.Parser.Megaparsec (parse) where + +import Control.Monad.Combinators.Expr +import Data.Functor.Identity qualified +import Data.Text +import Data.Void (Void) +import Main.Type +import Text.Megaparsec as MP hiding (parse) +import Text.Megaparsec qualified as MP +import Text.Megaparsec.Char qualified as C +import Text.Megaparsec.Char.Lexer qualified as L + +type Parser = Parsec Void Text + +lexeme :: Parser a -> Parser a +lexeme = L.lexeme C.space + +symbol :: Text -> Parser Text +symbol = L.symbol C.space + +int :: Parser Int +int = lexeme $ L.signed (return ()) L.decimal + +string :: Text -> Parser Text +string = C.string + +container :: Text -> Text -> Parser a -> Parser a +container b e = between (symbol b) (symbol e) + +term :: Parser Expr +term = + choice + [ Lit <$> int, + container "(" ")" expr + ] + +table :: [[Operator Parser Expr]] +table = + [ [methodOp "print" Print], + [ binaryOp "*" Mul, + binaryOp "/" Div + ], + [ binaryOp "+" Add, + binaryOp "-" Sub + ] + ] + +binaryOp :: Text -> (a -> a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a +binaryOp name f = InfixL $ f <$ symbol name + +-- prefixOp :: Text -> (a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a +-- prefixOp name f = Prefix (f <$ symbol name) + +methodOp :: Text -> (a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a +methodOp name f = Prefix $ f <$ (string name <* C.space) + +-- postfixOp :: Text -> (a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a +-- postfixOp name f = Postfix (f <$ symbol name) + +expr :: Parser Expr +expr = makeExprParser term table + +statement :: Parser Expr +statement = expr <* symbol ";" + +parseExpr :: Text -> Either (ParseErrorBundle Text Void) [Expr] +parseExpr = MP.parse (C.space *> many statement <* eof) "" + +parse :: Text -> [Expr] +parse t = + case parseExpr t of + Right r -> r + +-- TODO: add error handling \ No newline at end of file diff --git a/src/Main/Type.hs b/src/Main/Type.hs new file mode 100644 index 0000000..23178b8 --- /dev/null +++ b/src/Main/Type.hs @@ -0,0 +1,30 @@ +module Main.Type + ( Expr (..), + -- AST (..) + ) +where + +import Data.Graph (Tree (Node)) + +data Expr + = Lit Int + | Paren Expr + | Add Expr Expr + | Sub Expr Expr + | Mul Expr Expr + | Div Expr Expr + | Print Expr + deriving + ( Show + ) + +-- data AST = AST Node + +-- data Node +-- = Reg +-- { cur :: Expr, +-- next :: Node +-- } +-- | End +-- { cur :: Expr +-- } \ No newline at end of file diff --git a/stack.yaml b/stack.yaml index 45dc800..c54066b 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,4 +1,11 @@ resolver: lts-20.26 +compiler: ghc-9.2.8 +# setup-info: +# ghc: +# aarch64: +# 9.4.6: +# url: "https://downloads.haskell.org/~ghc/9.4.6/ghc-9.4.6-aarch64-deb10-linux.tar.xz" +# sha256: "05896fc4bc52c117d281eac9c621c6c3a0b14f9f9eed5e42cce5e1c4485c7623" packages: - . @@ -12,4 +19,4 @@ nix: enable: true packages: [llvm_15, clang_15, libxml2] system-ghc: true -install-ghc: true +install-ghc: false