diff --git a/.gitignore b/.gitignore index 4c9e245..1078bf3 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ cabal.project.local cabal.project.local~ .HTF/ .ghc.environment.* +a.out +a.out.ll \ No newline at end of file diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..2201e47 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "gattytto.phoityne-vscode", + "haskell.haskell", + "justusadam.language-haskell" + ] +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..ad9207c --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,46 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "ghc", + "request": "launch", + "name": "haskell(stack)", + "internalConsoleOptions": "openOnSessionStart", + "workspace": "${workspaceFolder}", + "startup": "${workspaceFolder}/test/Spec.hs", + "startupFunc": "", + "startupArgs": "", + "stopOnEntry": false, + "mainArgs": "", + "ghciPrompt": "H>>= ", + "ghciInitialPrompt": "Prelude>", + "ghciCmd": "stack ghci --test --no-load --no-build --main-is TARGET", + "ghciEnv": {}, + "logFile": "${workspaceFolder}/.vscode/phoityne.log", + "logLevel": "WARNING", + "forceInspect": false + }, + { + "type": "ghc", + "request": "launch", + "name": "haskell(cabal)", + "internalConsoleOptions": "openOnSessionStart", + "workspace": "${workspaceFolder}", + "startup": "${workspaceFolder}/Main.hs", + "startupFunc": "", + "startupArgs": "", + "stopOnEntry": false, + "mainArgs": "", + "ghciPrompt": "H>>= ", + "ghciInitialPrompt": "Prelude>", + "ghciCmd": "cabal exec -- ghci-dap --interactive -i -i${workspaceFolder}", + "ghciEnv": {}, + "logFile": "${workspaceFolder}/.vscode/phoityne.log", + "logLevel": "WARNING", + "forceInspect": false + } + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..c7efda6 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,50 @@ + +{ + // Automatically created by phoityne-vscode extension. + + "version": "2.0.0", + "presentation": { + "reveal": "always", + "panel": "new" + }, + "tasks": [ + { + // F7 + "group": { + "kind": "build", + "isDefault": true + }, + "label": "haskell build", + "type": "shell", + //"command": "cabal configure && cabal build" + "command": "stack build" + }, + { + // F6 + "group": "build", + "type": "shell", + "label": "haskell clean & build", + //"command": "cabal clean && cabal configure && cabal build" + "command": "stack clean && stack build" + //"command": "stack clean ; stack build" // for powershell + }, + { + // F8 + "group": { + "kind": "test", + "isDefault": true + }, + "type": "shell", + "label": "haskell test", + //"command": "cabal test" + "command": "stack test" + }, + { + // F6 + "isBackground": true, + "type": "shell", + "label": "haskell watch", + "command": "stack build --test --no-run-tests --file-watch" + } + ] +} diff --git a/README.md b/README.md index f26cbc7..e6b3a8f 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,23 @@ # Really Bad Compiler in Haskell -A compiler written in Haskell which can currently perform basic arithmetic using the megaparsec and llvm-hs libraries (I do not know what language I am going to compile yet). Built for the Introduction to Compiler Design class at The University of Texas at Dallas. +A compiler written in Haskell which can currently perform basic arithmetic. Currently using the megaparsec and llvm-hs-\* libraries, but I may reimplement certain libraries myself. Built for the Introduction to Compiler Design class at The University of Texas at Dallas. Main repo: https://git.sudoer.ch/me/really-bad-compiler-in-haskell -## Build Instructions +## Setup Instructions - Install `ghcup` (for managing Haskell tools) and `nix` (for managing external dependencies). - Clone the repo. - Use `ghcup` to install `stack 2.9.3`, `HLS 2.2.0.0`, `GHC 9.2.8`, and `cabal 3.6.2.0`. + +## Run Instructions + - Use `stack run ` to run the program (for example, `stack run example/1`). +- The LLVM will be generated as `a.out.ll`, and the executable will be generated as `a.out`. + +## To edit + +I recommend using VSCodium, which is preconfigured to have syntax highlighting and (currently broken) debugging features and will automatically suggest the Haskell extensions to install. ## Known bugs @@ -27,20 +35,23 @@ Main repo: https://git.sudoer.ch/me/really-bad-compiler-in-haskell ### Learning Resources Used - Introduction to Compiler Design class at The University of Texas at Dallas, taught by Charles Averill -- learnyouahaskell.com (for learning Haskell basics) +- https://learnyouahaskell.com (for learning Haskell basics) - https://akashagrawal.me/2017/01/19/beginners-guide-to-megaparsec.html -- https://markkarpov.com/tutorial/megaparsec.html (for help writing a Haskell equation parser) +- https://markkarpov.com/tutorial/megaparsec.html (for help writing a Haskell equation parser using megaparsec) - https://www.forth.com/starting-forth/1-forth-stacks-dictionary/ (for learning Forth) - https://blog.josephmorag.com/posts/mcc0/ (Haskell compiler tutorial with megaparsec, llvm-hs, and nix) -- https://gh.sudoer.ch/llvm-hs/llvm-hs-examples (for help writing an llvm code generator) +- https://gh.sudoer.ch/llvm-hs/llvm-hs-examples (for help writing an llvm code generator using llvm-hs) +- https://danieljharvey.github.io/posts/2023-02-08-llvm-compiler-part-1.html (for help using llvm-hs-pure) +- https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs (source code for above resource) +- https://9to5tutorial.com/homebrew-compiler-made-with-haskell-llvm-configuration (for help using llvm-hs-pure) ### Tools - Language: Haskell -- Haskell tools: GHCup, Stack, Cabal -- Libraries: megaparsec, parser-combinators, text, llvm-hs-pure -- Dependencies: llvm, clang +- Haskell tools: GHCup, Stack, Cabal, GHC 9.2 +- Libraries: megaparsec, parser-combinators, text, process, llvm-hs 15, llvm-hs-pure 15, +- Dependencies: llvm 15, clang 15 - IDE: VSCodium - Git platform: Forgejo -- AI: Phind +- AI: Phind (GPT-4), ollama (codellama) - Search: Kagi, Stack Overflow diff --git a/app/Eval/Expression.hs b/app/Eval/Expression.hs deleted file mode 100644 index 325afdf..0000000 --- a/app/Eval/Expression.hs +++ /dev/null @@ -1,13 +0,0 @@ -module Eval.Expression (evalExpr) where - -import Objects.Expression - -eval :: Expr -> Int -eval (Lit x) = x -eval (Add x y) = eval x + eval y -eval (Sub x y) = eval x - eval y -eval (Mul x y) = eval x * eval y -eval (Div x y) = eval x `div` eval y - -evalExpr :: Expr -> Int -evalExpr = eval \ No newline at end of file diff --git a/app/LLVMGen/Expression.hs b/app/LLVMGen/Expression.hs deleted file mode 100644 index 0b2c4d2..0000000 --- a/app/LLVMGen/Expression.hs +++ /dev/null @@ -1,51 +0,0 @@ -module LLVMGen.Expression () where - -import qualified Objects.Expression as Expr - -import LLVM.AST -import qualified LLVM.AST as AST -import LLVM.AST.Global -import LLVM.Context -import LLVM.Module - -import Data.ByteString.Char8 as BS - -int :: Type -int = IntegerType 32 - -defAdd :: Definition -defAdd = GlobalDefinition functionDefaults - { name = Name "add" - , parameters = - ( [ Parameter int (Name "a") [] - , Parameter int (Name "b") [] ] - , False ) - , returnType = int - , basicBlocks = [body] - } - where - body = BasicBlock - (Name "entry") - [ Name "result" := - Add False -- no signed wrap - False -- no unsigned wrap - (LocalReference int (Name "a")) - (LocalReference int (Name "b")) - []] - (Do $ Ret (Just (LocalReference int (Name "result"))) []) - - -module_ :: AST.Module -module_ = defaultModule - { moduleName = "basic" - , moduleDefinitions = [defAdd] - } - - -toLLVM :: AST.Module -> IO () -toLLVM modul = withContext $ \ctx -> do - llvm <- withModuleFromAST ctx modul moduleLLVMAssembly - BS.putStrLn llvm - -main :: IO () -main = toLLVM module_ \ No newline at end of file diff --git a/app/Main.hs b/app/Main.hs deleted file mode 100644 index d9407df..0000000 --- a/app/Main.hs +++ /dev/null @@ -1,22 +0,0 @@ -module Main (main) where - -import Control.Monad -import Data.Either -import Eval.Expression -import Objects.Expression -import Parser.Expression -import System.Environment -import System.IO -import Text.Megaparsec - -getRight :: ParseResult -> Expr -getRight (Right r) = r - -getResult :: String -> Int -getResult str = evalExpr (getRight (parseExpr str)) - -main = do - fileName <- fmap head getArgs - contents <- readFile fileName - let result = getResult contents - print result diff --git a/app/Objects/Expression.hs b/app/Objects/Expression.hs deleted file mode 100644 index 932df82..0000000 --- a/app/Objects/Expression.hs +++ /dev/null @@ -1,19 +0,0 @@ -module Objects.Expression - ( Expr - ( Lit, - Add, - Sub, - Mul, - Div - ), - -- solve, - ) -where - -data Expr - = Lit Int - | Add Expr Expr - | Sub Expr Expr - | Mul Expr Expr - | Div Expr Expr - deriving (Show) \ No newline at end of file diff --git a/main/Compiler/LLVMGen.hs b/main/Compiler/LLVMGen.hs new file mode 100644 index 0000000..d36277d --- /dev/null +++ b/main/Compiler/LLVMGen.hs @@ -0,0 +1,67 @@ +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecursiveDo #-} + +-- see https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs + +module Compiler.LLVMGen (llvmGen) where + +-- import LLVM.Pretty + +import Data.ByteString (ByteString) +import Debug.Trace +import LLVM (moduleLLVMAssembly, withModuleFromAST, writeLLVMAssemblyToFile) +import LLVM.AST hiding (function) +import LLVM.AST.Type +import LLVM.Context +import LLVM.IRBuilder.Constant +import LLVM.IRBuilder.Instruction +import LLVM.IRBuilder.Module +import LLVM.IRBuilder.Monad +import Types.Expr as Expr + +getLLVM :: Expr -> Module +getLLVM expr = + buildModule "program" $ mdo + -- TODO: better module name + printf <- externVarArgs "printf" [ptr] i32 + function "main" [] i32 $ \_ -> mdo + numFormatStr <- globalStringPtr "%d\n" (mkName "str") + ourExpression <- exprToLLVM expr + _ <- call (FunctionType i32 [ptr] True) printf [(ConstantOperand numFormatStr, []), (ourExpression, [])] + ret (int32 0) + +exprToLLVM :: + ( MonadIRBuilder m, + MonadModuleBuilder m + ) => + Expr -> + m Operand +exprToLLVM (Lit prim) = pure $ primToLLVM prim +exprToLLVM (Expr.Add a b) = mdo + lhs <- exprToLLVM a + rhs <- exprToLLVM b + add lhs rhs +exprToLLVM (Expr.Sub a b) = mdo + lhs <- exprToLLVM a + rhs <- exprToLLVM b + sub lhs rhs +exprToLLVM (Expr.Mul a b) = mdo + lhs <- exprToLLVM a + rhs <- exprToLLVM b + mul lhs rhs +exprToLLVM (Expr.Div a b) = mdo + lhs <- exprToLLVM a + rhs <- exprToLLVM b + sdiv lhs rhs + +primToLLVM :: Int -> Operand +primToLLVM i = int32 (fromIntegral i) + +llvmGen :: Expr -> IO ByteString +llvmGen expr = do + let l = getLLVM expr + withContext + ( \c -> + withModuleFromAST c l moduleLLVMAssembly + ) \ No newline at end of file diff --git a/main/Main.hs b/main/Main.hs new file mode 100644 index 0000000..dda57c6 --- /dev/null +++ b/main/Main.hs @@ -0,0 +1,28 @@ +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where + +-- import Compiler.ExeGen +import Compiler.LLVMGen +import Data.ByteString.Char8 qualified as B +import Data.Text.Lazy.IO qualified as T +import Parser.Expr +import System.Environment +import System.Process +import Types.Expr + +getRight :: ParseResult -> Expr +getRight (Right r) = r + +main :: IO () +main = do + fileName <- fmap head getArgs + contents <- readFile fileName + T.putStrLn "- Parsing file..." + let parsed = getRight (parseExpr contents) + T.putStrLn "- Generating LLVM to './a.out.ll'..." + llvmGen parsed >>= B.writeFile "a.out.ll" + T.putStrLn "- Compiling to executable './a.out'..." + callCommand "clang a.out.ll" + T.putStrLn "- Done." diff --git a/app/Parser/Expression.hs b/main/Parser/Expr.hs similarity index 72% rename from app/Parser/Expression.hs rename to main/Parser/Expr.hs index 0d323ff..a2f3f4c 100644 --- a/app/Parser/Expression.hs +++ b/main/Parser/Expr.hs @@ -1,11 +1,15 @@ -module Parser.Expression (parseExpr, ParseResult) where +-- see https://markkarpov.com/tutorial/megaparsec.html +{-# LANGUAGE ImportQualifiedPost #-} + +module Parser.Expr (parseExpr, ParseResult) where import Control.Monad.Combinators.Expr +import Data.Functor.Identity qualified import Data.Void (Void) -import Objects.Expression import Text.Megaparsec as MP import Text.Megaparsec.Char as C import Text.Megaparsec.Char.Lexer as L +import Types.Expr type Parser = Parsec Void String @@ -31,6 +35,7 @@ table = ] ] +binaryOp :: String -> (a -> a -> a) -> Operator (ParsecT Void String Data.Functor.Identity.Identity) a binaryOp name f = InfixL (f <$ symbolParser name) expr :: Parser Expr diff --git a/main/Types/Expr.hs b/main/Types/Expr.hs new file mode 100644 index 0000000..51c8925 --- /dev/null +++ b/main/Types/Expr.hs @@ -0,0 +1,11 @@ +module Types.Expr (Expr (..)) where + +data Expr + = Lit Int + | Add Expr Expr + | Sub Expr Expr + | Mul Expr Expr + | Div Expr Expr + deriving + ( Show + ) \ No newline at end of file diff --git a/package.yaml b/package.yaml index af19751..99c82b0 100644 --- a/package.yaml +++ b/package.yaml @@ -6,9 +6,9 @@ dependencies: - megaparsec >= 9.0.1 && < 10 - parser-combinators - text + - process - llvm-hs >= 15 && < 16 - llvm-hs-pure >= 15 && < 16 - # - llvm-hs-pretty >= 15 && < 16 - bytestring tested-with: GHC == 9.2.8 category: Compilers/Interpreters @@ -19,7 +19,7 @@ ghc-options: -threaded -Wall -j8 +RTS -A64M -RTS -fllvm # source-dirs: src executable: - source-dirs: app + source-dirs: main main: Main.hs # tests: # testall: diff --git a/really-bad-compiler-in-haskell.cabal b/really-bad-compiler-in-haskell.cabal index 25ecb34..d82f626 100644 --- a/really-bad-compiler-in-haskell.cabal +++ b/really-bad-compiler-in-haskell.cabal @@ -16,13 +16,12 @@ tested-with: executable really-bad-compiler-in-haskell main-is: Main.hs other-modules: - Eval.Expression - LLVMGen.Expression - Objects.Expression - Parser.Expression + Compiler.LLVMGen + Parser.Expr + Types.Expr Paths_really_bad_compiler_in_haskell hs-source-dirs: - app + main default-extensions: OverloadedStrings, LambdaCase ghc-options: -threaded -Wall -j8 +RTS -A64M -RTS -fllvm @@ -33,5 +32,6 @@ executable really-bad-compiler-in-haskell , llvm-hs-pure ==15.* , megaparsec >=9.0.1 && <10 , parser-combinators + , process , text default-language: Haskell2010 diff --git a/stack.yaml b/stack.yaml index f5a1103..45dc800 100644 --- a/stack.yaml +++ b/stack.yaml @@ -3,8 +3,6 @@ resolver: lts-20.26 packages: - . extra-deps: - # - github: hyunsooda/llvm-hs-pretty-15 - # commit: 79283942d1667168ecd65237667aff7fed730303 - github: llvm-hs/llvm-hs commit: 5bca2c1a2a3aa98ecfb19181e7a5ebbf3e212b76 subdirs: