Compare commits

...

14 Commits

16 changed files with 312 additions and 170 deletions

View File

@ -1,6 +1,8 @@
# Really Bad Compiler in Haskell
# HEAR Compiler in Haskell
A compiler written in Haskell which can currently perform basic arithmetic. Currently using the megaparsec and llvm-hs-\* libraries, but I may reimplement certain libraries myself. Built for the Introduction to Compiler Design class at The University of Texas at Dallas.
A compiler for HEAR, a language for when you cannot C.
Written in Haskell, and currently using the megaparsec and llvm-hs-\* libraries, but I plan to eventually rewrite the lexar/parser from scratch. Built for the Introduction to Compiler Design class at The University of Texas at Dallas.
Main repo: https://git.sudoer.ch/me/really-bad-compiler-in-haskell
@ -8,13 +10,19 @@ Main repo: https://git.sudoer.ch/me/really-bad-compiler-in-haskell
- Install `ghcup` (for managing Haskell tools) and `nix` (for managing external dependencies).
- Clone the repo.
- Use `ghcup` to install `stack 2.9.3`, `HLS 2.2.0.0`, `GHC 9.2.8`, and `cabal 3.6.2.0`.
- Use `ghcup` to install `stack 2.11.1`, `HLS 2.3.0.0`, and `cabal 3.8.1.0`.
## Run Instructions
- Use `stack run <file>` to run the program (for example, `stack run example/1`).
- Use `stack run <file>` to run the program (for example, `stack run example/1.hear`).
- The LLVM will be generated as `a.out.ll`, and the executable will be generated as `a.out`.
## Currently Supported functionality
- Arithmetic
- Parenthesis
- print() statement
## To edit
I recommend using VSCodium, which is preconfigured to have syntax highlighting and (currently broken) debugging features and will automatically suggest the Haskell extensions to install.
@ -44,12 +52,13 @@ I recommend using VSCodium, which is preconfigured to have syntax highlighting a
- https://danieljharvey.github.io/posts/2023-02-08-llvm-compiler-part-1.html (for help using llvm-hs-pure)
- https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs (source code for above resource)
- https://9to5tutorial.com/homebrew-compiler-made-with-haskell-llvm-configuration (for help using llvm-hs-pure)
- https://blog.ocharles.org.uk/blog/posts/2012-12-17-24-days-of-hackage-optparse-applicative.html (for help parsing command line arguments with optparse-applicative)
### Tools
- Language: Haskell
- Haskell tools: GHCup, Stack, Cabal, GHC 9.2
- Libraries: megaparsec, parser-combinators, text, process, llvm-hs 15, llvm-hs-pure 15,
- Haskell/management tools: GHCup, Stack, Cabal, GHC 9.2, Nix
- Libraries: See `package.yaml`
- Dependencies: llvm 15, clang 15
- IDE: VSCodium
- Git platform: Forgejo

View File

@ -1 +0,0 @@
5*3+2

1
example/1.hear 100644
View File

@ -0,0 +1 @@
print(5*(3-2)+-4-4);

View File

@ -1 +0,0 @@
6+8/3

2
example/2.hear 100644
View File

@ -0,0 +1,2 @@
print(6+8/3);
print(5000);

View File

@ -1,67 +0,0 @@
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecursiveDo #-}
-- see https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs
module Compiler.LLVMGen (llvmGen) where
-- import LLVM.Pretty
import Data.ByteString (ByteString)
import Debug.Trace
import LLVM (moduleLLVMAssembly, withModuleFromAST, writeLLVMAssemblyToFile)
import LLVM.AST hiding (function)
import LLVM.AST.Type
import LLVM.Context
import LLVM.IRBuilder.Constant
import LLVM.IRBuilder.Instruction
import LLVM.IRBuilder.Module
import LLVM.IRBuilder.Monad
import Types.Expr as Expr
getLLVM :: Expr -> Module
getLLVM expr =
buildModule "program" $ mdo
-- TODO: better module name
printf <- externVarArgs "printf" [ptr] i32
function "main" [] i32 $ \_ -> mdo
numFormatStr <- globalStringPtr "%d\n" (mkName "str")
ourExpression <- exprToLLVM expr
_ <- call (FunctionType i32 [ptr] True) printf [(ConstantOperand numFormatStr, []), (ourExpression, [])]
ret (int32 0)
exprToLLVM ::
( MonadIRBuilder m,
MonadModuleBuilder m
) =>
Expr ->
m Operand
exprToLLVM (Lit prim) = pure $ primToLLVM prim
exprToLLVM (Expr.Add a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
add lhs rhs
exprToLLVM (Expr.Sub a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
sub lhs rhs
exprToLLVM (Expr.Mul a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
mul lhs rhs
exprToLLVM (Expr.Div a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
sdiv lhs rhs
primToLLVM :: Int -> Operand
primToLLVM i = int32 (fromIntegral i)
llvmGen :: Expr -> IO ByteString
llvmGen expr = do
let l = getLLVM expr
withContext
( \c ->
withModuleFromAST c l moduleLLVMAssembly
)

View File

@ -1,28 +0,0 @@
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
-- import Compiler.ExeGen
import Compiler.LLVMGen
import Data.ByteString.Char8 qualified as B
import Data.Text.Lazy.IO qualified as T
import Parser.Expr
import System.Environment
import System.Process
import Types.Expr
getRight :: ParseResult -> Expr
getRight (Right r) = r
main :: IO ()
main = do
fileName <- fmap head getArgs
contents <- readFile fileName
T.putStrLn "- Parsing file..."
let parsed = getRight (parseExpr contents)
T.putStrLn "- Generating LLVM to './a.out.ll'..."
llvmGen parsed >>= B.writeFile "a.out.ll"
T.putStrLn "- Compiling to executable './a.out'..."
callCommand "clang a.out.ll"
T.putStrLn "- Done."

View File

@ -1,49 +0,0 @@
-- see https://markkarpov.com/tutorial/megaparsec.html
{-# LANGUAGE ImportQualifiedPost #-}
module Parser.Expr (parseExpr, ParseResult) where
import Control.Monad.Combinators.Expr
import Data.Functor.Identity qualified
import Data.Void (Void)
import Text.Megaparsec as MP
import Text.Megaparsec.Char as C
import Text.Megaparsec.Char.Lexer as L
import Types.Expr
type Parser = Parsec Void String
lexemeParser :: Parser a -> Parser a
lexemeParser = L.lexeme C.space
symbolParser :: String -> Parser String
symbolParser = L.symbol C.space
intParser :: Parser Int
intParser = lexemeParser L.decimal
term :: Parser Expr
term = Lit <$> intParser
table :: [[Operator Parser Expr]]
table =
[ [ binaryOp "*" Mul,
binaryOp "/" Div
],
[ binaryOp "+" Add,
binaryOp "-" Sub
]
]
binaryOp :: String -> (a -> a -> a) -> Operator (ParsecT Void String Data.Functor.Identity.Identity) a
binaryOp name f = InfixL (f <$ symbolParser name)
expr :: Parser Expr
expr = makeExprParser term table
type ParseResult = Either (ParseErrorBundle String Void) Expr
parseExpr :: String -> ParseResult
parseExpr = MP.parse (C.space *> expr <* eof) ""
-- parseE = parseExpr

View File

@ -1,11 +0,0 @@
module Types.Expr (Expr (..)) where
data Expr
= Lit Int
| Add Expr Expr
| Sub Expr Expr
| Mul Expr Expr
| Div Expr Expr
deriving
( Show
)

View File

@ -7,9 +7,14 @@ dependencies:
- parser-combinators
- text
- process
- mtl
- containers
- llvm-hs >= 15 && < 16
- llvm-hs-pure >= 15 && < 16
- bytestring
- string-conversions
- transformers
- optparse-applicative >= 0.17 && < 1
tested-with: GHC == 9.2.8
category: Compilers/Interpreters
@ -19,7 +24,7 @@ ghc-options: -threaded -Wall -j8 +RTS -A64M -RTS -fllvm
# source-dirs: src
executable:
source-dirs: main
source-dirs: src
main: Main.hs
# tests:
# testall:

View File

@ -1,6 +1,6 @@
cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.35.1.
-- This file has been generated from package.yaml by hpack version 0.35.2.
--
-- see: https://github.com/sol/hpack
@ -16,22 +16,27 @@ tested-with:
executable really-bad-compiler-in-haskell
main-is: Main.hs
other-modules:
Compiler.LLVMGen
Parser.Expr
Types.Expr
Main.LLVMGen
Main.Parser.Megaparsec
Main.Type
Paths_really_bad_compiler_in_haskell
hs-source-dirs:
main
src
default-extensions:
OverloadedStrings, LambdaCase
ghc-options: -threaded -Wall -j8 +RTS -A64M -RTS -fllvm
build-depends:
base >=4.14.3 && <5
, bytestring
, containers
, llvm-hs ==15.*
, llvm-hs-pure ==15.*
, megaparsec >=9.0.1 && <10
, mtl
, optparse-applicative >=0.17 && <1
, parser-combinators
, process
, string-conversions
, text
, transformers
default-language: Haskell2010

47
src/Main.hs 100644
View File

@ -0,0 +1,47 @@
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
import Data.ByteString.Char8 qualified as B
import Data.Text.IO qualified as T
import Main.LLVMGen
import Main.Parser.Megaparsec
import Options.Applicative
import System.Environment
import System.Process (callCommand)
data Opt = Opt
{ filePath :: String,
showLLVM :: Bool,
showDebug :: Bool
}
run :: Opt -> IO ()
run opts = do
let fileName = filePath opts
contents <- T.readFile fileName
T.putStrLn "- Generating LLVM to './a.out.ll'..."
result <- (llvmGen . parse) contents
B.writeFile "a.out.ll" result
T.putStrLn "- Compiling to executable './a.out'..."
callCommand "clang a.out.ll"
T.putStrLn "- Done."
main :: IO ()
main = execParser opts >>= run
where
parser =
Opt
<$> argument str (metavar "FILE_PATH")
<*> switch
( short 'l'
<> long "showLLVM"
<> help "Create <file>.ll with LLVM used to compile the binary"
)
<*> switch
( short 'd'
<> long "showDebug"
<> help "Show debug output"
)
opts = info parser mempty

115
src/Main/LLVMGen.hs 100644
View File

@ -0,0 +1,115 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecursiveDo #-}
-- see https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs
-- see https://blog.josephmorag.com/posts/mcc3/
module Main.LLVMGen (llvmGen) where
import Control.Monad.State
import Control.Monad.Trans.Class (lift)
import Data.ByteString (ByteString)
import Data.Map qualified as M
import Data.String.Conversions
import Data.Text
import LLVM (moduleLLVMAssembly, withModuleFromAST)
import LLVM.AST hiding (function)
import LLVM.AST.Type
import LLVM.Context
import LLVM.IRBuilder.Constant
import LLVM.IRBuilder.Instruction
import LLVM.IRBuilder.Module
import LLVM.IRBuilder.Monad
import Main.Type as Expr
data Env = Env
{ operands :: M.Map Text Operand,
strings :: M.Map Text Operand
}
deriving (Eq, Show)
registerOperand :: (MonadState Env m) => Text -> Operand -> m ()
registerOperand name op =
modify $ \env -> env {operands = M.insert name op (operands env)}
registerString :: (MonadState Env m) => Text -> Operand -> m ()
registerString name op =
modify $ \env -> env {strings = M.insert name op (operands env)}
getOperand :: (MonadState Env m, MonadModuleBuilder m, MonadIRBuilder m) => Text -> m Operand
getOperand name = do
env <- get
case M.lookup name (operands env) of
Just op -> return op
Nothing -> error $ "Unknown operand: " ++ show name
getString :: (MonadState Env m, MonadModuleBuilder m, MonadIRBuilder m) => Text -> m Operand
getString str = do
env <- get
case M.lookup str (strings env) of
Just s -> return s
Nothing -> do
s <- globalStringPtr (unpack str) (mkName "str")
let operand = ConstantOperand s
modify $ \env -> env {strings = M.insert str operand (strings env)}
return operand
getLLVM :: [Expr] -> Module
getLLVM expr =
flip evalState (Env {operands = M.empty, strings = M.empty}) $
buildModuleT "program" $ mdo
-- TODO: better module name
printf <- externVarArgs "printf" [ptr] i32
lift $ registerOperand "printf" printf
function "main" [] i32 $ \_ -> mdo
printNumStr <- globalStringPtr "%d\n" (mkName "str")
lift $ registerString "%d\n" $ ConstantOperand printNumStr
_ <- forM_ expr exprToLLVM
ret $ int32 0
--
-- ourExpression <- exprToLLVM expr
-- _ <- call (FunctionType i32 [ptr] True) printf [(ConstantOperand numFormatStr, []), (ourExpression, [])]
-- ret $ int32 0
exprToLLVM ::
( MonadIRBuilder m,
MonadModuleBuilder m,
MonadState Env m
) =>
Expr ->
m Operand
exprToLLVM (Lit prim) = pure $ primToLLVM prim
exprToLLVM (Paren e) = exprToLLVM e
exprToLLVM (Print e) = mdo
val <- exprToLLVM e
printf <- getOperand "printf"
formatStr <- getString "%d\n"
_ <- call (FunctionType i32 [ptr] True) printf [(formatStr, []), (val, [])]
pure val
exprToLLVM (Expr.Add a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
add lhs rhs
exprToLLVM (Expr.Sub a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
sub lhs rhs
exprToLLVM (Expr.Mul a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
mul lhs rhs
exprToLLVM (Expr.Div a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
sdiv lhs rhs
primToLLVM :: Int -> Operand
primToLLVM i = int32 $ fromIntegral i
llvmGen :: [Expr] -> IO ByteString
llvmGen expr = do
let l = getLLVM expr
withContext $ \c -> withModuleFromAST c l moduleLLVMAssembly

View File

@ -0,0 +1,78 @@
-- see https://markkarpov.com/tutorial/megaparsec.html
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
module Main.Parser.Megaparsec (parse) where
import Control.Monad.Combinators.Expr
import Data.Functor.Identity qualified
import Data.Text
import Data.Void (Void)
import Main.Type
import Text.Megaparsec as MP hiding (parse)
import Text.Megaparsec qualified as MP
import Text.Megaparsec.Char qualified as C
import Text.Megaparsec.Char.Lexer qualified as L
type Parser = Parsec Void Text
lexeme :: Parser a -> Parser a
lexeme = L.lexeme C.space
symbol :: Text -> Parser Text
symbol = L.symbol C.space
int :: Parser Int
int = lexeme $ L.signed (return ()) L.decimal
string :: Text -> Parser Text
string = C.string
container :: Text -> Text -> Parser a -> Parser a
container b e = between (symbol b) (symbol e)
term :: Parser Expr
term =
choice
[ Lit <$> int,
container "(" ")" expr
]
table :: [[Operator Parser Expr]]
table =
[ [methodOp "print" Print],
[ binaryOp "*" Mul,
binaryOp "/" Div
],
[ binaryOp "+" Add,
binaryOp "-" Sub
]
]
binaryOp :: Text -> (a -> a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a
binaryOp name f = InfixL $ f <$ symbol name
-- prefixOp :: Text -> (a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a
-- prefixOp name f = Prefix (f <$ symbol name)
methodOp :: Text -> (a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a
methodOp name f = Prefix $ f <$ (string name <* C.space)
-- postfixOp :: Text -> (a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a
-- postfixOp name f = Postfix (f <$ symbol name)
expr :: Parser Expr
expr = makeExprParser term table
statement :: Parser Expr
statement = expr <* symbol ";"
parseExpr :: Text -> Either (ParseErrorBundle Text Void) [Expr]
parseExpr = MP.parse (C.space *> many statement <* eof) ""
parse :: Text -> [Expr]
parse t =
case parseExpr t of
Right r -> r
-- TODO: add error handling

30
src/Main/Type.hs 100644
View File

@ -0,0 +1,30 @@
module Main.Type
( Expr (..),
-- AST (..)
)
where
import Data.Graph (Tree (Node))
data Expr
= Lit Int
| Paren Expr
| Add Expr Expr
| Sub Expr Expr
| Mul Expr Expr
| Div Expr Expr
| Print Expr
deriving
( Show
)
-- data AST = AST Node
-- data Node
-- = Reg
-- { cur :: Expr,
-- next :: Node
-- }
-- | End
-- { cur :: Expr
-- }

View File

@ -1,4 +1,11 @@
resolver: lts-20.26
compiler: ghc-9.2.8
# setup-info:
# ghc:
# aarch64:
# 9.4.6:
# url: "https://downloads.haskell.org/~ghc/9.4.6/ghc-9.4.6-aarch64-deb10-linux.tar.xz"
# sha256: "05896fc4bc52c117d281eac9c621c6c3a0b14f9f9eed5e42cce5e1c4485c7623"
packages:
- .
@ -12,4 +19,4 @@ nix:
enable: true
packages: [llvm_15, clang_15, libxml2]
system-ghc: true
install-ghc: true
install-ghc: false