Compare commits

..

No commits in common. "2f790d6e554c93b2ab12f3acce3f2bada5aef347" and "5f1b5ce65cb1860d26b7367c20ba935c66513094" have entirely different histories.

16 changed files with 170 additions and 312 deletions

View File

@ -1,8 +1,6 @@
# HEAR Compiler in Haskell # Really Bad Compiler in Haskell
A compiler for HEAR, a language for when you cannot C. A compiler written in Haskell which can currently perform basic arithmetic. Currently using the megaparsec and llvm-hs-\* libraries, but I may reimplement certain libraries myself. Built for the Introduction to Compiler Design class at The University of Texas at Dallas.
Written in Haskell, and currently using the megaparsec and llvm-hs-\* libraries, but I plan to eventually rewrite the lexar/parser from scratch. Built for the Introduction to Compiler Design class at The University of Texas at Dallas.
Main repo: https://git.sudoer.ch/me/really-bad-compiler-in-haskell Main repo: https://git.sudoer.ch/me/really-bad-compiler-in-haskell
@ -10,19 +8,13 @@ Main repo: https://git.sudoer.ch/me/really-bad-compiler-in-haskell
- Install `ghcup` (for managing Haskell tools) and `nix` (for managing external dependencies). - Install `ghcup` (for managing Haskell tools) and `nix` (for managing external dependencies).
- Clone the repo. - Clone the repo.
- Use `ghcup` to install `stack 2.11.1`, `HLS 2.3.0.0`, and `cabal 3.8.1.0`. - Use `ghcup` to install `stack 2.9.3`, `HLS 2.2.0.0`, `GHC 9.2.8`, and `cabal 3.6.2.0`.
## Run Instructions ## Run Instructions
- Use `stack run <file>` to run the program (for example, `stack run example/1.hear`). - Use `stack run <file>` to run the program (for example, `stack run example/1`).
- The LLVM will be generated as `a.out.ll`, and the executable will be generated as `a.out`. - The LLVM will be generated as `a.out.ll`, and the executable will be generated as `a.out`.
## Currently Supported functionality
- Arithmetic
- Parenthesis
- print() statement
## To edit ## To edit
I recommend using VSCodium, which is preconfigured to have syntax highlighting and (currently broken) debugging features and will automatically suggest the Haskell extensions to install. I recommend using VSCodium, which is preconfigured to have syntax highlighting and (currently broken) debugging features and will automatically suggest the Haskell extensions to install.
@ -52,13 +44,12 @@ I recommend using VSCodium, which is preconfigured to have syntax highlighting a
- https://danieljharvey.github.io/posts/2023-02-08-llvm-compiler-part-1.html (for help using llvm-hs-pure) - https://danieljharvey.github.io/posts/2023-02-08-llvm-compiler-part-1.html (for help using llvm-hs-pure)
- https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs (source code for above resource) - https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs (source code for above resource)
- https://9to5tutorial.com/homebrew-compiler-made-with-haskell-llvm-configuration (for help using llvm-hs-pure) - https://9to5tutorial.com/homebrew-compiler-made-with-haskell-llvm-configuration (for help using llvm-hs-pure)
- https://blog.ocharles.org.uk/blog/posts/2012-12-17-24-days-of-hackage-optparse-applicative.html (for help parsing command line arguments with optparse-applicative)
### Tools ### Tools
- Language: Haskell - Language: Haskell
- Haskell/management tools: GHCup, Stack, Cabal, GHC 9.2, Nix - Haskell tools: GHCup, Stack, Cabal, GHC 9.2
- Libraries: See `package.yaml` - Libraries: megaparsec, parser-combinators, text, process, llvm-hs 15, llvm-hs-pure 15,
- Dependencies: llvm 15, clang 15 - Dependencies: llvm 15, clang 15
- IDE: VSCodium - IDE: VSCodium
- Git platform: Forgejo - Git platform: Forgejo

1
example/1 100644
View File

@ -0,0 +1 @@
5*3+2

View File

@ -1 +0,0 @@
print(5*(3-2)+-4-4);

1
example/2 100644
View File

@ -0,0 +1 @@
6+8/3

View File

@ -1,2 +0,0 @@
print(6+8/3);
print(5000);

View File

@ -0,0 +1,67 @@
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecursiveDo #-}
-- see https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs
module Compiler.LLVMGen (llvmGen) where
-- import LLVM.Pretty
import Data.ByteString (ByteString)
import Debug.Trace
import LLVM (moduleLLVMAssembly, withModuleFromAST, writeLLVMAssemblyToFile)
import LLVM.AST hiding (function)
import LLVM.AST.Type
import LLVM.Context
import LLVM.IRBuilder.Constant
import LLVM.IRBuilder.Instruction
import LLVM.IRBuilder.Module
import LLVM.IRBuilder.Monad
import Types.Expr as Expr
getLLVM :: Expr -> Module
getLLVM expr =
buildModule "program" $ mdo
-- TODO: better module name
printf <- externVarArgs "printf" [ptr] i32
function "main" [] i32 $ \_ -> mdo
numFormatStr <- globalStringPtr "%d\n" (mkName "str")
ourExpression <- exprToLLVM expr
_ <- call (FunctionType i32 [ptr] True) printf [(ConstantOperand numFormatStr, []), (ourExpression, [])]
ret (int32 0)
exprToLLVM ::
( MonadIRBuilder m,
MonadModuleBuilder m
) =>
Expr ->
m Operand
exprToLLVM (Lit prim) = pure $ primToLLVM prim
exprToLLVM (Expr.Add a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
add lhs rhs
exprToLLVM (Expr.Sub a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
sub lhs rhs
exprToLLVM (Expr.Mul a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
mul lhs rhs
exprToLLVM (Expr.Div a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
sdiv lhs rhs
primToLLVM :: Int -> Operand
primToLLVM i = int32 (fromIntegral i)
llvmGen :: Expr -> IO ByteString
llvmGen expr = do
let l = getLLVM expr
withContext
( \c ->
withModuleFromAST c l moduleLLVMAssembly
)

28
main/Main.hs 100644
View File

@ -0,0 +1,28 @@
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
-- import Compiler.ExeGen
import Compiler.LLVMGen
import Data.ByteString.Char8 qualified as B
import Data.Text.Lazy.IO qualified as T
import Parser.Expr
import System.Environment
import System.Process
import Types.Expr
getRight :: ParseResult -> Expr
getRight (Right r) = r
main :: IO ()
main = do
fileName <- fmap head getArgs
contents <- readFile fileName
T.putStrLn "- Parsing file..."
let parsed = getRight (parseExpr contents)
T.putStrLn "- Generating LLVM to './a.out.ll'..."
llvmGen parsed >>= B.writeFile "a.out.ll"
T.putStrLn "- Compiling to executable './a.out'..."
callCommand "clang a.out.ll"
T.putStrLn "- Done."

View File

@ -0,0 +1,49 @@
-- see https://markkarpov.com/tutorial/megaparsec.html
{-# LANGUAGE ImportQualifiedPost #-}
module Parser.Expr (parseExpr, ParseResult) where
import Control.Monad.Combinators.Expr
import Data.Functor.Identity qualified
import Data.Void (Void)
import Text.Megaparsec as MP
import Text.Megaparsec.Char as C
import Text.Megaparsec.Char.Lexer as L
import Types.Expr
type Parser = Parsec Void String
lexemeParser :: Parser a -> Parser a
lexemeParser = L.lexeme C.space
symbolParser :: String -> Parser String
symbolParser = L.symbol C.space
intParser :: Parser Int
intParser = lexemeParser L.decimal
term :: Parser Expr
term = Lit <$> intParser
table :: [[Operator Parser Expr]]
table =
[ [ binaryOp "*" Mul,
binaryOp "/" Div
],
[ binaryOp "+" Add,
binaryOp "-" Sub
]
]
binaryOp :: String -> (a -> a -> a) -> Operator (ParsecT Void String Data.Functor.Identity.Identity) a
binaryOp name f = InfixL (f <$ symbolParser name)
expr :: Parser Expr
expr = makeExprParser term table
type ParseResult = Either (ParseErrorBundle String Void) Expr
parseExpr :: String -> ParseResult
parseExpr = MP.parse (C.space *> expr <* eof) ""
-- parseE = parseExpr

11
main/Types/Expr.hs 100644
View File

@ -0,0 +1,11 @@
module Types.Expr (Expr (..)) where
data Expr
= Lit Int
| Add Expr Expr
| Sub Expr Expr
| Mul Expr Expr
| Div Expr Expr
deriving
( Show
)

View File

@ -7,14 +7,9 @@ dependencies:
- parser-combinators - parser-combinators
- text - text
- process - process
- mtl
- containers
- llvm-hs >= 15 && < 16 - llvm-hs >= 15 && < 16
- llvm-hs-pure >= 15 && < 16 - llvm-hs-pure >= 15 && < 16
- bytestring - bytestring
- string-conversions
- transformers
- optparse-applicative >= 0.17 && < 1
tested-with: GHC == 9.2.8 tested-with: GHC == 9.2.8
category: Compilers/Interpreters category: Compilers/Interpreters
@ -24,7 +19,7 @@ ghc-options: -threaded -Wall -j8 +RTS -A64M -RTS -fllvm
# source-dirs: src # source-dirs: src
executable: executable:
source-dirs: src source-dirs: main
main: Main.hs main: Main.hs
# tests: # tests:
# testall: # testall:

View File

@ -1,6 +1,6 @@
cabal-version: 1.12 cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.35.2. -- This file has been generated from package.yaml by hpack version 0.35.1.
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
@ -16,27 +16,22 @@ tested-with:
executable really-bad-compiler-in-haskell executable really-bad-compiler-in-haskell
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Main.LLVMGen Compiler.LLVMGen
Main.Parser.Megaparsec Parser.Expr
Main.Type Types.Expr
Paths_really_bad_compiler_in_haskell Paths_really_bad_compiler_in_haskell
hs-source-dirs: hs-source-dirs:
src main
default-extensions: default-extensions:
OverloadedStrings, LambdaCase OverloadedStrings, LambdaCase
ghc-options: -threaded -Wall -j8 +RTS -A64M -RTS -fllvm ghc-options: -threaded -Wall -j8 +RTS -A64M -RTS -fllvm
build-depends: build-depends:
base >=4.14.3 && <5 base >=4.14.3 && <5
, bytestring , bytestring
, containers
, llvm-hs ==15.* , llvm-hs ==15.*
, llvm-hs-pure ==15.* , llvm-hs-pure ==15.*
, megaparsec >=9.0.1 && <10 , megaparsec >=9.0.1 && <10
, mtl
, optparse-applicative >=0.17 && <1
, parser-combinators , parser-combinators
, process , process
, string-conversions
, text , text
, transformers
default-language: Haskell2010 default-language: Haskell2010

View File

@ -1,47 +0,0 @@
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
import Data.ByteString.Char8 qualified as B
import Data.Text.IO qualified as T
import Main.LLVMGen
import Main.Parser.Megaparsec
import Options.Applicative
import System.Environment
import System.Process (callCommand)
data Opt = Opt
{ filePath :: String,
showLLVM :: Bool,
showDebug :: Bool
}
run :: Opt -> IO ()
run opts = do
let fileName = filePath opts
contents <- T.readFile fileName
T.putStrLn "- Generating LLVM to './a.out.ll'..."
result <- (llvmGen . parse) contents
B.writeFile "a.out.ll" result
T.putStrLn "- Compiling to executable './a.out'..."
callCommand "clang a.out.ll"
T.putStrLn "- Done."
main :: IO ()
main = execParser opts >>= run
where
parser =
Opt
<$> argument str (metavar "FILE_PATH")
<*> switch
( short 'l'
<> long "showLLVM"
<> help "Create <file>.ll with LLVM used to compile the binary"
)
<*> switch
( short 'd'
<> long "showDebug"
<> help "Show debug output"
)
opts = info parser mempty

View File

@ -1,115 +0,0 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecursiveDo #-}
-- see https://gh.sudoer.ch/danieljharvey/mimsa/blob/trunk/llvm-calc/src/Calc/Compile/ToLLVM.hs
-- see https://blog.josephmorag.com/posts/mcc3/
module Main.LLVMGen (llvmGen) where
import Control.Monad.State
import Control.Monad.Trans.Class (lift)
import Data.ByteString (ByteString)
import Data.Map qualified as M
import Data.String.Conversions
import Data.Text
import LLVM (moduleLLVMAssembly, withModuleFromAST)
import LLVM.AST hiding (function)
import LLVM.AST.Type
import LLVM.Context
import LLVM.IRBuilder.Constant
import LLVM.IRBuilder.Instruction
import LLVM.IRBuilder.Module
import LLVM.IRBuilder.Monad
import Main.Type as Expr
data Env = Env
{ operands :: M.Map Text Operand,
strings :: M.Map Text Operand
}
deriving (Eq, Show)
registerOperand :: (MonadState Env m) => Text -> Operand -> m ()
registerOperand name op =
modify $ \env -> env {operands = M.insert name op (operands env)}
registerString :: (MonadState Env m) => Text -> Operand -> m ()
registerString name op =
modify $ \env -> env {strings = M.insert name op (operands env)}
getOperand :: (MonadState Env m, MonadModuleBuilder m, MonadIRBuilder m) => Text -> m Operand
getOperand name = do
env <- get
case M.lookup name (operands env) of
Just op -> return op
Nothing -> error $ "Unknown operand: " ++ show name
getString :: (MonadState Env m, MonadModuleBuilder m, MonadIRBuilder m) => Text -> m Operand
getString str = do
env <- get
case M.lookup str (strings env) of
Just s -> return s
Nothing -> do
s <- globalStringPtr (unpack str) (mkName "str")
let operand = ConstantOperand s
modify $ \env -> env {strings = M.insert str operand (strings env)}
return operand
getLLVM :: [Expr] -> Module
getLLVM expr =
flip evalState (Env {operands = M.empty, strings = M.empty}) $
buildModuleT "program" $ mdo
-- TODO: better module name
printf <- externVarArgs "printf" [ptr] i32
lift $ registerOperand "printf" printf
function "main" [] i32 $ \_ -> mdo
printNumStr <- globalStringPtr "%d\n" (mkName "str")
lift $ registerString "%d\n" $ ConstantOperand printNumStr
_ <- forM_ expr exprToLLVM
ret $ int32 0
--
-- ourExpression <- exprToLLVM expr
-- _ <- call (FunctionType i32 [ptr] True) printf [(ConstantOperand numFormatStr, []), (ourExpression, [])]
-- ret $ int32 0
exprToLLVM ::
( MonadIRBuilder m,
MonadModuleBuilder m,
MonadState Env m
) =>
Expr ->
m Operand
exprToLLVM (Lit prim) = pure $ primToLLVM prim
exprToLLVM (Paren e) = exprToLLVM e
exprToLLVM (Print e) = mdo
val <- exprToLLVM e
printf <- getOperand "printf"
formatStr <- getString "%d\n"
_ <- call (FunctionType i32 [ptr] True) printf [(formatStr, []), (val, [])]
pure val
exprToLLVM (Expr.Add a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
add lhs rhs
exprToLLVM (Expr.Sub a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
sub lhs rhs
exprToLLVM (Expr.Mul a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
mul lhs rhs
exprToLLVM (Expr.Div a b) = mdo
lhs <- exprToLLVM a
rhs <- exprToLLVM b
sdiv lhs rhs
primToLLVM :: Int -> Operand
primToLLVM i = int32 $ fromIntegral i
llvmGen :: [Expr] -> IO ByteString
llvmGen expr = do
let l = getLLVM expr
withContext $ \c -> withModuleFromAST c l moduleLLVMAssembly

View File

@ -1,78 +0,0 @@
-- see https://markkarpov.com/tutorial/megaparsec.html
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
module Main.Parser.Megaparsec (parse) where
import Control.Monad.Combinators.Expr
import Data.Functor.Identity qualified
import Data.Text
import Data.Void (Void)
import Main.Type
import Text.Megaparsec as MP hiding (parse)
import Text.Megaparsec qualified as MP
import Text.Megaparsec.Char qualified as C
import Text.Megaparsec.Char.Lexer qualified as L
type Parser = Parsec Void Text
lexeme :: Parser a -> Parser a
lexeme = L.lexeme C.space
symbol :: Text -> Parser Text
symbol = L.symbol C.space
int :: Parser Int
int = lexeme $ L.signed (return ()) L.decimal
string :: Text -> Parser Text
string = C.string
container :: Text -> Text -> Parser a -> Parser a
container b e = between (symbol b) (symbol e)
term :: Parser Expr
term =
choice
[ Lit <$> int,
container "(" ")" expr
]
table :: [[Operator Parser Expr]]
table =
[ [methodOp "print" Print],
[ binaryOp "*" Mul,
binaryOp "/" Div
],
[ binaryOp "+" Add,
binaryOp "-" Sub
]
]
binaryOp :: Text -> (a -> a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a
binaryOp name f = InfixL $ f <$ symbol name
-- prefixOp :: Text -> (a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a
-- prefixOp name f = Prefix (f <$ symbol name)
methodOp :: Text -> (a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a
methodOp name f = Prefix $ f <$ (string name <* C.space)
-- postfixOp :: Text -> (a -> a) -> Operator (ParsecT Void Text Data.Functor.Identity.Identity) a
-- postfixOp name f = Postfix (f <$ symbol name)
expr :: Parser Expr
expr = makeExprParser term table
statement :: Parser Expr
statement = expr <* symbol ";"
parseExpr :: Text -> Either (ParseErrorBundle Text Void) [Expr]
parseExpr = MP.parse (C.space *> many statement <* eof) ""
parse :: Text -> [Expr]
parse t =
case parseExpr t of
Right r -> r
-- TODO: add error handling

View File

@ -1,30 +0,0 @@
module Main.Type
( Expr (..),
-- AST (..)
)
where
import Data.Graph (Tree (Node))
data Expr
= Lit Int
| Paren Expr
| Add Expr Expr
| Sub Expr Expr
| Mul Expr Expr
| Div Expr Expr
| Print Expr
deriving
( Show
)
-- data AST = AST Node
-- data Node
-- = Reg
-- { cur :: Expr,
-- next :: Node
-- }
-- | End
-- { cur :: Expr
-- }

View File

@ -1,11 +1,4 @@
resolver: lts-20.26 resolver: lts-20.26
compiler: ghc-9.2.8
# setup-info:
# ghc:
# aarch64:
# 9.4.6:
# url: "https://downloads.haskell.org/~ghc/9.4.6/ghc-9.4.6-aarch64-deb10-linux.tar.xz"
# sha256: "05896fc4bc52c117d281eac9c621c6c3a0b14f9f9eed5e42cce5e1c4485c7623"
packages: packages:
- . - .
@ -19,4 +12,4 @@ nix:
enable: true enable: true
packages: [llvm_15, clang_15, libxml2] packages: [llvm_15, clang_15, libxml2]
system-ghc: true system-ghc: true
install-ghc: false install-ghc: true