Add custom LLVM code generation package

This commit is contained in:
Sasha Koshka 2023-11-22 20:37:16 -05:00
parent 54b693620e
commit 8ce12613e2
12 changed files with 3246 additions and 0 deletions

20
llvm/README.md Normal file
View File

@ -0,0 +1,20 @@
# llvm
This package was created for the express purpose of generating LLVM IR. A good
portion of this code was taken from the [llir project](https://github.com/llir/llvm).
This package supports:
- Instructions and terminators
- Constants
- Modules
- Functions
- Types
- Type defs
- Blocks
- Opaque pointers
This package does not support:
- Constant expressions
- Sanity checking
- Typed pointers
- Metadata and attributes

508
llvm/constant.go Normal file
View File

@ -0,0 +1,508 @@
package llvm
import "fmt"
import "log"
import "math"
import "strings"
import "math/big"
import "github.com/mewmew/float"
import "github.com/mewmew/float/binary128"
import "github.com/mewmew/float/binary16"
import "github.com/mewmew/float/float128ppc"
import "github.com/mewmew/float/float80x86"
type Const interface {
Value
IsConstant ()
}
type ConstArray struct {
Ty *TypeArray
Elements []Const
}
func (*ConstArray) IsConstant () { }
func (this *ConstArray) Type () Type { return this.Ty }
func (this *ConstArray) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstArray) Name () string {
buf := &strings.Builder{}
buf.WriteString("[")
for i, elem := range this.Elements {
if i != 0 {
buf.WriteString(", ")
}
buf.WriteString(elem.String())
}
buf.WriteString("]")
return buf.String()
}
type ConstBlockAddress struct {
Function Const
Block Value
}
func (*ConstBlockAddress) IsConstant ( ) { }
func (this *ConstBlockAddress) Type () Type { return &TypePointer { } }
func (this *ConstBlockAddress) String () string {
return fmt.Sprintf("%v %v", &TypePointer { }, this.Name())
}
func (this *ConstBlockAddress) Name () string {
return fmt.Sprintf("blockaddress(%s, %s)", this.Function.Name(), this.Block.Name())
}
type ConstCharArray struct {
Ty *TypeArray
Elements []byte
}
func (*ConstCharArray) IsConstant () { }
func (this *ConstCharArray) Type () Type { return this.Ty }
func (this *ConstCharArray) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstCharArray) Name () string {
return "c" + EscapeQuoteString(this.Elements)
}
type ConstDSOLocalEquivalent struct {
Function Const
}
func (*ConstDSOLocalEquivalent) IsConstant () { }
func (this *ConstDSOLocalEquivalent) Type () Type { return this.Function.Type() }
func (this *ConstDSOLocalEquivalent) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstDSOLocalEquivalent) Name () string {
return fmt.Sprintf("dso_local_equivalent %s", this.Function.Name())
}
type ConstFloat struct {
Ty *TypeFloat
Value *big.Float
NaN bool
}
func NewConstFloat (ty *TypeFloat, value float64) *ConstFloat {
if math.IsNaN(value) {
f := &ConstFloat { Ty: ty, Value: &big.Float { }, NaN: true }
// Store sign of NaN.
if math.Signbit(value) {
f.Value.SetFloat64(-1)
}
return f
}
return &ConstFloat {
Ty: ty,
Value: big.NewFloat(value),
}
}
func (*ConstFloat) IsConstant () { }
func (this *ConstFloat) Type () Type { return this.Ty }
func (this *ConstFloat) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstFloat) Name () string {
// FloatLit
//
// Print hexadecimal representation of floating-point literal if NaN, Inf,
// inexact or extended precision (x86_fp80, fp128 or ppc_fp128).
switch this.Ty.Kind {
// half (IEEE 754 half precision)
case FloatKindHalf:
const hexPrefix = 'H'
if this.NaN {
bits := binary16.NaN.Bits()
if this.Value != nil && this.Value.Signbit() {
bits = binary16.NegNaN.Bits()
}
return fmt.Sprintf("0x%c%04X", hexPrefix, bits)
}
if this.Value.IsInf() || !float.IsExact16(this.Value) {
f, acc := binary16.NewFromBig(this.Value)
if acc != big.Exact {
log.Printf("unable to represent floating-point constant %v of type %v exactly; please submit a bug report to fspl with this error message", this.Value, this.Type())
}
bits := f.Bits()
return fmt.Sprintf("0x%c%04X", hexPrefix, bits)
}
// c is representable without loss as floating-point literal, this case is
// handled for half, float and double below the switch statement.
// float (IEEE 754 single precision)
case FloatKindFloat:
// ref: https://groups.google.com/d/msg/llvm-dev/IlqV3TbSk6M/27dAggZOMb0J
//
// The exact bit representation of the float is laid out with the
// corresponding bitwise representation of a double: the sign bit is
// copied over, the exponent is encoded in the larger width, and the 23
// bits of significand fills in the top 23 bits of significand in the
// double. A double has 52 bits of significand, so this means that the
// last 29 bits of significand will always be ignored. As an error
// detection measure, the IR parser requires them to be zero.
if this.NaN {
f := math.NaN()
if this.Value != nil && this.Value.Signbit() {
f = math.Copysign(f, -1)
}
bits := math.Float64bits(f)
// zero out last 29 bits.
bits &^= 0x1FFFFFFF
return fmt.Sprintf("0x%X", bits)
}
if this.Value.IsInf() || !float.IsExact32(this.Value) {
f, _ := this.Value.Float64()
bits := math.Float64bits(f)
// Note, to match Clang output we do not zero-pad the hexadecimal
// output.
// zero out last 29 bits.
bits &^= 0x1FFFFFFF
return fmt.Sprintf("0x%X", bits)
}
// c is representable without loss as floating-point literal, this case is
// handled for half, float and double below the switch statement.
// double (IEEE 754 double precision)
case FloatKindDouble:
if this.NaN {
// To use the same cannonical representation as LLVM IR for NaN values, we
// explicitly a qNaN value (quiet NaN) with the leading bit in the mantissa
// set, rather than the trailing bit as used for the cannonical
// representation in Go (see math.NaN).
//
// For further background, see https://github.com/llir/llvm/issues/133
//
// exponent mantissa
// s 11111111111 1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx = quiet (qNaN)
// s 11111111111 0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx = signaling (sNaN) **
// ^ quiet bit
//
// Where ** denote that at least one of the 'x' bits has to be set, since the
// mantissa must be non-zero to denote NaN.
//
// quiet NaN:
// 0x7FF8000000000000 = 0b0_11111111111_100000000000000000000000000000000000000000000000000
f := math.Float64frombits(0x7FF8000000000000) // quiet NaN
if this.Value != nil && this.Value.Signbit() {
f = math.Copysign(f, -1)
}
bits := math.Float64bits(f)
return fmt.Sprintf("0x%X", bits)
}
if this.Value.IsInf() || !float.IsExact64(this.Value) {
f, _ := this.Value.Float64()
bits := math.Float64bits(f)
// Note, to match Clang output we do not zero-pad the hexadecimal
// output.
return fmt.Sprintf("0x%X", bits)
}
// c is representable without loss as floating-point literal, this case is
// handled for half, float and double below the switch statement.
// x86_fp80 (x86 extended precision)
case FloatKindX86_FP80:
// always represent x86_fp80 in hexadecimal floating-point notation.
const hexPrefix = 'K'
if this.NaN {
se, m := float80x86.NaN.Bits()
if this.Value != nil && this.Value.Signbit() {
se, m = float80x86.NegNaN.Bits()
}
return fmt.Sprintf("0x%c%04X%016X", hexPrefix, se, m)
}
f, acc := float80x86.NewFromBig(this.Value)
if acc != big.Exact {
log.Printf("unable to represent floating-point constant %v of type %v exactly; please submit a bug report to fspl with this error message", this.Value, this.Type())
}
se, m := f.Bits()
return fmt.Sprintf("0x%c%04X%016X", hexPrefix, se, m)
// fp128 (IEEE 754 quadruple precision)
case FloatKindFP128:
// always represent fp128 in hexadecimal floating-point notation.
const hexPrefix = 'L'
if this.NaN {
a, b := binary128.NaN.Bits()
if this.Value != nil && this.Value.Signbit() {
a, b = binary128.NegNaN.Bits()
}
return fmt.Sprintf("0x%c%016X%016X", hexPrefix, a, b)
}
f, acc := binary128.NewFromBig(this.Value)
if acc != big.Exact {
log.Printf("unable to represent floating-point constant %v of type %v exactly; please submit a bug report to fspl with this error message", this.Value, this.Type())
}
a, b := f.Bits()
return fmt.Sprintf("0x%c%016X%016X", hexPrefix, a, b)
// ppc_fp128 (PowerPC double-double arithmetic)
case FloatKindPPC_FP128:
// always represent ppc_fp128 in hexadecimal floating-point notation.
const hexPrefix = 'M'
if this.NaN {
a, b := float128ppc.NaN.Bits()
if this.Value != nil && this.Value.Signbit() {
a, b = float128ppc.NegNaN.Bits()
}
return fmt.Sprintf("0x%c%016X%016X", hexPrefix, a, b)
}
f, acc := float128ppc.NewFromBig(this.Value)
if acc != big.Exact {
log.Printf("unable to represent floating-point constant %v of type %v exactly; please submit a bug report to fspl with this error message", this.Value, this.Type())
}
a, b := f.Bits()
return fmt.Sprintf("0x%c%016X%016X", hexPrefix, a, b)
default:
panic(fmt.Errorf("support for floating-point kind %v not yet implemented", this.Ty.Kind))
}
// Insert decimal point if not present.
// 3e4 -> 3.0e4
// 42 -> 42.0
s := this.Value.Text('g', -1)
if !strings.ContainsRune(s, '.') {
if pos := strings.IndexByte(s, 'e'); pos != -1 {
s = s[:pos] + ".0" + s[pos:]
} else {
s += ".0"
}
}
return s
}
type ConstIndex struct {
Const
InRange bool
}
func (this *ConstIndex) String () string {
// OptInrange Type Constant
if this.InRange {
return fmt.Sprintf("inrange %s", this.Const)
}
return this.Const.String()
}
type ConstInt struct {
Ty *TypeInt
Value *big.Int
}
func NewConstInt (ty *TypeInt, value int64) *ConstInt {
return &ConstInt {
Ty: ty,
Value: big.NewInt(value),
}
}
func NewConstBool (value bool) *ConstInt {
var intVal int64
if value { intVal = 1 }
return NewConstInt(I1, intVal)
}
func (*ConstInt) IsConstant () { }
func (this *ConstInt) Type () Type { return this.Ty }
func (this *ConstInt) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstInt) Name () string {
// IntLit
if this.Ty.BitSize == 1 {
// "true"
// "false"
switch x := this.Value.Int64(); x {
case 0:
return "false"
case 1:
return "true"
default:
panic(fmt.Errorf("invalid integer value of boolean type; expected 0 or 1, got %d", x))
}
}
// Output x in hexadecimal notation if x is positive, greater than or equal
// to 0x1000 and has a significantly lower entropy than decimal notation.
// Minimum difference between entropy of decimal and hexadecimal notation to
// output x in hexadecimal notation.
const minEntropyDiff = 0.2
// Maximum allowed entropy of hexadecimal notation to output x in hexadecimal
// notation.
//
// This is useful as some hex values, while lower entropy than their decimal
// counter-part do not improve readability.
//
// For instance, the decimal entropy of 7240739780546808700 is 9/10 = 0.9 and
// the hexadecimal entropy of 0x647C4677A2884B7C is 8/16 = 0.5. As such the
// entropy difference is 0.9-0.5 = 0.4, but the hexadecimal notation does not
// improve readability. Thus we add an upper bound on the hexadecimal entropy,
// and if the entropy is above this bound, output in decimal notation
// instead.
hexLength := len(this.Value.Text(16))
maxHexEntropy := calcMaxHexEntropy(hexLength)
threshold := big.NewInt(0x1000) // 4096
// Check entropy if x >= 0x1000.
if this.Value.Cmp(threshold) >= 0 {
hexentropy := hexEntropy(this.Value)
decentropy := decimalEntropy(this.Value)
if hexentropy <= maxHexEntropy+0.01 && decentropy >= hexentropy+minEntropyDiff {
return "u0x" + strings.ToUpper(this.Value.Text(16))
}
}
return this.Value.String()
}
func calcMaxHexEntropy (length int) float64 {
if length > 16 {
length = 16
}
switch {
case length < 4:
return 0
case 4 <= length && length <= 6:
return 2.0 / float64(length)
case 7 <= length && length <= 10:
return 3.0 / float64(length)
// length >= 11
default:
return 4.0 / float64(length)
}
}
func hexEntropy(x *big.Int) float64 {
const base = 16
return intEntropy(x, base)
}
func decimalEntropy(x *big.Int) float64 {
const base = 10
return intEntropy(x, base)
}
func intEntropy(x *big.Int, base int) float64 {
if base < 2 || base > 62 {
panic(fmt.Errorf("invalid base; expected 2 <= base <= 62, got %d", base))
}
const maxBase = 62
var digits [maxBase]bool
s := x.Text(base)
// Locate unique digits.
for i := 0; i < len(s); i++ {
b := s[i]
if b == '-' {
// skip sign.
continue
}
d := digitValue(b)
digits[d] = true
}
// Count unique digits.
uniqueDigits := 0
for i := 0; i < base; i++ {
if digits[i] {
uniqueDigits++
}
}
length := len(s)
if length > base {
length = base
}
return float64(uniqueDigits) / float64(length)
}
func digitValue(b byte) int {
switch {
case '0' <= b && b <= '9':
return 0 + int(b-'0')
case 'a' <= b && b <= 'z':
return 10 + int(b-'a')
case 'A' <= b && b <= 'Z':
return 36 + int(b-'A')
default:
panic(fmt.Errorf("invalid digit byte; expected [0-9a-zA-Z], got %#U", b))
}
}
type ConstNoCFI struct {
Function Const
}
func (*ConstNoCFI) IsConstant () { }
func (this *ConstNoCFI) Type () Type { return this.Function.Type() }
func (this *ConstNoCFI) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstNoCFI) Name () string {
return fmt.Sprintf("no_cfi %s", this.Function.Name())
}
type ConstNull struct { }
func (*ConstNull) IsConstant () { }
func (*ConstNull) Type () Type { return &TypePointer { } }
func (this *ConstNull) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstNull) Name () string { return "null" }
type ConstPoison struct {
Ty Type
}
func (*ConstPoison) IsConstant () { }
func (this *ConstPoison) Type () Type { return this.Ty }
func (this *ConstPoison) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstPoison) Name () string { return "poison" }
type ConstStruct struct {
Ty *TypeStruct
Fields []Const
}
func (*ConstStruct) IsConstant () { }
func (this *ConstStruct) Type () Type { return this.Ty }
func (this *ConstStruct) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstStruct) Name () string {
buf := &strings.Builder{}
if this.Ty.Packed {
buf.WriteString("<")
}
buf.WriteString("{ ")
for i, field := range this.Fields {
if i != 0 {
buf.WriteString(", ")
}
buf.WriteString(field.String())
}
buf.WriteString(" }")
if this.Ty.Packed {
buf.WriteString(">")
}
return buf.String()
}
type ConstUndef struct {
Ty Type
}
func (*ConstUndef) IsConstant () { }
func (this *ConstUndef) Type () Type { return this.Ty }
func (this *ConstUndef) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstUndef) Name () string { return "undef" }
type ConstVector struct {
Ty *TypeVector
Elements []Const
}
func (*ConstVector) IsConstant () { }
func (this *ConstVector) Type () Type { return this.Ty }
func (this *ConstVector) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstVector) Name () string {
buf := &strings.Builder{}
buf.WriteString("<")
for i, elem := range this.Elements {
if i != 0 {
buf.WriteString(", ")
}
buf.WriteString(elem.String())
}
buf.WriteString(">")
return buf.String()
}
type ConstZeroInitializer struct {
Ty Type
}
func (*ConstZeroInitializer) IsConstant () { }
func (this *ConstZeroInitializer) Type () Type { return this.Ty }
func (this *ConstZeroInitializer) String () string {
return fmt.Sprintf("%v %v", this.Type(), this.Name())
}
func (this *ConstZeroInitializer) Name () string { return "zeroinitializer" }

View File

@ -0,0 +1,6 @@
package llvm
type ConstExpr interface {
Const
IsExpression()
}

35
llvm/convenience.go Normal file
View File

@ -0,0 +1,35 @@
package llvm
var (
// Basic types.
Pointer = &TypePointer { } // ptr
Void = &TypeVoid { } // void
MMX = &TypeMMX { } // x86_mmx
Label = &TypeLabel { } // label
Token = &TypeToken { } // token
Metadata = &TypeMetadata { } // metadata
// Integer types.
I1 = &TypeInt { BitSize: 1 } // i1
I2 = &TypeInt { BitSize: 2 } // i2
I3 = &TypeInt { BitSize: 3 } // i3
I4 = &TypeInt { BitSize: 4 } // i4
I5 = &TypeInt { BitSize: 5 } // i5
I6 = &TypeInt { BitSize: 6 } // i6
I7 = &TypeInt { BitSize: 7 } // i7
I8 = &TypeInt { BitSize: 8 } // i8
I16 = &TypeInt { BitSize: 16 } // i16
I32 = &TypeInt { BitSize: 32 } // i32
I64 = &TypeInt { BitSize: 64 } // i64
I128 = &TypeInt { BitSize: 128 } // i128
I256 = &TypeInt { BitSize: 256 } // i256
I512 = &TypeInt { BitSize: 512 } // i512
I1024 = &TypeInt { BitSize: 1024 } // i1024
// Floating-point types.
Half = &TypeFloat { Kind: FloatKindHalf } // half
Float = &TypeFloat { Kind: FloatKindFloat } // float
Double = &TypeFloat { Kind: FloatKindDouble } // double
X86_FP80 = &TypeFloat { Kind: FloatKindX86_FP80 } // x86_fp80
FP128 = &TypeFloat { Kind: FloatKindFP128 } // fp128
PPC_FP128 = &TypeFloat { Kind: FloatKindPPC_FP128 } // ppc_fp128
// Pointer type.
)

130
llvm/encode.go Normal file
View File

@ -0,0 +1,130 @@
package llvm
import "strings"
const (
// decimal specifies the decimal digit characters.
decimal = "0123456789"
// upper specifies the uppercase letters.
upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
// lower specifies the lowercase letters.
lower = "abcdefghijklmnopqrstuvwxyz"
// alpha specifies the alphabetic characters.
alpha = upper + lower
// head is the set of valid characters for the first character of an
// identifier.
head = alpha + "$-._"
// tail is the set of valid characters for the remaining characters of an
// identifier (i.e. all characters in the identifier except the first). All
// characters of a label may be from the tail set, even the first character.
tail = head + decimal
// quotedIdent is the set of valid characters in quoted identifiers, which
// excludes ASCII control characters, double quote, backslash and extended
// ASCII characters.
quotedIdent = " !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~"
)
func EscapeIdent (s string) string {
replace := false
extra := 0
for i := 0; i < len(s); i++ {
if strings.IndexByte(tail, s[i]) == -1 {
// Check if a replacement is required.
//
// Note, there are characters which are not valid in an identifier
// (e.g. '#') but are valid in a quoted identifier, and therefore
// require a replacement (i.e. quoted identifier), but no extra
// characters for the escape sequence.
replace = true
}
if strings.IndexByte(quotedIdent, s[i]) == -1 {
// Two extra bytes are required for each byte not valid in a quoted
// identifier; e.g.
//
// "\t" -> `\09`
// "世" -> `\E4\B8\96`
extra += 2
}
}
if !replace {
return s
}
// Replace invalid characters.
const hextable = "0123456789ABCDEF"
buf := make([]byte, len(s)+extra)
j := 0
for i := 0; i < len(s); i++ {
b := s[i]
if strings.IndexByte(quotedIdent, b) != -1 {
buf[j] = b
j++
continue
}
buf[j] = '\\'
buf[j+1] = hextable[b>>4]
buf[j+2] = hextable[b&0x0F]
j += 3
}
// Add surrounding quotes.
return `"` + string(buf) + `"`
}
func Escape (s []byte, valid func(b byte) bool) string {
// Check if a replacement is required.
extra := 0
for i := 0; i < len(s); i++ {
if !valid(s[i]) {
// Two extra bytes are required for each invalid byte; e.g.
// "#" -> `\23`
// "世" -> `\E4\B8\96`
extra += 2
}
}
if extra == 0 {
return string(s)
}
// Replace invalid characters.
const hextable = "0123456789ABCDEF"
buf := make([]byte, len(s)+extra)
j := 0
for i := 0; i < len(s); i++ {
b := s[i]
if valid(b) {
buf[j] = b
j++
continue
}
buf[j] = '\\'
buf[j+1] = hextable[b>>4]
buf[j+2] = hextable[b&0x0F]
j += 3
}
return string(buf)
}
func EscapeString (s []byte) string {
valid := func(b byte) bool {
return ' ' <= b && b <= '~' && b != '"' && b != '\\'
}
return string(Escape(s, valid))
}
func EncodeTypeName (name string) string {
return "%" + EscapeIdent(name)
}
func EncodeRegisterName (name string) string {
return "%" + EscapeIdent(name)
}
func EncodeFunctionName (name string) string {
return "@" + EscapeIdent(name)
}
func EncodeLabelName (name string) string {
return EscapeIdent(name) + ":"
}
func EscapeQuoteString (s []byte) string {
return `"` + string(EscapeString(s)) + `"`
}

138
llvm/function.go Normal file
View File

@ -0,0 +1,138 @@
package llvm
import "fmt"
import "strings"
type Function struct {
FunctionName string
Signature *TypeFunction
Parameters []*Parameter
Blocks []*Block
AddressSpace AddressSpace
// TODO complete this
nextIdent int
}
type Parameter struct {
Register
}
func (this *Parameter) LLString () string {
return this.String()
}
func NewParameter (name string, ty Type) *Parameter {
parameter := &Parameter { }
parameter.RegisterName = name
parameter.Ty = ty
return parameter
}
func (this *Function) newIdent () string {
name := fmt.Sprint(this.nextIdent)
this.nextIdent ++
return name
}
func (this *Function) NewBlock (name string) *Block {
if name == "" { name = this.newIdent() }
block := &Block {
Name: name,
Parent: this,
}
this.Blocks = append(this.Blocks, block)
return block
}
func (this *Function) LLString () string {
buffer := &strings.Builder { }
if len(this.Blocks) == 0 {
// Function declaration.
buffer.WriteString("declare")
buffer.WriteString(this.headerString())
buffer.WriteString("\n")
return buffer.String()
} else {
// Function definition.
buffer.WriteString("define")
buffer.WriteString(this.headerString())
fmt.Fprintf(buffer, " %s", this.bodyString())
return buffer.String()
}
}
func (this *Function) Type () Type {
return Pointer
}
func (this *Function) Name () string {
return this.FunctionName
}
func (this *Function) SetName (name string) {
this.FunctionName = name
}
func (this *Function) String () string {
return fmt.Sprintf("ptr %v", EncodeFunctionName(this.Name()))
}
func (this *Function) headerString () string {
buffer := &strings.Builder { }
fmt.Fprintf(buffer, " %v", this.Signature.Return)
fmt.Fprintf(buffer, " %v(", EncodeFunctionName(this.Name()))
for index, param := range this.Parameters {
if index > 0 { buffer.WriteString(", ") }
buffer.WriteString(param.LLString())
}
if this.Signature.Variadic {
if len(this.Parameters) > 0 {
buffer.WriteString(", ")
}
buffer.WriteString("...")
}
buffer.WriteString(")")
if this.AddressSpace != 0 {
fmt.Fprintf(buffer, " %v", this.AddressSpace)
}
return buffer.String()
}
func (this *Function) bodyString () string {
buffer := &strings.Builder { }
buffer.WriteString("{\n")
for i, block := range this.Blocks {
if i != 0 {
buffer.WriteString("\n")
}
fmt.Fprintf(buffer, "%s", block.LLString())
}
buffer.WriteString("}\n")
return buffer.String()
}
type Block struct {
Name string
Parent *Function
Instructions []Instruction
}
func (this *Block) LLString () string {
buffer := &strings.Builder { }
fmt.Fprintf(buffer, "%s\n", EncodeLabelName(this.Name))
for _, instruction := range this.Instructions {
fmt.Fprintf(buffer, "\t%s\n", instruction.LLString())
}
return buffer.String()
}
func (this *Block) AddInstruction (instruction Instruction) {
if instruction, ok := instruction.(ValueInstruction); ok {
instruction.SetName(this.Parent.newIdent())
}
this.Instructions = append(this.Instructions, instruction)
}

189
llvm/gep.go Normal file
View File

@ -0,0 +1,189 @@
package llvm
import "fmt"
type gepIndex struct {
HasVal bool
Val int64
VectorLen uint64
}
func gepInstType (elemType, src Type, indices []Value) Type {
var idxs []gepIndex
for _, index := range indices {
var idx gepIndex
switch index := index.(type) {
case Const:
idx = getIndex(index)
default:
idx = gepIndex{HasVal: false}
// Check if index is of vector type.
if indexType, ok := index.Type().(*TypeVector); ok {
idx.VectorLen = indexType.Length
}
}
idxs = append(idxs, idx)
}
return resultType(elemType, src, idxs)
}
func getIndex (index Const) gepIndex {
// unpack inrange indices.
if idx, ok := index.(*ConstIndex); ok {
index = idx.Const
}
// TODO: figure out how to simplify expressions for GEP instructions without
// creating import cycle on irutil.
// Use index.Simplify() to simplify the constant expression to a concrete
// integer constant or vector of integers constant.
//if idx, ok := index.(constant.Expression); ok {
// index = idx.Simplify()
//}
switch index := index.(type) {
case *ConstInt:
return gepIndex {
HasVal: true,
Val: index.Value.Int64(),
}
case *ConstZeroInitializer:
return gepIndex {
HasVal: true,
Val: 0,
}
case *ConstVector:
// ref: https://llvm.org/docs/LangRef.html#getelementptr-instruction
//
// > The getelementptr returns a vector of pointers, instead of a single
// > address, when one or more of its arguments is a vector. In such
// > cases, all vector arguments should have the same number of elements,
// > and every scalar argument will be effectively broadcast into a vector
// > during address calculation.
if len(index.Elements) == 0 {
return gepIndex { HasVal: false }
}
// Sanity check. All vector elements must be integers, and must have the
// same value.
var val int64
for i, elem := range index.Elements {
switch elem := elem.(type) {
case *ConstInt:
x := elem.Value.Int64()
if i == 0 {
val = x
} else if x != val {
// since all elements were not identical, we must conclude that
// the index vector does not have a concrete value.
return gepIndex {
HasVal: false,
VectorLen: uint64(len(index.Elements)),
}
}
default:
// TODO: remove debug output.
panic(fmt.Errorf("support for gep index vector element type %T not yet implemented", elem))
//return gep.Index{HasVal: false}
}
}
return gepIndex {
HasVal: true,
Val: val,
VectorLen: uint64(len(index.Elements)),
}
case *ConstUndef:
return gepIndex { HasVal: false }
case *ConstPoison:
return gepIndex { HasVal: false }
case ConstExpr:
// should already have been simplified to a form we can handle.
return gepIndex { HasVal: false }
default:
// TODO: add support for more constant expressions.
// TODO: remove debug output.
panic(fmt.Errorf("support for gep index type %T not yet implemented", index))
//return gep.Index{HasVal: false}
}
}
func resultType (elemType, src Type, indices []gepIndex) Type {
// ref: http://llvm.org/docs/GetElementPtr.html#what-effect-do-address-spaces-have-on-geps
//
// > the address space qualifier on the second operand pointer type always
// > matches the address space qualifier on the result type.
var (
// Address space of src pointer type or src vector element pointer type.
addrSpace AddressSpace
// Length of vector of pointers result type; or 0 if pointer result type.
resultVectorLength uint64
)
// ref: https://llvm.org/docs/LangRef.html#getelementptr-instruction
//
// > The second argument is always a pointer or a vector of pointers.
switch src := src.(type) {
case *TypePointer:
addrSpace = src.AddressSpace
case *TypeVector:
vectorElemType, ok := src.Element.(*TypePointer)
if !ok {
panic(fmt.Errorf("invalid gep source vector element type %T", src.Element))
}
addrSpace = vectorElemType.AddressSpace
resultVectorLength = src.Length
default:
panic(fmt.Errorf("invalid gep source type %T", src))
}
// ref: https://llvm.org/docs/LangRef.html#getelementptr-instruction
//
// > The first argument is always a type used as the basis for the
// > calculations.
e := elemType
for i, index := range indices {
// ref: https://llvm.org/docs/LangRef.html#getelementptr-instruction
//
// > The getelementptr returns a vector of pointers, instead of a single
// > address, when one or more of its arguments is a vector. In such
// > cases, all vector arguments should have the same number of elements,
// > and every scalar argument will be effectively broadcast into a vector
// > during address calculation.
if index.VectorLen != 0 && resultVectorLength != 0 && index.VectorLen != resultVectorLength {
panic(fmt.Errorf("vector length mismatch of index vector (%d) and result type vector (%d)", index.VectorLen, resultVectorLength))
}
if resultVectorLength == 0 && index.VectorLen != 0 {
resultVectorLength = index.VectorLen
}
// ref: https://llvm.org/docs/GetElementPtr.html#why-is-the-extra-0-index-required
//
// > Since the second argument to the GEP instruction must always be a
// > value of pointer type, the first index steps through that pointer.
if i == 0 {
continue
}
switch elm := e.(type) {
case *TypePointer:
panic(fmt.Errorf("cannot index into pointer type at %d:th gep index, only valid at 0:th gep index; see https://llvm.org/docs/GetElementPtr.html#what-is-dereferenced-by-gep", i))
case *TypeArray:
e = elm.Element
case *TypeStruct:
// ref: https://llvm.org/docs/LangRef.html#getelementptr-instruction
//
// > When indexing into a (optionally packed) structure, only i32
// > integer constants are allowed (when using a vector of indices they
// > must all be the same i32 integer constant).
if !index.HasVal {
panic(fmt.Errorf("unable to index into struct type `%v` using gep with non-constant index", e))
}
e = elm.Fields[index.Val]
default:
panic(fmt.Errorf("cannot index into type %T using gep", e))
}
}
ptr := &TypePointer { AddressSpace: addrSpace }
if resultVectorLength != 0 {
vec := &TypeVector {
Element: ptr,
Length: resultVectorLength,
}
return vec
}
return ptr
}

1498
llvm/instruction.go Normal file

File diff suppressed because it is too large Load Diff

56
llvm/module.go Normal file
View File

@ -0,0 +1,56 @@
package llvm
import "io"
import "fmt"
type Module struct {
Types []Type
Functions []*Function
}
func (this *Module) WriteTo (writer io.Writer) (wrote int64, err error) {
write := func (n int, err error) error {
wrote += int64(n)
return err
}
for _, ty := range this.Types {
err = write(fmt.Fprintf(writer, "%v = type %v\n", ty, ty.LLString()))
if err != nil { return wrote, err }
}
for _, function := range this.Functions {
err = write(fmt.Fprint(writer, function.LLString()))
if err != nil { return wrote, err }
}
return wrote, err
}
func (this *Module) NewFunction (name string, retur Type, parameters ...*Parameter) *Function {
paramTypes := make([]Type, len(parameters))
for index, parameter := range parameters {
paramTypes[index] = parameter.Type()
}
function := &Function {
FunctionName: name,
Signature: &TypeFunction {
Return: retur,
Parameters: paramTypes,
},
Parameters: parameters,
}
this.Functions = append(this.Functions, function)
return function
}
func (this *Module) NewType (name string, ty Type) Type {
ty.SetName(name)
this.Types = append(this.Types, ty)
return ty
}
type Align uint64
func (align Align) String () string {
return fmt.Sprintf("align %d", align)
}

357
llvm/terminator.go Normal file
View File

@ -0,0 +1,357 @@
package llvm
import "fmt"
import "strings"
type Terminator interface {
Instruction
}
type TerminatorBr struct {
Target Value
}
func (this *TerminatorBr) LLString () string {
buf := &strings.Builder{}
fmt.Fprintf(buf, "br %s", this.Target)
return buf.String()
}
func (this *Block) NewBr (target Value) *TerminatorBr {
terminator := &TerminatorBr { Target: target }
this.AddInstruction(terminator)
return terminator
}
type TerminatorCallBr struct {
InstructionCall
NormalTarget Value
OtherTargets []Value
}
func (this *TerminatorCallBr) LLString () string {
buf := &strings.Builder{}
if _, ok := this.Type().(*TypeVoid); !ok {
fmt.Fprintf(buf, "%s = ", this.Name())
}
buf.WriteString("callbr")
// (optional) Address space.
if this.AddressSpace != 0 {
fmt.Fprintf(buf, " %s", this.AddressSpace)
}
// Use function signature instead of return type for variadic functions.
calleeType := this.Type()
if sig := this.Signature; sig.Variadic {
calleeType = sig
}
fmt.Fprintf(buf, " %s %s(", calleeType, this.Callee)
for index, arg := range this.Arguments {
if index > 0 {
buf.WriteString(", ")
}
buf.WriteString(arg.String())
}
buf.WriteString(")")
fmt.Fprintf(buf, "\n\t\tto %s [", this.NormalTarget)
for i, otherRetTarget := range this.OtherTargets {
if i != 0 {
buf.WriteString(", ")
}
buf.WriteString(otherRetTarget.String())
}
buf.WriteString("]")
return buf.String()
}
func (this *Block) NewCallBr (
callee Value,
signature *TypeFunction,
args []Value,
normalTarget Value,
otherTargets ...Value,
) *TerminatorCallBr {
terminator := &TerminatorCallBr { }
terminator.Callee = callee
terminator.Arguments = args
terminator.NormalTarget = normalTarget
terminator.OtherTargets = otherTargets
terminator.Signature = signature
terminator.Ty = signature.Return
this.AddInstruction(terminator)
return terminator
}
type TerminatorCatchRet struct {
CatchPad Value
Target Value
}
func (this *TerminatorCatchRet) LLString () string {
buf := &strings.Builder{}
fmt.Fprintf(buf, "catchret from %s to %s", this.CatchPad.Name(), this.Target)
return buf.String()
}
func (this *Block) NewCatchRet (catchPad Value, target Value) *TerminatorCatchRet {
terminator := &TerminatorCatchRet {
CatchPad: catchPad,
Target: target,
}
this.AddInstruction(terminator)
return terminator
}
type TerminatorCatchSwitch struct {
Register
ParentPad Value
Handlers []Value
DefaultUnwindTarget Value
}
func (this *TerminatorCatchSwitch) LLString () string {
buf := &strings.Builder{}
fmt.Fprintf(buf, "%s = ", this.Name())
fmt.Fprintf(buf, "catchswitch within %s [", this.ParentPad.Name())
for i, handler := range this.Handlers {
if i != 0 {
buf.WriteString(", ")
}
buf.WriteString(handler.String())
}
buf.WriteString("] unwind ")
if this.DefaultUnwindTarget != nil {
buf.WriteString(this.DefaultUnwindTarget.String())
} else {
buf.WriteString("to caller")
}
return buf.String()
}
func (this *Block) NewCatchSwitch (parentPad Value, handlers []Value, defaultUnwindTarget Value) *TerminatorCatchSwitch {
terminator := &TerminatorCatchSwitch {
ParentPad: parentPad,
Handlers: handlers,
DefaultUnwindTarget: defaultUnwindTarget,
}
terminator.Ty = &TypeToken { }
this.AddInstruction(terminator)
return terminator
}
type TerminatorCleanupRet struct {
CleanupPad Value
UnwindTarget Value
}
func (this *TerminatorCleanupRet) LLString () string {
buf := &strings.Builder{}
fmt.Fprintf(buf, "cleanupret from %s unwind ", this.CleanupPad.Name())
if this.UnwindTarget != nil {
buf.WriteString(this.UnwindTarget.String())
} else {
buf.WriteString("to caller")
}
return buf.String()
}
func (this *Block) NewTerminatorCleanupRet (cleanupPad, unwindTarget Value) *TerminatorCleanupRet {
terminator := &TerminatorCleanupRet {
CleanupPad: cleanupPad,
UnwindTarget: unwindTarget,
}
this.AddInstruction(terminator)
return terminator
}
type TerminatorCondBr struct {
Condition Value
True Value
False Value
}
func (this *TerminatorCondBr) LLString () string {
buf := &strings.Builder{}
fmt.Fprintf(buf, "br %s, %s, %s", this.Condition, this.True, this.False)
return buf.String()
}
func (this *Block) NewTerminatorCondBr (condition Value, tru, fals Value) *TerminatorCondBr {
terminator := &TerminatorCondBr {
Condition: condition,
True: tru,
False: fals,
}
this.AddInstruction(terminator)
return terminator
}
type TerminatorIndirectBr struct {
Address Value
ValidTargets []Value
}
func (this *TerminatorIndirectBr) LLString () string {
buf := &strings.Builder{}
fmt.Fprintf(buf, "indirectbr %s, [", this.Address)
for i, target := range this.ValidTargets {
if i != 0 {
buf.WriteString(", ")
}
buf.WriteString(target.String())
}
buf.WriteString("]")
return buf.String()
}
func (this *Block) NewIndirectBr (address Value, validTargets ...Value) *TerminatorIndirectBr {
terminator := &TerminatorIndirectBr {
Address: address,
ValidTargets: validTargets,
}
this.AddInstruction(terminator)
return terminator
}
type TerminatorInvoke struct {
Register
Invokee Value
Signature *TypeFunction
Arguments []Value
NormalTarget Value
ExceptionTarget Value
AddressSpace AddressSpace
// TODO complete this
}
func (this *TerminatorInvoke) LLString () string {
buf := &strings.Builder{}
if _, ok := this.Type().(*TypeVoid); !ok {
fmt.Fprintf(buf, "%s = ", this.Name())
}
buf.WriteString("invoke")
// (optional) Address space.
if this.AddressSpace != 0 {
fmt.Fprintf(buf, " %s", this.AddressSpace)
}
// Use function signature instead of return type for variadic functions.
calleeType := this.Type()
if sig := this.Signature; sig.Variadic {
calleeType = sig
}
fmt.Fprintf(buf, " %s %s(", calleeType, this.Invokee)
for index, arg := range this.Arguments {
if index > 0 {
buf.WriteString(", ")
}
buf.WriteString(arg.String())
}
buf.WriteString(")")
fmt.Fprintf(buf, "\n\t\tto %s unwind %s", this.NormalTarget, this.ExceptionTarget)
return buf.String()
}
func (this *Block) NewInvoke (
invokee Value,
signature *TypeFunction,
args []Value,
normalTarget Value,
exceptionTarget Value,
) *TerminatorInvoke {
terminator := &TerminatorInvoke { }
terminator.Invokee = invokee
terminator.Arguments = args
terminator.NormalTarget = normalTarget
terminator.ExceptionTarget = exceptionTarget
terminator.Signature = signature
terminator.Ty = signature.Return
this.AddInstruction(terminator)
return terminator
}
type TerminatorResume struct {
X Value
}
func (this *TerminatorResume) LLString () string {
buf := &strings.Builder{}
fmt.Fprintf(buf, "resume %s", this.X)
return buf.String()
}
func (this *Block) NewResume (x Value) *TerminatorResume {
terminator := &TerminatorResume { X: x }
this.AddInstruction(terminator)
return terminator
}
type TerminatorRet struct {
X Value
}
func (this *TerminatorRet) LLString () string {
buf := &strings.Builder { }
if this.X == nil {
fmt.Fprintf(buf, "ret")
} else {
fmt.Fprintf(buf, "ret %s", this.X)
}
return buf.String()
}
func (this *Block) NewRet (x Value) *TerminatorRet {
terminator := &TerminatorRet { X: x }
this.AddInstruction(terminator)
return terminator
}
type Case struct {
X Value
Target Value
}
func (this *Case) String () string {
return fmt.Sprintf("%s, %s", this.X, this.Target)
}
type TerminatorSwitch struct {
X Value
Default Value
Cases []*Case
}
func (this *TerminatorSwitch) LLString () string {
buf := &strings.Builder{}
fmt.Fprintf(buf, "switch %s, %s [\n", this.X, this.Default)
for _, c := range this.Cases {
fmt.Fprintf(buf, "\t\t%s\n", c)
}
buf.WriteString("\t]")
return buf.String()
}
func (this *Block) NewSwitch (x, defaul Value, cases ...*Case) *TerminatorSwitch {
terminator := &TerminatorSwitch {
X: x,
Default: defaul,
Cases: cases,
}
this.AddInstruction(terminator)
return terminator
}
type TerminatorUnreachable struct {
}
func (this *TerminatorUnreachable) LLString () string {
buf := &strings.Builder{}
buf.WriteString("unreachable")
return buf.String()
}
func (this *Block) NewUnreachable () *TerminatorUnreachable {
terminator := &TerminatorUnreachable { }
this.AddInstruction(terminator)
return terminator
}

275
llvm/type.go Normal file
View File

@ -0,0 +1,275 @@
package llvm
import "fmt"
import "strings"
type Type interface {
fmt.Stringer
LLString () string
Name () string
SetName (name string)
}
type AbstractType struct {
TypeName string
}
func (this *AbstractType) Name () string {
return this.TypeName
}
func (this *AbstractType) SetName (name string) {
this.TypeName = name
}
func (this *AbstractType) Named () bool {
return this.TypeName != ""
}
type TypeArray struct {
AbstractType
Element Type
Length uint64
}
func (this *TypeArray) LLString () string {
return fmt.Sprintf("[%d x %v]", this.Length, this.Element)
}
func (this *TypeArray) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type FloatKind uint8; const (
// 16-bit floating-point type (IEEE 754 half precision).
FloatKindHalf FloatKind = iota // half
// 32-bit floating-point type (IEEE 754 single precision).
FloatKindFloat // float
// 64-bit floating-point type (IEEE 754 double precision).
FloatKindDouble // double
// 128-bit floating-point type (IEEE 754 quadruple precision).
FloatKindFP128 // fp128
// 80-bit floating-point type (x86 extended precision).
FloatKindX86_FP80 // x86_fp80
// 128-bit floating-point type (PowerPC double-double arithmetic).
FloatKindPPC_FP128 // ppc_fp128
)
func (kind FloatKind) String () string {
switch kind {
case FloatKindHalf: return "half"
case FloatKindFloat: return "float"
case FloatKindDouble: return "double"
case FloatKindFP128: return "fp128"
case FloatKindX86_FP80: return "x86_fp80"
case FloatKindPPC_FP128: return "ppc_fp128"
default: return fmt.Sprintf("FloatKind(%d)", kind)
}
}
type TypeFloat struct {
AbstractType
Kind FloatKind
}
func (this *TypeFloat) LLString () string {
return this.Kind.String()
}
func (this *TypeFloat) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type TypeFunction struct {
AbstractType
Return Type
Parameters []Type
Variadic bool
}
func (this *TypeFunction) LLString () string {
buffer := &strings.Builder { }
fmt.Fprintf(buffer, "%s (", this.Return)
for index, param := range this.Parameters {
if index > 0 { buffer.WriteString(", ") }
buffer.WriteString(param.String())
}
if this.Variadic {
if len(this.Parameters) > 0 { buffer.WriteString(", ") }
buffer.WriteString("...")
}
buffer.WriteString(")")
return buffer.String()
}
func (this *TypeFunction) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type TypeInt struct {
AbstractType
BitSize uint64
}
func (this *TypeInt) LLString () string {
return fmt.Sprintf("i%d", this.BitSize)
}
func (this *TypeInt) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type TypeLabel struct {
AbstractType
}
func (this *TypeLabel) LLString () string {
return "label"
}
func (this *TypeLabel) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type TypeMMX struct {
AbstractType
}
func (this *TypeMMX) LLString () string {
return "x86_mmx"
}
func (this *TypeMMX) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type TypeMetadata struct {
AbstractType
}
func (this *TypeMetadata) LLString () string {
return "metadata"
}
func (this *TypeMetadata) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type AddressSpace uint64
func (space AddressSpace) String () string {
return fmt.Sprintf("addrspace(%d)", space)
}
type TypePointer struct {
AbstractType
AddressSpace AddressSpace
}
func (this *TypePointer) LLString () string {
if this.AddressSpace == 0 {
return fmt.Sprintf("ptr")
} else {
return fmt.Sprintf("ptr %v", this.AddressSpace)
}
}
func (this *TypePointer) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type TypeStruct struct {
AbstractType
Fields []Type
Packed bool
Opaque bool
}
func (this *TypeStruct) LLString () string {
if this.Opaque { return "opaque" }
buffer := &strings.Builder { }
if this.Packed { buffer.WriteString("<") }
buffer.WriteString("{ ")
for index, field := range this.Fields {
if index > 0 { buffer.WriteString(", ") }
buffer.WriteString(field.String())
}
buffer.WriteString(" }")
if this.Packed { buffer.WriteString(">") }
return buffer.String()
}
func (this *TypeStruct) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type TypeToken struct {
AbstractType
}
func (this *TypeToken) LLString () string {
return "token"
}
func (this *TypeToken) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type TypeVector struct {
AbstractType
Element Type
Length uint64
Scalable bool
}
func (this *TypeVector) LLString () string {
if this.Scalable {
return fmt.Sprintf("<vscale x %d x %s>", this.Length, this.Element)
} else {
return fmt.Sprintf("<%d x %s>", this.Length, this.Element)
}
}
func (this *TypeVector) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
type TypeVoid struct {
AbstractType
}
func (this *TypeVoid) LLString () string {
return "void"
}
func (this *TypeVoid) String () string {
if this.Named() { return EncodeTypeName(this.Name()) }
return this.LLString()
}
func aggregateElemType (t Type, indices []uint64) Type {
// Base case.
if len(indices) == 0 {
return t
}
switch t := t.(type) {
case *TypeArray:
return aggregateElemType(t.Element, indices[1:])
case *TypeStruct:
return aggregateElemType(t.Fields[indices[0]], indices[1:])
default:
panic(fmt.Errorf("support for aggregate type %T not yet implemented", t))
}
}

34
llvm/value.go Normal file
View File

@ -0,0 +1,34 @@
package llvm
import "fmt"
type Value interface {
fmt.Stringer
Type () Type
Name () string
}
type Register struct {
Ty Type
RegisterName string
}
func (this *Register) Type () Type {
return this.Ty
}
func (this *Register) Named () bool {
return this.RegisterName != ""
}
func (this *Register) Name () string {
return EncodeRegisterName(this.RegisterName)
}
func (this *Register) SetName (name string) {
this.RegisterName = name
}
func (this *Register) String () string {
return fmt.Sprintf("%v %v", this.Ty, this.Name())
}