diff --git a/go.mod b/go.mod index 9592327..2302c73 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,9 @@ module git.urbach.dev/cli/q go 1.24 -require git.urbach.dev/go/assert v0.0.0-20250606150337-559d3d3afcda \ No newline at end of file +require ( + git.urbach.dev/go/assert v0.0.0-20250606150337-559d3d3afcda + git.urbach.dev/go/color v0.0.0-20250606151219-222306e0b534 +) + +require golang.org/x/sys v0.33.0 // indirect \ No newline at end of file diff --git a/go.sum b/go.sum index 1b21880..1a58880 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,6 @@ git.urbach.dev/go/assert v0.0.0-20250606150337-559d3d3afcda h1:VN6ZQwtwLOm2xTms+v8IIeeNjvs55qyEBNArv3dPq9g= -git.urbach.dev/go/assert v0.0.0-20250606150337-559d3d3afcda/go.mod h1:PNI/NSBOqvoeU58/7eBsIR09Yoq2S/qtSRiTrctkiq0= \ No newline at end of file +git.urbach.dev/go/assert v0.0.0-20250606150337-559d3d3afcda/go.mod h1:PNI/NSBOqvoeU58/7eBsIR09Yoq2S/qtSRiTrctkiq0= +git.urbach.dev/go/color v0.0.0-20250606151219-222306e0b534 h1:KE2HJgxUq0Zvpl2bsxYgYhLSMe4B4XDXW5KNiq0QaFU= +git.urbach.dev/go/color v0.0.0-20250606151219-222306e0b534/go.mod h1:M7cugRvuAWsbWULgPIydVM+YRIenZYprVJHvNIka+kU= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= \ No newline at end of file diff --git a/lib/io/write.q b/lib/io/write.q deleted file mode 100644 index 28dcda7..0000000 --- a/lib/io/write.q +++ /dev/null @@ -1,5 +0,0 @@ -import os - -write(buffer []byte) -> (written int) { - return os.write(0, buffer, len(buffer)) -} \ No newline at end of file diff --git a/lib/io/write_linux_arm.q b/lib/io/write_linux_arm.q new file mode 100644 index 0000000..9743267 --- /dev/null +++ b/lib/io/write_linux_arm.q @@ -0,0 +1,3 @@ +write(buffer []byte) -> (written int) { + return syscall(64, 0, buffer, len(buffer)) +} \ No newline at end of file diff --git a/lib/io/write_linux_x86.q b/lib/io/write_linux_x86.q new file mode 100644 index 0000000..633cfbe --- /dev/null +++ b/lib/io/write_linux_x86.q @@ -0,0 +1,3 @@ +write(buffer []byte) -> (written int) { + return syscall(1, 0, buffer, len(buffer)) +} \ No newline at end of file diff --git a/lib/os/os_linux_x86.q b/lib/os/os_linux_x86.q deleted file mode 100644 index 0b690fd..0000000 --- a/lib/os/os_linux_x86.q +++ /dev/null @@ -1,3 +0,0 @@ -write(fd int, buffer *byte, length int) -> (written int) { - return syscall(1, fd, buffer, length) -} \ No newline at end of file diff --git a/src/build/Build.go b/src/build/Build.go index cae03d4..002c738 100644 --- a/src/build/Build.go +++ b/src/build/Build.go @@ -2,8 +2,9 @@ package build // Build describes the parameters for the "build" command. type Build struct { - Files []string - Arch Arch - OS OS - Dry bool + Files []string + Arch Arch + OS OS + Dry bool + ShowSSA bool } \ No newline at end of file diff --git a/src/cli/build.go b/src/cli/build.go index a56ae36..cc0c829 100644 --- a/src/cli/build.go +++ b/src/cli/build.go @@ -68,6 +68,9 @@ func newBuildFromArgs(args []string) (*build.Build, error) { return b, &invalidValueError{Value: args[i], Parameter: "os"} } + case "-v", "--verbose": + b.ShowSSA = true + default: if strings.HasPrefix(args[i], "-") { return b, &unknownParameterError{Parameter: args[i]} diff --git a/src/cli/help.txt b/src/cli/help.txt index ebe1173..f1aed0d 100644 --- a/src/cli/help.txt +++ b/src/cli/help.txt @@ -5,5 +5,8 @@ Usage: Commands: build [directory | file] build an executable + + --verbose, -v show everything + run [directory | file] build and run the executable help show this help \ No newline at end of file diff --git a/src/compiler/Compile.go b/src/compiler/Compile.go index bb08cfc..749bdce 100644 --- a/src/compiler/Compile.go +++ b/src/compiler/Compile.go @@ -21,5 +21,16 @@ func Compile(b *build.Build) (*core.Environment, error) { } compileFunctions(maps.Values(all.Functions)) + + for _, f := range all.Functions { + if f.Err != nil { + return nil, f.Err + } + } + + if b.ShowSSA { + showSSA(maps.Values(all.Functions)) + } + return all, nil } \ No newline at end of file diff --git a/src/compiler/showSSA.go b/src/compiler/showSSA.go new file mode 100644 index 0000000..ea4984e --- /dev/null +++ b/src/compiler/showSSA.go @@ -0,0 +1,26 @@ +package compiler + +import ( + "fmt" + "iter" + + "git.urbach.dev/cli/q/src/core" + "git.urbach.dev/go/color/ansi" +) + +// showSSA shows the SSA IR. +func showSSA(functions iter.Seq[*core.Function]) { + for f := range functions { + ansi.Bold.Printf("%s:\n", f.UniqueName) + + for i, block := range f.Blocks { + if i != 0 { + fmt.Println("---") + } + + for i, instr := range block.Instructions { + fmt.Printf("t%d = %s\n", i, instr.String()) + } + } + } +} \ No newline at end of file diff --git a/src/core/Compile.go b/src/core/Compile.go index 7a39f29..6a04058 100644 --- a/src/core/Compile.go +++ b/src/core/Compile.go @@ -1,4 +1,29 @@ package core +import ( + "git.urbach.dev/cli/q/src/cpu" + "git.urbach.dev/cli/q/src/token" +) + // Compile turns a function into machine code. -func (f *Function) Compile() {} \ No newline at end of file +func (f *Function) Compile() { + registerCount := 0 + + for _, input := range f.Input { + f.Identifiers[input.Name] = f.AppendRegister(cpu.Register(registerCount)) + registerCount++ + + if input.TypeTokens[0].Kind == token.ArrayStart { + f.Identifiers[input.Name+".length"] = f.AppendRegister(cpu.Register(registerCount)) + registerCount++ + } + } + + for instr := range f.Body.Instructions { + f.Err = f.CompileInstruction(instr) + + if f.Err != nil { + return + } + } +} \ No newline at end of file diff --git a/src/core/CompileInstruction.go b/src/core/CompileInstruction.go new file mode 100644 index 0000000..aaecc9e --- /dev/null +++ b/src/core/CompileInstruction.go @@ -0,0 +1,27 @@ +package core + +import ( + "git.urbach.dev/cli/q/src/expression" + "git.urbach.dev/cli/q/src/token" +) + +func (f *Function) CompileInstruction(instr token.List) error { + if instr[0].IsKeyword() { + switch instr[0].Kind { + case token.Return: + return f.CompileReturn(instr) + } + } + + expr := expression.Parse(instr) + + if expr.Token.Kind == token.Define { + name := expr.Children[0].String(f.File.Bytes) + value, err := f.Evaluate(expr.Children[1]) + f.Identifiers[name] = value + return err + } + + _, err := f.Evaluate(expr) + return err +} \ No newline at end of file diff --git a/src/core/CompileReturn.go b/src/core/CompileReturn.go new file mode 100644 index 0000000..9c04f58 --- /dev/null +++ b/src/core/CompileReturn.go @@ -0,0 +1,24 @@ +package core + +import ( + "git.urbach.dev/cli/q/src/expression" + "git.urbach.dev/cli/q/src/ssa" + "git.urbach.dev/cli/q/src/token" +) + +// CompileReturn compiles a return instruction. +func (f *Function) CompileReturn(tokens token.List) error { + expr := expression.Parse(tokens[1:]) + value, err := f.Evaluate(expr) + + if err != nil { + return err + } + + f.Append(ssa.Value{ + Type: ssa.Return, + Args: []*ssa.Value{value}, + }) + + return nil +} \ No newline at end of file diff --git a/src/core/Evaluate.go b/src/core/Evaluate.go new file mode 100644 index 0000000..9c07152 --- /dev/null +++ b/src/core/Evaluate.go @@ -0,0 +1,84 @@ +package core + +import ( + "fmt" + + "git.urbach.dev/cli/q/src/errors" + "git.urbach.dev/cli/q/src/expression" + "git.urbach.dev/cli/q/src/ssa" + "git.urbach.dev/cli/q/src/token" +) + +// Evaluate converts an expression to an SSA value. +func (f *Function) Evaluate(expr *expression.Expression) (*ssa.Value, error) { + if expr.IsLeaf() { + switch expr.Token.Kind { + case token.Identifier: + name := expr.Token.String(f.File.Bytes) + value, exists := f.Identifiers[name] + + if !exists { + return nil, errors.New(&UnknownIdentifier{Name: name}, f.File, expr.Token.Position) + } + + return value, nil + + case token.Number: + number, err := f.ToNumber(expr.Token) + + if err != nil { + return nil, err + } + + return f.AppendInt(number), nil + + case token.String: + data := expr.Token.Bytes(f.File.Bytes) + data = Unescape(data) + return f.AppendBytes(data), nil + } + + return nil, errors.New(InvalidExpression, f.File, expr.Token.Position) + } + + switch expr.Token.Kind { + case token.Call: + children := expr.Children + typ := ssa.Call + + if children[0].Token.Kind == token.Identifier { + funcName := children[0].String(f.File.Bytes) + + if funcName == "len" { + identifier := children[1].String(f.File.Bytes) + return f.Identifiers[identifier+".length"], nil + } + + if funcName == "syscall" { + children = children[1:] + typ = ssa.Syscall + } + } + + args := make([]*ssa.Value, len(children)) + + for i, child := range children { + value, err := f.Evaluate(child) + + if err != nil { + return nil, err + } + + args[i] = value + } + + call := f.Append(ssa.Value{Type: typ, Args: args}) + return call, nil + + case token.Dot: + name := fmt.Sprintf("%s.%s", expr.Children[0].String(f.File.Bytes), expr.Children[1].String(f.File.Bytes)) + return f.AppendFunction(name), nil + } + + return nil, nil +} \ No newline at end of file diff --git a/src/core/Function.go b/src/core/Function.go index 7b19c52..2e7695f 100644 --- a/src/core/Function.go +++ b/src/core/Function.go @@ -11,20 +11,26 @@ import ( // Function is the smallest unit of code. type Function struct { ssa.Function - Name string - UniqueName string - File *fs.File - Input []*Parameter - Output []*Parameter - Body token.List + Name string + UniqueName string + File *fs.File + Input []*Parameter + Output []*Parameter + Body token.List + Identifiers map[string]*ssa.Value + Err error } // NewFunction creates a new function. func NewFunction(name string, file *fs.File) *Function { return &Function{ - Name: name, - File: file, - UniqueName: fmt.Sprintf("%s.%s", file.Package, name), + Name: name, + File: file, + UniqueName: fmt.Sprintf("%s.%s", file.Package, name), + Identifiers: make(map[string]*ssa.Value), + Function: ssa.Function{ + Blocks: []*ssa.Block{{}}, + }, } } diff --git a/src/core/ToNumber.go b/src/core/ToNumber.go new file mode 100644 index 0000000..4165c9e --- /dev/null +++ b/src/core/ToNumber.go @@ -0,0 +1,63 @@ +package core + +import ( + "strconv" + "strings" + "unicode/utf8" + + "git.urbach.dev/cli/q/src/errors" + "git.urbach.dev/cli/q/src/fs" + "git.urbach.dev/cli/q/src/token" +) + +// ToNumber tries to convert the token into a numeric value. +func (f *Function) ToNumber(t token.Token) (int, error) { + return ToNumber(t, f.File) +} + +// ToNumber tries to convert the token into a numeric value. +func ToNumber(t token.Token, file *fs.File) (int, error) { + switch t.Kind { + case token.Number: + var ( + digits = t.String(file.Bytes) + number int64 + err error + ) + + switch { + case strings.HasPrefix(digits, "0x"): + number, err = strconv.ParseInt(digits[2:], 16, 64) + case strings.HasPrefix(digits, "0o"): + number, err = strconv.ParseInt(digits[2:], 8, 64) + case strings.HasPrefix(digits, "0b"): + number, err = strconv.ParseInt(digits[2:], 2, 64) + default: + number, err = strconv.ParseInt(digits, 10, 64) + } + + if err != nil { + return 0, errors.New(InvalidNumber, file, t.Position) + } + + return int(number), nil + + case token.Rune: + r := t.Bytes(file.Bytes) + r = Unescape(r) + + if len(r) == 0 { + return 0, errors.New(InvalidRune, file, t.Position+1) + } + + number, size := utf8.DecodeRune(r) + + if len(r) > size { + return 0, errors.New(InvalidRune, file, t.Position+1) + } + + return int(number), nil + } + + return 0, errors.New(InvalidNumber, file, t.Position) +} \ No newline at end of file diff --git a/src/core/Unescape.go b/src/core/Unescape.go new file mode 100644 index 0000000..cf5016c --- /dev/null +++ b/src/core/Unescape.go @@ -0,0 +1,43 @@ +package core + +import "bytes" + +// Unescape replaces the escape sequences in the contents of a string token with the respective characters. +func Unescape(data []byte) []byte { + data = data[1 : len(data)-1] + escape := bytes.IndexByte(data, '\\') + + if escape == -1 { + return data + } + + tmp := make([]byte, 0, len(data)) + + for { + tmp = append(tmp, data[:escape]...) + + switch data[escape+1] { + case '0': + tmp = append(tmp, '\000') + case 't': + tmp = append(tmp, '\t') + case 'n': + tmp = append(tmp, '\n') + case 'r': + tmp = append(tmp, '\r') + case '"': + tmp = append(tmp, '"') + case '\'': + tmp = append(tmp, '\'') + case '\\': + tmp = append(tmp, '\\') + } + + data = data[escape+2:] + escape = bytes.IndexByte(data, '\\') + + if escape == -1 { + return tmp + } + } +} \ No newline at end of file diff --git a/src/core/errors.go b/src/core/errors.go new file mode 100644 index 0000000..b8dbbdb --- /dev/null +++ b/src/core/errors.go @@ -0,0 +1,27 @@ +package core + +import ( + "fmt" + + "git.urbach.dev/cli/q/src/errors" +) + +var ( + InvalidExpression = errors.String("Invalid expression") + InvalidNumber = errors.String("Invalid number") + InvalidRune = errors.String("Invalid rune") +) + +// UnknownIdentifier represents unknown identifiers. +type UnknownIdentifier struct { + Name string + CorrectName string +} + +func (err *UnknownIdentifier) Error() string { + if err.CorrectName != "" { + return fmt.Sprintf("Unknown identifier '%s', did you mean '%s'?", err.Name, err.CorrectName) + } + + return fmt.Sprintf("Unknown identifier '%s'", err.Name) +} \ No newline at end of file diff --git a/src/cpu/Register.go b/src/cpu/Register.go new file mode 100644 index 0000000..a2acccc --- /dev/null +++ b/src/cpu/Register.go @@ -0,0 +1,11 @@ +package cpu + +import "fmt" + +// Register represents the number of the register. +type Register uint8 + +// String returns the human readable name of the register. +func (r Register) String() string { + return fmt.Sprintf("r%d", r) +} \ No newline at end of file diff --git a/src/cpu/Register_test.go b/src/cpu/Register_test.go new file mode 100644 index 0000000..a7dd3d8 --- /dev/null +++ b/src/cpu/Register_test.go @@ -0,0 +1,13 @@ +package cpu_test + +import ( + "testing" + + "git.urbach.dev/cli/q/src/cpu" + "git.urbach.dev/go/assert" +) + +func TestRegisterString(t *testing.T) { + register := cpu.Register(1) + assert.Equal(t, "r1", register.String()) +} \ No newline at end of file diff --git a/src/ssa/Block.go b/src/ssa/Block.go index 32ac6dd..fe44b97 100644 --- a/src/ssa/Block.go +++ b/src/ssa/Block.go @@ -2,11 +2,11 @@ package ssa // Block is a list of instructions that can be targeted in branches. type Block struct { - Instructions []Instruction + Instructions []Value } // Append adds a new instruction to the block. -func (b *Block) Append(instr Instruction) *Instruction { +func (b *Block) Append(instr Value) *Value { b.Instructions = append(b.Instructions, instr) return &b.Instructions[len(b.Instructions)-1] } \ No newline at end of file diff --git a/src/ssa/Function.go b/src/ssa/Function.go index d5df02a..93639ab 100644 --- a/src/ssa/Function.go +++ b/src/ssa/Function.go @@ -1,5 +1,9 @@ package ssa +import ( + "git.urbach.dev/cli/q/src/cpu" +) + // Function is a list of basic blocks. type Function struct { Blocks []*Block @@ -10,4 +14,48 @@ func (f *Function) AddBlock() *Block { block := &Block{} f.Blocks = append(f.Blocks, block) return block +} + +// Append adds a new value to the last block. +func (f *Function) Append(instr Value) *Value { + if len(f.Blocks) == 0 { + f.Blocks = append(f.Blocks, &Block{}) + } + + if instr.IsConst() { + for _, b := range f.Blocks { + for _, existing := range b.Instructions { + if instr.Equals(existing) { + return &existing + } + } + } + } + + return f.Blocks[len(f.Blocks)-1].Append(instr) +} + +// AppendInt adds a new integer value to the last block. +func (f *Function) AppendInt(x int) *Value { + return f.Append(Value{Type: Int, Int: x}) +} + +// AppendRegister adds a new register value to the last block. +func (f *Function) AppendRegister(reg cpu.Register) *Value { + return f.Append(Value{Type: Register, Register: reg}) +} + +// AppendFunction adds a new function value to the last block. +func (f *Function) AppendFunction(name string) *Value { + return f.Append(Value{Type: Func, Text: name}) +} + +// AppendBytes adds a new byte slice value to the last block. +func (f *Function) AppendBytes(s []byte) *Value { + return f.Append(Value{Type: String, Text: string(s)}) +} + +// AppendString adds a new string value to the last block. +func (f *Function) AppendString(s string) *Value { + return f.Append(Value{Type: String, Text: s}) } \ No newline at end of file diff --git a/src/ssa/Instruction.go b/src/ssa/Instruction.go deleted file mode 100644 index 2ce2f80..0000000 --- a/src/ssa/Instruction.go +++ /dev/null @@ -1,26 +0,0 @@ -package ssa - -import ( - "fmt" -) - -// Instruction is a single instruction in a basic block. -// It is implemented as a "fat struct" for performance reasons. -// It contains all the fields necessary to represent all instruction types. -type Instruction struct { - Args []*Instruction - Int int64 - Type Type -} - -// String returns a human-readable representation of the instruction. -func (i *Instruction) String() string { - switch i.Type { - case Int: - return fmt.Sprintf("%d", i.Int) - case Add: - return fmt.Sprintf("%s + %s", i.Args[0], i.Args[1]) - default: - return "" - } -} \ No newline at end of file diff --git a/src/ssa/Type.go b/src/ssa/Type.go index 1ea82b1..347ae3d 100644 --- a/src/ssa/Type.go +++ b/src/ssa/Type.go @@ -9,6 +9,8 @@ const ( // Values Int Float + Func + Register String // Binary @@ -25,11 +27,13 @@ const ( Shl Shr - // Branch + // Control flow If Jump + Call + Return + Syscall // Special - Call Phi ) \ No newline at end of file diff --git a/src/ssa/Value.go b/src/ssa/Value.go new file mode 100644 index 0000000..a562949 --- /dev/null +++ b/src/ssa/Value.go @@ -0,0 +1,83 @@ +package ssa + +import ( + "fmt" + + "git.urbach.dev/cli/q/src/cpu" +) + +// Value is a single instruction in a basic block. +// It is implemented as a "fat struct" for performance reasons. +// It contains all the fields necessary to represent all instruction types. +type Value struct { + Args []*Value + Int int + Text string + Register cpu.Register + Type Type +} + +// Equals returns true if the values are equal. +func (a Value) Equals(b Value) bool { + if a.Type != b.Type { + return false + } + + if a.Int != b.Int { + return false + } + + if a.Text != b.Text { + return false + } + + if a.Register != b.Register { + return false + } + + if len(a.Args) != len(b.Args) { + return false + } + + for i := range a.Args { + if !a.Args[i].Equals(*b.Args[i]) { + return false + } + } + + return true +} + +// IsConst returns true if the value is constant. +func (i *Value) IsConst() bool { + switch i.Type { + case Func, Int, Register, String: + return true + default: + return false + } +} + +// String returns a human-readable representation of the instruction. +func (i *Value) String() string { + switch i.Type { + case Func: + return i.Text + case Int: + return fmt.Sprintf("%d", i.Int) + case Register: + return i.Register.String() + case String: + return fmt.Sprintf("\"%s\"", i.Text) + case Add: + return fmt.Sprintf("%s + %s", i.Args[0], i.Args[1]) + case Return: + return fmt.Sprintf("return %s", i.Args[0]) + case Call: + return fmt.Sprintf("call%v", i.Args) + case Syscall: + return fmt.Sprintf("syscall%v", i.Args) + default: + return "" + } +} \ No newline at end of file diff --git a/src/ssa/ssa_test.go b/src/ssa/ssa_test.go index 7455424..d8ecb3b 100644 --- a/src/ssa/ssa_test.go +++ b/src/ssa/ssa_test.go @@ -7,16 +7,20 @@ import ( "git.urbach.dev/go/assert" ) -func TestBlock(t *testing.T) { - f := ssa.Function{} - block := f.AddBlock() - a := block.Append(ssa.Instruction{Type: ssa.Int, Int: 1}) - b := block.Append(ssa.Instruction{Type: ssa.Int, Int: 2}) - c := block.Append(ssa.Instruction{Type: ssa.Add, Args: []*ssa.Instruction{a, b}}) +func TestFunction(t *testing.T) { + fn := ssa.Function{} + a := fn.AppendInt(1) + b := fn.AppendInt(2) + c := fn.Append(ssa.Value{Type: ssa.Add, Args: []*ssa.Value{a, b}}) + fn.AddBlock() + d := fn.AppendInt(3) + e := fn.AppendInt(4) + f := fn.Append(ssa.Value{Type: ssa.Add, Args: []*ssa.Value{d, e}}) assert.Equal(t, c.String(), "1 + 2") + assert.Equal(t, f.String(), "3 + 4") } func TestInvalidInstruction(t *testing.T) { - instr := ssa.Instruction{} + instr := ssa.Value{} assert.Equal(t, instr.String(), "") } \ No newline at end of file