diff --git a/src/build/asm/Assembler.go b/src/build/asm/Assembler.go index d539003..4a595cf 100644 --- a/src/build/asm/Assembler.go +++ b/src/build/asm/Assembler.go @@ -1,10 +1,14 @@ package asm -import "maps" +import ( + "maps" + + "git.akyoto.dev/cli/q/src/build/data" +) // Assembler contains a list of instructions. type Assembler struct { - Data map[string][]byte + Data data.Data Instructions []Instruction } @@ -15,10 +19,10 @@ func (a *Assembler) Merge(b Assembler) { } // SetData sets the data for the given label. -func (a *Assembler) SetData(label string, data []byte) { +func (a *Assembler) SetData(label string, bytes []byte) { if a.Data == nil { - a.Data = map[string][]byte{} + a.Data = data.Data{} } - a.Data[label] = data + a.Data[label] = bytes } diff --git a/src/build/asm/Finalize.go b/src/build/asm/Finalize.go index 97aa786..ff0c7b3 100644 --- a/src/build/asm/Finalize.go +++ b/src/build/asm/Finalize.go @@ -3,6 +3,7 @@ package asm import ( "encoding/binary" "fmt" + "slices" "strings" "git.akyoto.dev/cli/q/src/build/arch/x64" @@ -13,10 +14,14 @@ import ( // Finalize generates the final machine code. func (a Assembler) Finalize() ([]byte, []byte) { - code := make([]byte, 0, len(a.Instructions)*8) - data := make([]byte, 0, 16) - labels := map[string]Address{} - pointers := []*Pointer{} + var ( + code = make([]byte, 0, len(a.Instructions)*8) + data []byte + codeLabels = map[string]Address{} + dataLabels map[string]Address + codePointers []*Pointer + dataPointers []*Pointer + ) for _, x := range a.Instructions { switch x.Mnemonic { @@ -67,7 +72,7 @@ func (a Assembler) Finalize() ([]byte, []byte) { } pointer.Resolve = func() Address { - destination, exists := labels[label.Name] + destination, exists := codeLabels[label.Name] if !exists { panic("unknown jump label") @@ -77,7 +82,7 @@ func (a Assembler) Finalize() ([]byte, []byte) { return Address(distance) } - pointers = append(pointers, pointer) + codePointers = append(codePointers, pointer) case COMMENT: continue @@ -118,7 +123,7 @@ func (a Assembler) Finalize() ([]byte, []byte) { } pointer.Resolve = func() Address { - destination, exists := labels[label.Name] + destination, exists := codeLabels[label.Name] if !exists { panic("unknown jump label") @@ -128,10 +133,10 @@ func (a Assembler) Finalize() ([]byte, []byte) { return Address(distance) } - pointers = append(pointers, pointer) + codePointers = append(codePointers, pointer) case LABEL: - labels[x.Data.(*Label).Name] = Address(len(code)) + codeLabels[x.Data.(*Label).Name] = Address(len(code)) case LOAD: switch operands := x.Data.(type) { @@ -157,12 +162,16 @@ func (a Assembler) Finalize() ([]byte, []byte) { opSize := len(code) - size - start regLabel := x.Data.(*RegisterLabel) - pointers = append(pointers, &Pointer{ + if !strings.HasPrefix(regLabel.Label, "data_") { + panic("non-data moves not implemented yet") + } + + dataPointers = append(dataPointers, &Pointer{ Position: Address(len(code) - size), OpSize: uint8(opSize), Size: uint8(size), Resolve: func() Address { - destination, exists := labels[regLabel.Label] + destination, exists := dataLabels[regLabel.Label] if !exists { panic("unknown label") @@ -238,16 +247,8 @@ func (a Assembler) Finalize() ([]byte, []byte) { } } - dataStart := config.BaseAddress + config.CodeOffset + Address(len(code)) - dataStart += int32(elf.Padding(int64(dataStart), config.Align)) - - for label, slice := range a.Data { - labels[label] = dataStart + Address(len(data)) - data = append(data, slice...) - } - restart: - for i, pointer := range pointers { + for i, pointer := range codePointers { address := pointer.Resolve() if sizeof.Signed(int64(address)) > int(pointer.Size) { @@ -283,24 +284,17 @@ restart: jump = binary.LittleEndian.AppendUint32(jump, uint32(address)) offset := Address(len(jump)) - Address(size) - for _, following := range pointers[i+1:] { + for _, following := range codePointers[i+1:] { following.Position += offset } - for key, address := range labels { - if strings.HasPrefix(key, "data_") { - continue - } - + for key, address := range codeLabels { if address > pointer.Position { - labels[key] += offset + codeLabels[key] += offset } } - code = make([]byte, len(left)+len(jump)+len(right)) - copy(code, left) - copy(code[len(left):], jump) - copy(code[len(left)+len(jump):], right) + code = slices.Concat(left, jump, right) goto restart } @@ -309,17 +303,24 @@ restart: switch pointer.Size { case 1: slice[0] = uint8(address) - case 2: binary.LittleEndian.PutUint16(slice, uint16(address)) - case 4: binary.LittleEndian.PutUint32(slice, uint32(address)) - case 8: binary.LittleEndian.PutUint64(slice, uint64(address)) } } + data, dataLabels = a.Data.Finalize() + dataStart := config.BaseAddress + config.CodeOffset + Address(len(code)) + dataStart += int32(elf.Padding(int64(dataStart), config.Align)) + + for _, pointer := range dataPointers { + address := dataStart + pointer.Resolve() + slice := code[pointer.Position : pointer.Position+4] + binary.LittleEndian.PutUint32(slice, uint32(address)) + } + return code, data } diff --git a/src/build/data/Data.go b/src/build/data/Data.go new file mode 100644 index 0000000..16aca87 --- /dev/null +++ b/src/build/data/Data.go @@ -0,0 +1,45 @@ +package data + +import ( + "bytes" + "sort" +) + +// Data saves slices of bytes referenced by labels. +type Data map[string][]byte + +// Finalize returns the final raw data slice and a map of labels with their respective indices. +// It will try to reuse existing data whenever possible. +func (data Data) Finalize() ([]byte, map[string]int32) { + var ( + final []byte + keys = make([]string, 0, len(data)) + positions = make(map[string]int32, len(data)) + ) + + for key := range data { + keys = append(keys, key) + } + + sort.SliceStable(keys, func(i, j int) bool { + return len(data[keys[i]]) > len(data[keys[j]]) + }) + + for _, key := range keys { + raw := data[key] + position := bytes.Index(final, raw) + + if position != -1 { + positions[key] = int32(position) + } else { + positions[key] = int32(len(final)) + final = append(final, raw...) + } + } + + return final, positions +} + +func (data Data) Insert(label string, raw []byte) { + data[label] = raw +} diff --git a/src/build/data/Data_test.go b/src/build/data/Data_test.go new file mode 100644 index 0000000..d81faa8 --- /dev/null +++ b/src/build/data/Data_test.go @@ -0,0 +1,28 @@ +package data_test + +import ( + "testing" + + "git.akyoto.dev/cli/q/src/build/data" + "git.akyoto.dev/go/assert" +) + +func TestInterning(t *testing.T) { + d := data.Data{} + d.Insert("label1", []byte("Hello")) + d.Insert("label2", []byte("ello")) + raw, positions := d.Finalize() + assert.DeepEqual(t, raw, []byte("Hello")) + assert.Equal(t, positions["label1"], 0) + assert.Equal(t, positions["label2"], 1) +} + +func TestInterningReverse(t *testing.T) { + d := data.Data{} + d.Insert("label1", []byte("ello")) + d.Insert("label2", []byte("Hello")) + raw, positions := d.Finalize() + assert.DeepEqual(t, raw, []byte("Hello")) + assert.Equal(t, positions["label1"], 1) + assert.Equal(t, positions["label2"], 0) +}