Added token package
All checks were successful
/ test (push) Successful in 15s

This commit is contained in:
Eduard Urbach 2025-06-19 15:58:29 +02:00
parent 38bd387002
commit 1405d2b8b1
Signed by: akyoto
GPG key ID: 49226B848C78F6C8
18 changed files with 1500 additions and 0 deletions

14
src/token/Count.go Normal file
View file

@ -0,0 +1,14 @@
package token
// Count counts how often the given token appears in the token list.
func Count(tokens []Token, buffer []byte, kind Kind, name string) uint8 {
count := uint8(0)
for _, t := range tokens {
if t.Kind == kind && t.String(buffer) == name {
count++
}
}
return count
}

17
src/token/Count_test.go Normal file
View file

@ -0,0 +1,17 @@
package token_test
import (
"testing"
"git.urbach.dev/cli/q/src/token"
"git.urbach.dev/go/assert"
)
func TestCount(t *testing.T) {
buffer := []byte(`a b b c c c`)
tokens := token.Tokenize(buffer)
assert.Equal(t, token.Count(tokens, buffer, token.Identifier, "a"), 1)
assert.Equal(t, token.Count(tokens, buffer, token.Identifier, "b"), 2)
assert.Equal(t, token.Count(tokens, buffer, token.Identifier, "c"), 3)
assert.Equal(t, token.Count(tokens, buffer, token.Identifier, "d"), 0)
}

61
src/token/Instructions.go Normal file
View file

@ -0,0 +1,61 @@
package token
// Instructions yields on each AST node.
func (list List) Instructions(yield func(List) bool) {
start := 0
groupLevel := 0
blockLevel := 0
for i, t := range list {
switch t.Kind {
case NewLine:
if start == i {
start = i + 1
continue
}
if groupLevel > 0 || blockLevel > 0 {
continue
}
if !yield(list[start:i]) {
return
}
start = i + 1
case GroupStart:
groupLevel++
case GroupEnd:
groupLevel--
case BlockStart:
blockLevel++
case BlockEnd:
blockLevel--
if groupLevel > 0 || blockLevel > 0 {
continue
}
if !yield(list[start : i+1]) {
return
}
start = i + 1
case EOF:
if start < i {
yield(list[start:i])
}
return
}
}
if start < len(list) {
yield(list[start:])
}
}

View file

@ -0,0 +1,109 @@
package token_test
import (
"testing"
"git.urbach.dev/cli/q/src/token"
"git.urbach.dev/go/assert"
)
func TestInstructionsBasic(t *testing.T) {
src := []byte("a := 1\nb := 2\n")
tokens := token.Tokenize(src)
nodes := []string{}
for param := range tokens.Instructions {
nodes = append(nodes, param.String(src))
}
assert.DeepEqual(t, nodes, []string{"a:=1", "b:=2"})
}
func TestInstructionsBlock(t *testing.T) {
src := []byte("a := 1\nif x > 0 {\nx = 0\n}\nb := 2\n")
tokens := token.Tokenize(src)
nodes := []string{}
for param := range tokens.Instructions {
nodes = append(nodes, param.String(src))
}
assert.DeepEqual(t, nodes, []string{"a:=1", "ifx>0{\nx=0\n}", "b:=2"})
}
func TestInstructionsGroup(t *testing.T) {
src := []byte("a := 1\ncall(\nx,\ny\n)\nb := 2\n")
tokens := token.Tokenize(src)
nodes := []string{}
for param := range tokens.Instructions {
nodes = append(nodes, param.String(src))
}
assert.DeepEqual(t, nodes, []string{"a:=1", "call(\nx,\ny\n)", "b:=2"})
}
func TestInstructionsBreak(t *testing.T) {
src := []byte("a := 1\nb := 2\n")
tokens := token.Tokenize(src)
count := 0
for range tokens.Instructions {
if count == 1 {
break
}
count++
}
}
func TestInstructionsEOF(t *testing.T) {
src := []byte("a := 1")
tokens := token.Tokenize(src)
count := 0
for range tokens.Instructions {
count++
}
assert.Equal(t, count, 1)
}
func TestInstructionsNoEOF(t *testing.T) {
tokens := token.List{
token.Token{Position: 0, Length: 1, Kind: token.Identifier},
}
count := 0
for range tokens.Instructions {
count++
}
assert.Equal(t, count, 1)
}
func TestInstructionsMultiBlock(t *testing.T) {
src := []byte("if x == 0 { if y == 0 {} }")
tokens := token.Tokenize(src)
count := 0
for range tokens.Instructions {
count++
}
assert.Equal(t, count, 1)
}
func TestInstructionsMultiBlockBreak(t *testing.T) {
src := []byte("if x == 0 { if y == 0 {} }")
tokens := token.Tokenize(src)
count := 0
for range tokens.Instructions {
count++
break
}
assert.Equal(t, count, 1)
}

79
src/token/Kind.go Normal file
View file

@ -0,0 +1,79 @@
package token
// Kind represents the type of token.
type Kind uint8
const (
Invalid Kind = iota // Invalid is an invalid token.
EOF // EOF is the end of file.
NewLine // NewLine is the newline character.
Identifier // Identifier is a series of characters used to identify a variable or function.
Number // Number is a series of numerical characters.
Rune // Rune is a single unicode code point.
String // String is an uninterpreted series of characters in the source code.
Comment // Comment is a comment.
GroupStart // (
GroupEnd // )
BlockStart // {
BlockEnd // }
ArrayStart // [
ArrayEnd // ]
ReturnType // ->
___OPERATORS___ // <operators>
Add // +
Sub // -
Mul // *
Div // /
Mod // %
And // &
Or // |
Xor // ^
Shl // <<
Shr // >>
LogicalAnd // &&
LogicalOr // ||
Define // :=
Dot // .
Range // ..
Call // x()
Array // [x]
Separator // ,
___ASSIGNMENTS___ // <assignments>
Assign // =
AddAssign // +=
SubAssign // -=
MulAssign // *=
DivAssign // /=
ModAssign // %=
AndAssign // &=
OrAssign // |=
XorAssign // ^=
ShlAssign // <<=
ShrAssign // >>=
___END_ASSIGNMENTS___ // </assignments>
___COMPARISONS___ // <comparisons>
Equal // ==
NotEqual // !=
Less // <
Greater // >
LessEqual // <=
GreaterEqual // >=
___END_COMPARISONS___ // </comparisons>
___UNARY___ // <unary>
Not // ! (unary)
Negate // - (unary)
___END_UNARY___ // </unary>
___END_OPERATORS___ // </operators>
___KEYWORDS___ // <keywords>
Assert // assert
Const // const
Else // else
Extern // extern
For // for
If // if
Import // import
Loop // loop
Return // return
Switch // switch
___END_KEYWORDS___ // </keywords>
)

76
src/token/List.go Normal file
View file

@ -0,0 +1,76 @@
package token
import (
"strings"
)
// List is a slice of tokens.
type List []Token
// IndexKind returns the position of a token kind within a token list.
func (list List) IndexKind(kind Kind) int {
for i, token := range list {
if token.Kind == kind {
return i
}
}
return -1
}
// LastIndexKind returns the position of the last token kind within a token list.
func (list List) LastIndexKind(kind Kind) int {
for i := len(list) - 1; i >= 0; i-- {
if list[i].Kind == kind {
return i
}
}
return -1
}
// Split calls the callback function on each set of tokens in a comma separated list.
func (list List) Split(yield func(List) bool) {
if len(list) == 0 {
return
}
start := 0
groupLevel := 0
for i, t := range list {
switch t.Kind {
case GroupStart, ArrayStart, BlockStart:
groupLevel++
case GroupEnd, ArrayEnd, BlockEnd:
groupLevel--
case Separator:
if groupLevel > 0 {
continue
}
parameter := list[start:i]
if !yield(parameter) {
return
}
start = i + 1
}
}
yield(list[start:])
}
// String returns the concatenated token strings.
func (list List) String(source []byte) string {
tmp := strings.Builder{}
for _, t := range list {
tmp.WriteString(t.String(source))
}
return tmp.String()
}

71
src/token/List_test.go Normal file
View file

@ -0,0 +1,71 @@
package token_test
import (
"testing"
"git.urbach.dev/cli/q/src/token"
"git.urbach.dev/go/assert"
)
func TestIndexKind(t *testing.T) {
tokens := token.Tokenize([]byte("a{{}}"))
assert.Equal(t, tokens.IndexKind(token.NewLine), -1)
assert.Equal(t, tokens.LastIndexKind(token.NewLine), -1)
assert.Equal(t, tokens.IndexKind(token.BlockStart), 1)
assert.Equal(t, tokens.LastIndexKind(token.BlockStart), 2)
assert.Equal(t, tokens.IndexKind(token.BlockEnd), 3)
assert.Equal(t, tokens.LastIndexKind(token.BlockEnd), 4)
}
func TestSplit(t *testing.T) {
src := []byte("1+2,3*4,5*6,7+8")
tokens := token.Tokenize(src)
parameters := []string{}
for param := range tokens.Split {
parameters = append(parameters, param.String(src))
}
assert.DeepEqual(t, parameters, []string{"1+2", "3*4", "5*6", "7+8"})
}
func TestSplitBreak(t *testing.T) {
src := []byte("1,2")
tokens := token.Tokenize(src)
for range tokens.Split {
break
}
}
func TestSplitEmpty(t *testing.T) {
tokens := token.List{}
for range tokens.Split {
t.Fail()
}
}
func TestSplitGroups(t *testing.T) {
src := []byte("f(1,2),g(3,4)")
tokens := token.Tokenize(src)
parameters := []string{}
for param := range tokens.Split {
parameters = append(parameters, param.String(src))
}
assert.DeepEqual(t, parameters, []string{"f(1,2)", "g(3,4)"})
}
func TestSplitSingle(t *testing.T) {
src := []byte("123")
tokens := token.Tokenize(src)
parameters := []string{}
for param := range tokens.Split {
parameters = append(parameters, param.String(src))
}
assert.DeepEqual(t, parameters, []string{"123"})
}

77
src/token/Token.go Normal file
View file

@ -0,0 +1,77 @@
package token
import (
"unsafe"
)
// Position is the data type for storing file offsets.
type Position = uint32
// Length is the data type for storing token lengths.
type Length = uint16
// Token represents a single element in a source file.
// The characters that make up an identifier are grouped into a single token.
// This makes parsing easier and allows us to do better syntax checks.
type Token struct {
Position Position
Length Length
Kind Kind
}
// Bytes returns the byte slice.
func (t Token) Bytes(buffer []byte) []byte {
return buffer[t.Position : t.Position+Position(t.Length)]
}
// End returns the position after the token.
func (t Token) End() Position {
return t.Position + Position(t.Length)
}
// IsAssignment returns true if the token is an assignment operator.
func (t Token) IsAssignment() bool {
return t.Kind > ___ASSIGNMENTS___ && t.Kind < ___END_ASSIGNMENTS___
}
// IsComparison returns true if the token is a comparison operator.
func (t Token) IsComparison() bool {
return t.Kind > ___COMPARISONS___ && t.Kind < ___END_COMPARISONS___
}
// IsExpressionStart returns true if the token starts an expression.
func (t Token) IsExpressionStart() bool {
return t.Kind == GroupStart || t.Kind == ArrayStart || t.Kind == BlockStart
}
// IsKeyword returns true if the token is a keyword.
func (t Token) IsKeyword() bool {
return t.Kind > ___KEYWORDS___ && t.Kind < ___END_KEYWORDS___
}
// IsNumeric returns true if the token is a number or rune.
func (t Token) IsNumeric() bool {
return t.Kind == Number || t.Kind == Rune
}
// IsOperator returns true if the token is an operator.
func (t Token) IsOperator() bool {
return t.Kind > ___OPERATORS___ && t.Kind < ___END_OPERATORS___
}
// IsUnaryOperator returns true if the token is a unary operator.
func (t Token) IsUnaryOperator() bool {
return t.Kind > ___UNARY___ && t.Kind < ___END_UNARY___
}
// Reset resets the token to default values.
func (t *Token) Reset() {
t.Position = 0
t.Length = 0
t.Kind = Invalid
}
// String returns the token string.
func (t Token) String(buffer []byte) string {
return unsafe.String(unsafe.SliceData(t.Bytes(buffer)), t.Length)
}

58
src/token/Token_test.go Normal file
View file

@ -0,0 +1,58 @@
package token_test
import (
"testing"
"git.urbach.dev/cli/q/src/token"
"git.urbach.dev/go/assert"
)
func TestTokenEnd(t *testing.T) {
hello := token.Token{
Kind: token.Identifier,
Position: 0,
Length: 5,
}
assert.Equal(t, hello.End(), 5)
}
func TestTokenReset(t *testing.T) {
hello := token.Token{
Kind: token.Identifier,
Position: 1,
Length: 5,
}
hello.Reset()
assert.Equal(t, hello.Position, 0)
assert.Equal(t, hello.Length, 0)
assert.Equal(t, hello.Kind, token.Invalid)
}
func TestTokenString(t *testing.T) {
buffer := []byte("hello, world")
hello := token.Token{Kind: token.Identifier, Position: 0, Length: 5}
comma := token.Token{Kind: token.Separator, Position: 5, Length: 1}
world := token.Token{Kind: token.Identifier, Position: 7, Length: 5}
assert.Equal(t, hello.String(buffer), "hello")
assert.Equal(t, comma.String(buffer), ",")
assert.Equal(t, world.String(buffer), "world")
}
func TestTokenGroups(t *testing.T) {
assignment := token.Token{Kind: token.Assign}
operator := token.Token{Kind: token.Add}
keyword := token.Token{Kind: token.If}
unary := token.Token{Kind: token.Not}
number := token.Token{Kind: token.Number}
comparison := token.Token{Kind: token.Equal}
assert.True(t, assignment.IsAssignment())
assert.True(t, operator.IsOperator())
assert.True(t, keyword.IsKeyword())
assert.True(t, unary.IsUnaryOperator())
assert.True(t, number.IsNumeric())
assert.True(t, comparison.IsComparison())
}

64
src/token/Tokenize.go Normal file
View file

@ -0,0 +1,64 @@
package token
// Tokenize turns the file contents into a list of tokens.
func Tokenize(buffer []byte) List {
var (
i Position
tokens = make(List, 0, 8+len(buffer)/2)
)
for i < Position(len(buffer)) {
switch buffer[i] {
case ' ', '\t', '\r':
case ',':
tokens = append(tokens, Token{Kind: Separator, Position: i, Length: 1})
case '(':
tokens = append(tokens, Token{Kind: GroupStart, Position: i, Length: 1})
case ')':
tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Length: 1})
case '{':
tokens = append(tokens, Token{Kind: BlockStart, Position: i, Length: 1})
case '}':
tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Length: 1})
case '[':
tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Length: 1})
case ']':
tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Length: 1})
case '\n':
tokens = append(tokens, Token{Kind: NewLine, Position: i, Length: 1})
case '-':
tokens, i = dash(tokens, buffer, i)
case '/':
tokens, i = slash(tokens, buffer, i)
continue
case '"', '\'':
tokens, i = quote(tokens, buffer, i)
continue
case '0':
tokens, i = zero(tokens, buffer, i)
continue
default:
if isIdentifierStart(buffer[i]) {
tokens, i = identifier(tokens, buffer, i)
continue
}
if isDigit(buffer[i]) {
tokens, i = digit(tokens, buffer, i)
continue
}
if isOperator(buffer[i]) {
tokens, i = operator(tokens, buffer, i)
continue
}
tokens = append(tokens, Token{Kind: Invalid, Position: i, Length: 1})
}
i++
}
tokens = append(tokens, Token{Kind: EOF, Position: i, Length: 0})
return tokens
}

576
src/token/Tokenize_test.go Normal file
View file

@ -0,0 +1,576 @@
package token_test
import (
"testing"
"git.urbach.dev/cli/q/src/token"
"git.urbach.dev/go/assert"
)
func TestFunction(t *testing.T) {
tokens := token.Tokenize([]byte("main(){}"))
expected := []token.Kind{
token.Identifier,
token.GroupStart,
token.GroupEnd,
token.BlockStart,
token.BlockEnd,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestKeyword(t *testing.T) {
tokens := token.Tokenize([]byte("assert const else extern if import for loop return switch"))
expected := []token.Kind{
token.Assert,
token.Const,
token.Else,
token.Extern,
token.If,
token.Import,
token.For,
token.Loop,
token.Return,
token.Switch,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestArray(t *testing.T) {
tokens := token.Tokenize([]byte("array[i]"))
expected := []token.Kind{
token.Identifier,
token.ArrayStart,
token.Identifier,
token.ArrayEnd,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestNewline(t *testing.T) {
tokens := token.Tokenize([]byte("\n\n"))
expected := []token.Kind{
token.NewLine,
token.NewLine,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestNumber(t *testing.T) {
tokens := token.Tokenize([]byte(`123 456`))
expected := []token.Kind{
token.Number,
token.Number,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestOperator(t *testing.T) {
tokens := token.Tokenize([]byte(`a + b - c * d / e % f << g >> h & i | j ^ k`))
expected := []token.Kind{
token.Identifier,
token.Add,
token.Identifier,
token.Sub,
token.Identifier,
token.Mul,
token.Identifier,
token.Div,
token.Identifier,
token.Mod,
token.Identifier,
token.Shl,
token.Identifier,
token.Shr,
token.Identifier,
token.And,
token.Identifier,
token.Or,
token.Identifier,
token.Xor,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestOperatorAssign(t *testing.T) {
tokens := token.Tokenize([]byte(`a = b += c -= d *= e /= f %= g &= h |= i ^= j <<= k >>= l`))
expected := []token.Kind{
token.Identifier,
token.Assign,
token.Identifier,
token.AddAssign,
token.Identifier,
token.SubAssign,
token.Identifier,
token.MulAssign,
token.Identifier,
token.DivAssign,
token.Identifier,
token.ModAssign,
token.Identifier,
token.AndAssign,
token.Identifier,
token.OrAssign,
token.Identifier,
token.XorAssign,
token.Identifier,
token.ShlAssign,
token.Identifier,
token.ShrAssign,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestOperatorEquality(t *testing.T) {
tokens := token.Tokenize([]byte(`a == b != c <= d >= e < f > g`))
expected := []token.Kind{
token.Identifier,
token.Equal,
token.Identifier,
token.NotEqual,
token.Identifier,
token.LessEqual,
token.Identifier,
token.GreaterEqual,
token.Identifier,
token.Less,
token.Identifier,
token.Greater,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestOperatorLogical(t *testing.T) {
tokens := token.Tokenize([]byte(`a && b || c`))
expected := []token.Kind{
token.Identifier,
token.LogicalAnd,
token.Identifier,
token.LogicalOr,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestDefine(t *testing.T) {
tokens := token.Tokenize([]byte(`a := b`))
expected := []token.Kind{
token.Identifier,
token.Define,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestDot(t *testing.T) {
tokens := token.Tokenize([]byte(`a.b.c`))
expected := []token.Kind{
token.Identifier,
token.Dot,
token.Identifier,
token.Dot,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestNot(t *testing.T) {
tokens := token.Tokenize([]byte(`!a`))
expected := []token.Kind{
token.Not,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestNegateFirstToken(t *testing.T) {
tokens := token.Tokenize([]byte(`-a`))
expected := []token.Kind{
token.Negate,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestNegateAfterGroupStart(t *testing.T) {
tokens := token.Tokenize([]byte(`(-a)`))
expected := []token.Kind{
token.GroupStart,
token.Negate,
token.Identifier,
token.GroupEnd,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestNegateSub(t *testing.T) {
tokens := token.Tokenize([]byte(`-a-b`))
expected := []token.Kind{
token.Negate,
token.Identifier,
token.Sub,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestNegateAfterOperator(t *testing.T) {
tokens := token.Tokenize([]byte(`-a + -b`))
expected := []token.Kind{
token.Negate,
token.Identifier,
token.Add,
token.Negate,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestNegateNumber(t *testing.T) {
tokens := token.Tokenize([]byte(`-1`))
expected := []token.Kind{
token.Number,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestBinaryNumber(t *testing.T) {
tokens := token.Tokenize([]byte(`0b1010`))
expected := []token.Kind{
token.Number,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestOctalNumber(t *testing.T) {
tokens := token.Tokenize([]byte(`0o755`))
expected := []token.Kind{
token.Number,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestHexadecimalNumber(t *testing.T) {
tokens := token.Tokenize([]byte(`0xCAFE`))
expected := []token.Kind{
token.Number,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestStandaloneZero(t *testing.T) {
tokens := token.Tokenize([]byte(`0`))
expected := []token.Kind{
token.Number,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestLeadingZero(t *testing.T) {
tokens := token.Tokenize([]byte(`0123`))
expected := []token.Kind{
token.Number,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestRange(t *testing.T) {
tokens := token.Tokenize([]byte("a..b"))
expected := []token.Kind{
token.Identifier,
token.Range,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestSeparator(t *testing.T) {
tokens := token.Tokenize([]byte("a,b,c"))
expected := []token.Kind{
token.Identifier,
token.Separator,
token.Identifier,
token.Separator,
token.Identifier,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestComment(t *testing.T) {
tokens := token.Tokenize([]byte("// Hello\n// World"))
expected := []token.Kind{
token.Comment,
token.NewLine,
token.Comment,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
tokens = token.Tokenize([]byte("// Hello\n"))
expected = []token.Kind{
token.Comment,
token.NewLine,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
tokens = token.Tokenize([]byte(`// Hello`))
expected = []token.Kind{
token.Comment,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
tokens = token.Tokenize([]byte(`//`))
expected = []token.Kind{
token.Comment,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
tokens = token.Tokenize([]byte(`/`))
expected = []token.Kind{
token.Div,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestInvalid(t *testing.T) {
tokens := token.Tokenize([]byte(`##`))
expected := []token.Kind{
token.Invalid,
token.Invalid,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestString(t *testing.T) {
tokens := token.Tokenize([]byte(`"Hello" "World"`))
expected := []token.Kind{
token.String,
token.String,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestStringMultiline(t *testing.T) {
tokens := token.Tokenize([]byte("\"Hello\nWorld\""))
expected := []token.Kind{
token.String,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestStringEOF(t *testing.T) {
tokens := token.Tokenize([]byte(`"EOF`))
expected := []token.Kind{
token.String,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestReturnType(t *testing.T) {
tokens := token.Tokenize([]byte("()->"))
expected := []token.Kind{
token.GroupStart,
token.GroupEnd,
token.ReturnType,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestMinusAtEOF(t *testing.T) {
tokens := token.Tokenize([]byte("1-"))
expected := []token.Kind{
token.Number,
token.Sub,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}
func TestRune(t *testing.T) {
tokens := token.Tokenize([]byte("'a'"))
expected := []token.Kind{
token.Rune,
token.EOF,
}
for i, kind := range expected {
assert.Equal(t, tokens[i].Kind, kind)
}
}

25
src/token/dash.go Normal file
View file

@ -0,0 +1,25 @@
package token
// dash handles all tokens starting with '-'.
func dash(tokens List, buffer []byte, i Position) (List, Position) {
if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() {
tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1})
} else {
if i+1 < Position(len(buffer)) {
switch buffer[i+1] {
case '=':
tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2})
i++
case '>':
tokens = append(tokens, Token{Kind: ReturnType, Position: i, Length: 2})
i++
default:
tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
}
} else {
tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
}
}
return tokens, i
}

38
src/token/digit.go Normal file
View file

@ -0,0 +1,38 @@
package token
// digit handles all tokens that qualify as a digit.
func digit(tokens List, buffer []byte, i Position) (List, Position) {
position := i
i++
for i < Position(len(buffer)) && isDigit(buffer[i]) {
i++
}
last := len(tokens) - 1
if len(tokens) > 0 && tokens[last].Kind == Negate {
tokens[last].Kind = Number
tokens[last].Length = Length(i-position) + 1
} else {
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
}
return tokens, i
}
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
func isHexDigit(c byte) bool {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
}
func isBinaryDigit(c byte) bool {
return c == '0' || c == '1'
}
func isOctalDigit(c byte) bool {
return c >= '0' && c <= '7'
}

52
src/token/identifier.go Normal file
View file

@ -0,0 +1,52 @@
package token
// identifier handles all tokens that qualify as an identifier.
func identifier(tokens List, buffer []byte, i Position) (List, Position) {
position := i
i++
for i < Position(len(buffer)) && isIdentifier(buffer[i]) {
i++
}
identifier := buffer[position:i]
kind := Identifier
switch string(identifier) {
case "assert":
kind = Assert
case "const":
kind = Const
case "if":
kind = If
case "else":
kind = Else
case "extern":
kind = Extern
case "for":
kind = For
case "import":
kind = Import
case "loop":
kind = Loop
case "return":
kind = Return
case "switch":
kind = Switch
}
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))})
return tokens, i
}
func isIdentifier(c byte) bool {
return isLetter(c) || isDigit(c) || c == '_'
}
func isIdentifierStart(c byte) bool {
return isLetter(c) || c == '_'
}
func isLetter(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
}

86
src/token/operator.go Normal file
View file

@ -0,0 +1,86 @@
package token
// operator handles all tokens that qualify as an operator.
func operator(tokens List, buffer []byte, i Position) (List, Position) {
position := i
i++
for i < Position(len(buffer)) && isOperator(buffer[i]) {
i++
}
kind := Invalid
switch string(buffer[position:i]) {
case "!":
kind = Not
case "!=":
kind = NotEqual
case "%":
kind = Mod
case "%=":
kind = ModAssign
case "&":
kind = And
case "&&":
kind = LogicalAnd
case "&=":
kind = AndAssign
case "*":
kind = Mul
case "*=":
kind = MulAssign
case "+":
kind = Add
case "+=":
kind = AddAssign
case ".":
kind = Dot
case "..":
kind = Range
case ":=":
kind = Define
case "<":
kind = Less
case "<<":
kind = Shl
case "<<=":
kind = ShlAssign
case "<=":
kind = LessEqual
case "=":
kind = Assign
case "==":
kind = Equal
case ">":
kind = Greater
case ">=":
kind = GreaterEqual
case ">>":
kind = Shr
case ">>=":
kind = ShrAssign
case "^":
kind = Xor
case "^=":
kind = XorAssign
case "|":
kind = Or
case "|=":
kind = OrAssign
case "||":
kind = LogicalOr
}
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
return tokens, i
}
func isOperator(c byte) bool {
switch c {
case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!':
return true
default:
return false
}
}

28
src/token/quote.go Normal file
View file

@ -0,0 +1,28 @@
package token
// quote handles all tokens starting with a single or double quote.
func quote(tokens List, buffer []byte, i Position) (List, Position) {
limiter := buffer[i]
start := i
end := Position(len(buffer))
i++
for i < Position(len(buffer)) {
if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') {
end = i + 1
i++
break
}
i++
}
kind := String
if limiter == '\'' {
kind = Rune
}
tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)})
return tokens, i
}

34
src/token/slash.go Normal file
View file

@ -0,0 +1,34 @@
package token
// slash handles all tokens starting with '/'.
func slash(tokens List, buffer []byte, i Position) (List, Position) {
if i+1 < Position(len(buffer)) && buffer[i+1] == '/' {
position := i
for i < Position(len(buffer)) && buffer[i] != '\n' {
i++
}
tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)})
} else {
position := i
i++
for i < Position(len(buffer)) && isOperator(buffer[i]) {
i++
}
kind := Invalid
switch string(buffer[position:i]) {
case "/":
kind = Div
case "/=":
kind = DivAssign
}
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
}
return tokens, i
}

35
src/token/zero.go Normal file
View file

@ -0,0 +1,35 @@
package token
// zero handles all tokens starting with a '0'.
func zero(tokens List, buffer []byte, i Position) (List, Position) {
position := i
i++
if i >= Position(len(buffer)) {
tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1})
return tokens, i
}
filter := isDigit
switch buffer[i] {
case 'x':
i++
filter = isHexDigit
case 'b':
i++
filter = isBinaryDigit
case 'o':
i++
filter = isOctalDigit
}
for i < Position(len(buffer)) && filter(buffer[i]) {
i++
}
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
return tokens, i
}