Added token package

2025-06-19 15:58:29 +02:00 · 2025-06-19 15:58:29 +02:00 · 1405d2b8b1
commit 1405d2b8b1
parent 38bd387002
18 changed files with 1500 additions and 0 deletions
--- a/src/token/Count.go
+++ b/src/token/Count.go
@ -0,0 +1,14 @@
+package token
+
+// Count counts how often the given token appears in the token list.
+func Count(tokens []Token, buffer []byte, kind Kind, name string) uint8 {
+	count := uint8(0)
+
+	for _, t := range tokens {
+		if t.Kind == kind && t.String(buffer) == name {
+			count++
+		}
+	}
+
+	return count
+}
--- a/src/token/Count_test.go
+++ b/src/token/Count_test.go
@ -0,0 +1,17 @@
+package token_test
+
+import (
+	"testing"
+
+	"git.urbach.dev/cli/q/src/token"
+	"git.urbach.dev/go/assert"
+)
+
+func TestCount(t *testing.T) {
+	buffer := []byte(`a b b c c c`)
+	tokens := token.Tokenize(buffer)
+	assert.Equal(t, token.Count(tokens, buffer, token.Identifier, "a"), 1)
+	assert.Equal(t, token.Count(tokens, buffer, token.Identifier, "b"), 2)
+	assert.Equal(t, token.Count(tokens, buffer, token.Identifier, "c"), 3)
+	assert.Equal(t, token.Count(tokens, buffer, token.Identifier, "d"), 0)
+}
--- a/src/token/Instructions.go
+++ b/src/token/Instructions.go
@ -0,0 +1,61 @@
+package token
+
+// Instructions yields on each AST node.
+func (list List) Instructions(yield func(List) bool) {
+	start := 0
+	groupLevel := 0
+	blockLevel := 0
+
+	for i, t := range list {
+		switch t.Kind {
+		case NewLine:
+			if start == i {
+				start = i + 1
+				continue
+			}
+
+			if groupLevel > 0 || blockLevel > 0 {
+				continue
+			}
+
+			if !yield(list[start:i]) {
+				return
+			}
+
+			start = i + 1
+
+		case GroupStart:
+			groupLevel++
+
+		case GroupEnd:
+			groupLevel--
+
+		case BlockStart:
+			blockLevel++
+
+		case BlockEnd:
+			blockLevel--
+
+			if groupLevel > 0 || blockLevel > 0 {
+				continue
+			}
+
+			if !yield(list[start : i+1]) {
+				return
+			}
+
+			start = i + 1
+
+		case EOF:
+			if start < i {
+				yield(list[start:i])
+			}
+
+			return
+		}
+	}
+
+	if start < len(list) {
+		yield(list[start:])
+	}
+}
--- a/src/token/Instructions_test.go
+++ b/src/token/Instructions_test.go
@ -0,0 +1,109 @@
+package token_test
+
+import (
+	"testing"
+
+	"git.urbach.dev/cli/q/src/token"
+	"git.urbach.dev/go/assert"
+)
+
+func TestInstructionsBasic(t *testing.T) {
+	src := []byte("a := 1\nb := 2\n")
+	tokens := token.Tokenize(src)
+	nodes := []string{}
+
+	for param := range tokens.Instructions {
+		nodes = append(nodes, param.String(src))
+	}
+
+	assert.DeepEqual(t, nodes, []string{"a:=1", "b:=2"})
+}
+
+func TestInstructionsBlock(t *testing.T) {
+	src := []byte("a := 1\nif x > 0 {\nx = 0\n}\nb := 2\n")
+	tokens := token.Tokenize(src)
+	nodes := []string{}
+
+	for param := range tokens.Instructions {
+		nodes = append(nodes, param.String(src))
+	}
+
+	assert.DeepEqual(t, nodes, []string{"a:=1", "ifx>0{\nx=0\n}", "b:=2"})
+}
+
+func TestInstructionsGroup(t *testing.T) {
+	src := []byte("a := 1\ncall(\nx,\ny\n)\nb := 2\n")
+	tokens := token.Tokenize(src)
+	nodes := []string{}
+
+	for param := range tokens.Instructions {
+		nodes = append(nodes, param.String(src))
+	}
+
+	assert.DeepEqual(t, nodes, []string{"a:=1", "call(\nx,\ny\n)", "b:=2"})
+}
+
+func TestInstructionsBreak(t *testing.T) {
+	src := []byte("a := 1\nb := 2\n")
+	tokens := token.Tokenize(src)
+	count := 0
+
+	for range tokens.Instructions {
+		if count == 1 {
+			break
+		}
+
+		count++
+	}
+}
+
+func TestInstructionsEOF(t *testing.T) {
+	src := []byte("a := 1")
+	tokens := token.Tokenize(src)
+	count := 0
+
+	for range tokens.Instructions {
+		count++
+	}
+
+	assert.Equal(t, count, 1)
+}
+
+func TestInstructionsNoEOF(t *testing.T) {
+	tokens := token.List{
+		token.Token{Position: 0, Length: 1, Kind: token.Identifier},
+	}
+
+	count := 0
+
+	for range tokens.Instructions {
+		count++
+	}
+
+	assert.Equal(t, count, 1)
+}
+
+func TestInstructionsMultiBlock(t *testing.T) {
+	src := []byte("if x == 0 { if y == 0 {} }")
+	tokens := token.Tokenize(src)
+	count := 0
+
+	for range tokens.Instructions {
+		count++
+	}
+
+	assert.Equal(t, count, 1)
+}
+
+func TestInstructionsMultiBlockBreak(t *testing.T) {
+	src := []byte("if x == 0 { if y == 0 {} }")
+	tokens := token.Tokenize(src)
+	count := 0
+
+	for range tokens.Instructions {
+		count++
+		break
+	}
+
+	assert.Equal(t, count, 1)
+}
--- a/src/token/Kind.go
+++ b/src/token/Kind.go
@ -0,0 +1,79 @@
+package token
+
+// Kind represents the type of token.
+type Kind uint8
+
+const (
+	Invalid               Kind = iota // Invalid is an invalid token.
+	EOF                               // EOF is the end of file.
+	NewLine                           // NewLine is the newline character.
+	Identifier                        // Identifier is a series of characters used to identify a variable or function.
+	Number                            // Number is a series of numerical characters.
+	Rune                              // Rune is a single unicode code point.
+	String                            // String is an uninterpreted series of characters in the source code.
+	Comment                           // Comment is a comment.
+	GroupStart                        // (
+	GroupEnd                          // )
+	BlockStart                        // {
+	BlockEnd                          // }
+	ArrayStart                        // [
+	ArrayEnd                          // ]
+	ReturnType                        // ->
+	___OPERATORS___                   // <operators>
+	Add                               // +
+	Sub                               // -
+	Mul                               // *
+	Div                               // /
+	Mod                               // %
+	And                               // &
+	Or                                // |
+	Xor                               // ^
+	Shl                               // <<
+	Shr                               // >>
+	LogicalAnd                        // &&
+	LogicalOr                         // ||
+	Define                            // :=
+	Dot                               // .
+	Range                             // ..
+	Call                              // x()
+	Array                             // [x]
+	Separator                         // ,
+	___ASSIGNMENTS___                 // <assignments>
+	Assign                            // =
+	AddAssign                         // +=
+	SubAssign                         // -=
+	MulAssign                         // *=
+	DivAssign                         // /=
+	ModAssign                         // %=
+	AndAssign                         // &=
+	OrAssign                          // |=
+	XorAssign                         // ^=
+	ShlAssign                         // <<=
+	ShrAssign                         // >>=
+	___END_ASSIGNMENTS___             // </assignments>
+	___COMPARISONS___                 // <comparisons>
+	Equal                             // ==
+	NotEqual                          // !=
+	Less                              // <
+	Greater                           // >
+	LessEqual                         // <=
+	GreaterEqual                      // >=
+	___END_COMPARISONS___             // </comparisons>
+	___UNARY___                       // <unary>
+	Not                               // ! (unary)
+	Negate                            // - (unary)
+	___END_UNARY___                   // </unary>
+	___END_OPERATORS___               // </operators>
+	___KEYWORDS___                    // <keywords>
+	Assert                            // assert
+	Const                             // const
+	Else                              // else
+	Extern                            // extern
+	For                               // for
+	If                                // if
+	Import                            // import
+	Loop                              // loop
+	Return                            // return
+	Switch                            // switch
+	___END_KEYWORDS___                // </keywords>
+)
--- a/src/token/List.go
+++ b/src/token/List.go
@ -0,0 +1,76 @@
+package token
+
+import (
+	"strings"
+)
+
+// List is a slice of tokens.
+type List []Token
+
+// IndexKind returns the position of a token kind within a token list.
+func (list List) IndexKind(kind Kind) int {
+	for i, token := range list {
+		if token.Kind == kind {
+			return i
+		}
+	}
+
+	return -1
+}
+
+// LastIndexKind returns the position of the last token kind within a token list.
+func (list List) LastIndexKind(kind Kind) int {
+	for i := len(list) - 1; i >= 0; i-- {
+		if list[i].Kind == kind {
+			return i
+		}
+	}
+
+	return -1
+}
+
+// Split calls the callback function on each set of tokens in a comma separated list.
+func (list List) Split(yield func(List) bool) {
+	if len(list) == 0 {
+		return
+	}
+
+	start := 0
+	groupLevel := 0
+
+	for i, t := range list {
+		switch t.Kind {
+		case GroupStart, ArrayStart, BlockStart:
+			groupLevel++
+
+		case GroupEnd, ArrayEnd, BlockEnd:
+			groupLevel--
+
+		case Separator:
+			if groupLevel > 0 {
+				continue
+			}
+
+			parameter := list[start:i]
+
+			if !yield(parameter) {
+				return
+			}
+
+			start = i + 1
+		}
+	}
+
+	yield(list[start:])
+}
+
+// String returns the concatenated token strings.
+func (list List) String(source []byte) string {
+	tmp := strings.Builder{}
+
+	for _, t := range list {
+		tmp.WriteString(t.String(source))
+	}
+
+	return tmp.String()
+}
--- a/src/token/List_test.go
+++ b/src/token/List_test.go
@ -0,0 +1,71 @@
+package token_test
+
+import (
+	"testing"
+
+	"git.urbach.dev/cli/q/src/token"
+	"git.urbach.dev/go/assert"
+)
+
+func TestIndexKind(t *testing.T) {
+	tokens := token.Tokenize([]byte("a{{}}"))
+	assert.Equal(t, tokens.IndexKind(token.NewLine), -1)
+	assert.Equal(t, tokens.LastIndexKind(token.NewLine), -1)
+	assert.Equal(t, tokens.IndexKind(token.BlockStart), 1)
+	assert.Equal(t, tokens.LastIndexKind(token.BlockStart), 2)
+	assert.Equal(t, tokens.IndexKind(token.BlockEnd), 3)
+	assert.Equal(t, tokens.LastIndexKind(token.BlockEnd), 4)
+}
+
+func TestSplit(t *testing.T) {
+	src := []byte("1+2,3*4,5*6,7+8")
+	tokens := token.Tokenize(src)
+	parameters := []string{}
+
+	for param := range tokens.Split {
+		parameters = append(parameters, param.String(src))
+	}
+
+	assert.DeepEqual(t, parameters, []string{"1+2", "3*4", "5*6", "7+8"})
+}
+
+func TestSplitBreak(t *testing.T) {
+	src := []byte("1,2")
+	tokens := token.Tokenize(src)
+
+	for range tokens.Split {
+		break
+	}
+}
+
+func TestSplitEmpty(t *testing.T) {
+	tokens := token.List{}
+
+	for range tokens.Split {
+		t.Fail()
+	}
+}
+
+func TestSplitGroups(t *testing.T) {
+	src := []byte("f(1,2),g(3,4)")
+	tokens := token.Tokenize(src)
+	parameters := []string{}
+
+	for param := range tokens.Split {
+		parameters = append(parameters, param.String(src))
+	}
+
+	assert.DeepEqual(t, parameters, []string{"f(1,2)", "g(3,4)"})
+}
+
+func TestSplitSingle(t *testing.T) {
+	src := []byte("123")
+	tokens := token.Tokenize(src)
+	parameters := []string{}
+
+	for param := range tokens.Split {
+		parameters = append(parameters, param.String(src))
+	}
+
+	assert.DeepEqual(t, parameters, []string{"123"})
+}
--- a/src/token/Token.go
+++ b/src/token/Token.go
@ -0,0 +1,77 @@
+package token
+
+import (
+	"unsafe"
+)
+
+// Position is the data type for storing file offsets.
+type Position = uint32
+
+// Length is the data type for storing token lengths.
+type Length = uint16
+
+// Token represents a single element in a source file.
+// The characters that make up an identifier are grouped into a single token.
+// This makes parsing easier and allows us to do better syntax checks.
+type Token struct {
+	Position Position
+	Length   Length
+	Kind     Kind
+}
+
+// Bytes returns the byte slice.
+func (t Token) Bytes(buffer []byte) []byte {
+	return buffer[t.Position : t.Position+Position(t.Length)]
+}
+
+// End returns the position after the token.
+func (t Token) End() Position {
+	return t.Position + Position(t.Length)
+}
+
+// IsAssignment returns true if the token is an assignment operator.
+func (t Token) IsAssignment() bool {
+	return t.Kind > ___ASSIGNMENTS___ && t.Kind < ___END_ASSIGNMENTS___
+}
+
+// IsComparison returns true if the token is a comparison operator.
+func (t Token) IsComparison() bool {
+	return t.Kind > ___COMPARISONS___ && t.Kind < ___END_COMPARISONS___
+}
+
+// IsExpressionStart returns true if the token starts an expression.
+func (t Token) IsExpressionStart() bool {
+	return t.Kind == GroupStart || t.Kind == ArrayStart || t.Kind == BlockStart
+}
+
+// IsKeyword returns true if the token is a keyword.
+func (t Token) IsKeyword() bool {
+	return t.Kind > ___KEYWORDS___ && t.Kind < ___END_KEYWORDS___
+}
+
+// IsNumeric returns true if the token is a number or rune.
+func (t Token) IsNumeric() bool {
+	return t.Kind == Number || t.Kind == Rune
+}
+
+// IsOperator returns true if the token is an operator.
+func (t Token) IsOperator() bool {
+	return t.Kind > ___OPERATORS___ && t.Kind < ___END_OPERATORS___
+}
+
+// IsUnaryOperator returns true if the token is a unary operator.
+func (t Token) IsUnaryOperator() bool {
+	return t.Kind > ___UNARY___ && t.Kind < ___END_UNARY___
+}
+
+// Reset resets the token to default values.
+func (t *Token) Reset() {
+	t.Position = 0
+	t.Length = 0
+	t.Kind = Invalid
+}
+
+// String returns the token string.
+func (t Token) String(buffer []byte) string {
+	return unsafe.String(unsafe.SliceData(t.Bytes(buffer)), t.Length)
+}
--- a/src/token/Token_test.go
+++ b/src/token/Token_test.go
@ -0,0 +1,58 @@
+package token_test
+
+import (
+	"testing"
+
+	"git.urbach.dev/cli/q/src/token"
+	"git.urbach.dev/go/assert"
+)
+
+func TestTokenEnd(t *testing.T) {
+	hello := token.Token{
+		Kind:     token.Identifier,
+		Position: 0,
+		Length:   5,
+	}
+
+	assert.Equal(t, hello.End(), 5)
+}
+
+func TestTokenReset(t *testing.T) {
+	hello := token.Token{
+		Kind:     token.Identifier,
+		Position: 1,
+		Length:   5,
+	}
+
+	hello.Reset()
+	assert.Equal(t, hello.Position, 0)
+	assert.Equal(t, hello.Length, 0)
+	assert.Equal(t, hello.Kind, token.Invalid)
+}
+
+func TestTokenString(t *testing.T) {
+	buffer := []byte("hello, world")
+	hello := token.Token{Kind: token.Identifier, Position: 0, Length: 5}
+	comma := token.Token{Kind: token.Separator, Position: 5, Length: 1}
+	world := token.Token{Kind: token.Identifier, Position: 7, Length: 5}
+
+	assert.Equal(t, hello.String(buffer), "hello")
+	assert.Equal(t, comma.String(buffer), ",")
+	assert.Equal(t, world.String(buffer), "world")
+}
+
+func TestTokenGroups(t *testing.T) {
+	assignment := token.Token{Kind: token.Assign}
+	operator := token.Token{Kind: token.Add}
+	keyword := token.Token{Kind: token.If}
+	unary := token.Token{Kind: token.Not}
+	number := token.Token{Kind: token.Number}
+	comparison := token.Token{Kind: token.Equal}
+
+	assert.True(t, assignment.IsAssignment())
+	assert.True(t, operator.IsOperator())
+	assert.True(t, keyword.IsKeyword())
+	assert.True(t, unary.IsUnaryOperator())
+	assert.True(t, number.IsNumeric())
+	assert.True(t, comparison.IsComparison())
+}
--- a/src/token/Tokenize.go
+++ b/src/token/Tokenize.go
@ -0,0 +1,64 @@
+package token
+
+// Tokenize turns the file contents into a list of tokens.
+func Tokenize(buffer []byte) List {
+	var (
+		i      Position
+		tokens = make(List, 0, 8+len(buffer)/2)
+	)
+
+	for i < Position(len(buffer)) {
+		switch buffer[i] {
+		case ' ', '\t', '\r':
+		case ',':
+			tokens = append(tokens, Token{Kind: Separator, Position: i, Length: 1})
+		case '(':
+			tokens = append(tokens, Token{Kind: GroupStart, Position: i, Length: 1})
+		case ')':
+			tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Length: 1})
+		case '{':
+			tokens = append(tokens, Token{Kind: BlockStart, Position: i, Length: 1})
+		case '}':
+			tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Length: 1})
+		case '[':
+			tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Length: 1})
+		case ']':
+			tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Length: 1})
+		case '\n':
+			tokens = append(tokens, Token{Kind: NewLine, Position: i, Length: 1})
+		case '-':
+			tokens, i = dash(tokens, buffer, i)
+		case '/':
+			tokens, i = slash(tokens, buffer, i)
+			continue
+		case '"', '\'':
+			tokens, i = quote(tokens, buffer, i)
+			continue
+		case '0':
+			tokens, i = zero(tokens, buffer, i)
+			continue
+		default:
+			if isIdentifierStart(buffer[i]) {
+				tokens, i = identifier(tokens, buffer, i)
+				continue
+			}
+
+			if isDigit(buffer[i]) {
+				tokens, i = digit(tokens, buffer, i)
+				continue
+			}
+
+			if isOperator(buffer[i]) {
+				tokens, i = operator(tokens, buffer, i)
+				continue
+			}
+
+			tokens = append(tokens, Token{Kind: Invalid, Position: i, Length: 1})
+		}
+
+		i++
+	}
+
+	tokens = append(tokens, Token{Kind: EOF, Position: i, Length: 0})
+	return tokens
+}
--- a/src/token/Tokenize_test.go
+++ b/src/token/Tokenize_test.go
@ -0,0 +1,576 @@
+package token_test
+
+import (
+	"testing"
+
+	"git.urbach.dev/cli/q/src/token"
+	"git.urbach.dev/go/assert"
+)
+
+func TestFunction(t *testing.T) {
+	tokens := token.Tokenize([]byte("main(){}"))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.GroupStart,
+		token.GroupEnd,
+		token.BlockStart,
+		token.BlockEnd,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestKeyword(t *testing.T) {
+	tokens := token.Tokenize([]byte("assert const else extern if import for loop return switch"))
+
+	expected := []token.Kind{
+		token.Assert,
+		token.Const,
+		token.Else,
+		token.Extern,
+		token.If,
+		token.Import,
+		token.For,
+		token.Loop,
+		token.Return,
+		token.Switch,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestArray(t *testing.T) {
+	tokens := token.Tokenize([]byte("array[i]"))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.ArrayStart,
+		token.Identifier,
+		token.ArrayEnd,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestNewline(t *testing.T) {
+	tokens := token.Tokenize([]byte("\n\n"))
+
+	expected := []token.Kind{
+		token.NewLine,
+		token.NewLine,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestNumber(t *testing.T) {
+	tokens := token.Tokenize([]byte(`123 456`))
+
+	expected := []token.Kind{
+		token.Number,
+		token.Number,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestOperator(t *testing.T) {
+	tokens := token.Tokenize([]byte(`a + b - c * d / e % f << g >> h & i | j ^ k`))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.Add,
+		token.Identifier,
+		token.Sub,
+		token.Identifier,
+		token.Mul,
+		token.Identifier,
+		token.Div,
+		token.Identifier,
+		token.Mod,
+		token.Identifier,
+		token.Shl,
+		token.Identifier,
+		token.Shr,
+		token.Identifier,
+		token.And,
+		token.Identifier,
+		token.Or,
+		token.Identifier,
+		token.Xor,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestOperatorAssign(t *testing.T) {
+	tokens := token.Tokenize([]byte(`a = b += c -= d *= e /= f %= g &= h |= i ^= j <<= k >>= l`))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.Assign,
+		token.Identifier,
+		token.AddAssign,
+		token.Identifier,
+		token.SubAssign,
+		token.Identifier,
+		token.MulAssign,
+		token.Identifier,
+		token.DivAssign,
+		token.Identifier,
+		token.ModAssign,
+		token.Identifier,
+		token.AndAssign,
+		token.Identifier,
+		token.OrAssign,
+		token.Identifier,
+		token.XorAssign,
+		token.Identifier,
+		token.ShlAssign,
+		token.Identifier,
+		token.ShrAssign,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestOperatorEquality(t *testing.T) {
+	tokens := token.Tokenize([]byte(`a == b != c <= d >= e < f > g`))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.Equal,
+		token.Identifier,
+		token.NotEqual,
+		token.Identifier,
+		token.LessEqual,
+		token.Identifier,
+		token.GreaterEqual,
+		token.Identifier,
+		token.Less,
+		token.Identifier,
+		token.Greater,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestOperatorLogical(t *testing.T) {
+	tokens := token.Tokenize([]byte(`a && b || c`))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.LogicalAnd,
+		token.Identifier,
+		token.LogicalOr,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestDefine(t *testing.T) {
+	tokens := token.Tokenize([]byte(`a := b`))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.Define,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestDot(t *testing.T) {
+	tokens := token.Tokenize([]byte(`a.b.c`))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.Dot,
+		token.Identifier,
+		token.Dot,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestNot(t *testing.T) {
+	tokens := token.Tokenize([]byte(`!a`))
+
+	expected := []token.Kind{
+		token.Not,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestNegateFirstToken(t *testing.T) {
+	tokens := token.Tokenize([]byte(`-a`))
+
+	expected := []token.Kind{
+		token.Negate,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestNegateAfterGroupStart(t *testing.T) {
+	tokens := token.Tokenize([]byte(`(-a)`))
+
+	expected := []token.Kind{
+		token.GroupStart,
+		token.Negate,
+		token.Identifier,
+		token.GroupEnd,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestNegateSub(t *testing.T) {
+	tokens := token.Tokenize([]byte(`-a-b`))
+
+	expected := []token.Kind{
+		token.Negate,
+		token.Identifier,
+		token.Sub,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestNegateAfterOperator(t *testing.T) {
+	tokens := token.Tokenize([]byte(`-a + -b`))
+
+	expected := []token.Kind{
+		token.Negate,
+		token.Identifier,
+		token.Add,
+		token.Negate,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestNegateNumber(t *testing.T) {
+	tokens := token.Tokenize([]byte(`-1`))
+
+	expected := []token.Kind{
+		token.Number,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestBinaryNumber(t *testing.T) {
+	tokens := token.Tokenize([]byte(`0b1010`))
+
+	expected := []token.Kind{
+		token.Number,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestOctalNumber(t *testing.T) {
+	tokens := token.Tokenize([]byte(`0o755`))
+
+	expected := []token.Kind{
+		token.Number,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestHexadecimalNumber(t *testing.T) {
+	tokens := token.Tokenize([]byte(`0xCAFE`))
+
+	expected := []token.Kind{
+		token.Number,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestStandaloneZero(t *testing.T) {
+	tokens := token.Tokenize([]byte(`0`))
+
+	expected := []token.Kind{
+		token.Number,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestLeadingZero(t *testing.T) {
+	tokens := token.Tokenize([]byte(`0123`))
+
+	expected := []token.Kind{
+		token.Number,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestRange(t *testing.T) {
+	tokens := token.Tokenize([]byte("a..b"))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.Range,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestSeparator(t *testing.T) {
+	tokens := token.Tokenize([]byte("a,b,c"))
+
+	expected := []token.Kind{
+		token.Identifier,
+		token.Separator,
+		token.Identifier,
+		token.Separator,
+		token.Identifier,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestComment(t *testing.T) {
+	tokens := token.Tokenize([]byte("// Hello\n// World"))
+
+	expected := []token.Kind{
+		token.Comment,
+		token.NewLine,
+		token.Comment,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+
+	tokens = token.Tokenize([]byte("// Hello\n"))
+
+	expected = []token.Kind{
+		token.Comment,
+		token.NewLine,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+
+	tokens = token.Tokenize([]byte(`// Hello`))
+
+	expected = []token.Kind{
+		token.Comment,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+
+	tokens = token.Tokenize([]byte(`//`))
+
+	expected = []token.Kind{
+		token.Comment,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+
+	tokens = token.Tokenize([]byte(`/`))
+
+	expected = []token.Kind{
+		token.Div,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestInvalid(t *testing.T) {
+	tokens := token.Tokenize([]byte(`##`))
+
+	expected := []token.Kind{
+		token.Invalid,
+		token.Invalid,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestString(t *testing.T) {
+	tokens := token.Tokenize([]byte(`"Hello" "World"`))
+
+	expected := []token.Kind{
+		token.String,
+		token.String,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestStringMultiline(t *testing.T) {
+	tokens := token.Tokenize([]byte("\"Hello\nWorld\""))
+
+	expected := []token.Kind{
+		token.String,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestStringEOF(t *testing.T) {
+	tokens := token.Tokenize([]byte(`"EOF`))
+
+	expected := []token.Kind{
+		token.String,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestReturnType(t *testing.T) {
+	tokens := token.Tokenize([]byte("()->"))
+
+	expected := []token.Kind{
+		token.GroupStart,
+		token.GroupEnd,
+		token.ReturnType,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestMinusAtEOF(t *testing.T) {
+	tokens := token.Tokenize([]byte("1-"))
+
+	expected := []token.Kind{
+		token.Number,
+		token.Sub,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
+
+func TestRune(t *testing.T) {
+	tokens := token.Tokenize([]byte("'a'"))
+
+	expected := []token.Kind{
+		token.Rune,
+		token.EOF,
+	}
+
+	for i, kind := range expected {
+		assert.Equal(t, tokens[i].Kind, kind)
+	}
+}
--- a/src/token/dash.go
+++ b/src/token/dash.go
@ -0,0 +1,25 @@
+package token
+
+// dash handles all tokens starting with '-'.
+func dash(tokens List, buffer []byte, i Position) (List, Position) {
+	if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() {
+		tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1})
+	} else {
+		if i+1 < Position(len(buffer)) {
+			switch buffer[i+1] {
+			case '=':
+				tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2})
+				i++
+			case '>':
+				tokens = append(tokens, Token{Kind: ReturnType, Position: i, Length: 2})
+				i++
+			default:
+				tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
+			}
+		} else {
+			tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
+		}
+	}
+
+	return tokens, i
+}
--- a/src/token/digit.go
+++ b/src/token/digit.go
@ -0,0 +1,38 @@
+package token
+
+// digit handles all tokens that qualify as a digit.
+func digit(tokens List, buffer []byte, i Position) (List, Position) {
+	position := i
+	i++
+
+	for i < Position(len(buffer)) && isDigit(buffer[i]) {
+		i++
+	}
+
+	last := len(tokens) - 1
+
+	if len(tokens) > 0 && tokens[last].Kind == Negate {
+		tokens[last].Kind = Number
+		tokens[last].Length = Length(i-position) + 1
+	} else {
+		tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
+	}
+
+	return tokens, i
+}
+
+func isDigit(c byte) bool {
+	return c >= '0' && c <= '9'
+}
+
+func isHexDigit(c byte) bool {
+	return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
+}
+
+func isBinaryDigit(c byte) bool {
+	return c == '0' || c == '1'
+}
+
+func isOctalDigit(c byte) bool {
+	return c >= '0' && c <= '7'
+}
--- a/src/token/identifier.go
+++ b/src/token/identifier.go
@ -0,0 +1,52 @@
+package token
+
+// identifier handles all tokens that qualify as an identifier.
+func identifier(tokens List, buffer []byte, i Position) (List, Position) {
+	position := i
+	i++
+
+	for i < Position(len(buffer)) && isIdentifier(buffer[i]) {
+		i++
+	}
+
+	identifier := buffer[position:i]
+	kind := Identifier
+
+	switch string(identifier) {
+	case "assert":
+		kind = Assert
+	case "const":
+		kind = Const
+	case "if":
+		kind = If
+	case "else":
+		kind = Else
+	case "extern":
+		kind = Extern
+	case "for":
+		kind = For
+	case "import":
+		kind = Import
+	case "loop":
+		kind = Loop
+	case "return":
+		kind = Return
+	case "switch":
+		kind = Switch
+	}
+
+	tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))})
+	return tokens, i
+}
+
+func isIdentifier(c byte) bool {
+	return isLetter(c) || isDigit(c) || c == '_'
+}
+
+func isIdentifierStart(c byte) bool {
+	return isLetter(c) || c == '_'
+}
+
+func isLetter(c byte) bool {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+}
--- a/src/token/operator.go
+++ b/src/token/operator.go
@ -0,0 +1,86 @@
+package token
+
+// operator handles all tokens that qualify as an operator.
+func operator(tokens List, buffer []byte, i Position) (List, Position) {
+	position := i
+	i++
+
+	for i < Position(len(buffer)) && isOperator(buffer[i]) {
+		i++
+	}
+
+	kind := Invalid
+
+	switch string(buffer[position:i]) {
+	case "!":
+		kind = Not
+	case "!=":
+		kind = NotEqual
+	case "%":
+		kind = Mod
+	case "%=":
+		kind = ModAssign
+	case "&":
+		kind = And
+	case "&&":
+		kind = LogicalAnd
+	case "&=":
+		kind = AndAssign
+	case "*":
+		kind = Mul
+	case "*=":
+		kind = MulAssign
+	case "+":
+		kind = Add
+	case "+=":
+		kind = AddAssign
+	case ".":
+		kind = Dot
+	case "..":
+		kind = Range
+	case ":=":
+		kind = Define
+	case "<":
+		kind = Less
+	case "<<":
+		kind = Shl
+	case "<<=":
+		kind = ShlAssign
+	case "<=":
+		kind = LessEqual
+	case "=":
+		kind = Assign
+	case "==":
+		kind = Equal
+	case ">":
+		kind = Greater
+	case ">=":
+		kind = GreaterEqual
+	case ">>":
+		kind = Shr
+	case ">>=":
+		kind = ShrAssign
+	case "^":
+		kind = Xor
+	case "^=":
+		kind = XorAssign
+	case "|":
+		kind = Or
+	case "|=":
+		kind = OrAssign
+	case "||":
+		kind = LogicalOr
+	}
+
+	tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
+	return tokens, i
+}
+
+func isOperator(c byte) bool {
+	switch c {
+	case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!':
+		return true
+	default:
+		return false
+	}
+}
--- a/src/token/quote.go
+++ b/src/token/quote.go
@ -0,0 +1,28 @@
+package token
+
+// quote handles all tokens starting with a single or double quote.
+func quote(tokens List, buffer []byte, i Position) (List, Position) {
+	limiter := buffer[i]
+	start := i
+	end := Position(len(buffer))
+	i++
+
+	for i < Position(len(buffer)) {
+		if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') {
+			end = i + 1
+			i++
+			break
+		}
+
+		i++
+	}
+
+	kind := String
+
+	if limiter == '\'' {
+		kind = Rune
+	}
+
+	tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)})
+	return tokens, i
+}
--- a/src/token/slash.go
+++ b/src/token/slash.go
@ -0,0 +1,34 @@
+package token
+
+// slash handles all tokens starting with '/'.
+func slash(tokens List, buffer []byte, i Position) (List, Position) {
+	if i+1 < Position(len(buffer)) && buffer[i+1] == '/' {
+		position := i
+
+		for i < Position(len(buffer)) && buffer[i] != '\n' {
+			i++
+		}
+
+		tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)})
+	} else {
+		position := i
+		i++
+
+		for i < Position(len(buffer)) && isOperator(buffer[i]) {
+			i++
+		}
+
+		kind := Invalid
+
+		switch string(buffer[position:i]) {
+		case "/":
+			kind = Div
+		case "/=":
+			kind = DivAssign
+		}
+
+		tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
+	}
+
+	return tokens, i
+}
--- a/src/token/zero.go
+++ b/src/token/zero.go
@ -0,0 +1,35 @@
+package token
+
+// zero handles all tokens starting with a '0'.
+func zero(tokens List, buffer []byte, i Position) (List, Position) {
+	position := i
+	i++
+
+	if i >= Position(len(buffer)) {
+		tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1})
+		return tokens, i
+	}
+
+	filter := isDigit
+
+	switch buffer[i] {
+	case 'x':
+		i++
+		filter = isHexDigit
+
+	case 'b':
+		i++
+		filter = isBinaryDigit
+
+	case 'o':
+		i++
+		filter = isOctalDigit
+	}
+
+	for i < Position(len(buffer)) && filter(buffer[i]) {
+		i++
+	}
+
+	tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
+	return tokens, i
+}