How to Implement a String Type in LLVM¶
There are two ways to implement a string type in LLVM:
- To write the implementation in LLVM IR.
- To write the implementation in a higher-level language that generates IR.
I’d personally much prefer to use the second method, but for the sake of
the example, I’ll go ahead and illustrate a simple but useful string
type in LLVM IR. It assumes a 32-bit architecture, so please replace all
occurrences of i32
with i64
if you are targetting a 64-bit
architecture.
We’ll be making a dynamic, mutable string type that can be appended to and could also be inserted into, converted to lower case, and so on, depending on which support functions are defined to operate on the string type.
It all boils down to making a suitable type definition for the class and then define a rich set of functions to operate on the type definition:
; The actual type definition for our 'String' type.
%String = type {
i8*, ; 0: buffer; pointer to the character buffer
i32, ; 1: length; the number of chars in the buffer
i32, ; 2: maxlen; the maximum number of chars in the buffer
i32 ; 3: factor; the number of chars to preallocate when growing
}
define fastcc void @String_Create_Default(%String* %this) nounwind {
; Initialize 'buffer'.
%1 = getelementptr %String* %this, i32 0, i32 0
store i8* null, i8** %1
; Initialize 'length'.
%2 = getelementptr %String* %this, i32 0, i32 1
store i32 0, i32* %2
; Initialize 'maxlen'.
%3 = getelementptr %String* %this, i32 0, i32 2
store i32 0, i32* %3
; Initialize 'factor'.
%4 = getelementptr %String* %this, i32 0, i32 3
store i32 16, i32* %4
ret void
}
declare i8* @malloc(i32)
declare void @free(i8*)
declare i8* @memcpy(i8*, i8*, i32)
define fastcc void @String_Delete(%String* %this) nounwind {
; Check if we need to call 'free'.
%1 = getelementptr %String* %this, i32 0, i32 0
%2 = load i8** %1
%3 = icmp ne i8* %2, null
br i1 %3, label %free_begin, label %free_close
free_begin:
call void @free(i8* %2)
br label %free_close
free_close:
ret void
}
define fastcc void @String_Resize(%String* %this, i32 %value) {
; %output = malloc(%value)
%output = call i8* @malloc(i32 %value)
; todo: check return value
; %buffer = this->buffer
%1 = getelementptr %String* %this, i32 0, i32 0
%buffer = load i8** %1
; %length = this->length
%2 = getelementptr %String* %this, i32 0, i32 1
%length = load i32* %2
; memcpy(%output, %buffer, %length)
%3 = call i8* @memcpy(i8* %output, i8* %buffer, i32 %length)
; free(%buffer)
call void @free(i8* %buffer)
; this->buffer = %output
store i8* %output, i8** %1
;this->maxlen = %value (value that was passed into @malloc is the new maxlen)
%4 = getelementptr %String* this, i32 0, i32 2
store i32 %value, i32* %4
ret void
}
define fastcc void @String_Add_Char(%String* %this, i8 %value) {
; Check if we need to grow the string.
%1 = getelementptr %String* %this, i32 0, i32 1
%length = load i32* %1
%2 = getelementptr %String* %this, i32 0, i32 2
%maxlen = load i32* %2
; if length == maxlen:
%3 = icmp eq i32 %length, %maxlen
br i1 %3, label %grow_begin, label %grow_close
grow_begin:
%4 = getelementptr %String* %this, i32 0, i32 3
%factor = load i32* %4
%5 = add i32 %maxlen, %factor
call void @String_Resize(%String* %this, i32 %5)
br label %grow_close
grow_close:
%6 = getelementptr %String* %this, i32 0, i32 0
%buffer = load i8** %6
%7 = getelementptr i8* %buffer, i32 %length
store i8 %value, i8* %7
%8 = add i32 %length, 1
store i32 %8, i32* %1
ret void
}