-
-
Notifications
You must be signed in to change notification settings - Fork 687
feat: Add memory.dataUTF8() built-in function
#2974
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -749,6 +749,7 @@ export namespace BuiltinNames { | |
| export const memory_copy = "~lib/memory/memory.copy"; | ||
| export const memory_fill = "~lib/memory/memory.fill"; | ||
| export const memory_data = "~lib/memory/memory.data"; | ||
| export const memory_dataUTF8 = "~lib/memory/memory.dataUTF8"; | ||
|
|
||
| // std/typedarray.ts | ||
| export const Int8Array = "~lib/typedarray/Int8Array"; | ||
|
|
@@ -3491,6 +3492,78 @@ function builtin_memory_data(ctx: BuiltinFunctionContext): ExpressionRef { | |
| } | ||
| builtinFunctions.set(BuiltinNames.memory_data, builtin_memory_data); | ||
|
|
||
| function utf16ToUtf8(str: string) : Uint8Array { | ||
| let result = new Uint8Array(str.length*3); // max possible length is 1.5x the UTF16 encoding | ||
| let utf8Length: i32 = 0; // track actual encoded length | ||
| for (let i: i32 = 0; i < str.length; ++i) { | ||
| // UTF16 decode | ||
| let codePoint: u32 = str.charCodeAt(i); | ||
| if (codePoint >= 0xD800 && codePoint < 0xDC00) { | ||
| // could be the first half of a surrogate pair (if) | ||
| let codePoint2: u32 = str.charCodeAt(i + 1); | ||
| if (i + 1 < str.length && codePoint2 >= 0xDC00 && codePoint2 < 0xE000) { | ||
| // valid surrogate pair - combine to get the code-point | ||
| codePoint = ((codePoint&0x3FF)<<10) + (codePoint2&0x3FF) + 0x10000; | ||
| ++i; | ||
| } | ||
| } | ||
| // UTF8 encode | ||
| if (codePoint < 0x0080) { | ||
| result[utf8Length++] = codePoint; | ||
| } else if (codePoint < 0x0800) { | ||
| result[utf8Length++] = 0xC0 + (codePoint>>6); | ||
| result[utf8Length++] = 0x80 + (codePoint&0x3F); | ||
| } else if (codePoint < 0x10000) { | ||
| result[utf8Length++] = 0xE0 + ((codePoint>>12)&0x0F); | ||
| result[utf8Length++] = 0x80 + ((codePoint>>6)&0x3F); | ||
| result[utf8Length++] = 0x80 + (codePoint&0x3F); | ||
| } else { | ||
| result[utf8Length++] = 0xF0 + ((codePoint>>18)&0x07); | ||
| result[utf8Length++] = 0x80 + ((codePoint>>12)&0x3F); | ||
| result[utf8Length++] = 0x80 + ((codePoint>>6)&0x3F); | ||
| result[utf8Length++] = 0x80 + (codePoint&0x3F); | ||
| } | ||
| } | ||
| let trimmed = new Uint8Array(utf8Length); | ||
| for (let i: i32 = 0; i < utf8Length; ++i) trimmed[i] = result[i]; | ||
| return trimmed; | ||
| } | ||
|
|
||
| // memory.dataUTF8(value) -> usize | ||
| function builtin_memory_dataUTF8(ctx: BuiltinFunctionContext): ExpressionRef { | ||
| let compiler = ctx.compiler; | ||
| let module = compiler.module; | ||
| if ( | ||
| checkTypeAbsent(ctx) | | ||
| checkArgsRequired(ctx, 1) | ||
| ) return module.unreachable(); | ||
| let operands = ctx.operands; | ||
| let usizeType = compiler.options.usizeType; | ||
| let offset: i64; | ||
| let arg0 = operands[0]; | ||
| if (!arg0.isLiteralKind(LiteralKind.String)) { | ||
| compiler.error( | ||
| DiagnosticCode.String_literal_expected, | ||
| arg0.range | ||
| ); | ||
| return module.unreachable(); | ||
| } | ||
| let str = (<StringLiteralExpression>arg0).value; | ||
| let array : Uint8Array = utf16ToUtf8(str); | ||
| let arrayNullTerminated = new Uint8Array(array.length + 1); | ||
| arrayNullTerminated.set(array); | ||
| offset = compiler.addAlignedMemorySegment(arrayNullTerminated, 1).offset; | ||
| // FIXME: what if recompiles happen? recompiles are bad. | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||
| compiler.currentType = usizeType; | ||
| if (usizeType == Type.usize32) { | ||
| assert(!i64_high(offset)); | ||
| return module.i32(i64_low(offset)); | ||
| } else { | ||
| return module.i64(i64_low(offset), i64_high(offset)); | ||
| } | ||
| } | ||
| builtinFunctions.set(BuiltinNames.memory_dataUTF8, builtin_memory_dataUTF8); | ||
|
|
||
| // === GC ===================================================================================== | ||
|
|
||
| function builtin_i31_new(ctx: BuiltinFunctionContext): ExpressionRef { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1798,6 +1798,8 @@ declare namespace memory { | |
| export function data(size: i32, align?: i32): usize; | ||
| /** Gets a pointer to a pre-initialized static chunk of memory. Alignment defaults to the size of `T`. Arguments must be compile-time constants. */ | ||
| export function data<T>(values: T[], align?: i32): usize; | ||
| /** Gets a pointer to a pre-initialized static chunk of memory containing null-terminated UTF8. Value must be a compile-time constant. */ | ||
| export function dataUTF8(value: string): usize; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this API could be improved and instead enum TextEncoding { UTF8, UTF16 }
declare namespace memory {
...
export function text(value: string, encoding?: TextEncoding, nullTerminated?: bool): usize;
} |
||
|
|
||
| export namespace atomic { | ||
| /** Performs a wait operation on a 32-bit integer value in memory suspending this agent if the condition is met. */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.