From 8b93872631f683edfde061089acac95688f0e5bc Mon Sep 17 00:00:00 2001 From: Joshua Tenner Date: Wed, 24 Jul 2019 14:41:16 -0400 Subject: [PATCH 1/5] [Implement] HEX encoding --- assembly/buffer/index.ts | 95 +++++++++++++++++++++++++++++++++++++++- assembly/node.d.ts | 11 +++++ tests/buffer.spec.ts | 20 +++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/assembly/buffer/index.ts b/assembly/buffer/index.ts index 2d43385..986b039 100644 --- a/assembly/buffer/index.ts +++ b/assembly/buffer/index.ts @@ -1,4 +1,4 @@ -import { BLOCK_MAXSIZE } from "rt/common"; +import { BLOCK_MAXSIZE, BLOCK, BLOCK_OVERHEAD } from "rt/common"; import { E_INVALIDLENGTH, E_INDEXOUTOFRANGE } from "util/error"; import { Uint8Array } from "typedarray"; @@ -49,3 +49,96 @@ export class Buffer extends Uint8Array { return load(this.dataStart + usize(offset)); } } + +export namespace Buffer { + export namespace HEX { + /** Calculates the two char combination from the byte. */ + @inline export function charsFromByte(byte: u32): u32 { + let top = (byte >>> 4) & 0xF; + let bottom = (0xF & byte); + top += select(0x57, 0x30, top > 9); + bottom += select(0x57, 0x30, bottom > 9); + return (bottom << 16) | top; + } + + @inline export function byteFromChars(chars: u32): i32 { + let top = chars & 0xFFFF; + let bottom = chars >>> 16; + + // get the top byte + if (top >= 0x30 && top <= 0x39) top -= 0x30; // 0-9 + else if (top >= 0x61 && top <= 0x66) top -= 0x57; // a-f + else if (top >= 0x41 && top <= 0x46) top -= 0x37; // A-F + else return -1; + + // get the bottom byte + if (bottom >= 0x30 && bottom <= 0x39) bottom -= 0x30; // 0-9 + else if (bottom >= 0x61 && bottom <= 0x66) bottom -= 0x57; // a-f + else if (bottom >= 0x41 && bottom <= 0x46) bottom -= 0x37; // A-F + else return -1; + + return (top << 4) | bottom; + } + + /** Calculates the byte length of the specified string when encoded as HEX. */ + export function byteLength(str: string): i32 { + let ptr = changetype(str); + let byteCount = changetype(changetype(str) - BLOCK_OVERHEAD).rtSize; + // The string length must be even because the bytes come in pairs of characters two wide + if (byteCount & 0x3) return 0; // encoding fails and returns an empty ArrayBuffer + + // start length calculation loop + let length = 0; + byteCount += ptr; + + while (ptr < byteCount) { + let result = byteFromChars(load(ptr)); + if (result == -1) return 0; // invalid character + length += 1; + ptr += 4; + } + return length; + } + + /** Creates an ArrayBuffer from a given string that is encoded in the HEX format. */ + export function encode(str: string): ArrayBuffer { + let bufferLength = byteLength(str); + // short path: string is not a valid hex string, return a new empty ArrayBuffer + if (bufferLength == 0) return changetype(__retain(__alloc(0, idof()))); + let ptr = changetype(str); + + // long path: loop over each byte and perform the conversion + let byteEnd = changetype(changetype(str) - BLOCK_OVERHEAD).rtSize + ptr; + let result = __alloc(bufferLength, idof()); + let i: usize = 0; + while (ptr < byteEnd) { + store(result + i, byteFromChars(load(ptr))); + ptr += 4; + i += 1; + } + return changetype(result); + } + + /** Creates an String from a given ArrayBuffer that is decoded in the HEX format. */ + export function decode(buff: ArrayBuffer): string { + return decodeUnsafe(changetype(buff), buff.byteLength); + } + + /** Decodes a block of memory from the given pointer with the given length to a utf16le encoded string in HEX format. */ + @unsafe export function decodeUnsafe(ptr: usize, length: i32): string { + let stringByteLength = length << 2; // length * (2 bytes per char) * (2 chars per input byte) + let result = __alloc(stringByteLength, idof()); + let i = 0; + let inputByteLength = length + ptr; + + // loop over each byte and store a `u32` for each one + while (ptr < inputByteLength) { + store(result + i, charsFromByte(load(ptr))); + i += 4; + ptr++; + } + + return changetype(result); + } + } +} diff --git a/assembly/node.d.ts b/assembly/node.d.ts index d6da2cb..49a60fc 100644 --- a/assembly/node.d.ts +++ b/assembly/node.d.ts @@ -14,3 +14,14 @@ declare class Buffer extends Uint8Array { /** Reads a signed integer at the designated offset. */ readInt8(offset?: i32): i8; } + +declare namespace Buffer { + export namespace HEX { + /** Creates an ArrayBuffer from a given string that is encoded in the HEX format. */ + export function encode(str: string): ArrayBuffer; + /** Creates an String from a given ArrayBuffer that is decoded in the HEX format. */ + export function decode(buffer: ArrayBuffer): string; + /** Decodes a block of memory from the given pointer with the given length to a utf16le encoded string in HEX format. */ + export function decodeUnsafe(ptr: usize, byteLength: i32): string; + } +} diff --git a/tests/buffer.spec.ts b/tests/buffer.spec.ts index 8645074..fbd160d 100644 --- a/tests/buffer.spec.ts +++ b/tests/buffer.spec.ts @@ -11,6 +11,12 @@ */ import { BLOCK_MAXSIZE } from "rt/common"; +function createFrom(values: valueof[]): T { + let result = instantiate(values.length); + for (let i = 0; i < values.length; i++) result[i] = values[i]; + return result; +} + describe("buffer", () => { test("#constructor", () => { expect(new Buffer(0)).toBeTruthy(); @@ -104,4 +110,18 @@ describe("buffer", () => { // newBuff.readInt8(5); // }).toThrow(); }); + + test("#Hex.encode", () => { + let actual = "000102030405060708090a0b0c0d0e0f102030405060708090a0b0c0d0e0f0"; + let exampleBuffer = createFrom([0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0]); + let encoded = Buffer.HEX.encode(actual); + expect(encoded).toStrictEqual(exampleBuffer.buffer); + }); + + test("#Hex.decode", () => { + let expected = "000102030405060708090a0b0c0d0e0f102030405060708090a0b0c0d0e0f0"; + let exampleBuffer = createFrom([0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0]); + let decoded = Buffer.HEX.decode(exampleBuffer.buffer); + expect(decoded).toStrictEqual(expected); + }); }); From 1fa828728c8ed47871c46af9572ac6cf6ddffa46 Mon Sep 17 00:00:00 2001 From: Joshua Tenner Date: Thu, 25 Jul 2019 11:00:48 -0400 Subject: [PATCH 2/5] [Optimize] Hex encode --- assembly/buffer/index.ts | 77 +++++++++++++++++++++++----------------- tests/node.js | 4 +-- 2 files changed, 45 insertions(+), 36 deletions(-) diff --git a/assembly/buffer/index.ts b/assembly/buffer/index.ts index 986b039..14f7b9a 100644 --- a/assembly/buffer/index.ts +++ b/assembly/buffer/index.ts @@ -61,41 +61,23 @@ export namespace Buffer { return (bottom << 16) | top; } - @inline export function byteFromChars(chars: u32): i32 { - let top = chars & 0xFFFF; - let bottom = chars >>> 16; - - // get the top byte - if (top >= 0x30 && top <= 0x39) top -= 0x30; // 0-9 - else if (top >= 0x61 && top <= 0x66) top -= 0x57; // a-f - else if (top >= 0x41 && top <= 0x46) top -= 0x37; // A-F - else return -1; - - // get the bottom byte - if (bottom >= 0x30 && bottom <= 0x39) bottom -= 0x30; // 0-9 - else if (bottom >= 0x61 && bottom <= 0x66) bottom -= 0x57; // a-f - else if (bottom >= 0x41 && bottom <= 0x46) bottom -= 0x37; // A-F - else return -1; - - return (top << 4) | bottom; - } - /** Calculates the byte length of the specified string when encoded as HEX. */ export function byteLength(str: string): i32 { let ptr = changetype(str); let byteCount = changetype(changetype(str) - BLOCK_OVERHEAD).rtSize; + let length = byteCount >> 2; // The string length must be even because the bytes come in pairs of characters two wide if (byteCount & 0x3) return 0; // encoding fails and returns an empty ArrayBuffer - // start length calculation loop - let length = 0; byteCount += ptr; - while (ptr < byteCount) { - let result = byteFromChars(load(ptr)); - if (result == -1) return 0; // invalid character - length += 1; - ptr += 4; + var char = load(ptr); + if ((char >= 0x30 && char <= 0x39) || (char >= 0x61 && char <= 0x66) || (char >= 0x41 && char <= 0x46)) { + ptr += 2; + continue; + } else { + return 0; + } } return length; } @@ -105,16 +87,45 @@ export namespace Buffer { let bufferLength = byteLength(str); // short path: string is not a valid hex string, return a new empty ArrayBuffer if (bufferLength == 0) return changetype(__retain(__alloc(0, idof()))); - let ptr = changetype(str); - // long path: loop over each byte and perform the conversion + // long path: loop over each enociding pair and perform the conversion + let ptr = changetype(str); let byteEnd = changetype(changetype(str) - BLOCK_OVERHEAD).rtSize + ptr; let result = __alloc(bufferLength, idof()); - let i: usize = 0; - while (ptr < byteEnd) { - store(result + i, byteFromChars(load(ptr))); - ptr += 4; - i += 1; + let b: u32 = 0; + let outChar = 0; + for (let i: usize = 0; ptr < byteEnd; i++) { + let odd = i & 1; + b = odd ? (b >>> 16) : load(ptr); + outChar <<= 4; + switch (b & 0xFF) { + case 0x30: outChar |= 0x0; break; + case 0x31: outChar |= 0x1; break; + case 0x32: outChar |= 0x2; break; + case 0x33: outChar |= 0x3; break; + case 0x34: outChar |= 0x4; break; + case 0x35: outChar |= 0x5; break; + case 0x36: outChar |= 0x6; break; + case 0x37: outChar |= 0x7; break; + case 0x38: outChar |= 0x8; break; + case 0x39: outChar |= 0x9; break; + case 0x61: outChar |= 0xa; break; + case 0x62: outChar |= 0xb; break; + case 0x63: outChar |= 0xc; break; + case 0x64: outChar |= 0xd; break; + case 0x65: outChar |= 0xe; break; + case 0x66: outChar |= 0xf; break; + case 0x41: outChar |= 0xa; break; + case 0x42: outChar |= 0xb; break; + case 0x43: outChar |= 0xc; break; + case 0x44: outChar |= 0xd; break; + case 0x45: outChar |= 0xe; break; + case 0x46: outChar |= 0xf; break; + } + if (odd) { + store(result + (i >> 1), (outChar & 0xFF)); + ptr += 4; + } } return changetype(result); } diff --git a/tests/node.js b/tests/node.js index c652c1e..0b5a3d4 100644 --- a/tests/node.js +++ b/tests/node.js @@ -109,9 +109,7 @@ function runTest(file, type, binary, wat) { + "." + type + ".wat"; // should not block testing - fs.writeFile(watPath, wat, (err) => { - if (err) console.warn(err); - }); + fs.writeFileSync(watPath, wat); const context = new TestContext({ fileName: file, From 392cefe1790f95af01664079185e29f84485231a Mon Sep 17 00:00:00 2001 From: Joshua Tenner Date: Thu, 25 Jul 2019 12:17:19 -0400 Subject: [PATCH 3/5] [Optimize] hex#encode --- assembly/buffer/index.ts | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/assembly/buffer/index.ts b/assembly/buffer/index.ts index 14f7b9a..c4869e5 100644 --- a/assembly/buffer/index.ts +++ b/assembly/buffer/index.ts @@ -72,7 +72,10 @@ export namespace Buffer { byteCount += ptr; while (ptr < byteCount) { var char = load(ptr); - if ((char >= 0x30 && char <= 0x39) || (char >= 0x61 && char <= 0x66) || (char >= 0x41 && char <= 0x46)) { + if ( + ((char - 0x30) <= 0x9) + || ((char - 0x61) <= 0x5) + || ((char - 0x41) <= 0x5)) { ptr += 2; continue; } else { @@ -98,29 +101,13 @@ export namespace Buffer { let odd = i & 1; b = odd ? (b >>> 16) : load(ptr); outChar <<= 4; - switch (b & 0xFF) { - case 0x30: outChar |= 0x0; break; - case 0x31: outChar |= 0x1; break; - case 0x32: outChar |= 0x2; break; - case 0x33: outChar |= 0x3; break; - case 0x34: outChar |= 0x4; break; - case 0x35: outChar |= 0x5; break; - case 0x36: outChar |= 0x6; break; - case 0x37: outChar |= 0x7; break; - case 0x38: outChar |= 0x8; break; - case 0x39: outChar |= 0x9; break; - case 0x61: outChar |= 0xa; break; - case 0x62: outChar |= 0xb; break; - case 0x63: outChar |= 0xc; break; - case 0x64: outChar |= 0xd; break; - case 0x65: outChar |= 0xe; break; - case 0x66: outChar |= 0xf; break; - case 0x41: outChar |= 0xa; break; - case 0x42: outChar |= 0xb; break; - case 0x43: outChar |= 0xc; break; - case 0x44: outChar |= 0xd; break; - case 0x45: outChar |= 0xe; break; - case 0x46: outChar |= 0xf; break; + let c = b & 0xFF; + if ((c - 0x30) <= 9) { + outChar |= c - 0x30; + } else if ((c - 0x61) <= 0x5) { + outChar |= c - 0x57; + } else if (c - 0x41 <= 0x5) { + outChar |= c - 0x37; } if (odd) { store(result + (i >> 1), (outChar & 0xFF)); From 1b5bc7ce75f6ed6ce021bd40f12df3ef621c9a1e Mon Sep 17 00:00:00 2001 From: Joshua Tenner Date: Thu, 25 Jul 2019 12:54:01 -0400 Subject: [PATCH 4/5] [Fix] docs, switch to single if --- assembly/buffer/index.ts | 43 ++++++++++++++++++++++++---------------- assembly/node.d.ts | 4 ++-- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/assembly/buffer/index.ts b/assembly/buffer/index.ts index c4869e5..ff97f4d 100644 --- a/assembly/buffer/index.ts +++ b/assembly/buffer/index.ts @@ -72,10 +72,9 @@ export namespace Buffer { byteCount += ptr; while (ptr < byteCount) { var char = load(ptr); - if ( - ((char - 0x30) <= 0x9) - || ((char - 0x61) <= 0x5) - || ((char - 0x41) <= 0x5)) { + if ( ((char - 0x30) <= 0x9) + || ((char - 0x61) <= 0x5) + || ((char - 0x41) <= 0x5)) { ptr += 2; continue; } else { @@ -89,7 +88,7 @@ export namespace Buffer { export function encode(str: string): ArrayBuffer { let bufferLength = byteLength(str); // short path: string is not a valid hex string, return a new empty ArrayBuffer - if (bufferLength == 0) return changetype(__retain(__alloc(0, idof()))); + if (bufferLength == 0) return changetype(__alloc(0, idof())); // long path: loop over each enociding pair and perform the conversion let ptr = changetype(str); @@ -99,30 +98,40 @@ export namespace Buffer { let outChar = 0; for (let i: usize = 0; ptr < byteEnd; i++) { let odd = i & 1; - b = odd ? (b >>> 16) : load(ptr); - outChar <<= 4; - let c = b & 0xFF; - if ((c - 0x30) <= 9) { - outChar |= c - 0x30; - } else if ((c - 0x61) <= 0x5) { - outChar |= c - 0x57; - } else if (c - 0x41 <= 0x5) { - outChar |= c - 0x37; - } if (odd) { + outChar <<= 4; + b >>>= 16; + if ((b - 0x30) <= 9) { + outChar |= b - 0x30; + } else if ((b - 0x61) <= 0x5) { + outChar |= b - 0x57; + } else if (b - 0x41 <= 0x5) { + outChar |= b - 0x37; + } store(result + (i >> 1), (outChar & 0xFF)); ptr += 4; + } else { + b = load(ptr); + outChar <<= 4; + let c = b & 0xFF; + if ((c - 0x30) <= 9) { + outChar |= c - 0x30; + } else if ((c - 0x61) <= 0x5) { + outChar |= c - 0x57; + } else if (c - 0x41 <= 0x5) { + outChar |= c - 0x37; + } } } return changetype(result); } - /** Creates an String from a given ArrayBuffer that is decoded in the HEX format. */ + /** Creates a string from a given ArrayBuffer that is decoded into hex format. */ export function decode(buff: ArrayBuffer): string { return decodeUnsafe(changetype(buff), buff.byteLength); } - /** Decodes a block of memory from the given pointer with the given length to a utf16le encoded string in HEX format. */ + /** Decodes a chunk of memory to a utf16le encoded string in hex format. */ @unsafe export function decodeUnsafe(ptr: usize, length: i32): string { let stringByteLength = length << 2; // length * (2 bytes per char) * (2 chars per input byte) let result = __alloc(stringByteLength, idof()); diff --git a/assembly/node.d.ts b/assembly/node.d.ts index 49a60fc..5d87f2d 100644 --- a/assembly/node.d.ts +++ b/assembly/node.d.ts @@ -19,9 +19,9 @@ declare namespace Buffer { export namespace HEX { /** Creates an ArrayBuffer from a given string that is encoded in the HEX format. */ export function encode(str: string): ArrayBuffer; - /** Creates an String from a given ArrayBuffer that is decoded in the HEX format. */ + /** Creates a string from a given ArrayBuffer that is decoded into hex format. */ export function decode(buffer: ArrayBuffer): string; - /** Decodes a block of memory from the given pointer with the given length to a utf16le encoded string in HEX format. */ + /** Decodes a chunk of memory to a utf16le encoded string in hex format. */ export function decodeUnsafe(ptr: usize, byteLength: i32): string; } } From 498c4356ef88d192db10b34ab42519915d346511 Mon Sep 17 00:00:00 2001 From: jtenner Date: Thu, 25 Jul 2019 12:59:13 -0400 Subject: [PATCH 5/5] Update node.d.ts --- assembly/node.d.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assembly/node.d.ts b/assembly/node.d.ts index 5d87f2d..ea1f898 100644 --- a/assembly/node.d.ts +++ b/assembly/node.d.ts @@ -17,7 +17,7 @@ declare class Buffer extends Uint8Array { declare namespace Buffer { export namespace HEX { - /** Creates an ArrayBuffer from a given string that is encoded in the HEX format. */ + /** Creates an ArrayBuffer from a given string that is encoded in the hex format. */ export function encode(str: string): ArrayBuffer; /** Creates a string from a given ArrayBuffer that is decoded into hex format. */ export function decode(buffer: ArrayBuffer): string;