const b4a = require('b4a') /** * https://encoding.spec.whatwg.org/#utf-8-decoder */ module.exports = class UTF8Decoder { constructor () { this.codePoint = 0 this.bytesSeen = 0 this.bytesNeeded = 0 this.lowerBoundary = 0x80 this.upperBoundary = 0xbf } get remaining () { return this.bytesSeen } decode (data) { // If we have a fast path, just sniff if the last part is a boundary if (this.bytesNeeded === 0) { let isBoundary = true for (let i = Math.max(0, data.byteLength - 4), n = data.byteLength; i < n && isBoundary; i++) { isBoundary = data[i] <= 0x7f } if (isBoundary) return b4a.toString(data, 'utf8') } let result = '' for (let i = 0, n = data.byteLength; i < n; i++) { const byte = data[i] if (this.bytesNeeded === 0) { if (byte <= 0x7f) { result += String.fromCharCode(byte) } else { this.bytesSeen = 1 if (byte >= 0xc2 && byte <= 0xdf) { this.bytesNeeded = 2 this.codePoint = byte & 0x1f } else if (byte >= 0xe0 && byte <= 0xef) { if (byte === 0xe0) this.lowerBoundary = 0xa0 else if (byte === 0xed) this.upperBoundary = 0x9f this.bytesNeeded = 3 this.codePoint = byte & 0xf } else if (byte >= 0xf0 && byte <= 0xf4) { if (byte === 0xf0) this.lowerBoundary = 0x90 if (byte === 0xf4) this.upperBoundary = 0x8f this.bytesNeeded = 4 this.codePoint = byte & 0x7 } else { result += '\ufffd' } } continue } if (byte < this.lowerBoundary || byte > this.upperBoundary) { this.codePoint = 0 this.bytesNeeded = 0 this.bytesSeen = 0 this.lowerBoundary = 0x80 this.upperBoundary = 0xbf result += '\ufffd' continue } this.lowerBoundary = 0x80 this.upperBoundary = 0xbf this.codePoint = (this.codePoint << 6) | (byte & 0x3f) this.bytesSeen++ if (this.bytesSeen !== this.bytesNeeded) continue result += String.fromCodePoint(this.codePoint) this.codePoint = 0 this.bytesNeeded = 0 this.bytesSeen = 0 } return result } flush () { const result = this.bytesNeeded > 0 ? '\ufffd' : '' this.codePoint = 0 this.bytesNeeded = 0 this.bytesSeen = 0 this.lowerBoundary = 0x80 this.upperBoundary = 0xbf return result } }