Newer
Older
vue-indexer / node_modules / text-decoder / lib / utf8-decoder.js
const b4a = require('b4a')

/**
 * https://encoding.spec.whatwg.org/#utf-8-decoder
 */
module.exports = class UTF8Decoder {
  constructor () {
    this.codePoint = 0
    this.bytesSeen = 0
    this.bytesNeeded = 0
    this.lowerBoundary = 0x80
    this.upperBoundary = 0xbf
  }

  get remaining () {
    return this.bytesSeen
  }

  decode (data) {
    // If we have a fast path, just sniff if the last part is a boundary
    if (this.bytesNeeded === 0) {
      let isBoundary = true

      for (let i = Math.max(0, data.byteLength - 4), n = data.byteLength; i < n && isBoundary; i++) {
        isBoundary = data[i] <= 0x7f
      }

      if (isBoundary) return b4a.toString(data, 'utf8')
    }

    let result = ''

    for (let i = 0, n = data.byteLength; i < n; i++) {
      const byte = data[i]

      if (this.bytesNeeded === 0) {
        if (byte <= 0x7f) {
          result += String.fromCharCode(byte)
        } else {
          this.bytesSeen = 1

          if (byte >= 0xc2 && byte <= 0xdf) {
            this.bytesNeeded = 2
            this.codePoint = byte & 0x1f
          } else if (byte >= 0xe0 && byte <= 0xef) {
            if (byte === 0xe0) this.lowerBoundary = 0xa0
            else if (byte === 0xed) this.upperBoundary = 0x9f
            this.bytesNeeded = 3
            this.codePoint = byte & 0xf
          } else if (byte >= 0xf0 && byte <= 0xf4) {
            if (byte === 0xf0) this.lowerBoundary = 0x90
            if (byte === 0xf4) this.upperBoundary = 0x8f
            this.bytesNeeded = 4
            this.codePoint = byte & 0x7
          } else {
            result += '\ufffd'
          }
        }

        continue
      }

      if (byte < this.lowerBoundary || byte > this.upperBoundary) {
        this.codePoint = 0
        this.bytesNeeded = 0
        this.bytesSeen = 0
        this.lowerBoundary = 0x80
        this.upperBoundary = 0xbf

        result += '\ufffd'

        continue
      }

      this.lowerBoundary = 0x80
      this.upperBoundary = 0xbf

      this.codePoint = (this.codePoint << 6) | (byte & 0x3f)
      this.bytesSeen++

      if (this.bytesSeen !== this.bytesNeeded) continue

      result += String.fromCodePoint(this.codePoint)

      this.codePoint = 0
      this.bytesNeeded = 0
      this.bytesSeen = 0
    }

    return result
  }

  flush () {
    const result = this.bytesNeeded > 0 ? '\ufffd' : ''

    this.codePoint = 0
    this.bytesNeeded = 0
    this.bytesSeen = 0
    this.lowerBoundary = 0x80
    this.upperBoundary = 0xbf

    return result
  }
}