1 jls.loader.provide('jls.io.cs.UTF_8');
  2 
  3 jls.loader.require('jls.io.cs.Charset');
  4 
  5 jls.io.cs.UTF_8 = jls.lang.Class.create(jls.io.cs.Charset,
  6 {
  7     initialize : function($super) {
  8     	$super('UTF-8');
  9     },
 10     newDecoder : function() {
 11         return new jls.io.cs.UTF_8.Decoder(this);
 12     },
 13     newEncoder : function() {
 14         return new jls.io.cs.UTF_8.Encoder(this);
 15     }
 16 });
 17 
 18 // static
 19 jls.io.cs.Charset.addCharset(new jls.io.cs.UTF_8());
 20 
 21 jls.io.cs.UTF_8.Decoder = jls.lang.Class.create(
 22 {
 23     initialize : function(charset) {
 24         this._charset = charset;
 25         this._averBytes = 2.0;
 26         this._replacement = '?'.charCodeAt(0);
 27     },
 28     decode : function(input, buffer) {
 29     	var length = Math.round(input.remaining());
 30         //jls.logger.info('UTF_8.Decoder.decode(), length: ' + length + ', input.remaining(): ' + input.remaining());
 31     	var output = buffer || jls.lang.CharBuffer.allocate(length + 1);
 32     	while (input.remaining() > 0) {
 33     		var b = input.getByte();
 34             //jls.logger.info('UTF_8.Decoder.decode(), b: ' + b.toPaddedString(3) + ' - 0x' + b.toPaddedString(2, 16) + ' - ' + b.toPaddedString(8, 2));
 35             if (b <= 0x7f) {
 36                 // nothing to do
 37             } else if (b <= 0xdf) {
 38                 // TODO Check that b2 starts with 10
 39                 b2 = input.getByte() & 0x3f;
 40                 b = ((b & 0x1f) << 6) | b2;
 41             } else if (b <= 0xef) {
 42                 b2 = input.getByte() & 0x3f;
 43                 b3 = input.getByte() & 0x3f;
 44                 b = ((b & 0x0f) << 12) | (b2 << 6) | b3;
 45             } else if (b <= 0xf7) {
 46                 b2 = input.getByte() & 0x3f;
 47                 b3 = input.getByte() & 0x3f;
 48                 b4 = input.getByte() & 0x3f;
 49                 b = ((b & 0x07) << 18) | (b2 << 12) | (b3 << 6) | b4;
 50             } else {
 51                 b = this._replacement;
 52             }
 53             //jls.logger.info('UTF_8.Decoder.decode() => ' + b.toPaddedString(5) + ' - 0x' + b.toPaddedString(4, 16) + ' - ' + b.toPaddedString(16, 2));
 54             output.putChar(b);
 55     	}
 56         return output;
 57     }
 58 });
 59 
 60 jls.io.cs.UTF_8.Encoder = jls.lang.Class.create(
 61 {
 62     initialize : function(charset) {
 63         this._charset = charset;
 64         this._averBytes = 2.0;
 65         this._replacement = '?'.charCodeAt(0);
 66     },
 67     encode : function(input, buffer) {
 68     	var length = Math.round(input.remaining() * this._averBytes);
 69     	var output = buffer || jls.lang.ByteBuffer.allocate(length + 1);
 70     	while (input.remaining() > 0) {
 71     		var c = input.getChar();
 72             //jls.logger.info('UTF_8.Encoder.encode(), c: ' + c.toPaddedString(5) + ' - 0x' + c.toPaddedString(4, 16) + ' - ' + c.toPaddedString(16, 2));
 73             if (c <= 0x007f) {
 74                 output.putByte(c);
 75             } else if (c <= 0x07ff) {
 76                 output.putByte(0xc0 | ((c >>> 6) & 0x1f));
 77                 output.putByte(0x80 | (c & 0x3f));
 78             } else if (c <= 0xffff) {
 79                 output.putByte(0xe0 | ((c >>> 12) & 0x0f));
 80                 output.putByte(0x80 | ((c >>> 6) & 0x3f));
 81                 output.putByte(0x80 | (c & 0x3f));
 82             } else if (c <= 0x1fffff) {
 83                 output.putByte(0xf0 | ((c >>> 18) & 0x07));
 84                 output.putByte(0x80 | ((c >>> 12) & 0x3f));
 85                 output.putByte(0x80 | ((c >>> 6) & 0x3f));
 86                 output.putByte(0x80 | (c & 0x3f));
 87     		} else {
 88                 output.putByte(this._replacement);
 89     		}
 90     	}
 91         return output;
 92     }
 93 });
 94 
 95