1 jls.loader.provide('jls.io.cs.UTF_8'); 2 3 jls.loader.require('jls.io.cs.Charset'); 4 5 jls.io.cs.UTF_8 = jls.lang.Class.create(jls.io.cs.Charset, 6 { 7 initialize : function($super) { 8 $super('UTF-8'); 9 }, 10 newDecoder : function() { 11 return new jls.io.cs.UTF_8.Decoder(this); 12 }, 13 newEncoder : function() { 14 return new jls.io.cs.UTF_8.Encoder(this); 15 } 16 }); 17 18 // static 19 jls.io.cs.Charset.addCharset(new jls.io.cs.UTF_8()); 20 21 jls.io.cs.UTF_8.Decoder = jls.lang.Class.create( 22 { 23 initialize : function(charset) { 24 this._charset = charset; 25 this._averBytes = 2.0; 26 this._replacement = '?'.charCodeAt(0); 27 }, 28 decode : function(input, buffer) { 29 var output = buffer || jls.lang.CharBuffer.allocate(input.remaining() + 1, true); 30 //jls.logger.info('UTF_8.Decoder.decode(), input.remaining(): ' + input.remaining() + ', output.remaining(): ' + output.remaining()); 31 while (input.remaining() > 0) { 32 var b = input.getByte(); 33 //jls.logger.info('UTF_8.Decoder.decode(), b: ' + b.toPaddedString(3) + ' - 0x' + b.toPaddedString(2, 16) + ' - ' + b.toPaddedString(8, 2)); 34 if (b <= 0x7f) { 35 // nothing to do 36 } else if (b <= 0xdf) { 37 // TODO Check that b2 starts with 10 38 b2 = input.getByte() & 0x3f; 39 b = ((b & 0x1f) << 6) | b2; 40 } else if (b <= 0xef) { 41 b2 = input.getByte() & 0x3f; 42 b3 = input.getByte() & 0x3f; 43 b = ((b & 0x0f) << 12) | (b2 << 6) | b3; 44 } else if (b <= 0xf7) { 45 b2 = input.getByte() & 0x3f; 46 b3 = input.getByte() & 0x3f; 47 b4 = input.getByte() & 0x3f; 48 b = ((b & 0x07) << 18) | (b2 << 12) | (b3 << 6) | b4; 49 } else { 50 b = this._replacement; 51 } 52 //jls.logger.info('UTF_8.Decoder.decode() => ' + b.toPaddedString(5) + ' - 0x' + b.toPaddedString(4, 16) + ' - ' + b.toPaddedString(16, 2)); 53 output.putChar(b); 54 } 55 return output; 56 } 57 }); 58 59 jls.io.cs.UTF_8.Encoder = jls.lang.Class.create( 60 { 61 initialize : function(charset) { 62 this._charset = charset; 63 this._averBytes = 2.0; 64 this._replacement = '?'.charCodeAt(0); 65 }, 66 encode : function(input, buffer) { 67 var length = Math.round(input.remaining() * this._averBytes); 68 var output = buffer || jls.lang.ByteBuffer.allocate(length + 1); 69 jls.logger.debug('UTF_8.Encoder.encode(), input.remaining(): ' + input.remaining() + ', length: ' + length + ', output.remaining(): ' + output.remaining()); 70 while (input.remaining() > 0) { 71 var c = input.getChar(); 72 //jls.logger.info('UTF_8.Encoder.encode(), c: ' + c.toPaddedString(5) + ' - 0x' + c.toPaddedString(4, 16) + ' - ' + c.toPaddedString(16, 2)); 73 if (c <= 0x007f) { 74 output.putByte(c); 75 } else if (c <= 0x07ff) { 76 output.putByte(0xc0 | ((c >>> 6) & 0x1f)); 77 output.putByte(0x80 | (c & 0x3f)); 78 } else if (c <= 0xffff) { 79 output.putByte(0xe0 | ((c >>> 12) & 0x0f)); 80 output.putByte(0x80 | ((c >>> 6) & 0x3f)); 81 output.putByte(0x80 | (c & 0x3f)); 82 } else if (c <= 0x1fffff) { 83 output.putByte(0xf0 | ((c >>> 18) & 0x07)); 84 output.putByte(0x80 | ((c >>> 12) & 0x3f)); 85 output.putByte(0x80 | ((c >>> 6) & 0x3f)); 86 output.putByte(0x80 | (c & 0x3f)); 87 } else { 88 output.putByte(this._replacement); 89 } 90 } 91 return output; 92 } 93 }); 94 95