1 jls.loader.provide('jls.io.cs.UTF_8'); 2 3 jls.loader.require('jls.io.cs.Charset'); 4 5 jls.io.cs.UTF_8 = jls.lang.Class.create(jls.io.cs.Charset, 6 { 7 initialize : function($super) { 8 $super('UTF-8'); 9 }, 10 newDecoder : function() { 11 return new jls.io.cs.UTF_8.Decoder(this); 12 }, 13 newEncoder : function() { 14 return new jls.io.cs.UTF_8.Encoder(this); 15 } 16 }); 17 18 // static 19 jls.io.cs.Charset.addCharset(new jls.io.cs.UTF_8()); 20 21 jls.io.cs.UTF_8.Decoder = jls.lang.Class.create( 22 { 23 initialize : function(charset) { 24 this._charset = charset; 25 this._averBytes = 2.0; 26 this._replacement = '?'.charCodeAt(0); 27 }, 28 decode : function(input, buffer) { 29 var length = Math.round(input.remaining()); 30 //jls.logger.info('UTF_8.Decoder.decode(), length: ' + length + ', input.remaining(): ' + input.remaining()); 31 var output = buffer || jls.lang.CharBuffer.allocate(length + 1); 32 while (input.remaining() > 0) { 33 var b = input.getByte(); 34 //jls.logger.info('UTF_8.Decoder.decode(), b: ' + b.toPaddedString(3) + ' - 0x' + b.toPaddedString(2, 16) + ' - ' + b.toPaddedString(8, 2)); 35 if (b <= 0x7f) { 36 // nothing to do 37 } else if (b <= 0xdf) { 38 // TODO Check that b2 starts with 10 39 b2 = input.getByte() & 0x3f; 40 b = ((b & 0x1f) << 6) | b2; 41 } else if (b <= 0xef) { 42 b2 = input.getByte() & 0x3f; 43 b3 = input.getByte() & 0x3f; 44 b = ((b & 0x0f) << 12) | (b2 << 6) | b3; 45 } else if (b <= 0xf7) { 46 b2 = input.getByte() & 0x3f; 47 b3 = input.getByte() & 0x3f; 48 b4 = input.getByte() & 0x3f; 49 b = ((b & 0x07) << 18) | (b2 << 12) | (b3 << 6) | b4; 50 } else { 51 b = this._replacement; 52 } 53 //jls.logger.info('UTF_8.Decoder.decode() => ' + b.toPaddedString(5) + ' - 0x' + b.toPaddedString(4, 16) + ' - ' + b.toPaddedString(16, 2)); 54 output.putChar(b); 55 } 56 return output; 57 } 58 }); 59 60 jls.io.cs.UTF_8.Encoder = jls.lang.Class.create( 61 { 62 initialize : function(charset) { 63 this._charset = charset; 64 this._averBytes = 2.0; 65 this._replacement = '?'.charCodeAt(0); 66 }, 67 encode : function(input, buffer) { 68 var length = Math.round(input.remaining() * this._averBytes); 69 var output = buffer || jls.lang.ByteBuffer.allocate(length + 1); 70 while (input.remaining() > 0) { 71 var c = input.getChar(); 72 //jls.logger.info('UTF_8.Encoder.encode(), c: ' + c.toPaddedString(5) + ' - 0x' + c.toPaddedString(4, 16) + ' - ' + c.toPaddedString(16, 2)); 73 if (c <= 0x007f) { 74 output.putByte(c); 75 } else if (c <= 0x07ff) { 76 output.putByte(0xc0 | ((c >>> 6) & 0x1f)); 77 output.putByte(0x80 | (c & 0x3f)); 78 } else if (c <= 0xffff) { 79 output.putByte(0xe0 | ((c >>> 12) & 0x0f)); 80 output.putByte(0x80 | ((c >>> 6) & 0x3f)); 81 output.putByte(0x80 | (c & 0x3f)); 82 } else if (c <= 0x1fffff) { 83 output.putByte(0xf0 | ((c >>> 18) & 0x07)); 84 output.putByte(0x80 | ((c >>> 12) & 0x3f)); 85 output.putByte(0x80 | ((c >>> 6) & 0x3f)); 86 output.putByte(0x80 | (c & 0x3f)); 87 } else { 88 output.putByte(this._replacement); 89 } 90 } 91 return output; 92 } 93 }); 94 95