jls.loader.provide('jls.io.cs.UTF_8');

jls.loader.require('jls.io.cs.Charset');

jls.io.cs.UTF_8 = jls.lang.Class.create(jls.io.cs.Charset,
{
    initialize : function($super) {
    	$super('UTF-8');
    },
    newDecoder : function() {
        return new jls.io.cs.UTF_8.Decoder(this);
    },
    newEncoder : function() {
        return new jls.io.cs.UTF_8.Encoder(this);
    }
});

// static
jls.io.cs.Charset.addCharset(new jls.io.cs.UTF_8());

jls.io.cs.UTF_8.Decoder = jls.lang.Class.create(
{
    initialize : function(charset) {
        this._charset = charset;
        this._averBytes = 2.0;
        this._replacement = '?'.charCodeAt(0);
    },
    decode : function(input, buffer) {
    	var output = buffer || jls.lang.CharBuffer.allocate(input.remaining() + 1, true);
        //jls.logger.info('UTF_8.Decoder.decode(), input.remaining(): ' + input.remaining() + ', output.remaining(): ' + output.remaining());
    	while (input.remaining() > 0) {
    		var b = input.getByte();
            //jls.logger.info('UTF_8.Decoder.decode(), b: ' + b.toPaddedString(3) + ' - 0x' + b.toPaddedString(2, 16) + ' - ' + b.toPaddedString(8, 2));
            if (b <= 0x7f) {
                // nothing to do
            } else if (b <= 0xdf) {
                // TODO Check that b2 starts with 10
                b2 = input.getByte() & 0x3f;
                b = ((b & 0x1f) << 6) | b2;
            } else if (b <= 0xef) {
                b2 = input.getByte() & 0x3f;
                b3 = input.getByte() & 0x3f;
                b = ((b & 0x0f) << 12) | (b2 << 6) | b3;
            } else if (b <= 0xf7) {
                b2 = input.getByte() & 0x3f;
                b3 = input.getByte() & 0x3f;
                b4 = input.getByte() & 0x3f;
                b = ((b & 0x07) << 18) | (b2 << 12) | (b3 << 6) | b4;
            } else {
                b = this._replacement;
            }
            //jls.logger.info('UTF_8.Decoder.decode() => ' + b.toPaddedString(5) + ' - 0x' + b.toPaddedString(4, 16) + ' - ' + b.toPaddedString(16, 2));
            output.putChar(b);
    	}
        return output;
    }
});

jls.io.cs.UTF_8.Encoder = jls.lang.Class.create(
{
    initialize : function(charset) {
        this._charset = charset;
        this._averBytes = 2.0;
        this._replacement = '?'.charCodeAt(0);
    },
    encode : function(input, buffer) {
    	var length = Math.round(input.remaining() * this._averBytes);
    	var output = buffer || jls.lang.ByteBuffer.allocate(length + 1);
        jls.logger.debug('UTF_8.Encoder.encode(), input.remaining(): ' + input.remaining() + ', length: ' + length + ', output.remaining(): ' + output.remaining());
    	while (input.remaining() > 0) {
    		var c = input.getChar();
            //jls.logger.info('UTF_8.Encoder.encode(), c: ' + c.toPaddedString(5) + ' - 0x' + c.toPaddedString(4, 16) + ' - ' + c.toPaddedString(16, 2));
            if (c <= 0x007f) {
                output.putByte(c);
            } else if (c <= 0x07ff) {
                output.putByte(0xc0 | ((c >>> 6) & 0x1f));
                output.putByte(0x80 | (c & 0x3f));
            } else if (c <= 0xffff) {
                output.putByte(0xe0 | ((c >>> 12) & 0x0f));
                output.putByte(0x80 | ((c >>> 6) & 0x3f));
                output.putByte(0x80 | (c & 0x3f));
            } else if (c <= 0x1fffff) {
                output.putByte(0xf0 | ((c >>> 18) & 0x07));
                output.putByte(0x80 | ((c >>> 12) & 0x3f));
                output.putByte(0x80 | ((c >>> 6) & 0x3f));
                output.putByte(0x80 | (c & 0x3f));
    		} else {
                output.putByte(this._replacement);
    		}
    	}
        return output;
    }
});

