I just looked at how hard it would be to encode UTF-8 to a preallocated/reusable byte buffer. It looks pretty simple and I have written the method below for StringUtil.
The issue I see with this is that it is hard to abstract into AbstractFrame as the index is in terms of chars rather than bytes, but that could be put into an opaque iterator???
/* ------------------------------------------------------------ */
/** Encode a string as UTF-8 to a ByteBuffer
* @param s The characters to encode
* @param offset The offset of the characters to encode
* @param len The length of characters to encode
* @param out The ByteBuffer to encode to. Bytes will between the limit and capacity and the limit will be updated to reflect the bytes encoded.
* @return The number of characters actually encoded, which may be < len if space is not available in the out buffer.
*/
public static int encodeUTF8to(CharSequence s,int offset, int len,ByteBuffer out)
{
/*
bits sequence Byte 1 Byte 2 Byte 3 Byte 4
7 U+0000 U+007F 0xxxxxxx
11 U+0080 U+07FF 110xxxxx 10xxxxxx
16 U+0800 U+FFFF 1110xxxx 10xxxxxx 10xxxxxx
21 U+10000 U+1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
if (!out.hasArray())
throw new IllegalArgumentException();
byte[] e=out.array();
int eEnd=out.capacity()+out.arrayOffset()-4;
int o=out.limit()+out.arrayOffset();
int sEnd=offset+len;
int i=offset;
for (;i<sEnd && o<eEnd;i++)
{
char c=s.charAt(i);
if (c<0x80)
{
e[o++]=(byte)c;
}
else if (c<0x800)
{
e[o++] = (byte)(0xc0 | (c >> 06));
e[o++] = (byte)(0x80 | (c & 0x3f));
}
else if (Character.isHighSurrogate(c))
{
int code=Character.toCodePoint(c,s.charAt(++i));
e[o++] = (byte)(0xf0 | ((code >> 18)));
e[o++] = (byte)(0x80 | ((code >> 12) & 0x3f));
e[o++] = (byte)(0x80 | ((code >> 06) & 0x3f));
e[o++] = (byte)(0x80 | (code & 0x3f));
}
else
{
e[o++] = (byte)(0xe0 | (c >> 12));
e[o++] = (byte)(0x80 | ((c >> 06) & 0x3f));
e[o++] = (byte)(0x80 | (c & 0x3f));
}
}
out.limit(o-out.arrayOffset());
return i-offset;
}