The following code should work, just specify the number of bytes per character.
public class GwtPlayground implements EntryPoint { static final Logger logger = Logger.getLogger(""); @Override public void onModuleLoad() { VerticalPanel loggerArea = new VerticalPanel(); logger.addHandler(new HasWidgetsLogHandler(loggerArea)); RootPanel.get().add(loggerArea); String original = new String("A" + "\uffea" + "\u00f1" + "\u00fc" + "C"); logger.info("original = " + original); byte[] utfBytes = getBytes(original, 2); String roundTrip = getString(utfBytes, 2); logger.info("roundTrip = " + roundTrip); } public static byte[] getBytes(String string, int bytesPerChar) { char[] chars = string.toCharArray(); byte[] toReturn = new byte[chars.length * bytesPerChar]; for (int i = 0; i < chars.length; i++) { for (int j = 0; j < bytesPerChar; j++) toReturn[i * bytesPerChar + j] = (byte) (chars[i] >>> (8 * (bytesPerChar - 1 - j))); } return toReturn; } public static String getString(byte[] bytes, int bytesPerChar) { char[] chars = new char[bytes.length / bytesPerChar]; for (int i = 0; i < chars.length; i++) { for (int j = 0; j < bytesPerChar; j++) { int shift = (bytesPerChar - 1 - j) * 8; chars[i] |= (0x000000FF << shift) & (((int) bytes[i * bytesPerChar + j]) << shift); } } return new String(chars); } }
As @Per Wiklander noted, this does not correctly support UTF-8. Here is a real UTF-8 decoder ported from C here
private static class UTF8Decoder { final byte[] the_input; int the_index, the_length; protected UTF8Decoder( byte[] bytes ) { super(); this.the_input = bytes; this.the_index = 0; this.the_length = bytes.length; } int get() { int c; c = the_input[the_index] & 0xFF; the_index += 1; return c; } int cont() { int c = get(); if( (c & 0xC0) == 0x80 ) return (c & 0x3F); else throw new IllegalArgumentException( "Failed to pass strict UTF-8" ); } CharSequence getStringUTF8() { StringBuilder sb = new StringBuilder( the_input.length );
source share