UTF8Checker.java

/*
 * JBoss, Home of Professional Open Source.
 * Copyright 2014 Red Hat, Inc., and individual contributors
 * as indicated by the @author tags.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package io.undertow.websockets.core.protocol.version07;

import io.undertow.server.protocol.framed.FrameHeaderData;
import io.undertow.websockets.core.WebSocketMessages;
import io.undertow.websockets.core.function.ChannelFunction;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;

/**
 * An utility class which can be used to check if a sequence of bytes or ByteBuffers contain non UTF-8 data.
 * <p>
 * Please use a new instance per stream.
 *
 * @author <a href="mailto:nmaurer@redhat.com">Norman Maurer</a>
 */
public class UTF8Checker implements ChannelFunction {


    private static final int UTF8_ACCEPT = 0;
    private static final int UTF8_REJECT = 12;

    private static final byte[] TYPES = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7,
            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8,
            8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
            2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8,
            8, 8, 8, 8, 8, 8};

    private static final byte[] STATES = {0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12,
            12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12,
            12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,
            12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36,
            12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12,
            12, 12, 12, 12, 12, 12};

    private int state = UTF8_ACCEPT;

    private void checkUTF8(int b) throws UnsupportedEncodingException {
        byte type = TYPES[b & 0xFF];

        state = STATES[state + type];
        if (state == UTF8_REJECT) {
            throw WebSocketMessages.MESSAGES.invalidTextFrameEncoding();
        }
    }

    /**
     * Check if the given ByteBuffer contains non UTF-8 data.
     *
     * @param buf           the ByteBuffer to check
     * @param position      the index in the {@link ByteBuffer} to start from
     * @param length        the number of bytes to operate on
     * @throws UnsupportedEncodingException     is thrown if non UTF-8 data is found
     */
    private void checkUTF8(ByteBuffer buf, int position, int length) throws UnsupportedEncodingException {
        int limit = position + length;
        for (int i = position; i < limit; i++) {
            checkUTF8(buf.get(i));
        }
    }

    @Override
    public void newFrame(FrameHeaderData headerData) {
    }

    @Override
    public void afterRead(ByteBuffer buf, int position, int length) throws IOException{
        checkUTF8(buf, position, length);
    }

    @Override
    public void beforeWrite(ByteBuffer buf, int position, int length) throws UnsupportedEncodingException{
        checkUTF8(buf, position, length);
    }

    @Override
    public void complete() throws UnsupportedEncodingException {
        if (state != UTF8_ACCEPT) {
            throw WebSocketMessages.MESSAGES.invalidTextFrameEncoding();
        }
    }
}