UnicodeBomHandlingTest.java

package tech.tablesaw.io.csv;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static tech.tablesaw.io.csv.UnicodeBomHandlingTest.BOM.UTF_8;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import tech.tablesaw.api.Table;

public class UnicodeBomHandlingTest {

  private static final byte[] CONTENT = "1, 2, 3, 4, 5/t6, 7, 8, 9, 10".getBytes();
  private static final byte[] UTF8_BOM_CONTENT;

  static {
    UTF8_BOM_CONTENT = new byte[UTF_8.getBytes().length + CONTENT.length];

    System.arraycopy(UTF_8.getBytes(), 0, UTF8_BOM_CONTENT, 0, UTF_8.getBytes().length);
    System.arraycopy(CONTENT, 0, UTF8_BOM_CONTENT, UTF_8.getBytes().length, CONTENT.length);
  }

  @Test
  @Disabled
  public void javaBehaviour() throws IOException {

    Table t =
        Table.read()
            .csv(
                CsvReadOptions.builder(new InputStreamReader(new ByteArrayInputStream(CONTENT)))
                    .tableName("R")
                    .header(false)
                    .build());
    assertEquals(1, t.get(0, 0));
    t =
        Table.read()
            .csv(
                CsvReadOptions.builder(
                        new InputStreamReader(new ByteArrayInputStream(UTF8_BOM_CONTENT)))
                    .tableName("R")
                    .header(false)
                    .build());
    assertEquals(1, t.get(0, 0));
  }

  protected static final class BOM {
    /** UTF-8 BOM (EF BB BF). */
    protected static final BOM UTF_8 =
        new BOM(new byte[] {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}, "UTF-8");

    private final byte bytes[];
    private final String description;

    private BOM(final byte bom[], final String description) {
      assert (bom != null) : "invalid BOM: null is not allowed";
      assert (description != null) : "invalid description: null is not allowed";
      assert (description.length() != 0) : "invalid description: empty string is not allowed";

      this.bytes = bom;
      this.description = description;
    }

    /** Returns a <code>String</code> representation of this <code>BOM</code> value. */
    public final String toString() {
      return description;
    }

    /** Returns the bytes corresponding to this <code>BOM</code> value. */
    private final byte[] getBytes() {
      final int length = bytes.length;
      final byte[] result = new byte[length];

      // make a defensive copy
      System.arraycopy(bytes, 0, result, 0, length);

      return result;
    }
  }
}