TestSequenceFileSync.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.io;

import java.io.IOException;
import java.util.Random;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.jupiter.api.Test;

import static org.assertj.core.api.Assertions.assertThat;

/** Tests sync based seek reads/write intervals inside SequenceFiles. */
public class TestSequenceFileSync {
  private static final int NUMRECORDS = 2000;
  private static final int RECORDSIZE = 80;
  private static final Random RAND = new Random();

  private final static String REC_FMT = "%d RECORDID %d : ";


  private static void forOffset(SequenceFile.Reader reader,
      IntWritable key, Text val, int iter, long off, int expectedRecord)
      throws IOException {
    val.clear();
    reader.sync(off);
    reader.next(key, val);
    assertThat(key.get()).isEqualTo(expectedRecord);
    final String test = String.format(REC_FMT, expectedRecord, expectedRecord);
    assertThat(val.find(test, 0)).withFailMessage(
        "Invalid value in iter " + iter + ": " + val).isZero();
  }

  @Test
  public void testDefaultSyncInterval() throws IOException {
    // Uses the default sync interval of 100 KB
    final Configuration conf = new Configuration();
    final FileSystem fs = FileSystem.getLocal(conf);
    final Path path = new Path(GenericTestUtils.getTempPath(
            "sequencefile.sync.test"));
    final IntWritable input = new IntWritable();
    final Text val = new Text();
    SequenceFile.Writer writer = new SequenceFile.Writer(
        conf,
        SequenceFile.Writer.file(path),
        SequenceFile.Writer.compression(CompressionType.NONE),
        SequenceFile.Writer.keyClass(IntWritable.class),
        SequenceFile.Writer.valueClass(Text.class)
    );
    try {
      writeSequenceFile(writer, NUMRECORDS*4);
      for (int i = 0; i < 5; i++) {
        final SequenceFile.Reader reader;

        //try different SequenceFile.Reader constructors
        if (i % 2 == 0) {
          final int buffersize = conf.getInt("io.file.buffer.size", 4096);
          reader = new SequenceFile.Reader(conf,
              SequenceFile.Reader.file(path),
              SequenceFile.Reader.bufferSize(buffersize));
        } else {
          final FSDataInputStream in = fs.open(path);
          final long length = fs.getFileStatus(path).getLen();
          reader = new SequenceFile.Reader(conf,
              SequenceFile.Reader.stream(in),
              SequenceFile.Reader.start(0L),
              SequenceFile.Reader.length(length));
        }

        try {
          forOffset(reader, input, val, i, 0, 0);
          forOffset(reader, input, val, i, 65, 0);
          // There would be over 1000 records within
          // this sync interval
          forOffset(reader, input, val, i, 2000, 1101);
          forOffset(reader, input, val, i, 0, 0);
        } finally {
          reader.close();
        }
      }
    } finally {
      fs.delete(path, false);
    }
  }

  @Test
  public void testLowSyncpoint() throws IOException {
    // Uses a smaller sync interval of 2000 bytes
    final Configuration conf = new Configuration();
    final FileSystem fs = FileSystem.getLocal(conf);
    final Path path = new Path(GenericTestUtils.getTempPath(
        "sequencefile.sync.test"));
    final IntWritable input = new IntWritable();
    final Text val = new Text();
    SequenceFile.Writer writer = new SequenceFile.Writer(
        conf,
        SequenceFile.Writer.file(path),
        SequenceFile.Writer.compression(CompressionType.NONE),
        SequenceFile.Writer.keyClass(IntWritable.class),
        SequenceFile.Writer.valueClass(Text.class),
        SequenceFile.Writer.syncInterval(20*100)
    );
    // Ensure the custom sync interval value is set
    assertThat(writer.syncInterval).isEqualTo(20*100);
    try {
      writeSequenceFile(writer, NUMRECORDS);
      for (int i = 0; i < 5; i++) {
        final SequenceFile.Reader reader;
       
        //try different SequenceFile.Reader constructors
        if (i % 2 == 0) {
          final int bufferSize = conf.getInt("io.file.buffer.size", 4096);
          reader = new SequenceFile.Reader(
              conf,
              SequenceFile.Reader.file(path),
              SequenceFile.Reader.bufferSize(bufferSize));
        } else {
          final FSDataInputStream in = fs.open(path);
          final long length = fs.getFileStatus(path).getLen();
          reader = new SequenceFile.Reader(
              conf,
              SequenceFile.Reader.stream(in),
              SequenceFile.Reader.start(0L),
              SequenceFile.Reader.length(length));
        }

        try {
          forOffset(reader, input, val, i, 0, 0);
          forOffset(reader, input, val, i, 65, 0);
          // There would be only a few records within
          // this sync interval
          forOffset(reader, input, val, i, 2000, 21);
          forOffset(reader, input, val, i, 0, 0);
        } finally {
          reader.close();
        }
      }
    } finally {
      fs.delete(path, false);
    }
  }

  private static void writeSequenceFile(SequenceFile.Writer writer,
      int numRecords) throws IOException {
    final IntWritable key = new IntWritable();
    final Text val = new Text();
    for (int numWritten = 0; numWritten < numRecords; ++numWritten) {
      key.set(numWritten);
      randomText(val, numWritten, RECORDSIZE);
      writer.append(key, val);
    }
    writer.close();
  }

  private static void randomText(Text val, int id, int recordSize) {
    val.clear();
    final StringBuilder ret = new StringBuilder(recordSize);
    ret.append(String.format(REC_FMT, id, id));
    recordSize -= ret.length();
    for (int i = 0; i < recordSize; ++i) {
      ret.append(RAND.nextInt(9));
    }
    val.set(ret.toString());
  }
}