DirectCompactCompressedSketch.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.theta;

import static org.apache.datasketches.theta.PreambleUtil.extractEntryBitsV4;
import static org.apache.datasketches.theta.PreambleUtil.extractNumEntriesBytesV4;
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash;
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLongV4;
import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.thetacommon.ThetaUtil;

/**
 * An off-heap (Direct), compact, compressed, read-only sketch. It is not empty, not a single item and ordered.
 *
 * <p>This sketch can only be associated with a Serialization Version 4 format binary image.</p>
 *
 * <p>This implementation uses data in a given Memory that is owned and managed by the caller.
 * This Memory can be off-heap, which if managed properly will greatly reduce the need for
 * the JVM to perform garbage collection.</p>
 */
class DirectCompactCompressedSketch extends DirectCompactSketch {
  /**
   * Construct this sketch with the given memory.
   * @param mem Read-only Memory object.
   */
  DirectCompactCompressedSketch(final Memory mem) {
    super(mem);
  }

  /**
   * Wraps the given Memory, which must be a SerVer 4 compressed CompactSketch image.
   * Must check the validity of the Memory before calling.
   * @param srcMem <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
   * @param seedHash The update seedHash.
   * <a href="{@docRoot}/resources/dictionary.html#seedHash">See Seed Hash</a>.
   * @return this sketch
   */
  static DirectCompactCompressedSketch wrapInstance(final Memory srcMem, final short seedHash) {
    ThetaUtil.checkSeedHashes((short) extractSeedHash(srcMem), seedHash);
    return new DirectCompactCompressedSketch(srcMem);
  }

  //Sketch Overrides

  @Override
  public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
    if (dstMem != null) {
      mem_.copyTo(0, dstMem, 0, getCurrentBytes());
      return new DirectCompactSketch(dstMem);
    }
    return CompactSketch.heapify(mem_);
  }

  @Override
  public int getCurrentBytes() {
    final int preLongs = extractPreLongs(mem_);
    final int entryBits = extractEntryBitsV4(mem_);
    final int numEntriesBytes = extractNumEntriesBytesV4(mem_);
    return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits);
  }

  private static final int START_PACKED_DATA_EXACT_MODE = 8;
  private static final int START_PACKED_DATA_ESTIMATION_MODE = 16;
  
  @Override
  public int getRetainedEntries(final boolean valid) { //compact is always valid
    // number of entries is stored using variable length encoding
    // most significant bytes with all zeros are not stored
    // one byte in the preamble has the number of non-zero bytes used
    final int preLongs = extractPreLongs(mem_); // if > 1 then the second long has theta
    final int numEntriesBytes = extractNumEntriesBytesV4(mem_);
    int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE;
    int numEntries = 0;
    for (int i = 0; i < numEntriesBytes; i++) {
      numEntries |= Byte.toUnsignedInt(mem_.getByte(offsetBytes++)) << (i << 3);
    }
    return numEntries;
  }

  @Override
  public long getThetaLong() {
    final int preLongs = extractPreLongs(mem_);
    return (preLongs > 1) ? extractThetaLongV4(mem_) : Long.MAX_VALUE;
  }

  @Override
  public boolean isEmpty() {
    return false;
  }

  @Override
  public boolean isOrdered() {
    return true;
  }

  @Override
  public HashIterator iterator() {
    return new MemoryCompactCompressedHashIterator(
      mem_,
      (extractPreLongs(mem_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE)
        + extractNumEntriesBytesV4(mem_),
      extractEntryBitsV4(mem_),
      getRetainedEntries()
    );
  }

  //restricted methods

  @Override
  long[] getCache() {
    final int numEntries = getRetainedEntries();
    final long[] cache = new long[numEntries];
    int i = 0;
    HashIterator it = iterator();
    while (it.next()) {
      cache[i++] = it.get();
    }
    return cache;
  }
}