BaseReqSketch.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.req;

import org.apache.datasketches.quantilescommon.FloatsSortedView;
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
import org.apache.datasketches.quantilescommon.QuantilesFloatsAPI;
import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator;

/**
 * This abstract class provides a single place to define and document the public API
 * for the Relative Error Quantiles Sketch.
 *
 * @see <a href="https://datasketches.apache.org/docs/Quantiles/SketchingQuantilesAndRanksTutorial.html">
 * Sketching Quantiles and Ranks Tutorial</a>
 *
 * @author Lee Rhodes
 */
abstract class BaseReqSketch implements QuantilesFloatsAPI {
  static final byte INIT_NUMBER_OF_SECTIONS = 3;
  //These two factors are used by upper and lower bounds
  private static final double relRseFactor = Math.sqrt(0.0512 / INIT_NUMBER_OF_SECTIONS);
  private static final double fixRseFactor = .084;

  @Override
  public abstract double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit);

  /**
   * If true, the high ranks are prioritized for better accuracy. Otherwise
   * the low ranks are prioritized for better accuracy.  This state is chosen during sketch
   * construction.
   * @return the high ranks accuracy state.
   */
  public abstract boolean getHighRankAccuracyMode();

  @Override
  public abstract int getK();

  @Override
  public abstract float getMaxItem();

  @Override
  public abstract float getMinItem();

  /**
   * Returns an a priori estimate of relative standard error (RSE, expressed as a number in [0,1]).
   * Derived from Lemma 12 in https://arxiv.org/abs/2004.01668v2, but the constant factors were
   * adjusted based on empirical measurements.
   *
   * @param k the given size of k
   * @param rank the given normalized rank, a number in [0,1].
   * @param hra if true High Rank Accuracy mode is being selected, otherwise, Low Rank Accuracy.
   * @param totalN an estimate of the total number of items submitted to the sketch.
   * @return an a priori estimate of relative standard error (RSE, expressed as a number in [0,1]).
   */
  public static double getRSE(final int k, final double rank, final boolean hra, final long totalN) {
    return getRankUB(k, 2, rank, 1, hra, totalN); //more conservative to assume > 1 level
  }

  @Override
  public abstract long getN();

  @Override
  public abstract double[] getPMF(float[] splitPoints, QuantileSearchCriteria searchCrit);

  @Override
  public abstract float getQuantile(double rank, QuantileSearchCriteria searchCrit);

  @Override
  public abstract float[] getQuantiles(double[] normRanks, QuantileSearchCriteria searchCrit);

  @Override
  public abstract float getQuantileLowerBound(double rank);

  public abstract float getQuantileLowerBound(double rank, int numStdDev);

  @Override
  public abstract float getQuantileUpperBound(double rank);

  public abstract float getQuantileUpperBound(double rank, int numStdDev);

  @Override
  public abstract double getRank(float quantile, QuantileSearchCriteria searchCrit);

  /**
   * Gets an approximate lower bound rank of the given normalized rank.
   * @param rank the given rank, a number between 0 and 1.0.
   * @param numStdDev the number of standard deviations. Must be 1, 2, or 3.
   * @return an approximate lower bound rank.
   */
  public abstract double getRankLowerBound(double rank, int numStdDev);

  @Override
  public abstract double[] getRanks(float[] quantiles, QuantileSearchCriteria searchCrit);

  /**
   * Gets an approximate upper bound rank of the given rank.
   * @param rank the given rank, a number between 0 and 1.0.
   * @param numStdDev the number of standard deviations. Must be 1, 2, or 3.
   * @return an approximate upper bound rank.
   */
  public abstract double getRankUpperBound(double rank, int numStdDev);

  @Override
  public abstract int getNumRetained();

  @Override
  public abstract int getSerializedSizeBytes();

  @Override
  public abstract FloatsSortedView getSortedView();

  @Override
  public boolean hasMemory() {
    return false;
  }

  @Override
  public boolean isDirect() {
    return false;
  }

  @Override
  public abstract boolean isEmpty();

  @Override
  public abstract boolean isEstimationMode();

  @Override
  public boolean isReadOnly() {
    return false;
  }

  @Override
  public abstract QuantilesFloatsSketchIterator iterator();

  /**
   * Merge other sketch into this one. The other sketch is not modified.
   * @param other sketch to be merged into this one.
   * @return this
   */
  public abstract ReqSketch merge(final ReqSketch other);

  /**
   * {@inheritDoc}
   * <p>The parameters k, highRankAccuracy, and reqDebug will not change.</p>
   */
  @Override
  public abstract void reset();

  @Override
  public abstract byte[] toByteArray();

  @Override
  public abstract String toString();

  @Override
  public abstract void update(final float item);

  /**
   * A detailed, human readable view of the sketch compactors and their data.
   * Each compactor string is prepended by the compactor lgWeight, the current number of retained
   * quantiles of the compactor and the current nominal capacity of the compactor.
   * @param fmt the format string for the quantiles; example: "%4.0f".
   * @param allData all the retained quantiles for the sketch will be output by
   * compactor level.  Otherwise, just a summary will be output.
   * @return a detailed view of the compactors and their data
   */
  public abstract String viewCompactorDetail(String fmt, boolean allData);

  static boolean exactRank(final int k, final int levels, final double rank,
      final boolean hra, final long totalN) {
    final int baseCap = k * INIT_NUMBER_OF_SECTIONS;
    if (levels == 1 || totalN <= baseCap) { return true; }
    final double exactRankThresh = (double)baseCap / totalN;
    return hra && rank >= 1.0 - exactRankThresh || !hra && rank <= exactRankThresh;
  }

  static double getRankLB(final int k, final int levels, final double rank,
      final int numStdDev, final boolean hra, final long totalN) {
    if (exactRank(k, levels, rank, hra, totalN)) { return rank; }
    final double relative = relRseFactor / k * (hra ? 1.0 - rank : rank);
    final double fixed = fixRseFactor / k;
    final double lbRel = rank - numStdDev * relative;
    final double lbFix = rank - numStdDev * fixed;
    return Math.max(lbRel, lbFix);
  }

  static double getRankUB(final int k, final int levels, final double rank,
      final int numStdDev, final boolean hra, final long totalN) {
    if (exactRank(k, levels, rank, hra, totalN)) { return rank; }
    final double relative = relRseFactor / k * (hra ? 1.0 - rank : rank);
    final double fixed = fixRseFactor / k;
    final double ubRel = rank + numStdDev * relative;
    final double ubFix = rank + numStdDev * fixed;
    return Math.min(ubRel, ubFix);
  }

}