Prediction.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.ml;
import java.util.Locale;
/**
* The result of a single-label classification from a {@link LinearModel}.
* <p>
* Two scores are exposed:
* <ul>
* <li>{@link #getProbability()} ��� softmax probability of this label
* relative to all other labels (0���1). Reflects the model's relative
* preference for this label over alternatives. Higher is better, but the
* magnitude is N-dependent: 0.60 in an 80-class model is very strong.</li>
* <li>{@link #getConfidence()} ��� a calibration-independent signal (0���1)
* computed as {@code sigmoid(logit) �� probability}. The sigmoid factor
* captures absolute model activation: a large negative logit (the model
* has no evidence for this class) suppresses confidence even when the
* label happens to win the softmax race by a slim margin.</li>
* </ul>
* <p>
* Use {@link #getProbability()} when comparing candidates from the same
* prediction run. Use {@link #getConfidence()} when deciding whether to trust
* a prediction at all (e.g. as a threshold gate).
*/
public final class Prediction {
private final String label;
private final double probability;
private final double confidence;
/**
* Construct a prediction from a raw logit and its softmax probability.
*
* @param label the class label (e.g. language tag or charset name)
* @param logit raw pre-softmax score for this class
* @param probability softmax probability for this class (0���1)
*/
public Prediction(String label, float logit, float probability) {
this.label = label;
this.probability = probability;
double sig = 1.0 / (1.0 + Math.exp(-logit));
this.confidence = sig * probability;
}
/**
* The predicted class label (e.g. {@code "eng"}, {@code "UTF-8"}).
*/
public String getLabel() {
return label;
}
/**
* Softmax probability of this label (0���1), relative to all other labels.
* Suitable for ranking candidates within a single prediction run.
*/
public double getProbability() {
return probability;
}
/**
* Calibration-independent confidence (0���1).
* Computed as {@code sigmoid(logit) �� probability}.
* <p>
* Accounts for absolute model activation: if the winning logit is very
* negative (the model has no strong evidence for any class), confidence
* is suppressed even when the softmax winner has a comfortable margin.
* Suitable as a threshold gate for deciding whether to trust the result.
*/
public double getConfidence() {
return confidence;
}
@Override
public String toString() {
return String.format(Locale.ROOT,
"%s(prob=%.3f, conf=%.3f)", label, probability, confidence);
}
}