VectorSerializerTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.inference;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.junit.jupiter.api.Test;
public class VectorSerializerTest {
@Test
void testRoundTrip() {
float[] original = {0.1f, -0.5f, 3.14f, 0.0f, -99.99f};
String encoded = VectorSerializer.encode(original);
float[] decoded = VectorSerializer.decode(encoded);
assertArrayEquals(original, decoded, 1e-6f);
}
@Test
void testEmptyVector() {
float[] empty = {};
String encoded = VectorSerializer.encode(empty);
float[] decoded = VectorSerializer.decode(encoded);
assertEquals(0, decoded.length);
}
/**
* Pins the byte order to big-endian using the exact example from the
* Elasticsearch dense_vector documentation: [0.5, 10, 6] encodes to
* "PwAAAEEgAABAwAAA". If this test fails the byte order has been changed
* and ES indexing of vectors will silently produce wrong results.
*/
@Test
void testKnownElasticsearchBase64() {
float[] vec = {0.5f, 10.0f, 6.0f};
assertEquals("PwAAAEEgAABAwAAA", VectorSerializer.encode(vec));
assertArrayEquals(vec, VectorSerializer.decode("PwAAAEEgAABAwAAA"), 1e-6f);
}
@Test
void testLargeVector() {
float[] large = new float[768];
for (int i = 0; i < large.length; i++) {
large[i] = (float) Math.sin(i * 0.01);
}
String encoded = VectorSerializer.encode(large);
assertNotNull(encoded);
float[] decoded = VectorSerializer.decode(encoded);
assertArrayEquals(large, decoded, 1e-6f);
// 768 * 4 bytes = 3072 bytes ��� base64 should be ~4096 chars
assertEquals(4096, encoded.length());
}
}