TikaHttpClient.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.http;
import java.io.Closeable;
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.tika.exception.TikaException;
/**
* Lightweight HTTP client for Tika parser modules that call external REST
* endpoints (embedding APIs, VLM services, etc.).
* <p>
* Built on {@link java.net.http.HttpClient} with a daemon thread executor
* so the JVM ��� including forked {@code PipesServer} processes ��� shuts down
* cleanly without waiting for idle HTTP threads.
* <p>
* This class has no runtime dependencies beyond the JDK and {@code tika-core}.
* Obtain an instance via {@link #build(int)} and close it when done to release
* the underlying executor.
*
* @since Apache Tika 4.0
*/
public class TikaHttpClient implements Closeable {
private static final String JSON_CONTENT_TYPE = "application/json; charset=utf-8";
private final HttpClient httpClient;
private final ExecutorService executor;
private final int defaultTimeoutSeconds;
private TikaHttpClient(HttpClient httpClient, ExecutorService executor,
int defaultTimeoutSeconds) {
this.httpClient = httpClient;
this.executor = executor;
this.defaultTimeoutSeconds = defaultTimeoutSeconds;
}
/**
* Create a new {@code TikaHttpClient} with a daemon-thread executor.
*
* @param connectTimeoutSeconds TCP connection timeout in seconds
*/
public static TikaHttpClient build(int connectTimeoutSeconds) {
ExecutorService executor = Executors.newCachedThreadPool(r -> {
Thread t = new Thread(r, "tika-http-jdk");
t.setDaemon(true);
return t;
});
HttpClient client = HttpClient.newBuilder()
.executor(executor)
.connectTimeout(Duration.ofSeconds(connectTimeoutSeconds))
.followRedirects(HttpClient.Redirect.NORMAL)
.version(HttpClient.Version.HTTP_1_1)
.build();
return new TikaHttpClient(client, executor, connectTimeoutSeconds);
}
/**
* POST a JSON body to {@code url} and return the response body as a string.
*
* @param url target URL
* @param jsonBody request body (UTF-8 JSON)
* @param headers additional HTTP headers (e.g. {@code Authorization})
* @param timeoutSeconds read timeout; {@code 0} uses the default timeout
* @return response body string
* @throws IOException on network error
* @throws TikaException on non-2xx HTTP status
*/
public String postJson(String url, String jsonBody, Map<String, String> headers,
int timeoutSeconds) throws IOException, TikaException {
HttpRequest.Builder builder = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(timeoutSeconds > 0
? timeoutSeconds : defaultTimeoutSeconds))
.header("Content-Type", JSON_CONTENT_TYPE)
.POST(HttpRequest.BodyPublishers.ofString(jsonBody, StandardCharsets.UTF_8));
headers.forEach(builder::header);
return send(builder.build());
}
/**
* GET {@code url} and return the response body as a string.
* Useful for health-check probes at init time.
*
* @param url target URL
* @param headers additional HTTP headers
* @param timeoutSeconds read timeout; {@code 0} uses the default timeout
* @return response body string
* @throws IOException on network error
* @throws TikaException on non-2xx HTTP status
*/
public String get(String url, Map<String, String> headers,
int timeoutSeconds) throws IOException, TikaException {
HttpRequest.Builder builder = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(timeoutSeconds > 0
? timeoutSeconds : defaultTimeoutSeconds))
.GET();
headers.forEach(builder::header);
return send(builder.build());
}
private String send(HttpRequest request) throws IOException, TikaException {
try {
HttpResponse<String> response = httpClient.send(
request, HttpResponse.BodyHandlers.ofString(StandardCharsets.UTF_8));
if (response.statusCode() < 200 || response.statusCode() >= 300) {
throw new TikaException("HTTP " + response.statusCode()
+ " from " + request.uri() + ": " + response.body());
}
return response.body();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException("HTTP request interrupted: " + request.uri(), e);
}
}
@Override
public void close() {
executor.shutdown();
}
}