PageExtractor.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.multipdf;
import java.io.IOException;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
/**
* This class will extract one or more sequential pages and create a new document.
* @author Adam Nichols (adam@apache.org)
*/
public class PageExtractor
{
private final PDDocument sourceDocument;
// first page to extract is page 1 (by default)
private int startPage = 1;
private int endPage;
/**
* Creates a new instance of PageExtractor
* @param sourceDocument The document to split.
*/
public PageExtractor(PDDocument sourceDocument)
{
this.sourceDocument = sourceDocument;
endPage = sourceDocument.getNumberOfPages();
}
/**
* Creates a new instance of PageExtractor
* @param sourceDocument The document to split.
* @param startPage The first page you want extracted (1-based, inclusive)
* @param endPage The last page you want extracted (1-based, inclusive)
*/
public PageExtractor(PDDocument sourceDocument, int startPage, int endPage)
{
this.sourceDocument = sourceDocument;
this.startPage = startPage;
this.endPage = endPage;
}
/**
* This will take a document and extract the desired pages into a new
* document. Both startPage and endPage are included in the extracted
* document. If the endPage is greater than the number of pages in the
* source document, it will go to the end of the document. If startPage is
* less than 1, it'll start with page 1. If startPage is greater than
* endPage or greater than the number of pages in the source document, a
* blank document will be returned.
*
* @return The extracted document
* @throws IOException If there is an IOError
*/
public PDDocument extract() throws IOException
{
if (endPage - startPage + 1 <= 0)
{
return new PDDocument();
}
Splitter splitter = new Splitter();
splitter.setStartPage(Math.max(startPage, 1));
splitter.setEndPage(Math.min(endPage, sourceDocument.getNumberOfPages()));
splitter.setSplitAtPage(getEndPage() - getStartPage() + 1);
List<PDDocument> splitted = splitter.split(sourceDocument);
return splitted.get(0);
}
/**
* Gets the first page number to be extracted.
* @return the first page number which should be extracted
*/
public int getStartPage()
{
return startPage;
}
/**
* Sets the first page number to be extracted.
* @param startPage the first page number which should be extracted
*/
public void setStartPage(int startPage)
{
this.startPage = startPage;
}
/**
* Gets the last page number (inclusive) to be extracted.
* @return the last page number which should be extracted
*/
public int getEndPage()
{
return endPage;
}
/**
* Sets the last page number to be extracted.
* @param endPage the last page number which should be extracted
*/
public void setEndPage(int endPage)
{
this.endPage = endPage;
}
}