DateConverter.java

/*****************************************************************************
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 ****************************************************************************/

package org.apache.xmpbox;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Locale;
import java.util.SimpleTimeZone;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * This class is used to convert dates to strings and back using the PDF date standards. Date are described in
 * PDFReference1.4 section 3.8.2
 * 
 * <p>
 * <strong>This is (and will not be) a Java date parsing library and will likely still have limited
 * support for various strings as it���s main use case it to parse from PDF date strings.</strong>
 * </p>
 * 
 * @author Ben Litchfield
 * @author Christopher Oezbek
 * 
 */
public final class DateConverter
{

    // The Date format is supposed to be the PDF_DATE_FORMAT, but not all PDF
    // documents
    // will use that date, so I have added a couple other potential formats
    // to try if the original one does not work.
    private static final SimpleDateFormat[] POTENTIAL_FORMATS = {
            new SimpleDateFormat("EEEE, dd MMM yyyy hh:mm:ss a"),
            new SimpleDateFormat("EEEE, MMM dd, yyyy hh:mm:ss a"),
            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"),
            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssz"),
            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"),
            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.S")
        };

    /**
     * According to check-style, Utility classes should not have a public or default constructor.
     */
    private DateConverter()
    {
    }

    /**
     * This will convert a string to a calendar.
     * 
     * @param date
     *            The string representation of the calendar.
     * 
     * @return The calendar that this string represents.
     * 
     * @throws IOException
     *             If the date string is not in the correct format.
     */
    public static Calendar toCalendar(String date) throws IOException
    {
        Calendar retval = null;
        if ((date != null) && (!date.trim().isEmpty()))
        {
            date = date.trim();

            // these are the default values
            int month = 1;
            int day = 1;
            int hour = 0;
            int minute = 0;
            int second = 0;
            // first string off the prefix if it exists
            try
            {
                SimpleTimeZone zone = null;
                
                if (Pattern.matches("^\\d{4}-\\d{2}-\\d{2}T.*", date))
                {
                    // Assuming ISO860 date string
                    return fromISO8601(date);
                }
                else if (date.startsWith("D:"))
                {
                    date = date.substring(2);
                }

                date = date.replaceAll("[-:T]", "");

                if (date.length() < 4)
                {
                    throw new IOException("Error: Invalid date format '" + date + "'");
                }
                int year = Integer.parseInt(date.substring(0, 4));
                if (date.length() >= 6)
                {
                    month = Integer.parseInt(date.substring(4, 6));
                }
                if (date.length() >= 8)
                {
                    day = Integer.parseInt(date.substring(6, 8));
                }
                if (date.length() >= 10)
                {
                    hour = Integer.parseInt(date.substring(8, 10));
                }
                if (date.length() >= 12)
                {
                    minute = Integer.parseInt(date.substring(10, 12));
                }

                int timeZonePos = 12;
                if (date.length() - 12 > 5 || (date.length() - 12 == 3 && date.endsWith("Z")))
                {
                    second = Integer.parseInt(date.substring(12, 14));
                    timeZonePos = 14;
                }

                if (date.length() >= (timeZonePos + 1))
                {
                    char sign = date.charAt(timeZonePos);
                    if (sign == 'Z')
                    {
                        zone = new SimpleTimeZone(0, "Unknown");
                    }
                    else
                    {
                        int hours = 0;
                        int minutes = 0;
                        if (date.length() >= (timeZonePos + 3))
                        {
                            if (sign == '+')
                            {
                                // parseInt cannot handle the + sign
                                hours = Integer.parseInt(date.substring((timeZonePos + 1), (timeZonePos + 3)));
                            }
                            else
                            {
                                hours = -Integer.parseInt(date.substring(timeZonePos, (timeZonePos + 2)));
                            }
                        }
                        if (sign == '+')
                        {
                            if (date.length() >= (timeZonePos + 5))
                            {
                                minutes = Integer.parseInt(date.substring((timeZonePos + 3), (timeZonePos + 5)));
                            }
                        }
                        else
                        {
                            if (date.length() >= (timeZonePos + 4))
                            {
                                minutes = Integer.parseInt(date.substring((timeZonePos + 2), (timeZonePos + 4)));
                            }
                        }
                        zone = new SimpleTimeZone(hours * 60 * 60 * 1000 + minutes * 60 * 1000, "Unknown");
                    }
                }

                if (zone == null)
                {
                    retval = new GregorianCalendar();
                }
                else
                {
                    updateZoneId(zone);
                    retval = new GregorianCalendar(zone);
                }
                retval.clear();
                retval.set(year, month - 1, day, hour, minute, second);
            }
            catch (NumberFormatException e)
            {

                // remove the arbitrary : in the timezone. SimpleDateFormat can't handle it
                if (date.charAt(date.length() - 3) == ':' && 
                    (date.charAt(date.length() - 6) == '+' || date.charAt(date.length() - 6) == '-'))
                {
                    // that's a timezone string, remove the :
                    date = date.substring(0, date.length() - 3) + date.substring(date.length() - 2);
                }
                for (int i = 0; (retval == null) && (i < POTENTIAL_FORMATS.length); i++)
                {
                    try
                    {
                        Date utilDate = POTENTIAL_FORMATS[i].parse(date);
                        retval = new GregorianCalendar();
                        retval.setTime(utilDate);
                    }
                    catch (ParseException pe)
                    {
                        // ignore and move to next potential format
                    }
                }
                if (retval == null)
                {
                    // we didn't find a valid date format so throw an exception
                    throw new IOException("Error converting date:" + date, e);
                }
            }
        }
        return retval;
    }

    /**
     * Update the zone ID based on the raw offset. This is either GMT, GMT+hh:mm or GMT-hh:mm, where
     * n is between 1 and 14. The highest negative hour is -14, the highest positive hour is 12.
     * Zones that don't fit in this schema are set to zone ID "unknown".
     *
     * @param tz the time zone to update.
     */
    private static void updateZoneId(TimeZone tz)
    {
        int offset = tz.getRawOffset();
        char pm = '+';
        if (offset < 0)
        {
            pm = '-';
            offset = -offset;
        }
        int hh = offset / 3600000;
        int mm = offset % 3600000 / 60000;
        if (offset == 0)
        {
            tz.setID("GMT");
        }
        else if (pm == '+' && hh <= 12)
        {
            tz.setID(String.format(Locale.US, "GMT+%02d:%02d", hh, mm));
        }
        else if (pm == '-' && hh <= 14)
        {
            tz.setID(String.format(Locale.US, "GMT-%02d:%02d", hh, mm));
        }
        else
        {
            tz.setID("unknown");
        }
    }

    /**
     * Convert the date to iso 8601 string format.
     * 
     * @param cal
     *            The date to convert.
     * @return The date represented as an ISO 8601 string.
     */
    public static String toISO8601(Calendar cal)
    {
        return toISO8601(cal, false);
    }
    
    /**
     * Convert the date to iso 8601 string format.
     * 
     * @param cal The date to convert.
     * @param printMillis Print Milliseconds.
     * @return The date represented as an ISO 8601 string.
     */
    public static String toISO8601(Calendar cal, boolean printMillis)
    {
        StringBuilder retval = new StringBuilder();

        retval.append(cal.get(Calendar.YEAR));
        retval.append('-');
        retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.MONTH) + 1));
        retval.append('-');
        retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.DAY_OF_MONTH)));
        retval.append('T');
        retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.HOUR_OF_DAY)));
        retval.append(':');
        retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.MINUTE)));
        retval.append(':');
        retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.SECOND)));
        
        if (printMillis)
        {
            retval.append('.');
            retval.append(String.format(Locale.US, "%03d", cal.get(Calendar.MILLISECOND)));
        }

        int timeZone = cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET);
        if (timeZone < 0)
        {
            retval.append('-');
        }
        else
        {
            retval.append('+');
        }
        timeZone = Math.abs(timeZone);
        // milliseconds/1000 = seconds; seconds / 60 = minutes; minutes/60 = hours
        int hours = timeZone / 1000 / 60 / 60;
        int minutes = (timeZone - (hours * 1000 * 60 * 60)) / 1000 / 60;
        if (hours < 10)
        {
            retval.append('0');
        }
        retval.append(hours);
        retval.append(':');
        if (minutes < 10)
        {
            retval.append('0');
        }
        retval.append(minutes);
        return retval.toString();
    }
    
    /**
     * Get a Calendar from an ISO8601 date string.
     * 
     * @param dateString
     * @return the Calendar instance.
     */
    private static Calendar fromISO8601(String dateString)
    {
        DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX][zzz]");

        // Pattern to test for a time zone string
        Pattern timeZonePattern = Pattern.compile(
                    "[\\d-]*T?[\\d-\\.]([A-Z]{1,4})$|(.*\\d*)([A-Z][a-z]+\\/[A-Z][a-z]+)$"
                );
        Matcher timeZoneMatcher = timeZonePattern.matcher(dateString);
        
        String timeZoneString = null;
        
        while (timeZoneMatcher.find())
        {
            for (int i = 1; i <= timeZoneMatcher.groupCount(); i++)
            {
                String group = timeZoneMatcher.group(i);
                if (group != null)
                {
                    timeZoneString = group;
                }
            }
        }

        if (timeZoneString != null)
        {
            // can't use parseDateTime immediately, first do handling for time that has no seconds
            int teeIndex = dateString.indexOf('T');
            int tzIndex = dateString.indexOf(timeZoneString);
            String toParse = dateString.substring(0, tzIndex);
            if (tzIndex - teeIndex == 6)
            {
                toParse = dateString.substring(0, tzIndex) + ":00";
            }

            ZonedDateTime zonedDateTime = ZonedDateTime.parse(toParse + timeZoneString, dateTimeFormatter);

            return GregorianCalendar.from(zonedDateTime);
        }
        else
        {
            // can't use parseDateTime immediately, first do handling for time that has no seconds
            int teeIndex = dateString.indexOf('T');
            if (teeIndex == -1)
            {
                ZonedDateTime zonedDateTime = ZonedDateTime.parse(dateString, dateTimeFormatter);
                return GregorianCalendar.from(zonedDateTime);
            }
            int plusIndex = dateString.indexOf('+', teeIndex + 1);
            int minusIndex = dateString.indexOf('-', teeIndex + 1);
            if (plusIndex == -1 && minusIndex == -1)
            {
                ZonedDateTime zonedDateTime = ZonedDateTime.parse(dateString, dateTimeFormatter);
                return GregorianCalendar.from(zonedDateTime);
            }
            plusIndex = Math.max(plusIndex, minusIndex);
            if (plusIndex - teeIndex == 6)
            {
                String toParse = dateString.substring(0, plusIndex) + ":00" + dateString.substring(plusIndex);
                ZonedDateTime zonedDateTime = ZonedDateTime.parse(toParse, dateTimeFormatter);
                return GregorianCalendar.from(zonedDateTime);
            }
            ZonedDateTime zonedDateTime = ZonedDateTime.parse(dateString, dateTimeFormatter);
            return GregorianCalendar.from(zonedDateTime);
        }
    }
}