CSVDuplicateHeaderTest.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.commons.csv;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

/**
 * Tests parsing of duplicate column names in a CSV header.
 * The test verifies that headers are consistently handled by CSVFormat and CSVParser.
 */
class CSVDuplicateHeaderTest {

    /**
     * Return test cases for duplicate header data for use in CSVFormat.
     * <p>
     * This filters the parsing test data to all cases where the allow missing column
     * names flag is true and ignore header case is false: these flags are exclusively for parsing.
     * CSVFormat validation applies to both parsing and writing and thus validation
     * is less strict and behaves as if the allow missing column names constraint and
     * the ignore header case behavior are absent.
     * The filtered data is then returned with the parser flags set to both true and false
     * for each test case.
     * </p>
     *
     * @return the stream of arguments
     */
    static Stream<Arguments> duplicateHeaderAllowsMissingColumnsNamesData() {
        return duplicateHeaderData()
            .filter(arg -> Boolean.TRUE.equals(arg.get()[1]) && Boolean.FALSE.equals(arg.get()[2]))
            .flatMap(arg -> {
                // Return test case with flags as all true/false combinations
                final Object[][] data = new Object[4][];
                final Boolean[] flags = {Boolean.TRUE, Boolean.FALSE};
                int i = 0;
                for (final Boolean a : flags) {
                    for (final Boolean b : flags) {
                        data[i] = arg.get().clone();
                        data[i][1] = a;
                        data[i][2] = b;
                        i++;
                    }
                }
                return Arrays.stream(data).map(Arguments::of);
            });
    }

    /**
     * Return test cases for duplicate header data for use in parsing (CSVParser). Uses the order:
     * <pre>
     * DuplicateHeaderMode duplicateHeaderMode
     * boolean allowMissingColumnNames
     * String[] headers
     * boolean valid
     * </pre>
     *
     * @return the stream of arguments
     */
    static Stream<Arguments> duplicateHeaderData() {
        return Stream.of(
            // Any combination with a valid header
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "B"}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B"}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "B"}, true),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "B"}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "B"}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "B"}, true),

            // Any combination with a valid header including empty
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", ""}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", ""}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", ""}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", ""}, true),

            // Any combination with a valid header including blank (1 space)
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", " "}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", " "}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", " "}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", " "}, true),

            // Any combination with a valid header including null
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", null}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", null}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", null}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", null}, true),

            // Duplicate non-empty names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A"}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A"}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A"}, true),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A"}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A"}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A"}, true),

            // Duplicate empty names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"", ""}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"", ""}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"", ""}, true),

            // Duplicate blank names (1 space)
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {" ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {" ", " "}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {" ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {" ", " "}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {" ", " "}, true),

            // Duplicate blank names (3 spaces)
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"   ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"   ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"   ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"   ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"   ", "   "}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"   ", "   "}, true),

            // Duplicate null names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {null, null}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {null, null}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {null, null}, true),

            // Duplicate blank names (1+3 spaces)
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {" ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {" ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {" ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {" ", "   "}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {" ", "   "}, true),

            // Duplicate blank names and null names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {" ", null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {" ", null}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {" ", null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {" ", null}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {" ", null}, true),

            // Duplicate non-empty and empty names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A", "", ""}, true),

            // Non-duplicate non-empty and duplicate empty names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "B", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "B", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "B", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "B", "", ""}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "B", "", ""}, true),

            // Duplicate non-empty and blank names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A", " ", " "}, true),

            // Duplicate non-empty and null names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A", null, null}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A", null, null}, true),

            // Duplicate blank names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "", ""}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "", ""}, true),

            // Duplicate null names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", null, null}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", null, null}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", null, null}, true),

            // Duplicate blank names (1+3 spaces)
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", " ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", " ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", " ", "   "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", " ", "   "}, true),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", " ", "   "}, true),

            // Duplicate names (case insensitive)
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true , new String[] {"A", "a"}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true , new String[] {"A", "a"}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true , new String[] {"A", "a"}, true),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true , new String[] {"A", "a"}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true , new String[] {"A", "a"}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true , new String[] {"A", "a"}, true),

            // Duplicate non-empty (case insensitive) and empty names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true, new String[] {"A", "a", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true, new String[] {"A", "a", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true, new String[] {"A", "a", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true, new String[] {"A", "a", "", ""}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true, new String[] {"A", "a", "", ""}, true),

            // Duplicate non-empty (case insensitive) and blank names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true, new String[] {"A", "a", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true, new String[] {"A", "a", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true, new String[] {"A", "a", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true, new String[] {"A", "a", " ", " "}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true, new String[] {"A", "a", " ", " "}, true),

            // Duplicate non-empty (case insensitive) and null names
            Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true, new String[] {"A", "a", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true, new String[] {"A", "a", null, null}, false),
            Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true, new String[] {"A", "a", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true, new String[] {"A", "a", null, null}, false),
            Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true, new String[] {"A", "a", null, null}, true)
        );
    }

    /**
     * Tests duplicate headers with the CSVFormat.
     *
     * @param duplicateHeaderMode the duplicate header mode
     * @param allowMissingColumnNames the allow missing column names flag (only used for parsing)
     * @param ignoreHeaderCase the ignore header case flag (only used for parsing)
     * @param headers the headers
     * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException
     */
    @ParameterizedTest
    @MethodSource(value = {"duplicateHeaderAllowsMissingColumnsNamesData"})
    void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode,
                              final boolean allowMissingColumnNames,
                              final boolean ignoreHeaderCase,
                              final String[] headers,
                              final boolean valid) {
        final CSVFormat.Builder builder =
            CSVFormat.DEFAULT.builder()
                             .setDuplicateHeaderMode(duplicateHeaderMode)
                             .setAllowMissingColumnNames(allowMissingColumnNames)
                             .setIgnoreHeaderCase(ignoreHeaderCase)
                             .setHeader(headers);
        if (valid) {
            final CSVFormat format = builder.get();
            Assertions.assertEquals(duplicateHeaderMode, format.getDuplicateHeaderMode(), "DuplicateHeaderMode");
            Assertions.assertEquals(allowMissingColumnNames, format.getAllowMissingColumnNames(), "AllowMissingColumnNames");
            Assertions.assertArrayEquals(headers, format.getHeader(), "Header");
        } else {
            Assertions.assertThrows(IllegalArgumentException.class, builder::get);
        }
    }

    /**
     * Tests duplicate headers with the CSVParser.
     *
     * @param duplicateHeaderMode the duplicate header mode
     * @param allowMissingColumnNames the allow missing column names flag (only used for parsing)
     * @param ignoreHeaderCase the ignore header case flag (only used for parsing)
     * @param headers the headers (joined with the CSVFormat delimiter to create a string input)
     * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException
     * @throws IOException Signals that an I/O exception has occurred.
     */
    @ParameterizedTest
    @MethodSource(value = {"duplicateHeaderData"})
    void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode,
                              final boolean allowMissingColumnNames,
                              final boolean ignoreHeaderCase,
                              final String[] headers,
            final boolean valid) throws IOException {
        // @formatter:off
        final CSVFormat format = CSVFormat.DEFAULT.builder()
                .setDuplicateHeaderMode(duplicateHeaderMode)
                .setAllowMissingColumnNames(allowMissingColumnNames)
                .setIgnoreHeaderCase(ignoreHeaderCase)
                .setNullString("NULL")
                .setHeader()
                .get();
        // @formatter:on
        final String input = Arrays.stream(headers)
                .map(s -> s == null ? format.getNullString() : s)
                .collect(Collectors.joining(format.getDelimiterString()));
        // @formatter:off
        if (valid) {
            try (CSVParser parser = CSVParser.parse(input, format)) {
                // Parser ignores null headers
                final List<String> expected = Arrays.stream(headers).filter(s -> s != null).collect(Collectors.toList());
                Assertions.assertEquals(expected, parser.getHeaderNames(), "HeaderNames");
            }
        } else {
            Assertions.assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format));
        }
    }
}