Attributes.java

package org.jsoup.nodes;

import org.jsoup.SerializationException;
import org.jsoup.helper.Validate;
import org.jsoup.internal.SharedConstants;
import org.jsoup.internal.StringUtil;
import org.jsoup.parser.ParseSettings;
import org.jspecify.annotations.Nullable;

import java.io.IOException;
import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;

import static org.jsoup.internal.Normalizer.lowerCase;
import static org.jsoup.internal.SharedConstants.AttrRangeKey;
import static org.jsoup.nodes.Range.AttributeRange.UntrackedAttr;

/**
 * The attributes of an Element.
 * <p>
 * During parsing, attributes in with the same name in an element are deduplicated, according to the configured parser's
 * attribute case-sensitive setting. It is possible to have duplicate attributes subsequently if
 * {@link #add(String, String)} vs {@link #put(String, String)} is used.
 * </p>
 * <p>
 * Attribute name and value comparisons are generally <b>case sensitive</b>. By default for HTML, attribute names are
 * normalized to lower-case on parsing. That means you should use lower-case strings when referring to attributes by
 * name.
 * </p>
 *
 * @author Jonathan Hedley, jonathan@hedley.net
 */
public class Attributes implements Iterable<Attribute>, Cloneable {
    // Indicates an internal key. Can't be set via HTML. (It could be set via accessor, but not too worried about
    // that. Suppressed from list, iter.)
    static final char InternalPrefix = '/';

    // The Attributes object is only created on the first use of an attribute; the Element will just have a null
    // Attribute slot otherwise
    protected static final String dataPrefix = "data-";
    private static final int InitialCapacity = 3; // sampling found mean count when attrs present = 1.49; 1.08 overall. 2.6:1 don't have any attrs.

    // manages the key/val arrays
    private static final int GrowthFactor = 2;
    static final int NotFound = -1;
    private static final String EmptyString = "";

    // the number of instance fields is kept as low as possible giving an object size of 24 bytes
    private int size = 0; // number of slots used (not total capacity, which is keys.length)
    String[] keys = new String[InitialCapacity];
    Object[] vals = new Object[InitialCapacity]; // Genericish: all non-internal attribute values must be Strings and are cast on access.
    // todo - make keys iterable without creating Attribute objects

    // check there's room for more
    private void checkCapacity(int minNewSize) {
        Validate.isTrue(minNewSize >= size);
        int curCap = keys.length;
        if (curCap >= minNewSize)
            return;
        int newCap = curCap >= InitialCapacity ? size * GrowthFactor : InitialCapacity;
        if (minNewSize > newCap)
            newCap = minNewSize;

        keys = Arrays.copyOf(keys, newCap);
        vals = Arrays.copyOf(vals, newCap);
    }

    int indexOfKey(String key) {
        Validate.notNull(key);
        for (int i = 0; i < size; i++) {
            if (key.equals(keys[i]))
                return i;
        }
        return NotFound;
    }

    private int indexOfKeyIgnoreCase(String key) {
        Validate.notNull(key);
        for (int i = 0; i < size; i++) {
            if (key.equalsIgnoreCase(keys[i]))
                return i;
        }
        return NotFound;
    }

    // we track boolean attributes as null in values - they're just keys. so returns empty for consumers
    // casts to String, so only for non-internal attributes
    static String checkNotNull(@Nullable Object val) {
        return val == null ? EmptyString : (String) val;
    }

    /**
     Get an attribute value by key.
     @param key the (case-sensitive) attribute key
     @return the attribute value if set; or empty string if not set (or a boolean attribute).
     @see #hasKey(String)
     */
    public String get(String key) {
        int i = indexOfKey(key);
        return i == NotFound ? EmptyString : checkNotNull(vals[i]);
    }

    /**
     Get an Attribute by key. The Attribute will remain connected to these Attributes, so changes made via
     {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to these Attributes and
     their owning Element.
     @param key the (case-sensitive) attribute key
     @return the Attribute for this key, or null if not present.
     @since 1.17.2
     */
    public Attribute attribute(String key) {
        int i = indexOfKey(key);
        return i == NotFound ? null : new Attribute(key, checkNotNull(vals[i]), this);
    }

    /**
     * Get an attribute's value by case-insensitive key
     * @param key the attribute name
     * @return the first matching attribute value if set; or empty string if not set (ora boolean attribute).
     */
    public String getIgnoreCase(String key) {
        int i = indexOfKeyIgnoreCase(key);
        return i == NotFound ? EmptyString : checkNotNull(vals[i]);
    }

    /**
     * Adds a new attribute. Will produce duplicates if the key already exists.
     * @see Attributes#put(String, String)
     */
    public Attributes add(String key, @Nullable String value) {
        addObject(key, value);
        return this;
    }

    private void addObject(String key, @Nullable Object value) {
        checkCapacity(size + 1);
        keys[size] = key;
        vals[size] = value;
        size++;
    }

    /**
     * Set a new attribute, or replace an existing one by key.
     * @param key case sensitive attribute key (not null)
     * @param value attribute value (which can be null, to set a true boolean attribute)
     * @return these attributes, for chaining
     */
    public Attributes put(String key, @Nullable String value) {
        Validate.notNull(key);
        int i = indexOfKey(key);
        if (i != NotFound)
            vals[i] = value;
        else
            add(key, value);
        return this;
    }

    /**
     Get the map holding any user-data associated with these Attributes. Will be created empty on first use. Held as
     an internal attribute, not a field member, to reduce the memory footprint of Attributes when not used. Can hold
     arbitrary objects; use for source ranges, connecting W3C nodes to Elements, etc.
     * @return the map holding user-data
     */
    Map<String, Object> userData() {
        final Map<String, Object> userData;
        int i = indexOfKey(SharedConstants.UserDataKey);
        if (i == NotFound) {
            userData = new HashMap<>();
            addObject(SharedConstants.UserDataKey, userData);
        } else {
            //noinspection unchecked
            userData = (Map<String, Object>) vals[i];
        }
        return userData;
    }

    /**
     Get an arbitrary user-data object by key.
     * @param key case-sensitive key to the object.
     * @return the object associated to this key, or {@code null} if not found.
     * @see #userData(String key, Object val)
     * @since 1.17.1
     */
    @Nullable
    public Object userData(String key) {
        Validate.notNull(key);
        if (!hasKey(SharedConstants.UserDataKey)) return null; // no user data exists
        Map<String, Object> userData = userData();
        return userData.get(key);
    }

    /**
     Set an arbitrary user-data object by key. Will be treated as an internal attribute, so will not be emitted in HTML.
     * @param key case-sensitive key
     * @param value object value
     * @return these attributes
     * @see #userData(String key)
     * @since 1.17.1
     */
    public Attributes userData(String key, Object value) {
        Validate.notNull(key);
        userData().put(key, value);
        return this;
    }

    void putIgnoreCase(String key, @Nullable String value) {
        int i = indexOfKeyIgnoreCase(key);
        if (i != NotFound) {
            vals[i] = value;
            if (!keys[i].equals(key)) // case changed, update
                keys[i] = key;
        }
        else
            add(key, value);
    }

    /**
     * Set a new boolean attribute. Removes the attribute if the value is false.
     * @param key case <b>insensitive</b> attribute key
     * @param value attribute value
     * @return these attributes, for chaining
     */
    public Attributes put(String key, boolean value) {
        if (value)
            putIgnoreCase(key, null);
        else
            remove(key);
        return this;
    }

    /**
     Set a new attribute, or replace an existing one by key.
     @param attribute attribute with case-sensitive key
     @return these attributes, for chaining
     */
    public Attributes put(Attribute attribute) {
        Validate.notNull(attribute);
        put(attribute.getKey(), attribute.getValue());
        attribute.parent = this;
        return this;
    }

    // removes and shifts up
    @SuppressWarnings("AssignmentToNull")
    private void remove(int index) {
        Validate.isFalse(index >= size);
        int shifted = size - index - 1;
        if (shifted > 0) {
            System.arraycopy(keys, index + 1, keys, index, shifted);
            System.arraycopy(vals, index + 1, vals, index, shifted);
        }
        size--;
        keys[size] = null; // release hold
        vals[size] = null;
    }

    /**
     Remove an attribute by key. <b>Case sensitive.</b>
     @param key attribute key to remove
     */
    public void remove(String key) {
        int i = indexOfKey(key);
        if (i != NotFound)
            remove(i);
    }

    /**
     Remove an attribute by key. <b>Case insensitive.</b>
     @param key attribute key to remove
     */
    public void removeIgnoreCase(String key) {
        int i = indexOfKeyIgnoreCase(key);
        if (i != NotFound)
            remove(i);
    }

    /**
     Tests if these attributes contain an attribute with this key.
     @param key case-sensitive key to check for
     @return true if key exists, false otherwise
     */
    public boolean hasKey(String key) {
        return indexOfKey(key) != NotFound;
    }

    /**
     Tests if these attributes contain an attribute with this key.
     @param key key to check for
     @return true if key exists, false otherwise
     */
    public boolean hasKeyIgnoreCase(String key) {
        return indexOfKeyIgnoreCase(key) != NotFound;
    }

    /**
     * Check if these attributes contain an attribute with a value for this key.
     * @param key key to check for
     * @return true if key exists, and it has a value
     */
    public boolean hasDeclaredValueForKey(String key) {
        int i = indexOfKey(key);
        return i != NotFound && vals[i] != null;
    }

    /**
     * Check if these attributes contain an attribute with a value for this key.
     * @param key case-insensitive key to check for
     * @return true if key exists, and it has a value
     */
    public boolean hasDeclaredValueForKeyIgnoreCase(String key) {
        int i = indexOfKeyIgnoreCase(key);
        return i != NotFound && vals[i] != null;
    }

    /**
     Get the number of attributes in this set, including any jsoup internal-only attributes. Internal attributes are
     excluded from the {@link #html()}, {@link #asList()}, and {@link #iterator()} methods.
     @return size
     */
    public int size() {
        return size;
        // todo - exclude internal attributes from this count - maintain size, count of internals
    }

    /**
     * Test if this Attributes list is empty (size==0).
     */
    public boolean isEmpty() {
        return size == 0;
    }

    /**
     Add all the attributes from the incoming set to this set.
     @param incoming attributes to add to these attributes.
     */
    public void addAll(Attributes incoming) {
        if (incoming.size() == 0)
            return;
        checkCapacity(size + incoming.size);

        boolean needsPut = size != 0; // if this set is empty, no need to check existing set, so can add() vs put()
        // (and save bashing on the indexOfKey()
        for (Attribute attr : incoming) {
            if (needsPut)
                put(attr);
            else
                add(attr.getKey(), attr.getValue());
        }
    }

    /**
     Get the source ranges (start to end position) in the original input source from which this attribute's <b>name</b>
     and <b>value</b> were parsed.
     <p>Position tracking must be enabled prior to parsing the content.</p>
     @param key the attribute name
     @return the ranges for the attribute's name and value, or {@code untracked} if the attribute does not exist or its range
     was not tracked.
     @see org.jsoup.parser.Parser#setTrackPosition(boolean)
     @see Attribute#sourceRange()
     @see Node#sourceRange()
     @see Element#endSourceRange()
     @since 1.17.1
     */
    public Range.AttributeRange sourceRange(String key) {
        if (!hasKey(key)) return UntrackedAttr;
        Map<String, Range.AttributeRange> ranges = getRanges();
        if (ranges == null) return Range.AttributeRange.UntrackedAttr;
        Range.AttributeRange range = ranges.get(key);
        return range != null ? range : Range.AttributeRange.UntrackedAttr;
    }

    /** Get the Ranges, if tracking is enabled; null otherwise. */
    @Nullable Map<String, Range.AttributeRange> getRanges() {
        //noinspection unchecked
        return (Map<String, Range.AttributeRange>) userData(AttrRangeKey);
    }


    @Override
    public Iterator<Attribute> iterator() {
        return new Iterator<Attribute>() {
            int expectedSize = size;
            int i = 0;

            @Override
            public boolean hasNext() {
                checkModified();
                while (i < size) {
                    if (isInternalKey(keys[i])) // skip over internal keys
                        i++;
                    else
                        break;
                }

                return i < size;
            }

            @Override
            public Attribute next() {
                checkModified();
                if (i >= size) throw new NoSuchElementException();
                final Attribute attr = new Attribute(keys[i], (String) vals[i], Attributes.this);
                i++;
                return attr;
            }

            private void checkModified() {
                if (size != expectedSize) throw new ConcurrentModificationException("Use Iterator#remove() instead to remove attributes while iterating.");
            }

            @Override
            public void remove() {
                Attributes.this.remove(--i); // next() advanced, so rewind
                expectedSize--;
            }
        };
    }

    /**
     Get the attributes as a List, for iteration.
     @return a view of the attributes as an unmodifiable List.
     */
    public List<Attribute> asList() {
        ArrayList<Attribute> list = new ArrayList<>(size);
        for (int i = 0; i < size; i++) {
            if (isInternalKey(keys[i]))
                continue; // skip internal keys
            Attribute attr = new Attribute(keys[i], (String) vals[i], Attributes.this);
            list.add(attr);
        }
        return Collections.unmodifiableList(list);
    }

    /**
     * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys
     * starting with {@code data-}.
     * @return map of custom data attributes.
     */
    public Map<String, String> dataset() {
        return new Dataset(this);
    }

    /**
     Get the HTML representation of these attributes.
     @return HTML
     */
    public String html() {
        StringBuilder sb = StringUtil.borrowBuilder();
        try {
            html(sb, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used
        } catch (IOException e) { // ought never happen
            throw new SerializationException(e);
        }
        return StringUtil.releaseBuilder(sb);
    }

    final void html(final Appendable accum, final Document.OutputSettings out) throws IOException {
        final int sz = size;
        for (int i = 0; i < sz; i++) {
            if (isInternalKey(keys[i]))
                continue;
            final String key = Attribute.getValidKey(keys[i], out.syntax());
            if (key != null)
                Attribute.htmlNoValidate(key, (String) vals[i], accum.append(' '), out);
        }
    }

    @Override
    public String toString() {
        return html();
    }

    /**
     * Checks if these attributes are equal to another set of attributes, by comparing the two sets. Note that the order
     * of the attributes does not impact this equality (as per the Map interface equals()).
     * @param o attributes to compare with
     * @return if both sets of attributes have the same content
     */
    @Override
    public boolean equals(@Nullable Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        Attributes that = (Attributes) o;
        if (size != that.size) return false;
        for (int i = 0; i < size; i++) {
            String key = keys[i];
            int thatI = that.indexOfKey(key);
            if (thatI == NotFound)
                return false;
            Object val = vals[i];
            Object thatVal = that.vals[thatI];
            if (val == null) {
                if (thatVal != null)
                    return false;
            } else if (!val.equals(thatVal))
                return false;
        }
        return true;
    }

    /**
     * Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes.
     * @return calculated hashcode
     */
    @Override
    public int hashCode() {
        int result = size;
        result = 31 * result + Arrays.hashCode(keys);
        result = 31 * result + Arrays.hashCode(vals);
        return result;
    }

    @Override
    public Attributes clone() {
        Attributes clone;
        try {
            clone = (Attributes) super.clone();
        } catch (CloneNotSupportedException e) {
            throw new RuntimeException(e);
        }
        clone.size = size;
        clone.keys = Arrays.copyOf(keys, size);
        clone.vals = Arrays.copyOf(vals, size);
        return clone;
    }

    /**
     * Internal method. Lowercases all (non-internal) keys.
     */
    public void normalize() {
        for (int i = 0; i < size; i++) {
            if (!isInternalKey(keys[i]))
                keys[i] = lowerCase(keys[i]);
        }
    }

    /**
     * Internal method. Removes duplicate attribute by name. Settings for case sensitivity of key names.
     * @param settings case sensitivity
     * @return number of removed dupes
     */
    public int deduplicate(ParseSettings settings) {
        if (isEmpty())
            return 0;
        boolean preserve = settings.preserveAttributeCase();
        int dupes = 0;
        OUTER: for (int i = 0; i < keys.length; i++) {
            for (int j = i + 1; j < keys.length; j++) {
                if (keys[j] == null)
                    continue OUTER; // keys.length doesn't shrink when removing, so re-test
                if ((preserve && keys[i].equals(keys[j])) || (!preserve && keys[i].equalsIgnoreCase(keys[j]))) {
                    dupes++;
                    remove(j);
                    j--;
                }
            }
        }
        return dupes;
    }

    private static class Dataset extends AbstractMap<String, String> {
        private final Attributes attributes;

        private Dataset(Attributes attributes) {
            this.attributes = attributes;
        }

        @Override
        public Set<Entry<String, String>> entrySet() {
            return new EntrySet();
        }

        @Override
        public String put(String key, String value) {
            String dataKey = dataKey(key);
            String oldValue = attributes.hasKey(dataKey) ? attributes.get(dataKey) : null;
            attributes.put(dataKey, value);
            return oldValue;
        }

        private class EntrySet extends AbstractSet<Map.Entry<String, String>> {

            @Override
            public Iterator<Map.Entry<String, String>> iterator() {
                return new DatasetIterator();
            }

            @Override
            public int size() {
                int count = 0;
                Iterator iter = new DatasetIterator();
                while (iter.hasNext())
                    count++;
                return count;
            }
        }

        private class DatasetIterator implements Iterator<Map.Entry<String, String>> {
            private Iterator<Attribute> attrIter = attributes.iterator();
            private Attribute attr;
            public boolean hasNext() {
                while (attrIter.hasNext()) {
                    attr = attrIter.next();
                    if (attr.isDataAttribute()) return true;
                }
                return false;
            }

            public Entry<String, String> next() {
                return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue());
            }

            public void remove() {
                attributes.remove(attr.getKey());
            }
        }
    }

    private static String dataKey(String key) {
        return dataPrefix + key;
    }

    static String internalKey(String key) {
        return InternalPrefix + key;
    }

    static boolean isInternalKey(String key) {
        return key != null && key.length() > 1 && key.charAt(0) == InternalPrefix;
    }
}