RSS091UserlandParser.java
/*
* Copyright 2004 Sun Microsystems, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.rometools.rome.io.impl;
import com.rometools.rome.feed.WireFeed;
import com.rometools.rome.feed.rss.*;
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
public class RSS091UserlandParser extends RSS090Parser {
public RSS091UserlandParser() {
this("rss_0.91U");
}
protected RSS091UserlandParser(final String type) {
super(type, null);
}
@Override
public boolean isMyType(final Document document) {
final Element rssRoot = document.getRootElement();
final Attribute version = rssRoot.getAttribute("version");
return rssRoot.getName().equals("rss") && version != null && version.getValue().equals(getRSSVersion());
}
protected String getRSSVersion() {
return "0.91";
}
@Override
protected Namespace getRSSNamespace() {
return Namespace.getNamespace("");
}
/**
* To be overriden by RSS 0.91 Netscape and RSS 0.94
*/
protected boolean isHourFormat24(final Element rssRoot) {
return true;
}
/**
* Parses the root element of an RSS document into a Channel bean.
* <p/>
* It first invokes super.parseChannel and then parses and injects the following properties if
* present: language, pubDate, rating and copyright.
* <p/>
*
* @param rssRoot the root element of the RSS document to parse.
* @return the parsed Channel bean.
*/
@Override
protected WireFeed parseChannel(final Element rssRoot, final Locale locale) {
final Channel channel = (Channel) super.parseChannel(rssRoot, locale);
final Element eChannel = rssRoot.getChild("channel", getRSSNamespace());
final Element language = eChannel.getChild("language", getRSSNamespace());
if (language != null) {
channel.setLanguage(language.getText());
}
final Element atinge = eChannel.getChild("rating", getRSSNamespace());
if (atinge != null) {
channel.setRating(atinge.getText());
}
final Element copyright = eChannel.getChild("copyright", getRSSNamespace());
if (copyright != null) {
channel.setCopyright(copyright.getText());
}
final Element pubDate = eChannel.getChild("pubDate", getRSSNamespace());
if (pubDate != null) {
channel.setPubDate(DateParser.parseDate(pubDate.getText(), locale));
}
final Element lastBuildDate = eChannel.getChild("lastBuildDate", getRSSNamespace());
if (lastBuildDate != null) {
channel.setLastBuildDate(DateParser.parseDate(lastBuildDate.getText(), locale));
}
final Element docs = eChannel.getChild("docs", getRSSNamespace());
if (docs != null) {
channel.setDocs(docs.getText());
}
final Element generator = eChannel.getChild("generator", getRSSNamespace());
if (generator != null) {
channel.setGenerator(generator.getText());
}
final Element managingEditor = eChannel.getChild("managingEditor", getRSSNamespace());
if (managingEditor != null) {
channel.setManagingEditor(managingEditor.getText());
}
final Element webMaster = eChannel.getChild("webMaster", getRSSNamespace());
if (webMaster != null) {
channel.setWebMaster(webMaster.getText());
}
final Element eSkipHours = eChannel.getChild("skipHours");
if (eSkipHours != null) {
final List<Integer> skipHours = new ArrayList<Integer>();
final List<Element> eHours = eSkipHours.getChildren("hour", getRSSNamespace());
for (final Element eHour : eHours) {
skipHours.add(Integer.valueOf(eHour.getText().trim()));
}
channel.setSkipHours(skipHours);
}
final Element eSkipDays = eChannel.getChild("skipDays");
if (eSkipDays != null) {
final List<String> skipDays = new ArrayList<String>();
final List<Element> eDays = eSkipDays.getChildren("day", getRSSNamespace());
for (final Element eDay : eDays) {
skipDays.add(eDay.getText().trim());
}
channel.setSkipDays(skipDays);
}
return channel;
}
/**
* Parses the root element of an RSS document looking for image information.
* <p/>
* It first invokes super.parseImage and then parses and injects the following properties if
* present: url, link, width, height and description.
* <p/>
*
* @param rssRoot the root element of the RSS document to parse for image information.
* @return the parsed RSSImage bean.
*/
@Override
protected Image parseImage(final Element rssRoot) {
final Image image = super.parseImage(rssRoot);
if (image != null) {
final Element eImage = getImage(rssRoot);
final Element width = eImage.getChild("width", getRSSNamespace());
if (width != null) {
final Integer val = NumberParser.parseInt(width.getText());
if (val != null) {
image.setWidth(val);
}
}
final Element height = eImage.getChild("height", getRSSNamespace());
if (height != null) {
final Integer val = NumberParser.parseInt(height.getText());
if (val != null) {
image.setHeight(val);
}
}
final Element description = eImage.getChild("description", getRSSNamespace());
if (description != null) {
image.setDescription(description.getText());
}
}
return image;
}
/**
* It looks for the 'item' elements under the 'channel' elemment.
*/
@Override
protected List<Element> getItems(final Element rssRoot) {
final Element eChannel = rssRoot.getChild("channel", getRSSNamespace());
if (eChannel != null) {
return eChannel.getChildren("item", getRSSNamespace());
} else {
return Collections.emptyList();
}
}
/**
* It looks for the 'image' elements under the 'channel' elemment.
*/
@Override
protected Element getImage(final Element rssRoot) {
final Element eChannel = rssRoot.getChild("channel", getRSSNamespace());
if (eChannel != null) {
return eChannel.getChild("image", getRSSNamespace());
} else {
return null;
}
}
/**
* To be overriden by RSS 0.91 Netscape parser
*/
protected String getTextInputLabel() {
return "textInput";
}
/**
* It looks for the 'textinput' elements under the 'channel' elemment.
*/
@Override
protected Element getTextInput(final Element rssRoot) {
final String elementName = getTextInputLabel();
final Element eChannel = rssRoot.getChild("channel", getRSSNamespace());
if (eChannel != null) {
return eChannel.getChild(elementName, getRSSNamespace());
} else {
return null;
}
}
/**
* Parses an item element of an RSS document looking for item information.
* <p/>
* It first invokes super.parseItem and then parses and injects the description property if
* present.
* <p/>
*
* @param rssRoot the root element of the RSS document in case it's needed for context.
* @param eItem the item element to parse.
* @return the parsed RSSItem bean.
*/
@Override
protected Item parseItem(final Element rssRoot, final Element eItem, final Locale locale) {
final Item item = super.parseItem(rssRoot, eItem, locale);
final Element description = eItem.getChild("description", getRSSNamespace());
if (description != null) {
item.setDescription(parseItemDescription(rssRoot, description));
}
final Element pubDate = eItem.getChild("pubDate", getRSSNamespace());
if (pubDate != null) {
item.setPubDate(DateParser.parseDate(pubDate.getText(), locale));
}
final Element encoded = eItem.getChild("encoded", getContentNamespace());
if (encoded != null) {
final Content content = new Content();
content.setType(Content.HTML);
content.setValue(encoded.getText());
item.setContent(content);
}
return item;
}
protected Description parseItemDescription(final Element rssRoot, final Element eDesc) {
final Description desc = new Description();
desc.setType("text/plain");
desc.setValue(eDesc.getText());
return desc;
}
}