From 87f05e6894661f747bf1888e66fd89a8ef622cb5 Mon Sep 17 00:00:00 2001
From: Schroeder
+ * @param sDate string to parse for a date. + * @return the Date represented by the given W3C date-time string. + * It returns null if it was not possible to parse the given string into a Date. + * + * */ + public static Date parseDate(String sDate) { + Date d = parseW3CDateTime(sDate); + if (d == null) { + d = parseRFC822(sDate); + if (d == null && ADDITIONAL_MASKS.length > 0) { + d = parseUsingMask(ADDITIONAL_MASKS, sDate); + } + if (d == null) { // Schroeder 11-2012 + d = parseUsingMask(masks, sDate); + } + } + return d; + } + + /** + * create a RFC822 representation of a date. + *
+ * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. + * + * @param date Date to parse + * @return the RFC822 represented by the given Date + * It returns null if it was not possible to parse the date. + * + */ + @SuppressWarnings("unused") + public static String formatRFC822(Date date) { + SimpleDateFormat dateFormater = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US); + dateFormater.setTimeZone(TimeZone.getTimeZone("GMT")); + return dateFormater.format(date); + } + + /** + * create a W3C Date Time representation of a date. + * + * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. + * + * @param date Date to parse + * @return the W3C Date Time represented by the given Date + * It returns null if it was not possible to parse the date. + * + */ + @SuppressWarnings("unused") + public static String formatW3CDateTime(Date date) { + SimpleDateFormat dateFormater = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); + dateFormater.setTimeZone(TimeZone.getTimeZone("GMT")); + return dateFormater.format(date); + } + + + public static void main(String[] args) { + System.out.println(parseDate("Mon, 19 Nov 2012 23:22:39 Z")); + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS090Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS090Parser.java new file mode 100644 index 0000000..fa14426 --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS090Parser.java @@ -0,0 +1,345 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import com.rometools.rome.feed.rss.Channel; +import com.rometools.rome.feed.rss.Image; +import com.rometools.rome.feed.rss.Item; +import com.rometools.rome.feed.rss.TextInput; +import com.rometools.rome.io.FeedException; +import com.rometools.rome.io.impl.BaseWireFeedParser; +import org.jdom2.Document; +import org.jdom2.Element; +import org.jdom2.Namespace; + +import java.util.*; + +/** + */ +@SuppressWarnings({ "rawtypes", "unchecked" }) +public class RSS090Parser extends BaseWireFeedParser { + private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/"; + private static final String CONTENT_URI = "http://purl.org/rss/1.0/modules/content/"; + + private static final Namespace RDF_NS = Namespace.getNamespace(RDF_URI); + private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI); + private static final Namespace CONTENT_NS = Namespace.getNamespace(CONTENT_URI); + + public RSS090Parser() { + this("rss_0.9", RSS_NS); + } + + protected RSS090Parser(String type, Namespace ns) { + super(type, ns); + } + + public boolean isMyType(Document document) { + boolean ok = false; + + Element rssRoot = document.getRootElement(); + Namespace defaultNS = rssRoot.getNamespace(); + List additionalNSs = rssRoot.getAdditionalNamespaces(); + + ok = defaultNS != null && defaultNS.equals(getRDFNamespace()); + if (ok) { + if (additionalNSs == null) { + ok = false; + } else { + ok = false; + for (int i = 0; !ok && i < additionalNSs.size(); i++) { + ok = getRSSNamespace().equals(additionalNSs.get(i)); + } + } + } + return ok; + } + + @SuppressWarnings("unused") + public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException, FeedException { + if (validate) { + validateFeed(document); + } + Element rssRoot = document.getRootElement(); + return parseChannel(rssRoot); + } + + @SuppressWarnings("all") + protected void validateFeed(@SuppressWarnings("unused") Document document) throws FeedException { + // TBD + // here we have to validate the Feed against a schema or whatever + // not sure how to do it + // one posibility would be to inject our own schema for the feed (they don't exist out there) + // to the document, produce an ouput and attempt to parse it again with validation turned on. + // otherwise will have to check the document elements by hand. + } + + /** + * Returns the namespace used by RSS elements in document of the RSS version the parser supports. + *+ * This implementation returns the EMTPY namespace. + *
+ * + * @return returns the EMPTY namespace. + */ + protected Namespace getRSSNamespace() { + return RSS_NS; + } + + /** + * Returns the namespace used by RDF elements in document of the RSS version the parser supports. + *
+ * This implementation returns the EMTPY namespace. + *
+ * + * @return returns the EMPTY namespace. + */ + protected Namespace getRDFNamespace() { + return RDF_NS; + } + + /** + * Returns the namespace used by Content Module elements in document. + *
+ * This implementation returns the EMTPY namespace. + *
+ * + * @return returns the EMPTY namespace. + */ + protected Namespace getContentNamespace() { + return CONTENT_NS; + } + + /** + * Parses the root element of an RSS document into a Channel bean. + *
+ * It reads title, link and description and delegates to parseImage, parseItems + * and parseTextInput. This delegation always passes the root element of the RSS + * document as different RSS version may have this information in different parts + * of the XML tree (no assumptions made thanks to the specs variaty) + * + * + * @param rssRoot the root element of the RSS document to parse. + * @return the parsed Channel bean. + */ + protected WireFeed parseChannel(Element rssRoot) { + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + + Channel channel = new Channel(getType()); + + Element e = eChannel.getChild("title", getRSSNamespace()); + if (e != null) { + channel.setTitle(e.getText()); + } + e = eChannel.getChild("link", getRSSNamespace()); + if (e != null) { + channel.setLink(e.getText()); + } + e = eChannel.getChild("description", getRSSNamespace()); + if (e != null) { + channel.setDescription(e.getText()); + } + + channel.setImage(parseImage(rssRoot)); + + channel.setTextInput(parseTextInput(rssRoot)); + + // Unfortunately Microsoft's SSE extension has a special case of + // effectively putting the sharing channel module inside the RSS tag + // and not inside the channel itself. So we also need to look for + // channel modules from the root RSS element. + List allFeedModules = new ArrayList(); + List rootModules = parseFeedModules(rssRoot, Locale.getDefault()); + List channelModules = parseFeedModules(eChannel, Locale.getDefault()); + if (rootModules != null) { + allFeedModules.addAll(rootModules); + } + if (channelModules != null) { + allFeedModules.addAll(channelModules); + } + channel.setModules(allFeedModules); + channel.setItems(parseItems(rssRoot)); + + List foreignMarkup = extractForeignMarkup(eChannel, channel, getRSSNamespace()); + if (!foreignMarkup.isEmpty()) { + channel.setForeignMarkup(foreignMarkup); + } + return channel; + } + + /** + * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment. + * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element. + * + */ + protected List getItems(Element rssRoot) { + return rssRoot.getChildren("item", getRSSNamespace()); + } + + /** + * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment. + * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element. + * + */ + protected Element getImage(Element rssRoot) { + return rssRoot.getChild("image", getRSSNamespace()); + } + + /** + * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment. + * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element. + * + */ + protected Element getTextInput(Element rssRoot) { + return rssRoot.getChild("textinput", getRSSNamespace()); + } + + /** + * Parses the root element of an RSS document looking for image information. + * + * It reads title and url out of the 'image' element. + * + * + * @param rssRoot the root element of the RSS document to parse for image information. + * @return the parsed image bean. + */ + protected Image parseImage(Element rssRoot) { + Image image = null; + Element eImage = getImage(rssRoot); + if (eImage != null) { + image = new Image(); + + Element e = eImage.getChild("title", getRSSNamespace()); + if (e != null) { + image.setTitle(e.getText()); + } + e = eImage.getChild("url", getRSSNamespace()); + if (e != null) { + image.setUrl(e.getText()); + } + e = eImage.getChild("link", getRSSNamespace()); + if (e != null) { + image.setLink(e.getText()); + } + } + return image; + } + + /** + * Parses the root element of an RSS document looking for all items information. + * + * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem() + * for each item element. The resulting RSSItem of each item element is stored in a list. + * + * + * @param rssRoot the root element of the RSS document to parse for all items information. + * @return a list with all the parsed RSSItem beans. + */ + protected List parseItems(Element rssRoot) { + Collection eItems = getItems(rssRoot); + + List items = new ArrayList(); + for (Object item : eItems) { + Element eItem = (Element) item; + items.add(parseItem(rssRoot, eItem)); + } + return items; + } + + /** + * Parses an item element of an RSS document looking for item information. + * + * It reads title and link out of the 'item' element. + * + * + * @param rssRoot the root element of the RSS document in case it's needed for context. + * @param eItem the item element to parse. + * @return the parsed RSSItem bean. + */ + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = new Item(); + Element e = eItem.getChild("title", getRSSNamespace()); + if (e != null) { + item.setTitle(e.getText()); + } + e = eItem.getChild("link", getRSSNamespace()); + if (e != null) { + item.setLink(e.getText()); + item.setUri(e.getText()); + } + + item.setModules(parseItemModules(eItem, Locale.getDefault())); + + List foreignMarkup = extractForeignMarkup(eItem, item, getRSSNamespace()); + //content:encoded elements are treated special, without a module, they have to be removed from the foreign + //markup to avoid duplication in case of read/write. Note that this fix will break if a content module is + //used + Iterator iterator = foreignMarkup.iterator(); + while (iterator.hasNext()) { + Element ie = (Element) iterator.next(); + if (getContentNamespace().equals(ie.getNamespace()) && ie.getName().equals("encoded")) { + iterator.remove(); + } + } + if (!foreignMarkup.isEmpty()) { + item.setForeignMarkup(foreignMarkup); + } + return item; + } + + /** + * Parses the root element of an RSS document looking for text-input information. + * + * It reads title, description, name and link out of the 'textinput' or 'textInput' element. + * + * + * @param rssRoot the root element of the RSS document to parse for text-input information. + * @return the parsed RSSTextInput bean. + */ + protected TextInput parseTextInput(Element rssRoot) { + TextInput textInput = null; + Element eTextInput = getTextInput(rssRoot); + if (eTextInput != null) { + textInput = new TextInput(); + Element e = eTextInput.getChild("title", getRSSNamespace()); + if (e != null) { + textInput.setTitle(e.getText()); + } + e = eTextInput.getChild("description", getRSSNamespace()); + if (e != null) { + textInput.setDescription(e.getText()); + } + e = eTextInput.getChild("name", getRSSNamespace()); + if (e != null) { + textInput.setName(e.getText()); + } + e = eTextInput.getChild("link", getRSSNamespace()); + if (e != null) { + textInput.setLink(e.getText()); + } + } + return textInput; + } + + @Override + @SuppressWarnings("all") + public WireFeed parse(org.jdom2.Document document, boolean b, Locale locale) throws IllegalArgumentException, FeedException { + return null; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS091UserlandParser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS091UserlandParser.java new file mode 100644 index 0000000..958e127 --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS091UserlandParser.java @@ -0,0 +1,250 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import com.rometools.rome.feed.rss.*; +import com.rometools.rome.io.impl.NumberParser; +import org.jdom2.Attribute; +import org.jdom2.Document; +import org.jdom2.Element; +import org.jdom2.Namespace; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + */ +@SuppressWarnings({ "rawtypes", "unchecked" }) +public class RSS091UserlandParser extends RSS090Parser { + + public RSS091UserlandParser() { + this("rss_0.91U"); + } + + protected RSS091UserlandParser(String type) { + super(type, null); + } + + public boolean isMyType(Document document) { + boolean ok; + Element rssRoot = document.getRootElement(); + ok = rssRoot.getName().equals("rss"); + if (ok) { + ok = false; + Attribute version = rssRoot.getAttribute("version"); + if (version != null) { + ok = version.getValue().equals(getRSSVersion()); + } + } + return ok; + } + + protected String getRSSVersion() { + return "0.91"; + } + + protected Namespace getRSSNamespace() { + return Namespace.getNamespace(""); + } + + /** + * To be overriden by RSS 0.91 Netscape and RSS 0.94 + */ + @SuppressWarnings("unused") + protected boolean isHourFormat24(Element rssRoot) { + return true; + } + + /** + * Parses the root element of an RSS document into a Channel bean. + * + * It first invokes super.parseChannel and then parses and injects the following + * properties if present: language, pubDate, rating and copyright. + * + * + * @param rssRoot the root element of the RSS document to parse. + * @return the parsed Channel bean. + */ + protected WireFeed parseChannel(Element rssRoot) { + Channel channel = (Channel) super.parseChannel(rssRoot); + + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + + Element e = eChannel.getChild("language", getRSSNamespace()); + if (e != null) { + channel.setLanguage(e.getText()); + } + e = eChannel.getChild("rating", getRSSNamespace()); + if (e != null) { + channel.setRating(e.getText()); + } + e = eChannel.getChild("copyright", getRSSNamespace()); + if (e != null) { + channel.setCopyright(e.getText()); + } + e = eChannel.getChild("pubDate", getRSSNamespace()); + if (e != null) { + channel.setPubDate(DateParser.parseDate(e.getText())); + } + e = eChannel.getChild("lastBuildDate", getRSSNamespace()); + if (e != null) { + channel.setLastBuildDate(DateParser.parseDate(e.getText())); + } + e = eChannel.getChild("docs", getRSSNamespace()); + if (e != null) { + channel.setDocs(e.getText()); + } + e = eChannel.getChild("docs", getRSSNamespace()); + if (e != null) { + channel.setDocs(e.getText()); + } + e = eChannel.getChild("managingEditor", getRSSNamespace()); + if (e != null) { + channel.setManagingEditor(e.getText()); + } + e = eChannel.getChild("webMaster", getRSSNamespace()); + if (e != null) { + channel.setWebMaster(e.getText()); + } + e = eChannel.getChild("skipHours"); + if (e != null) { + List skipHours = new ArrayList(); + List eHours = e.getChildren("hour", getRSSNamespace()); + for (Object hour : eHours) { + Element eHour = (Element) hour; + skipHours.add(eHour.getText().trim()); + } + channel.setSkipHours(skipHours); + } + + e = eChannel.getChild("skipDays"); + if (e != null) { + List skipDays = new ArrayList(); + List eDays = e.getChildren("day", getRSSNamespace()); + for (Object day : eDays) { + Element eDay = (Element) day; + skipDays.add(eDay.getText().trim()); + } + channel.setSkipDays(skipDays); + } + return channel; + } + + /** + * Parses the root element of an RSS document looking for image information. + * + * It first invokes super.parseImage and then parses and injects the following + * properties if present: url, link, width, height and description. + * + * + * @param rssRoot the root element of the RSS document to parse for image information. + * @return the parsed RSSImage bean. + */ + protected Image parseImage(Element rssRoot) { + Image image = super.parseImage(rssRoot); + if (image != null) { + Element eImage = getImage(rssRoot); + Element e = eImage.getChild("width", getRSSNamespace()); + if (e != null) { + Integer val = NumberParser.parseInt(e.getText()); + if (val != null) { + image.setWidth(val); + } + } + e = eImage.getChild("height", getRSSNamespace()); + if (e != null) { + Integer val = NumberParser.parseInt(e.getText()); + if (val != null) { + image.setHeight(val); + } + } + e = eImage.getChild("description", getRSSNamespace()); + if (e != null) { + image.setDescription(e.getText()); + } + } + return image; + } + + /** + * It looks for the 'item' elements under the 'channel' elemment. + */ + protected List getItems(Element rssRoot) { + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + return (eChannel != null) ? eChannel.getChildren("item", getRSSNamespace()) : Collections.EMPTY_LIST; + } + + /** + * It looks for the 'image' elements under the 'channel' elemment. + */ + protected Element getImage(Element rssRoot) { + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + return (eChannel != null) ? eChannel.getChild("image", getRSSNamespace()) : null; + } + + /** + * To be overriden by RSS 0.91 Netscape parser + */ + protected String getTextInputLabel() { + return "textInput"; + } + + /** + * It looks for the 'textinput' elements under the 'channel' elemment. + */ + protected Element getTextInput(Element rssRoot) { + String elementName = getTextInputLabel(); + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + return (eChannel != null) ? eChannel.getChild(elementName, getRSSNamespace()) : null; + } + + /** + * Parses an item element of an RSS document looking for item information. + * + * It first invokes super.parseItem and then parses and injects the description property if present. + * + * + * @param rssRoot the root element of the RSS document in case it's needed for context. + * @param eItem the item element to parse. + * @return the parsed RSSItem bean. + */ + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = super.parseItem(rssRoot, eItem); + Element e = eItem.getChild("description", getRSSNamespace()); + if (e != null) { + item.setDescription(parseItemDescription(rssRoot, e)); + } + Element ce = eItem.getChild("encoded", getContentNamespace()); + if (ce != null) { + Content content = new Content(); + content.setType(Content.HTML); + content.setValue(ce.getText()); + item.setContent(content); + } + return item; + } + + protected Description parseItemDescription(Element rssRoot, Element eDesc) { + Description desc = new Description(); + desc.setType("text/plain"); + desc.setValue(eDesc.getText()); + return desc; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS092Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS092Parser.java new file mode 100644 index 0000000..18ebbed --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS092Parser.java @@ -0,0 +1,143 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import com.rometools.rome.feed.rss.*; +import com.rometools.rome.io.impl.NumberParser; +import org.jdom2.Element; + +import java.util.ArrayList; +import java.util.List; + +/** + */ +@SuppressWarnings({ "rawtypes", "unchecked" }) +public class RSS092Parser extends RSS091UserlandParser { + + public RSS092Parser() { + this("rss_0.92"); + } + + protected RSS092Parser(String type) { + super(type); + } + + protected String getRSSVersion() { + return "0.92"; + } + + protected WireFeed parseChannel(Element rssRoot) { + Channel channel = (Channel) super.parseChannel(rssRoot); + + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + Element eCloud = eChannel.getChild("cloud", getRSSNamespace()); + if (eCloud != null) { + Cloud cloud = new Cloud(); + String att = eCloud.getAttributeValue("domain");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setDomain(att); + } + att = eCloud.getAttributeValue("port");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setPort(Integer.parseInt(att.trim())); + } + att = eCloud.getAttributeValue("path");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setPath(att); + } + att = eCloud.getAttributeValue("registerProcedure");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setRegisterProcedure(att); + } + att = eCloud.getAttributeValue("protocol");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setProtocol(att); + } + channel.setCloud(cloud); + } + return channel; + } + + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = super.parseItem(rssRoot, eItem); + + Element e = eItem.getChild("source", getRSSNamespace()); + if (e != null) { + Source source = new Source(); + String url = e.getAttributeValue("url");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + source.setUrl(url); + source.setValue(e.getText()); + item.setSource(source); + } + + // 0.92 allows one enclosure occurrence, 0.93 multiple + // just saving to write some code. + List eEnclosures = eItem.getChildren("enclosure");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (!eEnclosures.isEmpty()) { + List enclosures = new ArrayList(); + for (Object eEnclosure : eEnclosures) { + e = (Element) eEnclosure; + + Enclosure enclosure = new Enclosure(); + String att = e.getAttributeValue("url");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + enclosure.setUrl(att); + } + att = e.getAttributeValue("length");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + enclosure.setLength(NumberParser.parseLong(att, 0L)); + + att = e.getAttributeValue("type");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + enclosure.setType(att); + } + enclosures.add(enclosure); + } + item.setEnclosures(enclosures); + } + + List eCats = eItem.getChildren("category");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + item.setCategories(parseCategories(eCats)); + + return item; + } + + protected List parseCategories(List eCats) { + List cats = null; + if (!eCats.isEmpty()) { + cats = new ArrayList(); + for (Object eCat : eCats) { + Category cat = new Category(); + Element e = (Element) eCat; + String att = e.getAttributeValue("domain");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cat.setDomain(att); + } + cat.setValue(e.getText()); + cats.add(cat); + } + } + return cats; + } + + protected Description parseItemDescription(Element rssRoot, Element eDesc) { + Description desc = super.parseItemDescription(rssRoot, eDesc); + desc.setType("text/html"); + return desc; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS093Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS093Parser.java new file mode 100644 index 0000000..cbfb14d --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS093Parser.java @@ -0,0 +1,59 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.rss.Item; +import org.jdom2.Element; + +/** + * + */ +public class RSS093Parser extends RSS092Parser { + + public RSS093Parser() { + this("rss_0.93"); + } + + protected RSS093Parser(String type) { + super(type); + } + + protected String getRSSVersion() { + return "0.93"; + } + + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = super.parseItem(rssRoot, eItem); + Element e = eItem.getChild("pubDate", getRSSNamespace()); + if (e != null) { + item.setPubDate(DateParser.parseDate(e.getText())); + } + e = eItem.getChild("expirationDate", getRSSNamespace()); + if (e != null) { + item.setExpirationDate(DateParser.parseDate(e.getText())); + } + e = eItem.getChild("description", getRSSNamespace()); + if (e != null) { + String type = e.getAttributeValue("type"); + if (type != null) { + item.getDescription().setType(type); + } + } + return item; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS094Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS094Parser.java new file mode 100644 index 0000000..0e632bb --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS094Parser.java @@ -0,0 +1,106 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import com.rometools.rome.feed.rss.Channel; +import com.rometools.rome.feed.rss.Description; +import com.rometools.rome.feed.rss.Guid; +import com.rometools.rome.feed.rss.Item; +import org.jdom2.Element; + +import java.util.List; +import java.util.Optional; + +/** + */ +public class RSS094Parser extends RSS093Parser { + + public RSS094Parser() { + this("rss_0.94"); + } + + protected RSS094Parser(String type) { + super(type); + } + + protected String getRSSVersion() { + return "0.94"; + } + + @SuppressWarnings("UnnecessarySemicolon") + protected WireFeed parseChannel(Element rssRoot) { + Channel channel = (Channel) super.parseChannel(rssRoot); + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + + List+ * + * @return returns "purl.org". + */ + protected Namespace getRSSNamespace() { + return Namespace.getNamespace(RSS_URI); + } + + /** + * Parses an item element of an RSS document looking for item information. + *
+ * It first invokes super.parseItem and then parses and injects the description property if present. + * + * + * @param rssRoot the root element of the RSS document in case it's needed for context. + * @param eItem the item element to parse. + * @return the parsed RSSItem bean. + */ + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = super.parseItem(rssRoot, eItem); + Element e = eItem.getChild("description", getRSSNamespace()); + if (e != null) { + item.setDescription(parseItemDescription(rssRoot, e)); + } + Element ce = eItem.getChild("encoded", getContentNamespace()); + if (ce != null) { + Content content = new Content(); + content.setType(Content.HTML); + content.setValue(ce.getText()); + item.setContent(content); + } + + String uri = eItem.getAttributeValue("about", getRDFNamespace()); + if (uri != null) { + item.setUri(uri); + } + + return item; + } + + protected WireFeed parseChannel(Element rssRoot) { + Channel channel = (Channel) super.parseChannel(rssRoot); + + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + String uri = eChannel.getAttributeValue("about", getRDFNamespace()); + if (uri != null) { + channel.setUri(uri); + } + + return channel; + } + + @SuppressWarnings("unused") + protected Description parseItemDescription(Element rssRoot, Element eDesc) { + Description desc = new Description(); + desc.setType("text/plain"); + desc.setValue(eDesc.getText()); + return desc; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20Parser.java new file mode 100644 index 0000000..95c65c5 --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20Parser.java @@ -0,0 +1,67 @@ +package dev.rsems.rometools.rome.io.impl; + +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +import com.rometools.rome.feed.rss.Description; +import org.jdom2.Attribute; +import org.jdom2.Document; +import org.jdom2.Element; + +/** + */ +public class RSS20Parser extends RSS094Parser { + + public RSS20Parser() { + this("rss_2.0"); + } + + protected RSS20Parser(String type) { + super(type); + } + + protected String getRSSVersion() { + return "2.0"; + } + + protected boolean isHourFormat24(Element rssRoot) { + return false; + } + + protected Description parseItemDescription(Element rssRoot, Element eDesc) { + Description desc = super.parseItemDescription(rssRoot, eDesc); + desc.setType("text/html"); // change as per https://rome.dev.java.net/issues/show_bug.cgi?id=26 + return desc; + } + + public boolean isMyType(Document document) { + boolean ok; + Element rssRoot = document.getRootElement(); + ok = rssRoot.getName().equals("rss"); + if (ok) { + ok = false; + Attribute version = rssRoot.getAttribute("version"); + if (version != null) { + // At this point, as far ROME is concerned RSS 2.0, 2.00 and + // 2.0.X are all the same, so let's use startsWith for leniency. + ok = version.getValue().startsWith(getRSSVersion()); + } + } + return ok; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20wNSParser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20wNSParser.java new file mode 100644 index 0000000..9b1c248 --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20wNSParser.java @@ -0,0 +1,71 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import org.jdom2.Document; +import org.jdom2.Element; +import org.jdom2.Namespace; + +/** + * To address issue with certain feeds (brought up by Charles Miller): + *- * @param sDate string to parse for a date. - * @return the Date represented by the given W3C date-time string. - * It returns null if it was not possible to parse the given string into a Date. - * - * */ - public static Date parseDate(String sDate) { - Date d = parseW3CDateTime(sDate); - if (d == null) { - d = parseRFC822(sDate); - if (d == null && ADDITIONAL_MASKS.length > 0) { - d = parseUsingMask(ADDITIONAL_MASKS, sDate); - } - if (d == null) { // Schroeder 11-2012 - d = parseUsingMask(masks, sDate); - } - } - return d; - } - - /** - * create a RFC822 representation of a date. - *
- * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. - * - * @param date Date to parse - * @return the RFC822 represented by the given Date - * It returns null if it was not possible to parse the date. - * - */ - @SuppressWarnings("unused") - public static String formatRFC822(Date date) { - SimpleDateFormat dateFormater = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US); - dateFormater.setTimeZone(TimeZone.getTimeZone("GMT")); - return dateFormater.format(date); - } - - /** - * create a W3C Date Time representation of a date. - * - * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. - * - * @param date Date to parse - * @return the W3C Date Time represented by the given Date - * It returns null if it was not possible to parse the date. - * - */ - @SuppressWarnings("unused") - public static String formatW3CDateTime(Date date) { - SimpleDateFormat dateFormater = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); - dateFormater.setTimeZone(TimeZone.getTimeZone("GMT")); - return dateFormater.format(date); - } - - - public static void main(String[] args) { - System.out.println(parseDate("Mon, 19 Nov 2012 23:22:39 Z")); - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS090Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS090Parser.java deleted file mode 100644 index 53ddd17..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS090Parser.java +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import com.rometools.rome.feed.rss.Channel; -import com.rometools.rome.feed.rss.Image; -import com.rometools.rome.feed.rss.Item; -import com.rometools.rome.feed.rss.TextInput; -import com.rometools.rome.io.FeedException; -import com.rometools.rome.io.impl.BaseWireFeedParser; -import org.jdom2.Document; -import org.jdom2.Element; -import org.jdom2.Namespace; - -import java.util.*; - -/** - */ -@SuppressWarnings({ "rawtypes", "unchecked" }) -public class RSS090Parser extends BaseWireFeedParser { - private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; - private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/"; - private static final String CONTENT_URI = "http://purl.org/rss/1.0/modules/content/"; - - private static final Namespace RDF_NS = Namespace.getNamespace(RDF_URI); - private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI); - private static final Namespace CONTENT_NS = Namespace.getNamespace(CONTENT_URI); - - public RSS090Parser() { - this("rss_0.9", RSS_NS); - } - - protected RSS090Parser(String type, Namespace ns) { - super(type, ns); - } - - public boolean isMyType(Document document) { - boolean ok = false; - - Element rssRoot = document.getRootElement(); - Namespace defaultNS = rssRoot.getNamespace(); - List additionalNSs = rssRoot.getAdditionalNamespaces(); - - ok = defaultNS != null && defaultNS.equals(getRDFNamespace()); - if (ok) { - if (additionalNSs == null) { - ok = false; - } else { - ok = false; - for (int i = 0; !ok && i < additionalNSs.size(); i++) { - ok = getRSSNamespace().equals(additionalNSs.get(i)); - } - } - } - return ok; - } - - @SuppressWarnings("unused") - public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException, FeedException { - if (validate) { - validateFeed(document); - } - Element rssRoot = document.getRootElement(); - return parseChannel(rssRoot); - } - - @SuppressWarnings("all") - protected void validateFeed(@SuppressWarnings("unused") Document document) throws FeedException { - // TBD - // here we have to validate the Feed against a schema or whatever - // not sure how to do it - // one posibility would be to inject our own schema for the feed (they don't exist out there) - // to the document, produce an ouput and attempt to parse it again with validation turned on. - // otherwise will have to check the document elements by hand. - } - - /** - * Returns the namespace used by RSS elements in document of the RSS version the parser supports. - *- * This implementation returns the EMTPY namespace. - *
- * - * @return returns the EMPTY namespace. - */ - protected Namespace getRSSNamespace() { - return RSS_NS; - } - - /** - * Returns the namespace used by RDF elements in document of the RSS version the parser supports. - *
- * This implementation returns the EMTPY namespace. - *
- * - * @return returns the EMPTY namespace. - */ - protected Namespace getRDFNamespace() { - return RDF_NS; - } - - /** - * Returns the namespace used by Content Module elements in document. - *
- * This implementation returns the EMTPY namespace. - *
- * - * @return returns the EMPTY namespace. - */ - protected Namespace getContentNamespace() { - return CONTENT_NS; - } - - /** - * Parses the root element of an RSS document into a Channel bean. - *
- * It reads title, link and description and delegates to parseImage, parseItems - * and parseTextInput. This delegation always passes the root element of the RSS - * document as different RSS version may have this information in different parts - * of the XML tree (no assumptions made thanks to the specs variaty) - * - * - * @param rssRoot the root element of the RSS document to parse. - * @return the parsed Channel bean. - */ - protected WireFeed parseChannel(Element rssRoot) { - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - - Channel channel = new Channel(getType()); - - Element e = eChannel.getChild("title", getRSSNamespace()); - if (e != null) { - channel.setTitle(e.getText()); - } - e = eChannel.getChild("link", getRSSNamespace()); - if (e != null) { - channel.setLink(e.getText()); - } - e = eChannel.getChild("description", getRSSNamespace()); - if (e != null) { - channel.setDescription(e.getText()); - } - - channel.setImage(parseImage(rssRoot)); - - channel.setTextInput(parseTextInput(rssRoot)); - - // Unfortunately Microsoft's SSE extension has a special case of - // effectively putting the sharing channel module inside the RSS tag - // and not inside the channel itself. So we also need to look for - // channel modules from the root RSS element. - List allFeedModules = new ArrayList(); - List rootModules = parseFeedModules(rssRoot, Locale.getDefault()); - List channelModules = parseFeedModules(eChannel, Locale.getDefault()); - if (rootModules != null) { - allFeedModules.addAll(rootModules); - } - if (channelModules != null) { - allFeedModules.addAll(channelModules); - } - channel.setModules(allFeedModules); - channel.setItems(parseItems(rssRoot)); - - List foreignMarkup = extractForeignMarkup(eChannel, channel, getRSSNamespace()); - if (!foreignMarkup.isEmpty()) { - channel.setForeignMarkup(foreignMarkup); - } - return channel; - } - - /** - * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment. - * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element. - * - */ - protected List getItems(Element rssRoot) { - return rssRoot.getChildren("item", getRSSNamespace()); - } - - /** - * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment. - * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element. - * - */ - protected Element getImage(Element rssRoot) { - return rssRoot.getChild("image", getRSSNamespace()); - } - - /** - * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment. - * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element. - * - */ - protected Element getTextInput(Element rssRoot) { - return rssRoot.getChild("textinput", getRSSNamespace()); - } - - /** - * Parses the root element of an RSS document looking for image information. - * - * It reads title and url out of the 'image' element. - * - * - * @param rssRoot the root element of the RSS document to parse for image information. - * @return the parsed image bean. - */ - protected Image parseImage(Element rssRoot) { - Image image = null; - Element eImage = getImage(rssRoot); - if (eImage != null) { - image = new Image(); - - Element e = eImage.getChild("title", getRSSNamespace()); - if (e != null) { - image.setTitle(e.getText()); - } - e = eImage.getChild("url", getRSSNamespace()); - if (e != null) { - image.setUrl(e.getText()); - } - e = eImage.getChild("link", getRSSNamespace()); - if (e != null) { - image.setLink(e.getText()); - } - } - return image; - } - - /** - * Parses the root element of an RSS document looking for all items information. - * - * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem() - * for each item element. The resulting RSSItem of each item element is stored in a list. - * - * - * @param rssRoot the root element of the RSS document to parse for all items information. - * @return a list with all the parsed RSSItem beans. - */ - protected List parseItems(Element rssRoot) { - Collection eItems = getItems(rssRoot); - - List items = new ArrayList(); - for (Object item : eItems) { - Element eItem = (Element) item; - items.add(parseItem(rssRoot, eItem)); - } - return items; - } - - /** - * Parses an item element of an RSS document looking for item information. - * - * It reads title and link out of the 'item' element. - * - * - * @param rssRoot the root element of the RSS document in case it's needed for context. - * @param eItem the item element to parse. - * @return the parsed RSSItem bean. - */ - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = new Item(); - Element e = eItem.getChild("title", getRSSNamespace()); - if (e != null) { - item.setTitle(e.getText()); - } - e = eItem.getChild("link", getRSSNamespace()); - if (e != null) { - item.setLink(e.getText()); - item.setUri(e.getText()); - } - - item.setModules(parseItemModules(eItem, Locale.getDefault())); - - List foreignMarkup = extractForeignMarkup(eItem, item, getRSSNamespace()); - //content:encoded elements are treated special, without a module, they have to be removed from the foreign - //markup to avoid duplication in case of read/write. Note that this fix will break if a content module is - //used - Iterator iterator = foreignMarkup.iterator(); - while (iterator.hasNext()) { - Element ie = (Element) iterator.next(); - if (getContentNamespace().equals(ie.getNamespace()) && ie.getName().equals("encoded")) { - iterator.remove(); - } - } - if (!foreignMarkup.isEmpty()) { - item.setForeignMarkup(foreignMarkup); - } - return item; - } - - /** - * Parses the root element of an RSS document looking for text-input information. - * - * It reads title, description, name and link out of the 'textinput' or 'textInput' element. - * - * - * @param rssRoot the root element of the RSS document to parse for text-input information. - * @return the parsed RSSTextInput bean. - */ - protected TextInput parseTextInput(Element rssRoot) { - TextInput textInput = null; - Element eTextInput = getTextInput(rssRoot); - if (eTextInput != null) { - textInput = new TextInput(); - Element e = eTextInput.getChild("title", getRSSNamespace()); - if (e != null) { - textInput.setTitle(e.getText()); - } - e = eTextInput.getChild("description", getRSSNamespace()); - if (e != null) { - textInput.setDescription(e.getText()); - } - e = eTextInput.getChild("name", getRSSNamespace()); - if (e != null) { - textInput.setName(e.getText()); - } - e = eTextInput.getChild("link", getRSSNamespace()); - if (e != null) { - textInput.setLink(e.getText()); - } - } - return textInput; - } - - @Override - @SuppressWarnings("all") - public WireFeed parse(org.jdom2.Document document, boolean b, Locale locale) throws IllegalArgumentException, FeedException { - return null; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS091UserlandParser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS091UserlandParser.java deleted file mode 100644 index c25200f..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS091UserlandParser.java +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import com.rometools.rome.feed.rss.*; -import com.rometools.rome.io.impl.NumberParser; -import org.jdom2.Attribute; -import org.jdom2.Document; -import org.jdom2.Element; -import org.jdom2.Namespace; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - */ -@SuppressWarnings({ "rawtypes", "unchecked" }) -public class RSS091UserlandParser extends RSS090Parser { - - public RSS091UserlandParser() { - this("rss_0.91U"); - } - - protected RSS091UserlandParser(String type) { - super(type, null); - } - - public boolean isMyType(Document document) { - boolean ok; - Element rssRoot = document.getRootElement(); - ok = rssRoot.getName().equals("rss"); - if (ok) { - ok = false; - Attribute version = rssRoot.getAttribute("version"); - if (version != null) { - ok = version.getValue().equals(getRSSVersion()); - } - } - return ok; - } - - protected String getRSSVersion() { - return "0.91"; - } - - protected Namespace getRSSNamespace() { - return Namespace.getNamespace(""); - } - - /** - * To be overriden by RSS 0.91 Netscape and RSS 0.94 - */ - @SuppressWarnings("unused") - protected boolean isHourFormat24(Element rssRoot) { - return true; - } - - /** - * Parses the root element of an RSS document into a Channel bean. - * - * It first invokes super.parseChannel and then parses and injects the following - * properties if present: language, pubDate, rating and copyright. - * - * - * @param rssRoot the root element of the RSS document to parse. - * @return the parsed Channel bean. - */ - protected WireFeed parseChannel(Element rssRoot) { - Channel channel = (Channel) super.parseChannel(rssRoot); - - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - - Element e = eChannel.getChild("language", getRSSNamespace()); - if (e != null) { - channel.setLanguage(e.getText()); - } - e = eChannel.getChild("rating", getRSSNamespace()); - if (e != null) { - channel.setRating(e.getText()); - } - e = eChannel.getChild("copyright", getRSSNamespace()); - if (e != null) { - channel.setCopyright(e.getText()); - } - e = eChannel.getChild("pubDate", getRSSNamespace()); - if (e != null) { - channel.setPubDate(DateParser.parseDate(e.getText())); - } - e = eChannel.getChild("lastBuildDate", getRSSNamespace()); - if (e != null) { - channel.setLastBuildDate(DateParser.parseDate(e.getText())); - } - e = eChannel.getChild("docs", getRSSNamespace()); - if (e != null) { - channel.setDocs(e.getText()); - } - e = eChannel.getChild("docs", getRSSNamespace()); - if (e != null) { - channel.setDocs(e.getText()); - } - e = eChannel.getChild("managingEditor", getRSSNamespace()); - if (e != null) { - channel.setManagingEditor(e.getText()); - } - e = eChannel.getChild("webMaster", getRSSNamespace()); - if (e != null) { - channel.setWebMaster(e.getText()); - } - e = eChannel.getChild("skipHours"); - if (e != null) { - List skipHours = new ArrayList(); - List eHours = e.getChildren("hour", getRSSNamespace()); - for (Object hour : eHours) { - Element eHour = (Element) hour; - skipHours.add(eHour.getText().trim()); - } - channel.setSkipHours(skipHours); - } - - e = eChannel.getChild("skipDays"); - if (e != null) { - List skipDays = new ArrayList(); - List eDays = e.getChildren("day", getRSSNamespace()); - for (Object day : eDays) { - Element eDay = (Element) day; - skipDays.add(eDay.getText().trim()); - } - channel.setSkipDays(skipDays); - } - return channel; - } - - /** - * Parses the root element of an RSS document looking for image information. - * - * It first invokes super.parseImage and then parses and injects the following - * properties if present: url, link, width, height and description. - * - * - * @param rssRoot the root element of the RSS document to parse for image information. - * @return the parsed RSSImage bean. - */ - protected Image parseImage(Element rssRoot) { - Image image = super.parseImage(rssRoot); - if (image != null) { - Element eImage = getImage(rssRoot); - Element e = eImage.getChild("width", getRSSNamespace()); - if (e != null) { - Integer val = NumberParser.parseInt(e.getText()); - if (val != null) { - image.setWidth(val); - } - } - e = eImage.getChild("height", getRSSNamespace()); - if (e != null) { - Integer val = NumberParser.parseInt(e.getText()); - if (val != null) { - image.setHeight(val); - } - } - e = eImage.getChild("description", getRSSNamespace()); - if (e != null) { - image.setDescription(e.getText()); - } - } - return image; - } - - /** - * It looks for the 'item' elements under the 'channel' elemment. - */ - protected List getItems(Element rssRoot) { - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - return (eChannel != null) ? eChannel.getChildren("item", getRSSNamespace()) : Collections.EMPTY_LIST; - } - - /** - * It looks for the 'image' elements under the 'channel' elemment. - */ - protected Element getImage(Element rssRoot) { - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - return (eChannel != null) ? eChannel.getChild("image", getRSSNamespace()) : null; - } - - /** - * To be overriden by RSS 0.91 Netscape parser - */ - protected String getTextInputLabel() { - return "textInput"; - } - - /** - * It looks for the 'textinput' elements under the 'channel' elemment. - */ - protected Element getTextInput(Element rssRoot) { - String elementName = getTextInputLabel(); - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - return (eChannel != null) ? eChannel.getChild(elementName, getRSSNamespace()) : null; - } - - /** - * Parses an item element of an RSS document looking for item information. - * - * It first invokes super.parseItem and then parses and injects the description property if present. - * - * - * @param rssRoot the root element of the RSS document in case it's needed for context. - * @param eItem the item element to parse. - * @return the parsed RSSItem bean. - */ - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = super.parseItem(rssRoot, eItem); - Element e = eItem.getChild("description", getRSSNamespace()); - if (e != null) { - item.setDescription(parseItemDescription(rssRoot, e)); - } - Element ce = eItem.getChild("encoded", getContentNamespace()); - if (ce != null) { - Content content = new Content(); - content.setType(Content.HTML); - content.setValue(ce.getText()); - item.setContent(content); - } - return item; - } - - protected Description parseItemDescription(Element rssRoot, Element eDesc) { - Description desc = new Description(); - desc.setType("text/plain"); - desc.setValue(eDesc.getText()); - return desc; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS092Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS092Parser.java deleted file mode 100644 index e0a3cf4..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS092Parser.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import com.rometools.rome.feed.rss.*; -import com.rometools.rome.io.impl.NumberParser; -import org.jdom2.Element; - -import java.util.ArrayList; -import java.util.List; - -/** - */ -@SuppressWarnings({ "rawtypes", "unchecked" }) -public class RSS092Parser extends RSS091UserlandParser { - - public RSS092Parser() { - this("rss_0.92"); - } - - protected RSS092Parser(String type) { - super(type); - } - - protected String getRSSVersion() { - return "0.92"; - } - - protected WireFeed parseChannel(Element rssRoot) { - Channel channel = (Channel) super.parseChannel(rssRoot); - - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - Element eCloud = eChannel.getChild("cloud", getRSSNamespace()); - if (eCloud != null) { - Cloud cloud = new Cloud(); - String att = eCloud.getAttributeValue("domain");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setDomain(att); - } - att = eCloud.getAttributeValue("port");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setPort(Integer.parseInt(att.trim())); - } - att = eCloud.getAttributeValue("path");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setPath(att); - } - att = eCloud.getAttributeValue("registerProcedure");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setRegisterProcedure(att); - } - att = eCloud.getAttributeValue("protocol");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setProtocol(att); - } - channel.setCloud(cloud); - } - return channel; - } - - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = super.parseItem(rssRoot, eItem); - - Element e = eItem.getChild("source", getRSSNamespace()); - if (e != null) { - Source source = new Source(); - String url = e.getAttributeValue("url");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - source.setUrl(url); - source.setValue(e.getText()); - item.setSource(source); - } - - // 0.92 allows one enclosure occurrence, 0.93 multiple - // just saving to write some code. - List eEnclosures = eItem.getChildren("enclosure");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (!eEnclosures.isEmpty()) { - List enclosures = new ArrayList(); - for (Object eEnclosure : eEnclosures) { - e = (Element) eEnclosure; - - Enclosure enclosure = new Enclosure(); - String att = e.getAttributeValue("url");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - enclosure.setUrl(att); - } - att = e.getAttributeValue("length");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - enclosure.setLength(NumberParser.parseLong(att, 0L)); - - att = e.getAttributeValue("type");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - enclosure.setType(att); - } - enclosures.add(enclosure); - } - item.setEnclosures(enclosures); - } - - List eCats = eItem.getChildren("category");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - item.setCategories(parseCategories(eCats)); - - return item; - } - - protected List parseCategories(List eCats) { - List cats = null; - if (!eCats.isEmpty()) { - cats = new ArrayList(); - for (Object eCat : eCats) { - Category cat = new Category(); - Element e = (Element) eCat; - String att = e.getAttributeValue("domain");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cat.setDomain(att); - } - cat.setValue(e.getText()); - cats.add(cat); - } - } - return cats; - } - - protected Description parseItemDescription(Element rssRoot, Element eDesc) { - Description desc = super.parseItemDescription(rssRoot, eDesc); - desc.setType("text/html"); - return desc; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS093Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS093Parser.java deleted file mode 100644 index 136e1d7..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS093Parser.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.rss.Item; -import org.jdom2.Element; - -/** - * - */ -public class RSS093Parser extends RSS092Parser { - - public RSS093Parser() { - this("rss_0.93"); - } - - protected RSS093Parser(String type) { - super(type); - } - - protected String getRSSVersion() { - return "0.93"; - } - - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = super.parseItem(rssRoot, eItem); - Element e = eItem.getChild("pubDate", getRSSNamespace()); - if (e != null) { - item.setPubDate(DateParser.parseDate(e.getText())); - } - e = eItem.getChild("expirationDate", getRSSNamespace()); - if (e != null) { - item.setExpirationDate(DateParser.parseDate(e.getText())); - } - e = eItem.getChild("description", getRSSNamespace()); - if (e != null) { - String type = e.getAttributeValue("type"); - if (type != null) { - item.getDescription().setType(type); - } - } - return item; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS094Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS094Parser.java deleted file mode 100644 index 9f93ffb..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS094Parser.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import com.rometools.rome.feed.rss.Channel; -import com.rometools.rome.feed.rss.Description; -import com.rometools.rome.feed.rss.Guid; -import com.rometools.rome.feed.rss.Item; -import org.jdom2.Element; - -import java.util.List; -import java.util.Optional; - -/** - */ -public class RSS094Parser extends RSS093Parser { - - public RSS094Parser() { - this("rss_0.94"); - } - - protected RSS094Parser(String type) { - super(type); - } - - protected String getRSSVersion() { - return "0.94"; - } - - @SuppressWarnings("UnnecessarySemicolon") - protected WireFeed parseChannel(Element rssRoot) { - Channel channel = (Channel) super.parseChannel(rssRoot); - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - - List- * - * @return returns "purl.org". - */ - protected Namespace getRSSNamespace() { - return Namespace.getNamespace(RSS_URI); - } - - /** - * Parses an item element of an RSS document looking for item information. - *
- * It first invokes super.parseItem and then parses and injects the description property if present. - * - * - * @param rssRoot the root element of the RSS document in case it's needed for context. - * @param eItem the item element to parse. - * @return the parsed RSSItem bean. - */ - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = super.parseItem(rssRoot, eItem); - Element e = eItem.getChild("description", getRSSNamespace()); - if (e != null) { - item.setDescription(parseItemDescription(rssRoot, e)); - } - Element ce = eItem.getChild("encoded", getContentNamespace()); - if (ce != null) { - Content content = new Content(); - content.setType(Content.HTML); - content.setValue(ce.getText()); - item.setContent(content); - } - - String uri = eItem.getAttributeValue("about", getRDFNamespace()); - if (uri != null) { - item.setUri(uri); - } - - return item; - } - - protected WireFeed parseChannel(Element rssRoot) { - Channel channel = (Channel) super.parseChannel(rssRoot); - - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - String uri = eChannel.getAttributeValue("about", getRDFNamespace()); - if (uri != null) { - channel.setUri(uri); - } - - return channel; - } - - @SuppressWarnings("unused") - protected Description parseItemDescription(Element rssRoot, Element eDesc) { - Description desc = new Description(); - desc.setType("text/plain"); - desc.setValue(eDesc.getText()); - return desc; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20Parser.java deleted file mode 100644 index e645ca2..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20Parser.java +++ /dev/null @@ -1,67 +0,0 @@ -package dev.rsems.syndication.rome.io.impl; - -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -import com.rometools.rome.feed.rss.Description; -import org.jdom2.Attribute; -import org.jdom2.Document; -import org.jdom2.Element; - -/** - */ -public class RSS20Parser extends RSS094Parser { - - public RSS20Parser() { - this("rss_2.0"); - } - - protected RSS20Parser(String type) { - super(type); - } - - protected String getRSSVersion() { - return "2.0"; - } - - protected boolean isHourFormat24(Element rssRoot) { - return false; - } - - protected Description parseItemDescription(Element rssRoot, Element eDesc) { - Description desc = super.parseItemDescription(rssRoot, eDesc); - desc.setType("text/html"); // change as per https://rome.dev.java.net/issues/show_bug.cgi?id=26 - return desc; - } - - public boolean isMyType(Document document) { - boolean ok; - Element rssRoot = document.getRootElement(); - ok = rssRoot.getName().equals("rss"); - if (ok) { - ok = false; - Attribute version = rssRoot.getAttribute("version"); - if (version != null) { - // At this point, as far ROME is concerned RSS 2.0, 2.00 and - // 2.0.X are all the same, so let's use startsWith for leniency. - ok = version.getValue().startsWith(getRSSVersion()); - } - } - return ok; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20wNSParser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20wNSParser.java deleted file mode 100644 index db6f016..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20wNSParser.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import org.jdom2.Document; -import org.jdom2.Element; -import org.jdom2.Namespace; - -/** - * To address issue with certain feeds (brought up by Charles Miller): - *This is a feed aggregator & feed reader application
+