From 87f05e6894661f747bf1888e66fd89a8ef622cb5 Mon Sep 17 00:00:00 2001 From: Schroeder Date: Mon, 12 Feb 2024 17:41:04 +0100 Subject: [PATCH] Added experimental about page, updated more Rome code, first preparations to adapt feed fetcher code --- .../dev/rsems/feedreader/FeedReaderController.java | 206 +++++++++++- .../rsems/rometools/rome/io/impl/DateParser.java | 291 +++++++++++++++++ .../rsems/rometools/rome/io/impl/RSS090Parser.java | 345 +++++++++++++++++++++ .../rome/io/impl/RSS091UserlandParser.java | 250 +++++++++++++++ .../rsems/rometools/rome/io/impl/RSS092Parser.java | 143 +++++++++ .../rsems/rometools/rome/io/impl/RSS093Parser.java | 59 ++++ .../rsems/rometools/rome/io/impl/RSS094Parser.java | 106 +++++++ .../rsems/rometools/rome/io/impl/RSS10Parser.java | 139 +++++++++ .../rsems/rometools/rome/io/impl/RSS20Parser.java | 67 ++++ .../rometools/rome/io/impl/RSS20wNSParser.java | 71 +++++ .../rsems/syndication/rome/io/impl/DateParser.java | 291 ----------------- .../syndication/rome/io/impl/RSS090Parser.java | 345 --------------------- .../rome/io/impl/RSS091UserlandParser.java | 250 --------------- .../syndication/rome/io/impl/RSS092Parser.java | 143 --------- .../syndication/rome/io/impl/RSS093Parser.java | 59 ---- .../syndication/rome/io/impl/RSS094Parser.java | 106 ------- .../syndication/rome/io/impl/RSS10Parser.java | 139 --------- .../syndication/rome/io/impl/RSS20Parser.java | 67 ---- .../syndication/rome/io/impl/RSS20wNSParser.java | 71 ----- src/main/resources/rome.properties | 22 +- src/main/resources/templates/about.html | 13 + src/main/resources/templates/index.html | 1 + 22 files changed, 1700 insertions(+), 1484 deletions(-) create mode 100644 src/main/java/dev/rsems/rometools/rome/io/impl/DateParser.java create mode 100644 src/main/java/dev/rsems/rometools/rome/io/impl/RSS090Parser.java create mode 100644 src/main/java/dev/rsems/rometools/rome/io/impl/RSS091UserlandParser.java create mode 100644 src/main/java/dev/rsems/rometools/rome/io/impl/RSS092Parser.java create mode 100644 src/main/java/dev/rsems/rometools/rome/io/impl/RSS093Parser.java create mode 100644 src/main/java/dev/rsems/rometools/rome/io/impl/RSS094Parser.java create mode 100644 src/main/java/dev/rsems/rometools/rome/io/impl/RSS10Parser.java create mode 100644 src/main/java/dev/rsems/rometools/rome/io/impl/RSS20Parser.java create mode 100644 src/main/java/dev/rsems/rometools/rome/io/impl/RSS20wNSParser.java delete mode 100644 src/main/java/dev/rsems/syndication/rome/io/impl/DateParser.java delete mode 100644 src/main/java/dev/rsems/syndication/rome/io/impl/RSS090Parser.java delete mode 100644 src/main/java/dev/rsems/syndication/rome/io/impl/RSS091UserlandParser.java delete mode 100644 src/main/java/dev/rsems/syndication/rome/io/impl/RSS092Parser.java delete mode 100644 src/main/java/dev/rsems/syndication/rome/io/impl/RSS093Parser.java delete mode 100644 src/main/java/dev/rsems/syndication/rome/io/impl/RSS094Parser.java delete mode 100644 src/main/java/dev/rsems/syndication/rome/io/impl/RSS10Parser.java delete mode 100644 src/main/java/dev/rsems/syndication/rome/io/impl/RSS20Parser.java delete mode 100644 src/main/java/dev/rsems/syndication/rome/io/impl/RSS20wNSParser.java create mode 100644 src/main/resources/templates/about.html diff --git a/src/main/java/dev/rsems/feedreader/FeedReaderController.java b/src/main/java/dev/rsems/feedreader/FeedReaderController.java index b4b48fb..cf8eec7 100644 --- a/src/main/java/dev/rsems/feedreader/FeedReaderController.java +++ b/src/main/java/dev/rsems/feedreader/FeedReaderController.java @@ -1,18 +1,220 @@ package dev.rsems.feedreader; +import com.rometools.fetcher.FeedFetcher; +import com.rometools.fetcher.impl.FeedFetcherCache; +import com.rometools.fetcher.impl.HttpURLFeedFetcher; +import com.rometools.fetcher.impl.LinkedHashMapFeedInfoCache; +import dev.rsems.feedreader.fetcher.FetcherEventListenerImpl; +import lombok.extern.slf4j.Slf4j; +import org.slf4j.Logger; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Controller; import org.springframework.ui.Model; import org.springframework.web.bind.annotation.GetMapping; -@Controller +import java.text.DateFormat; +import java.text.SimpleDateFormat; + +@Controller @Slf4j public class FeedReaderController { @Value("${spring.application.name}") String appName; + private static final Logger logger = log; + + private static final FeedFetcherCache feedInfoCache = LinkedHashMapFeedInfoCache.getInstance(); + public static final FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache); + private static final FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); + + static { + fetcher.addFetcherEventListener(listener); + } + + private static final int MAX_ITEMS_PER_FEED = 20; + private static final DateFormat TIME_FMT = new SimpleDateFormat("HH:mm"); + private static final DateFormat DAY_FMT = new SimpleDateFormat("EEEE',' dd. M. yyyy"); + @GetMapping("/") - public String homePage(Model model) { + public String index(Model model) { model.addAttribute("appName", appName); + +// Stopwatch stw = new Stopwatch(true); +// +// Set feedFetcherTasks = new TreeSet(); +// +// logger.info("[" + "] Reading database and building the feed fetcher tasks list"); +// +// ConnectionSource connectionSource = null; +// try { +// connectionSource = new JdbcConnectionSource("jdbc:mysql://db.rsems.de/feeds?user=feeds&password=fems1211"); +// Dao userFeedDao = DaoManager.createDao(connectionSource, UserFeed.class); +// CloseableIterator ituf = userFeedDao.closeableIterator(); +// try { +// int i = 0; +// while (ituf.hasNext()) { +// UserFeed userFeed = ituf.next(); +// if (userFeed.getVisible()) { +// i++; +// +// feedFetcherTasks.add(new FeedFetcherTask(i, userFeed.getFeed().getName(), userFeed.getFeed().getUrl(), "", userFeed.getFilter())); +// // logger.info("[" + remoteHost + "] #" + new DecimalFormat("00").format(i) + " " + userFeed.getFeed().getName() + " added"); +// } +// } +// } catch (Exception e) { +// logger.error(ExceptionUtils.getMessages(e)); +// logger.error("\n" + ExceptionUtils.getStackTrace(e)); +// throw new FeedException(e); +// } finally { +// ituf.close(); +// } +// } catch (SQLException e) { +// logger.error(ExceptionUtils.getMessages(e)); +// logger.error("\n" + ExceptionUtils.getStackTrace(e)); +// throw new FeedException("Failed to access database", e); +// } finally { +// if (connectionSource != null) { +// try { +// connectionSource.close(); +// } catch (SQLException unhandled) { +// } +// } +// } +// +// logger.info("[" + remoteHost + "] About to invoke " + feedFetcherTasks.size() + " feed retrieval tasks after " +// + stw.getTimeElapsed() + " ms"); +// +// List> feedFutures = null; +// ExecutorService executorService = Executors.newCachedThreadPool(); +// try { +// feedFutures = executorService.invokeAll(feedFetcherTasks); +// } catch (InterruptedException e) { +// logger.error(ExceptionUtils.getMessages(e)); +// logger.error("\n" + ExceptionUtils.getStackTrace(e)); +// throw new FeedException(e); +// } +// +// logger.info("[" + remoteHost + "] About to extract retrieved data from task results after " + stw.getTimeElapsed() + " ms"); +// +// ArrayList feeds = new ArrayList(); +// try { +// for (Future ff : feedFutures) { +// feeds.add(ff.get()); +// } +// } catch (Exception e) { +// logger.error(ExceptionUtils.getMessages(e)); +// logger.error("\n" + ExceptionUtils.getStackTrace(e)); +// throw new FeedException(e); +// } finally { +// executorService.shutdown(); +// } +// +// logger.info("[" + remoteHost + "] About to render retrieved data after " + stw.getTimeElapsed() + " ms"); +// +// add(new Label("head.title", Globals.TITLE)); +// RepeatingView feedRv = new RepeatingView("feed"); +// add(feedRv); +// +// int fc = 0; +// for (IFeed feed : feeds) { +// fc++; +// +// long lap = stw.getTimeElapsed(); +// +// WebMarkupContainer feedMarkup = new WebMarkupContainer(feedRv.newChildId()); +// feedRv.add(feedMarkup); +// String errorStatus = feed.getErrorStatus(); +// +// feedMarkup.add(new ExternalLink("feedtitle", (feed.getLink() == null ? "" : feed.getLink()), feed.getName())); +// +// if (errorStatus.equals("")) { +// feedMarkup.add(new ExternalLink("feedurl", feed.getUrl().toExternalForm(), feed.getTitle())); +// feedMarkup +// .add(new Label("feeddate", feed.getDate() != null ? Globals.DEFAULT_DISPLAY_DATE_FORMAT.format(feed.getDate()) : " ")); // this is ANSI 160, not space +// } else { +// feedMarkup.add(new ExternalLink("feedurl", feed.getUrl().toExternalForm(), "Error retrieving feed").add(new Behavior() { +// @Override +// public void beforeRender(Component component) { +// component.getResponse().write(""); +// super.beforeRender(component); +// } +// +// @Override +// public void afterRender(Component component) { +// super.afterRender(component); +// component.getResponse().write(""); +// } +// })); +// feedMarkup.add(new Label("feeddate", errorStatus)); +// } +// +// RepeatingView itemRv = new RepeatingView("item"); +// feedMarkup.add(itemRv); +// String lastDay = null; +// ArrayList items = feed.getFeedItems(); +// +// if (feed.getErrorStatus() != "") { +// logger.info("[Client " + remoteHost + "] " + feed.getErrorStatus() + "\n" + feed.getStackTrace()); +// } +// +// Collections.sort(items, FeedItem.LATEST_FIRST); +// int count = 0; +// for (Iterator it = items.iterator(); it.hasNext(); ) { +// FeedItem item = it.next(); +// +// String itemlabel = item.getTitle() == null ? "—" : StringUtils.cleanHtml(item.getTitle(), feed.getUrl().getPath()); +// +// if (!item.getTitle().isEmpty() && !feed.getFilter().matches(itemlabel)) { +// +// WebMarkupContainer itemMarkup = new WebMarkupContainer(itemRv.newChildId()); +// itemRv.add(itemMarkup); +// +// Date itemDate = item.getDate(); +// String day = itemDate == null ? " " : DAY_FMT.format(itemDate); //   not space +// final boolean dayEqualsLastDay = day.equals(lastDay); +// itemMarkup.add(new Label("date", day) { +// @Override +// public boolean isVisible() { +// return !dayEqualsLastDay; +// } +// }); +// if (!dayEqualsLastDay) { +// lastDay = day; +// } +// itemMarkup.add(new Label("itemdate", itemDate == null ? "—" : TIME_FMT.format(itemDate))); +// +// String itemlink = item.getLink() == null ? "" : item.getLink(); +// +// String itemsummary = item.getSummary() == null ? "" : StringUtils.cleanHtml(item.getSummary(), feed.getUrl().getPath()); +// itemMarkup.add(new ExternalLink("itemlink", itemlink, itemlabel).add(new AttributeModifier("title", itemsummary))); +// +// String itemAuthor = item.getAuthor() == null || item.getAuthor().equals("") ? "" +// : " (" + item.getAuthor().replaceFirst("(.*?)<(.*?)@(.*?)>", "$1").trim() + ")"; +// itemMarkup.add(new Label("itemauthor", StringUtils.unquote(itemAuthor))); +// +// count++; +// if (count >= MAX_ITEMS_PER_FEED) { +// break; +// } +// +// } +// +// } +// +// logger.info("[" + remoteHost + "] #" + fc + ": " + count + " feed items processed (" + (stw.getTimeElapsed() - lap) + " ms)"); +// +// } +// +// add(new Label("foot", Globals.TITLE + " " + Version.VERSION + " " + Globals.DEFAULT_DISPLAY_DATE_FORMAT.format(new Date()))); +// +// logger.info("[" + remoteHost + "] Finished processing feeds after " + stw.getTimeElapsed() + " ms"); + return "index"; } + + @GetMapping("/about") + public String about(Model model) { + model.addAttribute("appName", appName); + return "about"; + } + } \ No newline at end of file diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/DateParser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/DateParser.java new file mode 100644 index 0000000..8969121 --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/DateParser.java @@ -0,0 +1,291 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.io.impl.PropertiesLoader; + +import java.text.DateFormat; +import java.text.ParsePosition; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.TimeZone; + +/** + * A helper class that parses Dates out of Strings with date time in RFC822 and W3CDateTime + * formats plus the variants Atom (0.3) and RSS (0.9, 0.91, 0.92, 0.93, 0.94, 1.0 and 2.0) + * specificators added to those formats. + *

+ * It uses the JDK java.text.SimpleDateFormat class attemtping the parse using a mask for + * each one of the possible formats. + *

+ * Date parsing enhanced (RS) + *

+ * + * @author Alejandro Abdelnur + * @author Robert Schroeder + * + */ +public class DateParser { + + private static final String[] ADDITIONAL_MASKS; + + static { + ADDITIONAL_MASKS = PropertiesLoader.getPropertiesLoader().getTokenizedProperty("datetime.extra.masks", "|"); + } + + // order is like this because the SimpleDateFormat.parse does not fail with exception + // if it can parse a valid date out of a substring of the full string given the mask + // so we have to check the most complete format first, then it fails with exception + private static final String[] RFC822_MASKS = { + "EEE, dd MMM yy HH:mm:ss z", + "EEE, dd MMM yy HH:mm z", + "dd MMM yy HH:mm:ss z", + "dd MMM yy HH:mm z" }; + + // order is like this because the SimpleDateFormat.parse does not fail with exception + // if it can parse a valid date out of a substring of the full string given the mask + // so we have to check the most complete format first, then it fails with exception + private static final String[] W3CDATETIME_MASKS = { + "yyyy-MM-dd'T'HH:mm:ss.SSSz", + "yyyy-MM-dd't'HH:mm:ss.SSSz", + "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", + "yyyy-MM-dd't'HH:mm:ss.SSS'z'", + "yyyy-MM-dd'T'HH:mm:ssz", + "yyyy-MM-dd't'HH:mm:ssz", + "yyyy-MM-dd'T'HH:mm:ssZ", + "yyyy-MM-dd't'HH:mm:ssZ", + "yyyy-MM-dd'T'HH:mm:ss'Z'", + "yyyy-MM-dd't'HH:mm:ss'z'", + "yyyy-MM-dd'T'HH:mmz", // together with logic in the parseW3CDateTime they + "yyyy-MM'T'HH:mmz", // handle W3C dates without time forcing them to be GMT + "yyyy'T'HH:mmz", + "yyyy-MM-dd't'HH:mmz", + "yyyy-MM-dd'T'HH:mm'Z'", + "yyyy-MM-dd't'HH:mm'z'", + "yyyy-MM-dd", "yyyy-MM", + "yyyy" }; + + /** + * The masks used to validate and parse the input to this Atom date. + * These are a lot more forgiving than what the Atom spec allows. + * The forms that are invalid according to the spec are indicated. + */ + private static final String[] masks = { + "yyyy-MM-dd'T'HH:mm:ss.SSSz", + "yyyy-MM-dd't'HH:mm:ss.SSSz", // invalid + "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", + "yyyy-MM-dd't'HH:mm:ss.SSS'z'", // invalid + "yyyy-MM-dd'T'HH:mm:ssz", + "yyyy-MM-dd't'HH:mm:ssz", // invalid + "yyyy-MM-dd'T'HH:mm:ss'Z'", + "yyyy-MM-dd't'HH:mm:ss'z'", // invalid + "yyyy-MM-dd'T'HH:mmz", // invalid + "yyyy-MM-dd't'HH:mmz", // invalid + "yyyy-MM-dd'T'HH:mm'Z'", // invalid + "yyyy-MM-dd't'HH:mm'z'", // invalid + "yyyy-MM-dd", "yyyy-MM", "yyyy" }; + + /** + * Private constructor to avoid DateParser instances creation. + */ + private DateParser() { + } + + /** + * Parses a Date out of a string using an array of masks. + *

+ * It uses the masks in order until one of them succedes or all fail. + *

+ * + * @param masks array of masks to use for parsing the string + * @param sDate string to parse for a date. + * @return the Date represented by the given string using one of the given masks. + * It returns null if it was not possible to parse the the string with any of the masks. + * + */ + private static Date parseUsingMask(String[] masks, String sDate) { + sDate = (sDate != null) ? sDate.trim() : null; + ParsePosition pp = null; + Date d = null; + for (int i = 0; d == null && i < masks.length; i++) { + DateFormat df = new SimpleDateFormat(masks[i], Locale.US); + //df.setLenient(false); + df.setLenient(true); + try { + pp = new ParsePosition(0); + d = df.parse(sDate, pp); + assert sDate != null; + if (pp.getIndex() != sDate.length()) { + d = null; + } + //System.out.println("pp["+pp.getIndex()+"] s["+sDate+" m["+masks[i]+"] d["+d+"]"); + } catch (Exception ex1) { + //System.out.println("s: "+sDate+" m: "+masks[i]+" d: "+null); + } + } + return d; + } + + /** + * Parses a Date out of a String with a date in RFC822 format. + *

+ * It parsers the following formats: + *

    + *
  • "EEE, dd MMM yyyy HH:mm:ss z"
  • + *
  • "EEE, dd MMM yyyy HH:mm z"
  • + *
  • "EEE, dd MMM yy HH:mm:ss z"
  • + *
  • "EEE, dd MMM yy HH:mm z"
  • + *
  • "dd MMM yyyy HH:mm:ss z"
  • + *
  • "dd MMM yyyy HH:mm z"
  • + *
  • "dd MMM yy HH:mm:ss z"
  • + *
  • "dd MMM yy HH:mm z"
  • + *
+ *

+ * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. + *

+ * @param sDate string to parse for a date. + * @return the Date represented by the given RFC822 string. + * It returns null if it was not possible to parse the given string into a Date. + * + */ + public static Date parseRFC822(String sDate) { + int utIndex = sDate.indexOf(" UT"); + if (utIndex > -1) { + String pre = sDate.substring(0, utIndex); + String post = sDate.substring(utIndex + 3); + sDate = pre + " GMT" + post; + } else { // Schroeder 11-2012 + int zIndex = sDate.indexOf(" Z"); + if (zIndex > -1) { + String pre = sDate.substring(0, zIndex); + String post = sDate.substring(zIndex + 2); + sDate = pre + " GMT" + post; + } + } + return parseUsingMask(RFC822_MASKS, sDate); + } + + /** + * Parses a Date out of a String with a date in W3C date-time format. + *

+ * It parsers the following formats: + *

    + *
  • "yyyy-MM-dd'T'HH:mm:ssz"
  • + *
  • "yyyy-MM-dd'T'HH:mmz"
  • + *
  • "yyyy-MM-dd"
  • + *
  • "yyyy-MM"
  • + *
  • "yyyy"
  • + *
+ *

+ * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. + *

+ * @param sDate string to parse for a date. + * @return the Date represented by the given W3C date-time string. + * It returns null if it was not possible to parse the given string into a Date. + * + */ + public static Date parseW3CDateTime(String sDate) { + // if sDate has time on it, it injects 'GTM' before de TZ displacement to + // allow the SimpleDateFormat parser to parse it properly + int tIndex = sDate.indexOf("T"); + if (tIndex > -1) { + if (sDate.endsWith("Z")) { + sDate = sDate.substring(0, sDate.length() - 1) + "+00:00"; + } + int tzdIndex = sDate.indexOf("+", tIndex); + if (tzdIndex == -1) { + tzdIndex = sDate.indexOf("-", tIndex); + } + if (tzdIndex > -1) { + String pre = sDate.substring(0, tzdIndex); + int secFraction = pre.indexOf(","); + if (secFraction > -1) { + pre = pre.substring(0, secFraction); + } + String post = sDate.substring(tzdIndex); + sDate = pre + "GMT" + post; + } + } else { + sDate += "T00:00GMT"; + } + return parseUsingMask(W3CDATETIME_MASKS, sDate); + } + + /** + * Parses a Date out of a String with a date in W3C date-time format or + * in a RFC822 format. + *

+ * @param sDate string to parse for a date. + * @return the Date represented by the given W3C date-time string. + * It returns null if it was not possible to parse the given string into a Date. + * + * */ + public static Date parseDate(String sDate) { + Date d = parseW3CDateTime(sDate); + if (d == null) { + d = parseRFC822(sDate); + if (d == null && ADDITIONAL_MASKS.length > 0) { + d = parseUsingMask(ADDITIONAL_MASKS, sDate); + } + if (d == null) { // Schroeder 11-2012 + d = parseUsingMask(masks, sDate); + } + } + return d; + } + + /** + * create a RFC822 representation of a date. + *

+ * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. + *

+ * @param date Date to parse + * @return the RFC822 represented by the given Date + * It returns null if it was not possible to parse the date. + * + */ + @SuppressWarnings("unused") + public static String formatRFC822(Date date) { + SimpleDateFormat dateFormater = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US); + dateFormater.setTimeZone(TimeZone.getTimeZone("GMT")); + return dateFormater.format(date); + } + + /** + * create a W3C Date Time representation of a date. + *

+ * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. + *

+ * @param date Date to parse + * @return the W3C Date Time represented by the given Date + * It returns null if it was not possible to parse the date. + * + */ + @SuppressWarnings("unused") + public static String formatW3CDateTime(Date date) { + SimpleDateFormat dateFormater = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); + dateFormater.setTimeZone(TimeZone.getTimeZone("GMT")); + return dateFormater.format(date); + } + + + public static void main(String[] args) { + System.out.println(parseDate("Mon, 19 Nov 2012 23:22:39 Z")); + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS090Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS090Parser.java new file mode 100644 index 0000000..fa14426 --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS090Parser.java @@ -0,0 +1,345 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import com.rometools.rome.feed.rss.Channel; +import com.rometools.rome.feed.rss.Image; +import com.rometools.rome.feed.rss.Item; +import com.rometools.rome.feed.rss.TextInput; +import com.rometools.rome.io.FeedException; +import com.rometools.rome.io.impl.BaseWireFeedParser; +import org.jdom2.Document; +import org.jdom2.Element; +import org.jdom2.Namespace; + +import java.util.*; + +/** + */ +@SuppressWarnings({ "rawtypes", "unchecked" }) +public class RSS090Parser extends BaseWireFeedParser { + private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/"; + private static final String CONTENT_URI = "http://purl.org/rss/1.0/modules/content/"; + + private static final Namespace RDF_NS = Namespace.getNamespace(RDF_URI); + private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI); + private static final Namespace CONTENT_NS = Namespace.getNamespace(CONTENT_URI); + + public RSS090Parser() { + this("rss_0.9", RSS_NS); + } + + protected RSS090Parser(String type, Namespace ns) { + super(type, ns); + } + + public boolean isMyType(Document document) { + boolean ok = false; + + Element rssRoot = document.getRootElement(); + Namespace defaultNS = rssRoot.getNamespace(); + List additionalNSs = rssRoot.getAdditionalNamespaces(); + + ok = defaultNS != null && defaultNS.equals(getRDFNamespace()); + if (ok) { + if (additionalNSs == null) { + ok = false; + } else { + ok = false; + for (int i = 0; !ok && i < additionalNSs.size(); i++) { + ok = getRSSNamespace().equals(additionalNSs.get(i)); + } + } + } + return ok; + } + + @SuppressWarnings("unused") + public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException, FeedException { + if (validate) { + validateFeed(document); + } + Element rssRoot = document.getRootElement(); + return parseChannel(rssRoot); + } + + @SuppressWarnings("all") + protected void validateFeed(@SuppressWarnings("unused") Document document) throws FeedException { + // TBD + // here we have to validate the Feed against a schema or whatever + // not sure how to do it + // one posibility would be to inject our own schema for the feed (they don't exist out there) + // to the document, produce an ouput and attempt to parse it again with validation turned on. + // otherwise will have to check the document elements by hand. + } + + /** + * Returns the namespace used by RSS elements in document of the RSS version the parser supports. + *

+ * This implementation returns the EMTPY namespace. + *

+ * + * @return returns the EMPTY namespace. + */ + protected Namespace getRSSNamespace() { + return RSS_NS; + } + + /** + * Returns the namespace used by RDF elements in document of the RSS version the parser supports. + *

+ * This implementation returns the EMTPY namespace. + *

+ * + * @return returns the EMPTY namespace. + */ + protected Namespace getRDFNamespace() { + return RDF_NS; + } + + /** + * Returns the namespace used by Content Module elements in document. + *

+ * This implementation returns the EMTPY namespace. + *

+ * + * @return returns the EMPTY namespace. + */ + protected Namespace getContentNamespace() { + return CONTENT_NS; + } + + /** + * Parses the root element of an RSS document into a Channel bean. + *

+ * It reads title, link and description and delegates to parseImage, parseItems + * and parseTextInput. This delegation always passes the root element of the RSS + * document as different RSS version may have this information in different parts + * of the XML tree (no assumptions made thanks to the specs variaty) + *

+ * + * @param rssRoot the root element of the RSS document to parse. + * @return the parsed Channel bean. + */ + protected WireFeed parseChannel(Element rssRoot) { + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + + Channel channel = new Channel(getType()); + + Element e = eChannel.getChild("title", getRSSNamespace()); + if (e != null) { + channel.setTitle(e.getText()); + } + e = eChannel.getChild("link", getRSSNamespace()); + if (e != null) { + channel.setLink(e.getText()); + } + e = eChannel.getChild("description", getRSSNamespace()); + if (e != null) { + channel.setDescription(e.getText()); + } + + channel.setImage(parseImage(rssRoot)); + + channel.setTextInput(parseTextInput(rssRoot)); + + // Unfortunately Microsoft's SSE extension has a special case of + // effectively putting the sharing channel module inside the RSS tag + // and not inside the channel itself. So we also need to look for + // channel modules from the root RSS element. + List allFeedModules = new ArrayList(); + List rootModules = parseFeedModules(rssRoot, Locale.getDefault()); + List channelModules = parseFeedModules(eChannel, Locale.getDefault()); + if (rootModules != null) { + allFeedModules.addAll(rootModules); + } + if (channelModules != null) { + allFeedModules.addAll(channelModules); + } + channel.setModules(allFeedModules); + channel.setItems(parseItems(rssRoot)); + + List foreignMarkup = extractForeignMarkup(eChannel, channel, getRSSNamespace()); + if (!foreignMarkup.isEmpty()) { + channel.setForeignMarkup(foreignMarkup); + } + return channel; + } + + /** + * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment. + * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element. + *

+ */ + protected List getItems(Element rssRoot) { + return rssRoot.getChildren("item", getRSSNamespace()); + } + + /** + * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment. + * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element. + *

+ */ + protected Element getImage(Element rssRoot) { + return rssRoot.getChild("image", getRSSNamespace()); + } + + /** + * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment. + * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element. + *

+ */ + protected Element getTextInput(Element rssRoot) { + return rssRoot.getChild("textinput", getRSSNamespace()); + } + + /** + * Parses the root element of an RSS document looking for image information. + *

+ * It reads title and url out of the 'image' element. + *

+ * + * @param rssRoot the root element of the RSS document to parse for image information. + * @return the parsed image bean. + */ + protected Image parseImage(Element rssRoot) { + Image image = null; + Element eImage = getImage(rssRoot); + if (eImage != null) { + image = new Image(); + + Element e = eImage.getChild("title", getRSSNamespace()); + if (e != null) { + image.setTitle(e.getText()); + } + e = eImage.getChild("url", getRSSNamespace()); + if (e != null) { + image.setUrl(e.getText()); + } + e = eImage.getChild("link", getRSSNamespace()); + if (e != null) { + image.setLink(e.getText()); + } + } + return image; + } + + /** + * Parses the root element of an RSS document looking for all items information. + *

+ * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem() + * for each item element. The resulting RSSItem of each item element is stored in a list. + *

+ * + * @param rssRoot the root element of the RSS document to parse for all items information. + * @return a list with all the parsed RSSItem beans. + */ + protected List parseItems(Element rssRoot) { + Collection eItems = getItems(rssRoot); + + List items = new ArrayList(); + for (Object item : eItems) { + Element eItem = (Element) item; + items.add(parseItem(rssRoot, eItem)); + } + return items; + } + + /** + * Parses an item element of an RSS document looking for item information. + *

+ * It reads title and link out of the 'item' element. + *

+ * + * @param rssRoot the root element of the RSS document in case it's needed for context. + * @param eItem the item element to parse. + * @return the parsed RSSItem bean. + */ + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = new Item(); + Element e = eItem.getChild("title", getRSSNamespace()); + if (e != null) { + item.setTitle(e.getText()); + } + e = eItem.getChild("link", getRSSNamespace()); + if (e != null) { + item.setLink(e.getText()); + item.setUri(e.getText()); + } + + item.setModules(parseItemModules(eItem, Locale.getDefault())); + + List foreignMarkup = extractForeignMarkup(eItem, item, getRSSNamespace()); + //content:encoded elements are treated special, without a module, they have to be removed from the foreign + //markup to avoid duplication in case of read/write. Note that this fix will break if a content module is + //used + Iterator iterator = foreignMarkup.iterator(); + while (iterator.hasNext()) { + Element ie = (Element) iterator.next(); + if (getContentNamespace().equals(ie.getNamespace()) && ie.getName().equals("encoded")) { + iterator.remove(); + } + } + if (!foreignMarkup.isEmpty()) { + item.setForeignMarkup(foreignMarkup); + } + return item; + } + + /** + * Parses the root element of an RSS document looking for text-input information. + *

+ * It reads title, description, name and link out of the 'textinput' or 'textInput' element. + *

+ * + * @param rssRoot the root element of the RSS document to parse for text-input information. + * @return the parsed RSSTextInput bean. + */ + protected TextInput parseTextInput(Element rssRoot) { + TextInput textInput = null; + Element eTextInput = getTextInput(rssRoot); + if (eTextInput != null) { + textInput = new TextInput(); + Element e = eTextInput.getChild("title", getRSSNamespace()); + if (e != null) { + textInput.setTitle(e.getText()); + } + e = eTextInput.getChild("description", getRSSNamespace()); + if (e != null) { + textInput.setDescription(e.getText()); + } + e = eTextInput.getChild("name", getRSSNamespace()); + if (e != null) { + textInput.setName(e.getText()); + } + e = eTextInput.getChild("link", getRSSNamespace()); + if (e != null) { + textInput.setLink(e.getText()); + } + } + return textInput; + } + + @Override + @SuppressWarnings("all") + public WireFeed parse(org.jdom2.Document document, boolean b, Locale locale) throws IllegalArgumentException, FeedException { + return null; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS091UserlandParser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS091UserlandParser.java new file mode 100644 index 0000000..958e127 --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS091UserlandParser.java @@ -0,0 +1,250 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import com.rometools.rome.feed.rss.*; +import com.rometools.rome.io.impl.NumberParser; +import org.jdom2.Attribute; +import org.jdom2.Document; +import org.jdom2.Element; +import org.jdom2.Namespace; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + */ +@SuppressWarnings({ "rawtypes", "unchecked" }) +public class RSS091UserlandParser extends RSS090Parser { + + public RSS091UserlandParser() { + this("rss_0.91U"); + } + + protected RSS091UserlandParser(String type) { + super(type, null); + } + + public boolean isMyType(Document document) { + boolean ok; + Element rssRoot = document.getRootElement(); + ok = rssRoot.getName().equals("rss"); + if (ok) { + ok = false; + Attribute version = rssRoot.getAttribute("version"); + if (version != null) { + ok = version.getValue().equals(getRSSVersion()); + } + } + return ok; + } + + protected String getRSSVersion() { + return "0.91"; + } + + protected Namespace getRSSNamespace() { + return Namespace.getNamespace(""); + } + + /** + * To be overriden by RSS 0.91 Netscape and RSS 0.94 + */ + @SuppressWarnings("unused") + protected boolean isHourFormat24(Element rssRoot) { + return true; + } + + /** + * Parses the root element of an RSS document into a Channel bean. + *

+ * It first invokes super.parseChannel and then parses and injects the following + * properties if present: language, pubDate, rating and copyright. + *

+ * + * @param rssRoot the root element of the RSS document to parse. + * @return the parsed Channel bean. + */ + protected WireFeed parseChannel(Element rssRoot) { + Channel channel = (Channel) super.parseChannel(rssRoot); + + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + + Element e = eChannel.getChild("language", getRSSNamespace()); + if (e != null) { + channel.setLanguage(e.getText()); + } + e = eChannel.getChild("rating", getRSSNamespace()); + if (e != null) { + channel.setRating(e.getText()); + } + e = eChannel.getChild("copyright", getRSSNamespace()); + if (e != null) { + channel.setCopyright(e.getText()); + } + e = eChannel.getChild("pubDate", getRSSNamespace()); + if (e != null) { + channel.setPubDate(DateParser.parseDate(e.getText())); + } + e = eChannel.getChild("lastBuildDate", getRSSNamespace()); + if (e != null) { + channel.setLastBuildDate(DateParser.parseDate(e.getText())); + } + e = eChannel.getChild("docs", getRSSNamespace()); + if (e != null) { + channel.setDocs(e.getText()); + } + e = eChannel.getChild("docs", getRSSNamespace()); + if (e != null) { + channel.setDocs(e.getText()); + } + e = eChannel.getChild("managingEditor", getRSSNamespace()); + if (e != null) { + channel.setManagingEditor(e.getText()); + } + e = eChannel.getChild("webMaster", getRSSNamespace()); + if (e != null) { + channel.setWebMaster(e.getText()); + } + e = eChannel.getChild("skipHours"); + if (e != null) { + List skipHours = new ArrayList(); + List eHours = e.getChildren("hour", getRSSNamespace()); + for (Object hour : eHours) { + Element eHour = (Element) hour; + skipHours.add(eHour.getText().trim()); + } + channel.setSkipHours(skipHours); + } + + e = eChannel.getChild("skipDays"); + if (e != null) { + List skipDays = new ArrayList(); + List eDays = e.getChildren("day", getRSSNamespace()); + for (Object day : eDays) { + Element eDay = (Element) day; + skipDays.add(eDay.getText().trim()); + } + channel.setSkipDays(skipDays); + } + return channel; + } + + /** + * Parses the root element of an RSS document looking for image information. + *

+ * It first invokes super.parseImage and then parses and injects the following + * properties if present: url, link, width, height and description. + *

+ * + * @param rssRoot the root element of the RSS document to parse for image information. + * @return the parsed RSSImage bean. + */ + protected Image parseImage(Element rssRoot) { + Image image = super.parseImage(rssRoot); + if (image != null) { + Element eImage = getImage(rssRoot); + Element e = eImage.getChild("width", getRSSNamespace()); + if (e != null) { + Integer val = NumberParser.parseInt(e.getText()); + if (val != null) { + image.setWidth(val); + } + } + e = eImage.getChild("height", getRSSNamespace()); + if (e != null) { + Integer val = NumberParser.parseInt(e.getText()); + if (val != null) { + image.setHeight(val); + } + } + e = eImage.getChild("description", getRSSNamespace()); + if (e != null) { + image.setDescription(e.getText()); + } + } + return image; + } + + /** + * It looks for the 'item' elements under the 'channel' elemment. + */ + protected List getItems(Element rssRoot) { + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + return (eChannel != null) ? eChannel.getChildren("item", getRSSNamespace()) : Collections.EMPTY_LIST; + } + + /** + * It looks for the 'image' elements under the 'channel' elemment. + */ + protected Element getImage(Element rssRoot) { + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + return (eChannel != null) ? eChannel.getChild("image", getRSSNamespace()) : null; + } + + /** + * To be overriden by RSS 0.91 Netscape parser + */ + protected String getTextInputLabel() { + return "textInput"; + } + + /** + * It looks for the 'textinput' elements under the 'channel' elemment. + */ + protected Element getTextInput(Element rssRoot) { + String elementName = getTextInputLabel(); + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + return (eChannel != null) ? eChannel.getChild(elementName, getRSSNamespace()) : null; + } + + /** + * Parses an item element of an RSS document looking for item information. + *

+ * It first invokes super.parseItem and then parses and injects the description property if present. + *

+ * + * @param rssRoot the root element of the RSS document in case it's needed for context. + * @param eItem the item element to parse. + * @return the parsed RSSItem bean. + */ + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = super.parseItem(rssRoot, eItem); + Element e = eItem.getChild("description", getRSSNamespace()); + if (e != null) { + item.setDescription(parseItemDescription(rssRoot, e)); + } + Element ce = eItem.getChild("encoded", getContentNamespace()); + if (ce != null) { + Content content = new Content(); + content.setType(Content.HTML); + content.setValue(ce.getText()); + item.setContent(content); + } + return item; + } + + protected Description parseItemDescription(Element rssRoot, Element eDesc) { + Description desc = new Description(); + desc.setType("text/plain"); + desc.setValue(eDesc.getText()); + return desc; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS092Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS092Parser.java new file mode 100644 index 0000000..18ebbed --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS092Parser.java @@ -0,0 +1,143 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import com.rometools.rome.feed.rss.*; +import com.rometools.rome.io.impl.NumberParser; +import org.jdom2.Element; + +import java.util.ArrayList; +import java.util.List; + +/** + */ +@SuppressWarnings({ "rawtypes", "unchecked" }) +public class RSS092Parser extends RSS091UserlandParser { + + public RSS092Parser() { + this("rss_0.92"); + } + + protected RSS092Parser(String type) { + super(type); + } + + protected String getRSSVersion() { + return "0.92"; + } + + protected WireFeed parseChannel(Element rssRoot) { + Channel channel = (Channel) super.parseChannel(rssRoot); + + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + Element eCloud = eChannel.getChild("cloud", getRSSNamespace()); + if (eCloud != null) { + Cloud cloud = new Cloud(); + String att = eCloud.getAttributeValue("domain");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setDomain(att); + } + att = eCloud.getAttributeValue("port");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setPort(Integer.parseInt(att.trim())); + } + att = eCloud.getAttributeValue("path");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setPath(att); + } + att = eCloud.getAttributeValue("registerProcedure");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setRegisterProcedure(att); + } + att = eCloud.getAttributeValue("protocol");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cloud.setProtocol(att); + } + channel.setCloud(cloud); + } + return channel; + } + + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = super.parseItem(rssRoot, eItem); + + Element e = eItem.getChild("source", getRSSNamespace()); + if (e != null) { + Source source = new Source(); + String url = e.getAttributeValue("url");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + source.setUrl(url); + source.setValue(e.getText()); + item.setSource(source); + } + + // 0.92 allows one enclosure occurrence, 0.93 multiple + // just saving to write some code. + List eEnclosures = eItem.getChildren("enclosure");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (!eEnclosures.isEmpty()) { + List enclosures = new ArrayList(); + for (Object eEnclosure : eEnclosures) { + e = (Element) eEnclosure; + + Enclosure enclosure = new Enclosure(); + String att = e.getAttributeValue("url");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + enclosure.setUrl(att); + } + att = e.getAttributeValue("length");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + enclosure.setLength(NumberParser.parseLong(att, 0L)); + + att = e.getAttributeValue("type");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + enclosure.setType(att); + } + enclosures.add(enclosure); + } + item.setEnclosures(enclosures); + } + + List eCats = eItem.getChildren("category");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + item.setCategories(parseCategories(eCats)); + + return item; + } + + protected List parseCategories(List eCats) { + List cats = null; + if (!eCats.isEmpty()) { + cats = new ArrayList(); + for (Object eCat : eCats) { + Category cat = new Category(); + Element e = (Element) eCat; + String att = e.getAttributeValue("domain");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + cat.setDomain(att); + } + cat.setValue(e.getText()); + cats.add(cat); + } + } + return cats; + } + + protected Description parseItemDescription(Element rssRoot, Element eDesc) { + Description desc = super.parseItemDescription(rssRoot, eDesc); + desc.setType("text/html"); + return desc; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS093Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS093Parser.java new file mode 100644 index 0000000..cbfb14d --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS093Parser.java @@ -0,0 +1,59 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.rss.Item; +import org.jdom2.Element; + +/** + * + */ +public class RSS093Parser extends RSS092Parser { + + public RSS093Parser() { + this("rss_0.93"); + } + + protected RSS093Parser(String type) { + super(type); + } + + protected String getRSSVersion() { + return "0.93"; + } + + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = super.parseItem(rssRoot, eItem); + Element e = eItem.getChild("pubDate", getRSSNamespace()); + if (e != null) { + item.setPubDate(DateParser.parseDate(e.getText())); + } + e = eItem.getChild("expirationDate", getRSSNamespace()); + if (e != null) { + item.setExpirationDate(DateParser.parseDate(e.getText())); + } + e = eItem.getChild("description", getRSSNamespace()); + if (e != null) { + String type = e.getAttributeValue("type"); + if (type != null) { + item.getDescription().setType(type); + } + } + return item; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS094Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS094Parser.java new file mode 100644 index 0000000..0e632bb --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS094Parser.java @@ -0,0 +1,106 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import com.rometools.rome.feed.rss.Channel; +import com.rometools.rome.feed.rss.Description; +import com.rometools.rome.feed.rss.Guid; +import com.rometools.rome.feed.rss.Item; +import org.jdom2.Element; + +import java.util.List; +import java.util.Optional; + +/** + */ +public class RSS094Parser extends RSS093Parser { + + public RSS094Parser() { + this("rss_0.94"); + } + + protected RSS094Parser(String type) { + super(type); + } + + protected String getRSSVersion() { + return "0.94"; + } + + @SuppressWarnings("UnnecessarySemicolon") + protected WireFeed parseChannel(Element rssRoot) { + Channel channel = (Channel) super.parseChannel(rssRoot); + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + + List eCats = eChannel.getChildren("category", getRSSNamespace()); + channel.setCategories(parseCategories(eCats)); + + Element eTtl = eChannel.getChild("ttl", getRSSNamespace()); + if (eTtl != null && eTtl.getText() != null) { + Optional ttlValue = Optional.empty(); + try { + ttlValue = Optional.of(Integer.valueOf(eTtl.getText())); + } catch (NumberFormatException ignored) { + ; + } + if (ttlValue.isPresent()) { + channel.setTtl(ttlValue.orElse(null)); + } + } + return channel; + } + + public Item parseItem(Element rssRoot, Element eItem) { + Item item = super.parseItem(rssRoot, eItem); + item.setExpirationDate(null); + + Element e = eItem.getChild("author", getRSSNamespace()); + if (e != null) { + item.setAuthor(e.getText()); + } + + e = eItem.getChild("guid", getRSSNamespace()); + if (e != null) { + Guid guid = new Guid(); + String att = e.getAttributeValue("isPermaLink");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att != null) { + guid.setPermaLink(att.equalsIgnoreCase("true")); + } + guid.setValue(e.getText()); + item.setGuid(guid); + } + + e = eItem.getChild("comments", getRSSNamespace()); + if (e != null) { + item.setComments(e.getText()); + } + + return item; + } + + protected Description parseItemDescription(Element rssRoot, Element eDesc) { + Description desc = super.parseItemDescription(rssRoot, eDesc); + String att = eDesc.getAttributeValue("type");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK + if (att == null) { + att = "text/html"; + } + desc.setType(att); + return desc; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS10Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS10Parser.java new file mode 100644 index 0000000..562652b --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS10Parser.java @@ -0,0 +1,139 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import com.rometools.rome.feed.rss.Channel; +import com.rometools.rome.feed.rss.Content; +import com.rometools.rome.feed.rss.Description; +import com.rometools.rome.feed.rss.Item; + +import org.jdom2.Document; +import org.jdom2.Element; +import org.jdom2.Namespace; + +import java.util.List; + +/** + */ +public class RSS10Parser extends RSS090Parser { + + private static final String RSS_URI = "http://purl.org/rss/1.0/"; + private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI); + + public RSS10Parser() { + this("rss_1.0", RSS_NS); + } + + protected RSS10Parser(String type, Namespace ns) { + super(type, ns); + } + + /** + * Indicates if a JDom document is an RSS instance that can be parsed with the parser. + *

+ * It checks for RDF (w3.org) and + * RSS (purl.org) namespaces being defined in the root element. + * + * @param document document to check if it can be parsed with this parser implementation. + * @return true if the document is RSS1., false otherwise. + */ + public boolean isMyType(Document document) { + boolean ok = false; + + Element rssRoot = document.getRootElement(); + Namespace defaultNS = rssRoot.getNamespace(); + @SuppressWarnings("rawtypes") + List additionalNSs = rssRoot.getAdditionalNamespaces(); + + ok = defaultNS != null && defaultNS.equals(getRDFNamespace()); + if (ok) { + if (additionalNSs == null) { + ok = false; + } else { + ok = false; + for (int i = 0; !ok && i < additionalNSs.size(); i++) { + ok = getRSSNamespace().equals(additionalNSs.get(i)); + } + } + } + return ok; + } + + /** + * Returns the namespace used by RSS elements in document of the RSS 1.0 + *

+ * + * @return returns "purl.org". + */ + protected Namespace getRSSNamespace() { + return Namespace.getNamespace(RSS_URI); + } + + /** + * Parses an item element of an RSS document looking for item information. + *

+ * It first invokes super.parseItem and then parses and injects the description property if present. + *

+ * + * @param rssRoot the root element of the RSS document in case it's needed for context. + * @param eItem the item element to parse. + * @return the parsed RSSItem bean. + */ + protected Item parseItem(Element rssRoot, Element eItem) { + Item item = super.parseItem(rssRoot, eItem); + Element e = eItem.getChild("description", getRSSNamespace()); + if (e != null) { + item.setDescription(parseItemDescription(rssRoot, e)); + } + Element ce = eItem.getChild("encoded", getContentNamespace()); + if (ce != null) { + Content content = new Content(); + content.setType(Content.HTML); + content.setValue(ce.getText()); + item.setContent(content); + } + + String uri = eItem.getAttributeValue("about", getRDFNamespace()); + if (uri != null) { + item.setUri(uri); + } + + return item; + } + + protected WireFeed parseChannel(Element rssRoot) { + Channel channel = (Channel) super.parseChannel(rssRoot); + + Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); + String uri = eChannel.getAttributeValue("about", getRDFNamespace()); + if (uri != null) { + channel.setUri(uri); + } + + return channel; + } + + @SuppressWarnings("unused") + protected Description parseItemDescription(Element rssRoot, Element eDesc) { + Description desc = new Description(); + desc.setType("text/plain"); + desc.setValue(eDesc.getText()); + return desc; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20Parser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20Parser.java new file mode 100644 index 0000000..95c65c5 --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20Parser.java @@ -0,0 +1,67 @@ +package dev.rsems.rometools.rome.io.impl; + +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +import com.rometools.rome.feed.rss.Description; +import org.jdom2.Attribute; +import org.jdom2.Document; +import org.jdom2.Element; + +/** + */ +public class RSS20Parser extends RSS094Parser { + + public RSS20Parser() { + this("rss_2.0"); + } + + protected RSS20Parser(String type) { + super(type); + } + + protected String getRSSVersion() { + return "2.0"; + } + + protected boolean isHourFormat24(Element rssRoot) { + return false; + } + + protected Description parseItemDescription(Element rssRoot, Element eDesc) { + Description desc = super.parseItemDescription(rssRoot, eDesc); + desc.setType("text/html"); // change as per https://rome.dev.java.net/issues/show_bug.cgi?id=26 + return desc; + } + + public boolean isMyType(Document document) { + boolean ok; + Element rssRoot = document.getRootElement(); + ok = rssRoot.getName().equals("rss"); + if (ok) { + ok = false; + Attribute version = rssRoot.getAttribute("version"); + if (version != null) { + // At this point, as far ROME is concerned RSS 2.0, 2.00 and + // 2.0.X are all the same, so let's use startsWith for leniency. + ok = version.getValue().startsWith(getRSSVersion()); + } + } + return ok; + } + +} diff --git a/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20wNSParser.java b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20wNSParser.java new file mode 100644 index 0000000..9b1c248 --- /dev/null +++ b/src/main/java/dev/rsems/rometools/rome/io/impl/RSS20wNSParser.java @@ -0,0 +1,71 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package dev.rsems.rometools.rome.io.impl; + +import com.rometools.rome.feed.WireFeed; +import org.jdom2.Document; +import org.jdom2.Element; +import org.jdom2.Namespace; + +/** + * To address issue with certain feeds (brought up by Charles Miller): + *
+ * "During the debacle that was the rollout of RSS2.0, this namespace was tried, + * and even appeared in Dave Winer's Scripting News feed for a while. It was + * then withdrawn, but the wonderful thing about standards is the moment you + * roll one out, even if it's marked as unfinished and subject to change, + * someone will end up stuck with it forever." + *
+ * Note that there is not counter part on the generator, we only generate the final RSS2 + */ +public class RSS20wNSParser extends RSS20Parser { + private static final String RSS20_URI = "http://backend.userland.com/rss2"; + + public RSS20wNSParser() { + this("rss_2.0wNS"); + } + + protected RSS20wNSParser(String type) { + super(type); + } + + public boolean isMyType(Document document) { + Element rssRoot = document.getRootElement(); + Namespace defaultNS = rssRoot.getNamespace(); + boolean ok = defaultNS != null && defaultNS.equals(getRSSNamespace()); + if (ok) { + ok = super.isMyType(document); + } + return ok; + } + + protected Namespace getRSSNamespace() { + return Namespace.getNamespace(RSS20_URI); + } + + /** + * After we parse the feed we put "rss_2.0" in it (so converters and generators work) + * this parser is a phantom. + * + */ + protected WireFeed parseChannel(Element rssRoot) { + WireFeed wFeed = super.parseChannel(rssRoot); + wFeed.setFeedType("rss_2.0"); + return wFeed; + } + +} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/DateParser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/DateParser.java deleted file mode 100644 index 3c50b90..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/DateParser.java +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.io.impl.PropertiesLoader; - -import java.text.DateFormat; -import java.text.ParsePosition; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Locale; -import java.util.TimeZone; - -/** - * A helper class that parses Dates out of Strings with date time in RFC822 and W3CDateTime - * formats plus the variants Atom (0.3) and RSS (0.9, 0.91, 0.92, 0.93, 0.94, 1.0 and 2.0) - * specificators added to those formats. - *

- * It uses the JDK java.text.SimpleDateFormat class attemtping the parse using a mask for - * each one of the possible formats. - *

- * Date parsing enhanced (RS) - *

- * - * @author Alejandro Abdelnur - * @author Robert Schroeder - * - */ -public class DateParser { - - private static final String[] ADDITIONAL_MASKS; - - static { - ADDITIONAL_MASKS = PropertiesLoader.getPropertiesLoader().getTokenizedProperty("datetime.extra.masks", "|"); - } - - // order is like this because the SimpleDateFormat.parse does not fail with exception - // if it can parse a valid date out of a substring of the full string given the mask - // so we have to check the most complete format first, then it fails with exception - private static final String[] RFC822_MASKS = { - "EEE, dd MMM yy HH:mm:ss z", - "EEE, dd MMM yy HH:mm z", - "dd MMM yy HH:mm:ss z", - "dd MMM yy HH:mm z" }; - - // order is like this because the SimpleDateFormat.parse does not fail with exception - // if it can parse a valid date out of a substring of the full string given the mask - // so we have to check the most complete format first, then it fails with exception - private static final String[] W3CDATETIME_MASKS = { - "yyyy-MM-dd'T'HH:mm:ss.SSSz", - "yyyy-MM-dd't'HH:mm:ss.SSSz", - "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", - "yyyy-MM-dd't'HH:mm:ss.SSS'z'", - "yyyy-MM-dd'T'HH:mm:ssz", - "yyyy-MM-dd't'HH:mm:ssz", - "yyyy-MM-dd'T'HH:mm:ssZ", - "yyyy-MM-dd't'HH:mm:ssZ", - "yyyy-MM-dd'T'HH:mm:ss'Z'", - "yyyy-MM-dd't'HH:mm:ss'z'", - "yyyy-MM-dd'T'HH:mmz", // together with logic in the parseW3CDateTime they - "yyyy-MM'T'HH:mmz", // handle W3C dates without time forcing them to be GMT - "yyyy'T'HH:mmz", - "yyyy-MM-dd't'HH:mmz", - "yyyy-MM-dd'T'HH:mm'Z'", - "yyyy-MM-dd't'HH:mm'z'", - "yyyy-MM-dd", "yyyy-MM", - "yyyy" }; - - /** - * The masks used to validate and parse the input to this Atom date. - * These are a lot more forgiving than what the Atom spec allows. - * The forms that are invalid according to the spec are indicated. - */ - private static final String[] masks = { - "yyyy-MM-dd'T'HH:mm:ss.SSSz", - "yyyy-MM-dd't'HH:mm:ss.SSSz", // invalid - "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", - "yyyy-MM-dd't'HH:mm:ss.SSS'z'", // invalid - "yyyy-MM-dd'T'HH:mm:ssz", - "yyyy-MM-dd't'HH:mm:ssz", // invalid - "yyyy-MM-dd'T'HH:mm:ss'Z'", - "yyyy-MM-dd't'HH:mm:ss'z'", // invalid - "yyyy-MM-dd'T'HH:mmz", // invalid - "yyyy-MM-dd't'HH:mmz", // invalid - "yyyy-MM-dd'T'HH:mm'Z'", // invalid - "yyyy-MM-dd't'HH:mm'z'", // invalid - "yyyy-MM-dd", "yyyy-MM", "yyyy" }; - - /** - * Private constructor to avoid DateParser instances creation. - */ - private DateParser() { - } - - /** - * Parses a Date out of a string using an array of masks. - *

- * It uses the masks in order until one of them succedes or all fail. - *

- * - * @param masks array of masks to use for parsing the string - * @param sDate string to parse for a date. - * @return the Date represented by the given string using one of the given masks. - * It returns null if it was not possible to parse the the string with any of the masks. - * - */ - private static Date parseUsingMask(String[] masks, String sDate) { - sDate = (sDate != null) ? sDate.trim() : null; - ParsePosition pp = null; - Date d = null; - for (int i = 0; d == null && i < masks.length; i++) { - DateFormat df = new SimpleDateFormat(masks[i], Locale.US); - //df.setLenient(false); - df.setLenient(true); - try { - pp = new ParsePosition(0); - d = df.parse(sDate, pp); - assert sDate != null; - if (pp.getIndex() != sDate.length()) { - d = null; - } - //System.out.println("pp["+pp.getIndex()+"] s["+sDate+" m["+masks[i]+"] d["+d+"]"); - } catch (Exception ex1) { - //System.out.println("s: "+sDate+" m: "+masks[i]+" d: "+null); - } - } - return d; - } - - /** - * Parses a Date out of a String with a date in RFC822 format. - *

- * It parsers the following formats: - *

    - *
  • "EEE, dd MMM yyyy HH:mm:ss z"
  • - *
  • "EEE, dd MMM yyyy HH:mm z"
  • - *
  • "EEE, dd MMM yy HH:mm:ss z"
  • - *
  • "EEE, dd MMM yy HH:mm z"
  • - *
  • "dd MMM yyyy HH:mm:ss z"
  • - *
  • "dd MMM yyyy HH:mm z"
  • - *
  • "dd MMM yy HH:mm:ss z"
  • - *
  • "dd MMM yy HH:mm z"
  • - *
- *

- * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. - *

- * @param sDate string to parse for a date. - * @return the Date represented by the given RFC822 string. - * It returns null if it was not possible to parse the given string into a Date. - * - */ - public static Date parseRFC822(String sDate) { - int utIndex = sDate.indexOf(" UT"); - if (utIndex > -1) { - String pre = sDate.substring(0, utIndex); - String post = sDate.substring(utIndex + 3); - sDate = pre + " GMT" + post; - } else { // Schroeder 11-2012 - int zIndex = sDate.indexOf(" Z"); - if (zIndex > -1) { - String pre = sDate.substring(0, zIndex); - String post = sDate.substring(zIndex + 2); - sDate = pre + " GMT" + post; - } - } - return parseUsingMask(RFC822_MASKS, sDate); - } - - /** - * Parses a Date out of a String with a date in W3C date-time format. - *

- * It parsers the following formats: - *

    - *
  • "yyyy-MM-dd'T'HH:mm:ssz"
  • - *
  • "yyyy-MM-dd'T'HH:mmz"
  • - *
  • "yyyy-MM-dd"
  • - *
  • "yyyy-MM"
  • - *
  • "yyyy"
  • - *
- *

- * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. - *

- * @param sDate string to parse for a date. - * @return the Date represented by the given W3C date-time string. - * It returns null if it was not possible to parse the given string into a Date. - * - */ - public static Date parseW3CDateTime(String sDate) { - // if sDate has time on it, it injects 'GTM' before de TZ displacement to - // allow the SimpleDateFormat parser to parse it properly - int tIndex = sDate.indexOf("T"); - if (tIndex > -1) { - if (sDate.endsWith("Z")) { - sDate = sDate.substring(0, sDate.length() - 1) + "+00:00"; - } - int tzdIndex = sDate.indexOf("+", tIndex); - if (tzdIndex == -1) { - tzdIndex = sDate.indexOf("-", tIndex); - } - if (tzdIndex > -1) { - String pre = sDate.substring(0, tzdIndex); - int secFraction = pre.indexOf(","); - if (secFraction > -1) { - pre = pre.substring(0, secFraction); - } - String post = sDate.substring(tzdIndex); - sDate = pre + "GMT" + post; - } - } else { - sDate += "T00:00GMT"; - } - return parseUsingMask(W3CDATETIME_MASKS, sDate); - } - - /** - * Parses a Date out of a String with a date in W3C date-time format or - * in a RFC822 format. - *

- * @param sDate string to parse for a date. - * @return the Date represented by the given W3C date-time string. - * It returns null if it was not possible to parse the given string into a Date. - * - * */ - public static Date parseDate(String sDate) { - Date d = parseW3CDateTime(sDate); - if (d == null) { - d = parseRFC822(sDate); - if (d == null && ADDITIONAL_MASKS.length > 0) { - d = parseUsingMask(ADDITIONAL_MASKS, sDate); - } - if (d == null) { // Schroeder 11-2012 - d = parseUsingMask(masks, sDate); - } - } - return d; - } - - /** - * create a RFC822 representation of a date. - *

- * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. - *

- * @param date Date to parse - * @return the RFC822 represented by the given Date - * It returns null if it was not possible to parse the date. - * - */ - @SuppressWarnings("unused") - public static String formatRFC822(Date date) { - SimpleDateFormat dateFormater = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US); - dateFormater.setTimeZone(TimeZone.getTimeZone("GMT")); - return dateFormater.format(date); - } - - /** - * create a W3C Date Time representation of a date. - *

- * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element. - *

- * @param date Date to parse - * @return the W3C Date Time represented by the given Date - * It returns null if it was not possible to parse the date. - * - */ - @SuppressWarnings("unused") - public static String formatW3CDateTime(Date date) { - SimpleDateFormat dateFormater = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); - dateFormater.setTimeZone(TimeZone.getTimeZone("GMT")); - return dateFormater.format(date); - } - - - public static void main(String[] args) { - System.out.println(parseDate("Mon, 19 Nov 2012 23:22:39 Z")); - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS090Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS090Parser.java deleted file mode 100644 index 53ddd17..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS090Parser.java +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import com.rometools.rome.feed.rss.Channel; -import com.rometools.rome.feed.rss.Image; -import com.rometools.rome.feed.rss.Item; -import com.rometools.rome.feed.rss.TextInput; -import com.rometools.rome.io.FeedException; -import com.rometools.rome.io.impl.BaseWireFeedParser; -import org.jdom2.Document; -import org.jdom2.Element; -import org.jdom2.Namespace; - -import java.util.*; - -/** - */ -@SuppressWarnings({ "rawtypes", "unchecked" }) -public class RSS090Parser extends BaseWireFeedParser { - private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; - private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/"; - private static final String CONTENT_URI = "http://purl.org/rss/1.0/modules/content/"; - - private static final Namespace RDF_NS = Namespace.getNamespace(RDF_URI); - private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI); - private static final Namespace CONTENT_NS = Namespace.getNamespace(CONTENT_URI); - - public RSS090Parser() { - this("rss_0.9", RSS_NS); - } - - protected RSS090Parser(String type, Namespace ns) { - super(type, ns); - } - - public boolean isMyType(Document document) { - boolean ok = false; - - Element rssRoot = document.getRootElement(); - Namespace defaultNS = rssRoot.getNamespace(); - List additionalNSs = rssRoot.getAdditionalNamespaces(); - - ok = defaultNS != null && defaultNS.equals(getRDFNamespace()); - if (ok) { - if (additionalNSs == null) { - ok = false; - } else { - ok = false; - for (int i = 0; !ok && i < additionalNSs.size(); i++) { - ok = getRSSNamespace().equals(additionalNSs.get(i)); - } - } - } - return ok; - } - - @SuppressWarnings("unused") - public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException, FeedException { - if (validate) { - validateFeed(document); - } - Element rssRoot = document.getRootElement(); - return parseChannel(rssRoot); - } - - @SuppressWarnings("all") - protected void validateFeed(@SuppressWarnings("unused") Document document) throws FeedException { - // TBD - // here we have to validate the Feed against a schema or whatever - // not sure how to do it - // one posibility would be to inject our own schema for the feed (they don't exist out there) - // to the document, produce an ouput and attempt to parse it again with validation turned on. - // otherwise will have to check the document elements by hand. - } - - /** - * Returns the namespace used by RSS elements in document of the RSS version the parser supports. - *

- * This implementation returns the EMTPY namespace. - *

- * - * @return returns the EMPTY namespace. - */ - protected Namespace getRSSNamespace() { - return RSS_NS; - } - - /** - * Returns the namespace used by RDF elements in document of the RSS version the parser supports. - *

- * This implementation returns the EMTPY namespace. - *

- * - * @return returns the EMPTY namespace. - */ - protected Namespace getRDFNamespace() { - return RDF_NS; - } - - /** - * Returns the namespace used by Content Module elements in document. - *

- * This implementation returns the EMTPY namespace. - *

- * - * @return returns the EMPTY namespace. - */ - protected Namespace getContentNamespace() { - return CONTENT_NS; - } - - /** - * Parses the root element of an RSS document into a Channel bean. - *

- * It reads title, link and description and delegates to parseImage, parseItems - * and parseTextInput. This delegation always passes the root element of the RSS - * document as different RSS version may have this information in different parts - * of the XML tree (no assumptions made thanks to the specs variaty) - *

- * - * @param rssRoot the root element of the RSS document to parse. - * @return the parsed Channel bean. - */ - protected WireFeed parseChannel(Element rssRoot) { - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - - Channel channel = new Channel(getType()); - - Element e = eChannel.getChild("title", getRSSNamespace()); - if (e != null) { - channel.setTitle(e.getText()); - } - e = eChannel.getChild("link", getRSSNamespace()); - if (e != null) { - channel.setLink(e.getText()); - } - e = eChannel.getChild("description", getRSSNamespace()); - if (e != null) { - channel.setDescription(e.getText()); - } - - channel.setImage(parseImage(rssRoot)); - - channel.setTextInput(parseTextInput(rssRoot)); - - // Unfortunately Microsoft's SSE extension has a special case of - // effectively putting the sharing channel module inside the RSS tag - // and not inside the channel itself. So we also need to look for - // channel modules from the root RSS element. - List allFeedModules = new ArrayList(); - List rootModules = parseFeedModules(rssRoot, Locale.getDefault()); - List channelModules = parseFeedModules(eChannel, Locale.getDefault()); - if (rootModules != null) { - allFeedModules.addAll(rootModules); - } - if (channelModules != null) { - allFeedModules.addAll(channelModules); - } - channel.setModules(allFeedModules); - channel.setItems(parseItems(rssRoot)); - - List foreignMarkup = extractForeignMarkup(eChannel, channel, getRSSNamespace()); - if (!foreignMarkup.isEmpty()) { - channel.setForeignMarkup(foreignMarkup); - } - return channel; - } - - /** - * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment. - * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element. - *

- */ - protected List getItems(Element rssRoot) { - return rssRoot.getChildren("item", getRSSNamespace()); - } - - /** - * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment. - * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element. - *

- */ - protected Element getImage(Element rssRoot) { - return rssRoot.getChild("image", getRSSNamespace()); - } - - /** - * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment. - * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element. - *

- */ - protected Element getTextInput(Element rssRoot) { - return rssRoot.getChild("textinput", getRSSNamespace()); - } - - /** - * Parses the root element of an RSS document looking for image information. - *

- * It reads title and url out of the 'image' element. - *

- * - * @param rssRoot the root element of the RSS document to parse for image information. - * @return the parsed image bean. - */ - protected Image parseImage(Element rssRoot) { - Image image = null; - Element eImage = getImage(rssRoot); - if (eImage != null) { - image = new Image(); - - Element e = eImage.getChild("title", getRSSNamespace()); - if (e != null) { - image.setTitle(e.getText()); - } - e = eImage.getChild("url", getRSSNamespace()); - if (e != null) { - image.setUrl(e.getText()); - } - e = eImage.getChild("link", getRSSNamespace()); - if (e != null) { - image.setLink(e.getText()); - } - } - return image; - } - - /** - * Parses the root element of an RSS document looking for all items information. - *

- * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem() - * for each item element. The resulting RSSItem of each item element is stored in a list. - *

- * - * @param rssRoot the root element of the RSS document to parse for all items information. - * @return a list with all the parsed RSSItem beans. - */ - protected List parseItems(Element rssRoot) { - Collection eItems = getItems(rssRoot); - - List items = new ArrayList(); - for (Object item : eItems) { - Element eItem = (Element) item; - items.add(parseItem(rssRoot, eItem)); - } - return items; - } - - /** - * Parses an item element of an RSS document looking for item information. - *

- * It reads title and link out of the 'item' element. - *

- * - * @param rssRoot the root element of the RSS document in case it's needed for context. - * @param eItem the item element to parse. - * @return the parsed RSSItem bean. - */ - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = new Item(); - Element e = eItem.getChild("title", getRSSNamespace()); - if (e != null) { - item.setTitle(e.getText()); - } - e = eItem.getChild("link", getRSSNamespace()); - if (e != null) { - item.setLink(e.getText()); - item.setUri(e.getText()); - } - - item.setModules(parseItemModules(eItem, Locale.getDefault())); - - List foreignMarkup = extractForeignMarkup(eItem, item, getRSSNamespace()); - //content:encoded elements are treated special, without a module, they have to be removed from the foreign - //markup to avoid duplication in case of read/write. Note that this fix will break if a content module is - //used - Iterator iterator = foreignMarkup.iterator(); - while (iterator.hasNext()) { - Element ie = (Element) iterator.next(); - if (getContentNamespace().equals(ie.getNamespace()) && ie.getName().equals("encoded")) { - iterator.remove(); - } - } - if (!foreignMarkup.isEmpty()) { - item.setForeignMarkup(foreignMarkup); - } - return item; - } - - /** - * Parses the root element of an RSS document looking for text-input information. - *

- * It reads title, description, name and link out of the 'textinput' or 'textInput' element. - *

- * - * @param rssRoot the root element of the RSS document to parse for text-input information. - * @return the parsed RSSTextInput bean. - */ - protected TextInput parseTextInput(Element rssRoot) { - TextInput textInput = null; - Element eTextInput = getTextInput(rssRoot); - if (eTextInput != null) { - textInput = new TextInput(); - Element e = eTextInput.getChild("title", getRSSNamespace()); - if (e != null) { - textInput.setTitle(e.getText()); - } - e = eTextInput.getChild("description", getRSSNamespace()); - if (e != null) { - textInput.setDescription(e.getText()); - } - e = eTextInput.getChild("name", getRSSNamespace()); - if (e != null) { - textInput.setName(e.getText()); - } - e = eTextInput.getChild("link", getRSSNamespace()); - if (e != null) { - textInput.setLink(e.getText()); - } - } - return textInput; - } - - @Override - @SuppressWarnings("all") - public WireFeed parse(org.jdom2.Document document, boolean b, Locale locale) throws IllegalArgumentException, FeedException { - return null; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS091UserlandParser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS091UserlandParser.java deleted file mode 100644 index c25200f..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS091UserlandParser.java +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import com.rometools.rome.feed.rss.*; -import com.rometools.rome.io.impl.NumberParser; -import org.jdom2.Attribute; -import org.jdom2.Document; -import org.jdom2.Element; -import org.jdom2.Namespace; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - */ -@SuppressWarnings({ "rawtypes", "unchecked" }) -public class RSS091UserlandParser extends RSS090Parser { - - public RSS091UserlandParser() { - this("rss_0.91U"); - } - - protected RSS091UserlandParser(String type) { - super(type, null); - } - - public boolean isMyType(Document document) { - boolean ok; - Element rssRoot = document.getRootElement(); - ok = rssRoot.getName().equals("rss"); - if (ok) { - ok = false; - Attribute version = rssRoot.getAttribute("version"); - if (version != null) { - ok = version.getValue().equals(getRSSVersion()); - } - } - return ok; - } - - protected String getRSSVersion() { - return "0.91"; - } - - protected Namespace getRSSNamespace() { - return Namespace.getNamespace(""); - } - - /** - * To be overriden by RSS 0.91 Netscape and RSS 0.94 - */ - @SuppressWarnings("unused") - protected boolean isHourFormat24(Element rssRoot) { - return true; - } - - /** - * Parses the root element of an RSS document into a Channel bean. - *

- * It first invokes super.parseChannel and then parses and injects the following - * properties if present: language, pubDate, rating and copyright. - *

- * - * @param rssRoot the root element of the RSS document to parse. - * @return the parsed Channel bean. - */ - protected WireFeed parseChannel(Element rssRoot) { - Channel channel = (Channel) super.parseChannel(rssRoot); - - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - - Element e = eChannel.getChild("language", getRSSNamespace()); - if (e != null) { - channel.setLanguage(e.getText()); - } - e = eChannel.getChild("rating", getRSSNamespace()); - if (e != null) { - channel.setRating(e.getText()); - } - e = eChannel.getChild("copyright", getRSSNamespace()); - if (e != null) { - channel.setCopyright(e.getText()); - } - e = eChannel.getChild("pubDate", getRSSNamespace()); - if (e != null) { - channel.setPubDate(DateParser.parseDate(e.getText())); - } - e = eChannel.getChild("lastBuildDate", getRSSNamespace()); - if (e != null) { - channel.setLastBuildDate(DateParser.parseDate(e.getText())); - } - e = eChannel.getChild("docs", getRSSNamespace()); - if (e != null) { - channel.setDocs(e.getText()); - } - e = eChannel.getChild("docs", getRSSNamespace()); - if (e != null) { - channel.setDocs(e.getText()); - } - e = eChannel.getChild("managingEditor", getRSSNamespace()); - if (e != null) { - channel.setManagingEditor(e.getText()); - } - e = eChannel.getChild("webMaster", getRSSNamespace()); - if (e != null) { - channel.setWebMaster(e.getText()); - } - e = eChannel.getChild("skipHours"); - if (e != null) { - List skipHours = new ArrayList(); - List eHours = e.getChildren("hour", getRSSNamespace()); - for (Object hour : eHours) { - Element eHour = (Element) hour; - skipHours.add(eHour.getText().trim()); - } - channel.setSkipHours(skipHours); - } - - e = eChannel.getChild("skipDays"); - if (e != null) { - List skipDays = new ArrayList(); - List eDays = e.getChildren("day", getRSSNamespace()); - for (Object day : eDays) { - Element eDay = (Element) day; - skipDays.add(eDay.getText().trim()); - } - channel.setSkipDays(skipDays); - } - return channel; - } - - /** - * Parses the root element of an RSS document looking for image information. - *

- * It first invokes super.parseImage and then parses and injects the following - * properties if present: url, link, width, height and description. - *

- * - * @param rssRoot the root element of the RSS document to parse for image information. - * @return the parsed RSSImage bean. - */ - protected Image parseImage(Element rssRoot) { - Image image = super.parseImage(rssRoot); - if (image != null) { - Element eImage = getImage(rssRoot); - Element e = eImage.getChild("width", getRSSNamespace()); - if (e != null) { - Integer val = NumberParser.parseInt(e.getText()); - if (val != null) { - image.setWidth(val); - } - } - e = eImage.getChild("height", getRSSNamespace()); - if (e != null) { - Integer val = NumberParser.parseInt(e.getText()); - if (val != null) { - image.setHeight(val); - } - } - e = eImage.getChild("description", getRSSNamespace()); - if (e != null) { - image.setDescription(e.getText()); - } - } - return image; - } - - /** - * It looks for the 'item' elements under the 'channel' elemment. - */ - protected List getItems(Element rssRoot) { - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - return (eChannel != null) ? eChannel.getChildren("item", getRSSNamespace()) : Collections.EMPTY_LIST; - } - - /** - * It looks for the 'image' elements under the 'channel' elemment. - */ - protected Element getImage(Element rssRoot) { - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - return (eChannel != null) ? eChannel.getChild("image", getRSSNamespace()) : null; - } - - /** - * To be overriden by RSS 0.91 Netscape parser - */ - protected String getTextInputLabel() { - return "textInput"; - } - - /** - * It looks for the 'textinput' elements under the 'channel' elemment. - */ - protected Element getTextInput(Element rssRoot) { - String elementName = getTextInputLabel(); - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - return (eChannel != null) ? eChannel.getChild(elementName, getRSSNamespace()) : null; - } - - /** - * Parses an item element of an RSS document looking for item information. - *

- * It first invokes super.parseItem and then parses and injects the description property if present. - *

- * - * @param rssRoot the root element of the RSS document in case it's needed for context. - * @param eItem the item element to parse. - * @return the parsed RSSItem bean. - */ - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = super.parseItem(rssRoot, eItem); - Element e = eItem.getChild("description", getRSSNamespace()); - if (e != null) { - item.setDescription(parseItemDescription(rssRoot, e)); - } - Element ce = eItem.getChild("encoded", getContentNamespace()); - if (ce != null) { - Content content = new Content(); - content.setType(Content.HTML); - content.setValue(ce.getText()); - item.setContent(content); - } - return item; - } - - protected Description parseItemDescription(Element rssRoot, Element eDesc) { - Description desc = new Description(); - desc.setType("text/plain"); - desc.setValue(eDesc.getText()); - return desc; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS092Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS092Parser.java deleted file mode 100644 index e0a3cf4..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS092Parser.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import com.rometools.rome.feed.rss.*; -import com.rometools.rome.io.impl.NumberParser; -import org.jdom2.Element; - -import java.util.ArrayList; -import java.util.List; - -/** - */ -@SuppressWarnings({ "rawtypes", "unchecked" }) -public class RSS092Parser extends RSS091UserlandParser { - - public RSS092Parser() { - this("rss_0.92"); - } - - protected RSS092Parser(String type) { - super(type); - } - - protected String getRSSVersion() { - return "0.92"; - } - - protected WireFeed parseChannel(Element rssRoot) { - Channel channel = (Channel) super.parseChannel(rssRoot); - - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - Element eCloud = eChannel.getChild("cloud", getRSSNamespace()); - if (eCloud != null) { - Cloud cloud = new Cloud(); - String att = eCloud.getAttributeValue("domain");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setDomain(att); - } - att = eCloud.getAttributeValue("port");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setPort(Integer.parseInt(att.trim())); - } - att = eCloud.getAttributeValue("path");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setPath(att); - } - att = eCloud.getAttributeValue("registerProcedure");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setRegisterProcedure(att); - } - att = eCloud.getAttributeValue("protocol");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cloud.setProtocol(att); - } - channel.setCloud(cloud); - } - return channel; - } - - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = super.parseItem(rssRoot, eItem); - - Element e = eItem.getChild("source", getRSSNamespace()); - if (e != null) { - Source source = new Source(); - String url = e.getAttributeValue("url");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - source.setUrl(url); - source.setValue(e.getText()); - item.setSource(source); - } - - // 0.92 allows one enclosure occurrence, 0.93 multiple - // just saving to write some code. - List eEnclosures = eItem.getChildren("enclosure");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (!eEnclosures.isEmpty()) { - List enclosures = new ArrayList(); - for (Object eEnclosure : eEnclosures) { - e = (Element) eEnclosure; - - Enclosure enclosure = new Enclosure(); - String att = e.getAttributeValue("url");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - enclosure.setUrl(att); - } - att = e.getAttributeValue("length");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - enclosure.setLength(NumberParser.parseLong(att, 0L)); - - att = e.getAttributeValue("type");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - enclosure.setType(att); - } - enclosures.add(enclosure); - } - item.setEnclosures(enclosures); - } - - List eCats = eItem.getChildren("category");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - item.setCategories(parseCategories(eCats)); - - return item; - } - - protected List parseCategories(List eCats) { - List cats = null; - if (!eCats.isEmpty()) { - cats = new ArrayList(); - for (Object eCat : eCats) { - Category cat = new Category(); - Element e = (Element) eCat; - String att = e.getAttributeValue("domain");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - cat.setDomain(att); - } - cat.setValue(e.getText()); - cats.add(cat); - } - } - return cats; - } - - protected Description parseItemDescription(Element rssRoot, Element eDesc) { - Description desc = super.parseItemDescription(rssRoot, eDesc); - desc.setType("text/html"); - return desc; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS093Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS093Parser.java deleted file mode 100644 index 136e1d7..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS093Parser.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.rss.Item; -import org.jdom2.Element; - -/** - * - */ -public class RSS093Parser extends RSS092Parser { - - public RSS093Parser() { - this("rss_0.93"); - } - - protected RSS093Parser(String type) { - super(type); - } - - protected String getRSSVersion() { - return "0.93"; - } - - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = super.parseItem(rssRoot, eItem); - Element e = eItem.getChild("pubDate", getRSSNamespace()); - if (e != null) { - item.setPubDate(DateParser.parseDate(e.getText())); - } - e = eItem.getChild("expirationDate", getRSSNamespace()); - if (e != null) { - item.setExpirationDate(DateParser.parseDate(e.getText())); - } - e = eItem.getChild("description", getRSSNamespace()); - if (e != null) { - String type = e.getAttributeValue("type"); - if (type != null) { - item.getDescription().setType(type); - } - } - return item; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS094Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS094Parser.java deleted file mode 100644 index 9f93ffb..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS094Parser.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import com.rometools.rome.feed.rss.Channel; -import com.rometools.rome.feed.rss.Description; -import com.rometools.rome.feed.rss.Guid; -import com.rometools.rome.feed.rss.Item; -import org.jdom2.Element; - -import java.util.List; -import java.util.Optional; - -/** - */ -public class RSS094Parser extends RSS093Parser { - - public RSS094Parser() { - this("rss_0.94"); - } - - protected RSS094Parser(String type) { - super(type); - } - - protected String getRSSVersion() { - return "0.94"; - } - - @SuppressWarnings("UnnecessarySemicolon") - protected WireFeed parseChannel(Element rssRoot) { - Channel channel = (Channel) super.parseChannel(rssRoot); - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - - List eCats = eChannel.getChildren("category", getRSSNamespace()); - channel.setCategories(parseCategories(eCats)); - - Element eTtl = eChannel.getChild("ttl", getRSSNamespace()); - if (eTtl != null && eTtl.getText() != null) { - Optional ttlValue = Optional.empty(); - try { - ttlValue = Optional.of(Integer.valueOf(eTtl.getText())); - } catch (NumberFormatException ignored) { - ; - } - if (ttlValue.isPresent()) { - channel.setTtl(ttlValue.orElse(null)); - } - } - return channel; - } - - public Item parseItem(Element rssRoot, Element eItem) { - Item item = super.parseItem(rssRoot, eItem); - item.setExpirationDate(null); - - Element e = eItem.getChild("author", getRSSNamespace()); - if (e != null) { - item.setAuthor(e.getText()); - } - - e = eItem.getChild("guid", getRSSNamespace()); - if (e != null) { - Guid guid = new Guid(); - String att = e.getAttributeValue("isPermaLink");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att != null) { - guid.setPermaLink(att.equalsIgnoreCase("true")); - } - guid.setValue(e.getText()); - item.setGuid(guid); - } - - e = eItem.getChild("comments", getRSSNamespace()); - if (e != null) { - item.setComments(e.getText()); - } - - return item; - } - - protected Description parseItemDescription(Element rssRoot, Element eDesc) { - Description desc = super.parseItemDescription(rssRoot, eDesc); - String att = eDesc.getAttributeValue("type");//getRSSNamespace()); DONT KNOW WHY DOESN'T WORK - if (att == null) { - att = "text/html"; - } - desc.setType(att); - return desc; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS10Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS10Parser.java deleted file mode 100644 index e66782f..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS10Parser.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import com.rometools.rome.feed.rss.Channel; -import com.rometools.rome.feed.rss.Content; -import com.rometools.rome.feed.rss.Description; -import com.rometools.rome.feed.rss.Item; - -import org.jdom2.Document; -import org.jdom2.Element; -import org.jdom2.Namespace; - -import java.util.List; - -/** - */ -public class RSS10Parser extends RSS090Parser { - - private static final String RSS_URI = "http://purl.org/rss/1.0/"; - private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI); - - public RSS10Parser() { - this("rss_1.0", RSS_NS); - } - - protected RSS10Parser(String type, Namespace ns) { - super(type, ns); - } - - /** - * Indicates if a JDom document is an RSS instance that can be parsed with the parser. - *

- * It checks for RDF (w3.org) and - * RSS (purl.org) namespaces being defined in the root element. - * - * @param document document to check if it can be parsed with this parser implementation. - * @return true if the document is RSS1., false otherwise. - */ - public boolean isMyType(Document document) { - boolean ok = false; - - Element rssRoot = document.getRootElement(); - Namespace defaultNS = rssRoot.getNamespace(); - @SuppressWarnings("rawtypes") - List additionalNSs = rssRoot.getAdditionalNamespaces(); - - ok = defaultNS != null && defaultNS.equals(getRDFNamespace()); - if (ok) { - if (additionalNSs == null) { - ok = false; - } else { - ok = false; - for (int i = 0; !ok && i < additionalNSs.size(); i++) { - ok = getRSSNamespace().equals(additionalNSs.get(i)); - } - } - } - return ok; - } - - /** - * Returns the namespace used by RSS elements in document of the RSS 1.0 - *

- * - * @return returns "purl.org". - */ - protected Namespace getRSSNamespace() { - return Namespace.getNamespace(RSS_URI); - } - - /** - * Parses an item element of an RSS document looking for item information. - *

- * It first invokes super.parseItem and then parses and injects the description property if present. - *

- * - * @param rssRoot the root element of the RSS document in case it's needed for context. - * @param eItem the item element to parse. - * @return the parsed RSSItem bean. - */ - protected Item parseItem(Element rssRoot, Element eItem) { - Item item = super.parseItem(rssRoot, eItem); - Element e = eItem.getChild("description", getRSSNamespace()); - if (e != null) { - item.setDescription(parseItemDescription(rssRoot, e)); - } - Element ce = eItem.getChild("encoded", getContentNamespace()); - if (ce != null) { - Content content = new Content(); - content.setType(Content.HTML); - content.setValue(ce.getText()); - item.setContent(content); - } - - String uri = eItem.getAttributeValue("about", getRDFNamespace()); - if (uri != null) { - item.setUri(uri); - } - - return item; - } - - protected WireFeed parseChannel(Element rssRoot) { - Channel channel = (Channel) super.parseChannel(rssRoot); - - Element eChannel = rssRoot.getChild("channel", getRSSNamespace()); - String uri = eChannel.getAttributeValue("about", getRDFNamespace()); - if (uri != null) { - channel.setUri(uri); - } - - return channel; - } - - @SuppressWarnings("unused") - protected Description parseItemDescription(Element rssRoot, Element eDesc) { - Description desc = new Description(); - desc.setType("text/plain"); - desc.setValue(eDesc.getText()); - return desc; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20Parser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20Parser.java deleted file mode 100644 index e645ca2..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20Parser.java +++ /dev/null @@ -1,67 +0,0 @@ -package dev.rsems.syndication.rome.io.impl; - -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -import com.rometools.rome.feed.rss.Description; -import org.jdom2.Attribute; -import org.jdom2.Document; -import org.jdom2.Element; - -/** - */ -public class RSS20Parser extends RSS094Parser { - - public RSS20Parser() { - this("rss_2.0"); - } - - protected RSS20Parser(String type) { - super(type); - } - - protected String getRSSVersion() { - return "2.0"; - } - - protected boolean isHourFormat24(Element rssRoot) { - return false; - } - - protected Description parseItemDescription(Element rssRoot, Element eDesc) { - Description desc = super.parseItemDescription(rssRoot, eDesc); - desc.setType("text/html"); // change as per https://rome.dev.java.net/issues/show_bug.cgi?id=26 - return desc; - } - - public boolean isMyType(Document document) { - boolean ok; - Element rssRoot = document.getRootElement(); - ok = rssRoot.getName().equals("rss"); - if (ok) { - ok = false; - Attribute version = rssRoot.getAttribute("version"); - if (version != null) { - // At this point, as far ROME is concerned RSS 2.0, 2.00 and - // 2.0.X are all the same, so let's use startsWith for leniency. - ok = version.getValue().startsWith(getRSSVersion()); - } - } - return ok; - } - -} diff --git a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20wNSParser.java b/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20wNSParser.java deleted file mode 100644 index db6f016..0000000 --- a/src/main/java/dev/rsems/syndication/rome/io/impl/RSS20wNSParser.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package dev.rsems.syndication.rome.io.impl; - -import com.rometools.rome.feed.WireFeed; -import org.jdom2.Document; -import org.jdom2.Element; -import org.jdom2.Namespace; - -/** - * To address issue with certain feeds (brought up by Charles Miller): - *
- * "During the debacle that was the rollout of RSS2.0, this namespace was tried, - * and even appeared in Dave Winer's Scripting News feed for a while. It was - * then withdrawn, but the wonderful thing about standards is the moment you - * roll one out, even if it's marked as unfinished and subject to change, - * someone will end up stuck with it forever." - *
- * Note that there is not counter part on the generator, we only generate the final RSS2 - */ -public class RSS20wNSParser extends RSS20Parser { - private static final String RSS20_URI = "http://backend.userland.com/rss2"; - - public RSS20wNSParser() { - this("rss_2.0wNS"); - } - - protected RSS20wNSParser(String type) { - super(type); - } - - public boolean isMyType(Document document) { - Element rssRoot = document.getRootElement(); - Namespace defaultNS = rssRoot.getNamespace(); - boolean ok = defaultNS != null && defaultNS.equals(getRSSNamespace()); - if (ok) { - ok = super.isMyType(document); - } - return ok; - } - - protected Namespace getRSSNamespace() { - return Namespace.getNamespace(RSS20_URI); - } - - /** - * After we parse the feed we put "rss_2.0" in it (so converters and generators work) - * this parser is a phantom. - * - */ - protected WireFeed parseChannel(Element rssRoot) { - WireFeed wFeed = super.parseChannel(rssRoot); - wFeed.setFeedType("rss_2.0"); - return wFeed; - } - -} diff --git a/src/main/resources/rome.properties b/src/main/resources/rome.properties index 0519ed2..a841936 100644 --- a/src/main/resources/rome.properties +++ b/src/main/resources/rome.properties @@ -1,13 +1,13 @@ # Feed Parser implementation classes # -WireFeedParser.classes=dev.rsems.syndication.rome.io.impl.RSS090Parser \ - com.sun.syndication.io.impl.RSS091NetscapeParser \ - com.sun.syndication.io.impl.RSS091UserlandParser \ - com.sun.syndication.io.impl.RSS092Parser \ - dev.rsems.syndication.rome.io.impl.RSS093Parser \ - dev.rsems.syndication.rome.io.impl.RSS094Parser \ - dev.rsems.syndication.rome.io.impl.RSS10Parser \ - dev.rsems.syndication.rome.io.impl.RSS20wNSParser \ - dev.rsems.syndication.rome.io.impl.RSS20Parser \ - com.sun.syndication.io.impl.Atom10Parser \ - com.sun.syndication.io.impl.Atom03Parser +WireFeedParser.classes=dev.rsems.rometools.rome.io.impl.RSS090Parser \ + com.rometools.rome.io.impl.RSS091NetscapeParser \ + com.rometools.rome.io.impl.RSS091UserlandParser \ + com.rometools.rome.io.impl.RSS092Parser \ + dev.rsems.rometools.rome.io.impl.RSS093Parser \ + dev.rsems.rometools.rome.io.impl.RSS094Parser \ + dev.rsems.rometools.rome.io.impl.RSS10Parser \ + dev.rsems.rometools.rome.io.impl.RSS20wNSParser \ + dev.rsems.rometools.rome.io.impl.RSS20Parser \ + com.rometools.rome.io.impl.Atom10Parser \ + com.rometools.rome.io.impl.Atom03Parser \ No newline at end of file diff --git a/src/main/resources/templates/about.html b/src/main/resources/templates/about.html new file mode 100644 index 0000000..af36b65 --- /dev/null +++ b/src/main/resources/templates/about.html @@ -0,0 +1,13 @@ + + + + + + + +

About

+

This is a feed aggregator & feed reader application

+
+
+ + \ No newline at end of file diff --git a/src/main/resources/templates/index.html b/src/main/resources/templates/index.html index 1661b03..b2378d4 100644 --- a/src/main/resources/templates/index.html +++ b/src/main/resources/templates/index.html @@ -5,6 +5,7 @@ +

About