/* TODO remove extra spaces between description item 7 of imbed * RssFormatParser.java * * Copyright (C) 2005-2006 Tommi Laukkanen * http://www.substanceofcode.com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ // Expand to define logging define //#define DNOLOGGING package com.substanceofcode.rssreader.businesslogic; //#ifdef DLOGGING import net.sf.jlogmicro.util.logging.Logger; import net.sf.jlogmicro.util.logging.Level; //#endif import com.substanceofcode.rssreader.businessentities.RssItunesFeed; import com.substanceofcode.rssreader.businessentities.RssItunesItem; import com.substanceofcode.utils.StringUtil; import com.substanceofcode.utils.EncodingUtil; import com.substanceofcode.utils.XmlParser; import com.substanceofcode.utils.CauseException; import java.io.IOException; import java.io.InputStream; import java.util.Calendar; import java.util.Date; import java.util.Vector; /** * * @author Tommi */ public class RssFormatParser implements FeedFormatParser { //#ifdef DLOGGING private Logger logger = Logger.getLogger("RssFormatParser"); //#endif /** RSS item properties */ private boolean m_hasExt = false; //#ifdef DLOGGING private boolean fineLoggable = logger.isLoggable(Level.FINE); private boolean finestLoggable = logger.isLoggable(Level.FINEST); //#endif private boolean m_convXmlEnts; private String m_title = ""; private String m_author = ""; private String m_description = ""; private String m_link = ""; private String m_language = ""; private String m_date = ""; private String m_enclosure = ""; private ExtParser m_extParser; /** Creates a new instance of RssFormatParser */ public RssFormatParser() { } public RssItunesFeed parse(XmlParser parser, RssItunesFeed cfeed, final boolean convXmlEnts, final int maxItemCount, boolean getTitleOnly) throws IOException, CauseException { Vector items = new Vector(); m_extParser = new ExtParser(convXmlEnts); m_extParser.parseNamespaces(parser); m_hasExt = m_extParser.isHasExt(); m_convXmlEnts = convXmlEnts; RssItunesFeed feed = cfeed; feed.setItems(items); /** Parse to first entry element */ while(!parser.getName().equals("item")) { switch (parser.parse()) { case XmlParser.END_DOCUMENT: System.out.println("No entries found."); return feed; case XmlParser.ELEMENT: String elementName = parser.getName(); if (elementName.length() == 0) { continue; } char elemChar = elementName.charAt(0); if (parseCommon(parser, elemChar, elementName)) { if ((elemChar == 't') && getTitleOnly && elementName.equals("title") ) { feed.setName(m_title); return feed; } continue; } switch (elemChar) { //#ifdef DITUNES case 'l': if (elementName.equals("language")) { m_language = parser.getText(); //#ifdef DLOGGING if (finestLoggable) {logger.finest("m_language=" + m_language);} //#endif continue; } break; //#endif case 'i': if (elementName.equals("image")) { // Skip image text as it includes link // and title. String itext = parser.getText(false); //#ifdef DLOGGING if (finestLoggable) {logger.finest("image=" + itext);} //#endif continue; } break; default: break; } if (m_hasExt) { m_extParser.parseExtItem(parser, elemChar, elementName); } break; default: break; } } feed.setLink(m_link); if (m_date.length() > 0) { Date pubDate = parseRssDate(m_date); feed.setDate(pubDate); } else { feed.setDate(null); } if (m_extParser.isItunes()) { feed = m_extParser.getFeedInstance(feed, m_language, m_title, m_description); } reset(); /** Parse next element */ int parsingResult; while( (parsingResult = parser.parse()) !=XmlParser.END_DOCUMENT ) { String elementName = parser.getName(); if (elementName.length() == 0) { continue; } char elemChar = elementName.charAt(0); switch (elemChar) { case 'i': if (elementName.equals("item") ) { /** Save previous entry */ RssItunesItem item = createItem(); if ( item != null) { items.addElement( item ); if(items.size()==maxItemCount) { return feed; } } /** New entry */ /** reset */ reset(); continue; } break; case 't': // Textinput has required sub element description. // We don't want the overriding description. if (elementName.equals("textinput") ) { String textData = parser.getText(false); //#ifdef DLOGGING if (finestLoggable) {logger.finest("skipping textinput data=" + textData);} //#endif continue; } break; default: } parseItem(parser, elemChar, elementName); } /** Save previous entry */ RssItunesItem item = createItem(); if ( item != null) { items.addElement( item ); } return feed; } /** Save previous entry */ final private RssItunesItem createItem() { boolean hasTitle = (m_title.length()>0); boolean hasDesc = (m_description.length()>0); if(hasTitle || hasDesc) { if (hasTitle && hasDesc) { m_title = m_title.replace('\n', ' '); // If we were not converting HTML, do so now since the // title should not have HTML. if (!m_convXmlEnts) { m_title = EncodingUtil.replaceAlphaEntities( true, m_title ); m_title = StringUtil.removeHtml( m_title ); } } Date pubDate = null; // Check date in case we cannot find it. if ((m_date.length() == 0) && m_extParser.isHasExt()) { m_date = m_extParser.getDate(); } if (m_date.length() > 0) { pubDate = parseRssDate(m_date); } RssItunesItem item; if (m_hasExt) { item = m_extParser.createItem(m_title, m_link, m_description, pubDate, m_enclosure, true, m_author); } else { item = new RssItunesItem(m_title, m_link, m_description, pubDate, m_enclosure, true); } return item; } return null; } private void reset() { m_title = ""; m_author = ""; m_description = ""; m_link = ""; m_language = ""; m_date = ""; m_enclosure = ""; if (m_hasExt) { m_extParser.reset(); } } /* Parse the fields common to feed and item. */ private boolean parseCommon(XmlParser parser, char elemChar, String elementName) throws IOException, CauseException { switch (elemChar) { case 'p': if( elementName.equals("pubDate")) { m_date = parser.getText(); //#ifdef DLOGGING if (finestLoggable) {logger.finest("m_date=" + m_date);} //#endif return true; } break; case 't': if( elementName.equals("title") ) { m_title = parser.getText(m_convXmlEnts); //#ifdef DLOGGING if (finestLoggable) {logger.finest("m_title=" + m_title);} //#endif return true; } break; case 'd': if( elementName.equals("description")) { m_description = parser.getText(m_convXmlEnts); if (m_convXmlEnts) { m_description = StringUtil.removeHtml( m_description ); } //#ifdef DLOGGING if (finestLoggable) {logger.finest("m_description=" + m_description);} //#endif return true; } break; case 'l': if( elementName.equals("link") ) { m_link = parser.getText(); m_link = StringUtil.removeHtml( m_link ); //#ifdef DLOGGING if (finestLoggable) {logger.finest("m_link=" + m_link);} //#endif return true; } break; default: } return false; } /* Parse the item to get it's fields */ void parseItem(XmlParser parser, char elemChar, String elementName) throws IOException, CauseException { switch (elemChar) { //#ifdef DITUNES case 'a': if( elementName.equals("author")) { m_author = parser.getText(m_convXmlEnts); return; } break; //#endif case 'e': if( elementName.equals("enclosure") ) { String cenclosure = parser.getAttributeValue("url"); if (cenclosure != null) { m_enclosure = cenclosure; return; } return; } break; default: } if (parseCommon(parser, elemChar, elementName)) { return; } if (m_hasExt) { m_extParser.parseExtItem(parser, elemChar, elementName); } } /** Get calendar date. **/ public static Date getCal(int dayOfMonth, int month, int year, int hours, int minutes, int seconds) throws Exception { // Create calendar object from date values Calendar cal = Calendar.getInstance(); cal.set(Calendar.DAY_OF_MONTH, dayOfMonth); cal.set(Calendar.MONTH, month); cal.set(Calendar.YEAR, year); cal.set(Calendar.HOUR_OF_DAY, hours); cal.set(Calendar.MINUTE, minutes); cal.set(Calendar.SECOND, seconds); return cal.getTime(); } /** Parse the standard RSS date and Dublin Core (dc) date. */ static Date parseRssDate(String date) { Date pubDate = null; date = date.trim(); int dpos = date.indexOf('-', 2); if ((dpos > 0) && (date.indexOf('-', dpos + 1) > 0)) { pubDate = parseDcDate(date); } else { pubDate = parseStdDate(date); } //#ifdef DLOGGING Logger logger = Logger.getLogger("RssFormatParser"); logger.finest("date,pubDate=" + date + "," + pubDate); //#endif return pubDate; } /** * Parse RSS date format to Date object. * Example of RSS date: * Sat, 23 Sep 2006 22:25:11 +0000 */ public static Date parseStdDate(String dateString) { Date pubDate = null; try { // Split date string to values // 0 = week day // 1 = day of month // 2 = month // 3 = year (could be with either 4 or 2 digits) // 4 = time // 5 = GMT int weekDayIndex = 0; int dayOfMonthIndex = 1; int monthIndex = 2; int yearIndex = 3; int timeIndex = 4; int gmtIndex = 5; int kc = 0; while ((dateString.indexOf(" ") >= 0) && (kc++ < dateString.length())) { dateString = StringUtil.replace(dateString, " ", " "); } String[] values = StringUtil.split(dateString, ' '); int columnCount = values.length; if( columnCount==5 ) { // Expected format: // 09 Nov 2006 23:18:49 EST dayOfMonthIndex = 0; monthIndex = 1; yearIndex = 2; timeIndex = 3; gmtIndex = 4; } else if( columnCount==7 ) { // Expected format: // Thu, 19 Jul 2007 00:00:00 N yearIndex = 4; timeIndex = 5; gmtIndex = 6; } else if( columnCount<5 || columnCount>6 ) { //#ifdef DLOGGING Logger logger = Logger.getLogger("RssFormatParser"); logger.warning("Invalid date format: " + dateString); //#endif for (int ic = 0; ic < dateString.length(); ic++) { System.out.println("date=" + ic + "," + dateString.charAt(ic) + "," + (int)dateString.charAt(ic)); } throw new Exception("Invalid date format: " + dateString); } // Day of month int dayOfMonth = Integer.parseInt( values[ dayOfMonthIndex ] ); // Month String[] months = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; String monthString = values[ monthIndex ]; int month=0; for(int monthEnumIndex=0; monthEnumIndex<11; monthEnumIndex++) { if( monthString.equals( months[ monthEnumIndex ] )) { month = monthEnumIndex; } } // Year int year = Integer.parseInt(values[ yearIndex ]); if(year<100) { year += 2000; } // Time String[] timeValues = StringUtil.split(values[ timeIndex ], ':'); int hours = Integer.parseInt( timeValues[0] ); int minutes = Integer.parseInt( timeValues[1] ); int seconds = Integer.parseInt( timeValues[2] ); pubDate = getCal(dayOfMonth, month, year, hours, minutes, seconds); } catch(Exception ex) { // TODO: Add exception handling code System.err.println("parseStdDate error while converting date string to object: " + dateString + "," + ex.toString()); //#ifdef DLOGGING Logger logger = Logger.getLogger("RssFormatParser"); logger.severe("parseStdDate error while converting date " + "string to object: " + dateString, ex); //#endif } catch(Throwable t) { // TODO: Add exception handling code System.err.println("parseStdDate error while converting date string to object: " + dateString + "," + t.toString()); //#ifdef DLOGGING Logger logger = Logger.getLogger("RssFormatParser"); logger.severe("parseStdDate error while converting date " + "string to object: " + dateString, t); //#endif } return pubDate; } /** * Parse RSS date dc:date or atom format to Date object. * Example of RSS dc:date: * 2007-07-31T02:02:00+00:00 * atom date * [-]CCYY-MM-DDThh:mm:ss[Z|(+|-)hh:mm] */ public static Date parseDcDate(String dateString) { Date pubDate = null; try { // Split date string to values // 0 = year (could be with either 4 or 2 digits) // 1 = month // 2 = day of month/time int yearIndex = 0; int monthIndex = 1; int dayOfMonthTimeIndex = 2; if (dateString.charAt(0) == '-') { dateString = dateString.substring(1); } String[] values = StringUtil.split(dateString, '-'); if( values.length<3 ) { throw new Exception("Invalid date format: " + dateString); } int year = Integer.parseInt(values[ yearIndex ]); // Month int month = Integer.parseInt( values[ monthIndex ] ); // Day of month String sdayOfMonth = values[ dayOfMonthTimeIndex ].substring(0, 2); int dayOfMonth = Integer.parseInt( sdayOfMonth ); String time = values[ dayOfMonthTimeIndex ].substring(3); String [] timeValues = StringUtil.split(time, ':'); int hours = Integer.parseInt( timeValues[0] ); int minutes = Integer.parseInt( timeValues[1] ); timeValues[2] = timeValues[2].substring( 0, 2 ); int seconds = Integer.parseInt( timeValues[2] ); pubDate = getCal(dayOfMonth, month - 1 + Calendar.JANUARY, year, hours, minutes, seconds); } catch(Exception ex) { //#ifdef DLOGGING Logger logger = Logger.getLogger("RssFormatParser"); logger.warning("parseDcDate error while converting date " + "string to object: " + dateString, ex); //#endif // TODO: Add exception handling code System.err.println("parseDcDate error while converting date string to object: " + dateString + "," + ex.toString()); } catch(Throwable t) { //#ifdef DLOGGING Logger logger = Logger.getLogger("RssFormatParser"); logger.severe("parseDcDate error while converting date " + "string to object: " + dateString, t); //#endif // TODO: Add exception handling code System.err.println("parseDcDate error while converting date string to object: " + dateString + "," + t.toString()); } return pubDate; } }