RssFeedParser.java example

Explorer
LEADT-master
/*
 * RssFeedParser.java
 *
 * Copyright (C) 2005-2006 Tommi Laukkanen
 * http://www.substanceofcode.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

// Expand to define test define
//#define DNOTEST
// Expand to define logging define
//#define DNOLOGGING
package com.substanceofcode.rssreader.businesslogic;

import com.substanceofcode.rssreader.businessentities.RssItunesFeed;
import com.substanceofcode.utils.StringUtil;
import com.substanceofcode.utils.XmlParser;
import javax.microedition.io.*;
import java.util.*;
import java.io.*;

import com.substanceofcode.utils.EncodingUtil;
import com.substanceofcode.utils.CauseException;
import com.substanceofcode.utils.CauseMemoryException;
//#ifdef DLOGGING
import net.sf.jlogmicro.util.logging.Logger;
import net.sf.jlogmicro.util.logging.LogManager;
import net.sf.jlogmicro.util.logging.Level;
//#endif

/**
 * RssFeedParser is an utility class for aquiring and parsing a RSS feed.
 * HttpConnection is used to fetch RSS feed and kXML is used on xml parsing.
 *
 * @author  Tommi Laukkanen
 * @version 1.0
 */
public class RssFeedParser extends URLHandler {
    
    private RssItunesFeed m_rssFeed;  // The RSS feed
    private boolean m_getTitleOnly = false;  // The RSS feed
	//#ifdef DLOGGING
    private Logger logger = Logger.getLogger("RssFeedParser");
    private boolean fineLoggable = logger.isLoggable(Level.FINE);
    private boolean finerLoggable = logger.isLoggable(Level.FINER);
    private boolean finestLoggable = logger.isLoggable(Level.FINEST);
	//#endif
    
    /** Create new instance of RssFeedParser */
    public RssFeedParser(RssItunesFeed rssFeed) {
        m_rssFeed = rssFeed;
    }
    
    /** Return RSS feed */
    public RssItunesFeed getRssFeed() {
        return m_rssFeed;
    }
    
    /**
     * Send a GET request to web server and parse feeds from response.
     *
     * @input updFeed Do updated feeds only.
     * @input convXmlEnts Convert XML entities
     * @input maxItemCount Maximum item count for the feed.
     *
     */
    public void parseRssFeed(final boolean updFeed,
			final boolean convXmlEnts, final int maxItemCount)
	throws IOException, CauseException, Exception {
		// Set this here as the instance of this class is reused
		// for update of the current feed.
		m_redirect = false;
		parseRssFeedUrl(m_rssFeed.getUrl(), updFeed, convXmlEnts, maxItemCount);
	}
        
    /**
     * Send a GET request to web server and parse feeds from response.
     *
     * @input url to parse
     * @input updFeed Do updated feeds only.
     * @input convXmlEnts Convert XML entities
     * @input maxItemCount Maximum item count for the feed.
     *
     */
    public void parseRssFeedUrl(final String url, final boolean updFeed,
			final boolean convXmlEnts, final int maxItemCount)
	throws IOException, CauseException, Exception {
        
		try {
			super.handleOpen(url, m_rssFeed.getUsername(),
					  m_rssFeed.getPassword());
			if (m_needRedirect) {
				m_needRedirect = false;
				parseHeaderRedirect(updFeed, m_location, convXmlEnts,
						maxItemCount);
				return;
			}
			// If we find HTML, usually it is redirection
			if ((m_contentType != null) && (m_contentType.indexOf("html") >= 0)) {
				parseHTMLRedirect(updFeed, url, m_inputStream,
								  convXmlEnts, maxItemCount);
			} else {
				if (m_lastMod == 0L) {
					m_rssFeed.setUpddate(null);
				} else {
					// If we're only processing if the feed is updated,
					// check if we previously had a update value.
					// If so and it does equals the new one, return
					if (updFeed) {
						Date updDate = m_rssFeed.getUpddate();
	  					if ((updDate != null) && updDate.equals(new
							Date(m_lastMod))) {
							return;
						}
					}
 				}
				parseRssFeedXml( m_inputStream, convXmlEnts, maxItemCount);
				m_rssFeed.setUpddate(new Date(m_lastMod));
			}
		} catch (CauseMemoryException e) {
			if (m_rssFeed != null) {
				m_rssFeed.setItems(null);
				m_rssFeed.setItems(new Vector());
			}
            CauseMemoryException ce =
					new CauseMemoryException("Out of memory error while " +
					"parsing RSS data: " + e.toString(), e);
			//#ifdef DLOGGING
			logger.severe(ce.getMessage(), e);
			//#endif
			if ((url != null) && url.startsWith("file://")) {
				System.err.println("Cannot process file.");
			}
            throw ce;
        } catch(Exception e) {
			//#ifdef DLOGGING
			logger.severe("parseRssFeedUrl error with " + url, e);
			//#endif
			if ((url != null) && url.startsWith("file://")) {
				System.err.println("Cannot process file.");
			}
            throw new CauseException("Error while parsing RSS data: " 
                    + e.toString(), e);
		} catch (OutOfMemoryError e) {
			if (m_rssFeed != null) {
				m_rssFeed.setItems(null);
				m_rssFeed.setItems(new Vector());
			}
            CauseMemoryException ce =
					new CauseMemoryException("Out of memory error while " +
					"parsing RSS data: " + e.toString(), e);
			//#ifdef DLOGGING
			logger.severe(ce.getMessage(), e);
			//#endif
			if ((url != null) && url.startsWith("file://")) {
				System.err.println("Cannot process file.");
			}
            throw ce;
        } catch(Throwable t) {
			//#ifdef DLOGGING
			logger.severe("parseRssFeedUrl error with " + url, t);
			//#endif
			if ((url != null) && url.startsWith("file://")) {
				System.err.println("Cannot process file.");
			}
            throw new CauseException("Error while parsing RSS data: " 
								+ t.toString(), t);
        } finally {
			super.handleClose();
		}
    }
    
	/** Read HTML and if it has links, redirect and parse the XML. */
	private void parseHeaderRedirect(final boolean updFeed, String url,
								     final boolean convXmlEnts,
									 final int maxItemCount)
	throws IOException, CauseException, Exception {
		if (m_redirect) {
			//#ifdef DLOGGING
			logger.severe("Error 2nd redirect url:  " + url);
			//#endif
			System.out.println("Error 2nd redirect url:  " + url);
			throw new IOException("Error url " + m_redirectUrl +
					" to 2nd redirect url:  " + url);
		}
		m_redirect = true;
		m_redirectUrl = url;
		parseRssFeedUrl(url, updFeed, convXmlEnts, maxItemCount);
		return;

	}

	/** Read HTML and if it has links, redirect and parse the XML. */
	private void parseHTMLRedirect(boolean updFeed, String url,
								   InputStream is, final boolean convXmlEnts,
								   final int maxItemCount)
	throws IOException, CauseException, Exception {
		String newUrl = super.parseHTMLRedirect(url, is);
		RssItunesFeed svFeed = new RssItunesFeed(m_rssFeed);
		parseRssFeedUrl(newUrl, updFeed, convXmlEnts, maxItemCount);
	}

    /**
     * Nasty RSS feed XML parser.
     * Seems to work with all RSS 0.91, 0.92 and 2.0.
     */
    public void parseRssFeedXml(InputStream is, final boolean convXmlEnts,
			final int maxItemCount)
	throws IOException, CauseException {
        /** Initialize item collection */
        m_rssFeed.getItems().removeAllElements();
        
        /** Initialize XML parser and parse feed */
        XmlParser parser = new XmlParser(is);
        
        /** <?xml...*/
        int parsingResult = parser.parse();
		/** if prologue was found, parse after prologue.  **/
		if (parsingResult == XmlParser.PROLOGUE) {
			parser.parse();
		}
        
        FeedFormatParser formatParser = null;
        String entryElementName = parser.getName();
        if(entryElementName.equals("rss") || 
           entryElementName.equals("rdf")) {
            /** Feed is in RSS format */
            formatParser = new RssFormatParser();
            m_rssFeed = formatParser.parse( parser, m_rssFeed,
					convXmlEnts, maxItemCount, m_getTitleOnly );
            
        } else if(entryElementName.equals("feed")) {
            /** Feed is in Atom format */
            formatParser = new AtomFormatParser();
            m_rssFeed = formatParser.parse( parser, m_rssFeed,
					convXmlEnts, maxItemCount, m_getTitleOnly );
            
        } else {
			//#ifdef DLOGGING
			logger.severe("Unable to parse feed type:  " + entryElementName);
			//#endif
            /** Unknown feed */
            throw new IOException("Unable to parse feed. Feed format is not supported.");
            
        }
        
    }
    
    public void setGetTitleOnly(boolean m_getTitleOnly) {
        this.m_getTitleOnly = m_getTitleOnly;
    }

    public boolean isGetTitleOnly() {
        return (m_getTitleOnly);
    }

}