/*
* RssFeedParser.java
*
* Copyright (C) 2005-2006 Tommi Laukkanen
* http://www.substanceofcode.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
// Expand to define test define
//#define DNOTEST
// Expand to define logging define
//#define DNOLOGGING
package com.substanceofcode.rssreader.businesslogic;
import com.substanceofcode.rssreader.businessentities.RssItunesFeed;
import com.substanceofcode.utils.StringUtil;
import com.substanceofcode.utils.XmlParser;
import javax.microedition.io.*;
import java.util.*;
import java.io.*;
import com.substanceofcode.utils.EncodingUtil;
import com.substanceofcode.utils.CauseException;
import com.substanceofcode.utils.CauseMemoryException;
//#ifdef DLOGGING
import net.sf.jlogmicro.util.logging.Logger;
import net.sf.jlogmicro.util.logging.LogManager;
import net.sf.jlogmicro.util.logging.Level;
//#endif
/**
* RssFeedParser is an utility class for aquiring and parsing a RSS feed.
* HttpConnection is used to fetch RSS feed and kXML is used on xml parsing.
*
* @author Tommi Laukkanen
* @version 1.0
*/
public class RssFeedParser extends URLHandler {
private RssItunesFeed m_rssFeed; // The RSS feed
private boolean m_getTitleOnly = false; // The RSS feed
//#ifdef DLOGGING
private Logger logger = Logger.getLogger("RssFeedParser");
private boolean fineLoggable = logger.isLoggable(Level.FINE);
private boolean finerLoggable = logger.isLoggable(Level.FINER);
private boolean finestLoggable = logger.isLoggable(Level.FINEST);
//#endif
/** Create new instance of RssFeedParser */
public RssFeedParser(RssItunesFeed rssFeed) {
m_rssFeed = rssFeed;
}
/** Return RSS feed */
public RssItunesFeed getRssFeed() {
return m_rssFeed;
}
/**
* Send a GET request to web server and parse feeds from response.
*
* @input updFeed Do updated feeds only.
* @input convXmlEnts Convert XML entities
* @input maxItemCount Maximum item count for the feed.
*
*/
public void parseRssFeed(final boolean updFeed,
final boolean convXmlEnts, final int maxItemCount)
throws IOException, CauseException, Exception {
// Set this here as the instance of this class is reused
// for update of the current feed.
m_redirect = false;
parseRssFeedUrl(m_rssFeed.getUrl(), updFeed, convXmlEnts, maxItemCount);
}
/**
* Send a GET request to web server and parse feeds from response.
*
* @input url to parse
* @input updFeed Do updated feeds only.
* @input convXmlEnts Convert XML entities
* @input maxItemCount Maximum item count for the feed.
*
*/
public void parseRssFeedUrl(final String url, final boolean updFeed,
final boolean convXmlEnts, final int maxItemCount)
throws IOException, CauseException, Exception {
try {
super.handleOpen(url, m_rssFeed.getUsername(),
m_rssFeed.getPassword());
if (m_needRedirect) {
m_needRedirect = false;
parseHeaderRedirect(updFeed, m_location, convXmlEnts,
maxItemCount);
return;
}
// If we find HTML, usually it is redirection
if ((m_contentType != null) && (m_contentType.indexOf("html") >= 0)) {
parseHTMLRedirect(updFeed, url, m_inputStream,
convXmlEnts, maxItemCount);
} else {
if (m_lastMod == 0L) {
m_rssFeed.setUpddate(null);
} else {
// If we're only processing if the feed is updated,
// check if we previously had a update value.
// If so and it does equals the new one, return
if (updFeed) {
Date updDate = m_rssFeed.getUpddate();
if ((updDate != null) && updDate.equals(new
Date(m_lastMod))) {
return;
}
}
}
parseRssFeedXml( m_inputStream, convXmlEnts, maxItemCount);
m_rssFeed.setUpddate(new Date(m_lastMod));
}
} catch (CauseMemoryException e) {
if (m_rssFeed != null) {
m_rssFeed.setItems(null);
m_rssFeed.setItems(new Vector());
}
CauseMemoryException ce =
new CauseMemoryException("Out of memory error while " +
"parsing RSS data: " + e.toString(), e);
//#ifdef DLOGGING
logger.severe(ce.getMessage(), e);
//#endif
if ((url != null) && url.startsWith("file://")) {
System.err.println("Cannot process file.");
}
throw ce;
} catch(Exception e) {
//#ifdef DLOGGING
logger.severe("parseRssFeedUrl error with " + url, e);
//#endif
if ((url != null) && url.startsWith("file://")) {
System.err.println("Cannot process file.");
}
throw new CauseException("Error while parsing RSS data: "
+ e.toString(), e);
} catch (OutOfMemoryError e) {
if (m_rssFeed != null) {
m_rssFeed.setItems(null);
m_rssFeed.setItems(new Vector());
}
CauseMemoryException ce =
new CauseMemoryException("Out of memory error while " +
"parsing RSS data: " + e.toString(), e);
//#ifdef DLOGGING
logger.severe(ce.getMessage(), e);
//#endif
if ((url != null) && url.startsWith("file://")) {
System.err.println("Cannot process file.");
}
throw ce;
} catch(Throwable t) {
//#ifdef DLOGGING
logger.severe("parseRssFeedUrl error with " + url, t);
//#endif
if ((url != null) && url.startsWith("file://")) {
System.err.println("Cannot process file.");
}
throw new CauseException("Error while parsing RSS data: "
+ t.toString(), t);
} finally {
super.handleClose();
}
}
/** Read HTML and if it has links, redirect and parse the XML. */
private void parseHeaderRedirect(final boolean updFeed, String url,
final boolean convXmlEnts,
final int maxItemCount)
throws IOException, CauseException, Exception {
if (m_redirect) {
//#ifdef DLOGGING
logger.severe("Error 2nd redirect url: " + url);
//#endif
System.out.println("Error 2nd redirect url: " + url);
throw new IOException("Error url " + m_redirectUrl +
" to 2nd redirect url: " + url);
}
m_redirect = true;
m_redirectUrl = url;
parseRssFeedUrl(url, updFeed, convXmlEnts, maxItemCount);
return;
}
/** Read HTML and if it has links, redirect and parse the XML. */
private void parseHTMLRedirect(boolean updFeed, String url,
InputStream is, final boolean convXmlEnts,
final int maxItemCount)
throws IOException, CauseException, Exception {
String newUrl = super.parseHTMLRedirect(url, is);
RssItunesFeed svFeed = new RssItunesFeed(m_rssFeed);
parseRssFeedUrl(newUrl, updFeed, convXmlEnts, maxItemCount);
}
/**
* Nasty RSS feed XML parser.
* Seems to work with all RSS 0.91, 0.92 and 2.0.
*/
public void parseRssFeedXml(InputStream is, final boolean convXmlEnts,
final int maxItemCount)
throws IOException, CauseException {
/** Initialize item collection */
m_rssFeed.getItems().removeAllElements();
/** Initialize XML parser and parse feed */
XmlParser parser = new XmlParser(is);
/** <?xml...*/
int parsingResult = parser.parse();
/** if prologue was found, parse after prologue. **/
if (parsingResult == XmlParser.PROLOGUE) {
parser.parse();
}
FeedFormatParser formatParser = null;
String entryElementName = parser.getName();
if(entryElementName.equals("rss") ||
entryElementName.equals("rdf")) {
/** Feed is in RSS format */
formatParser = new RssFormatParser();
m_rssFeed = formatParser.parse( parser, m_rssFeed,
convXmlEnts, maxItemCount, m_getTitleOnly );
} else if(entryElementName.equals("feed")) {
/** Feed is in Atom format */
formatParser = new AtomFormatParser();
m_rssFeed = formatParser.parse( parser, m_rssFeed,
convXmlEnts, maxItemCount, m_getTitleOnly );
} else {
//#ifdef DLOGGING
logger.severe("Unable to parse feed type: " + entryElementName);
//#endif
/** Unknown feed */
throw new IOException("Unable to parse feed. Feed format is not supported.");
}
}
public void setGetTitleOnly(boolean m_getTitleOnly) {
this.m_getTitleOnly = m_getTitleOnly;
}
public boolean isGetTitleOnly() {
return (m_getTitleOnly);
}
}