/*
* Created on Aug 6, 2004
*
*/
package net.sf.thingamablog.feed;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Feed utilities
*/
public class FeedUtils
{
private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=([.[^; ]]*)");
/**
* Creates a reader with the appropriate char encoding for the URL
*
* @param feedUrl the Feed url
* @return an appropriate reader
* @throws IOException If an error occurs
*/
public static Reader getFeedReader(URL feedUrl) throws IOException
{
Reader reader;
URLConnection conn = feedUrl.openConnection();
if(feedUrl.getProtocol().equals("http") || feedUrl.getProtocol().equals("https"))
{
// Finds out server charset encoding based on HTTP spec
String contentTypeHeader = conn.getContentType();
String encoding = "ISO-8859-1";
if(contentTypeHeader!=null)
{
Matcher matcher = CHARSET_PATTERN.matcher(contentTypeHeader);
if (matcher.find())
{
encoding = matcher.group(1);
}
}
reader = new InputStreamReader(conn.getInputStream(),encoding);
}
else
{
// Goes with plartform's default charset encoding
reader = new InputStreamReader(conn.getInputStream());
}
return reader;
}
}