/** * OAIListFriendsLoader * Copyright 2010 by Michael Peter Christen * First released 29.04.2010 at http://yacy.net * * This is a part of YaCy, a peer-to-peer based web search engine * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see <http://www.gnu.org/licenses/>. */ package net.yacy.document.importer; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; import java.net.MalformedURLException; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; import java.util.TreeMap; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.util.ConcurrentLog; import net.yacy.crawler.retrieval.Response; import net.yacy.kelondro.util.FileUtils; import net.yacy.repository.LoaderDispatcher; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class OAIListFriendsLoader implements Serializable { private static final long serialVersionUID = -8705115274655024604L; private static final HashMap<String, File> listFriends = new HashMap<String, File>(); public static void init(final LoaderDispatcher loader, final Map<String, File> moreFriends, final ClientIdentification.Agent agent) { listFriends.putAll(moreFriends); if (loader != null) for (final Map.Entry<String, File> oaiFriend: listFriends.entrySet()) { try { loader.loadIfNotExistBackground(new DigestURL(oaiFriend.getKey()), oaiFriend.getValue(), Integer.MAX_VALUE, null, agent); } catch (final MalformedURLException e) { } } } public static Map<String, File> loadListFriendsSources(final File initFile, final File dataPath) { final Properties p = new Properties(); final Map<String, File> m = new HashMap<String, File>(); try { p.loadFromXML(new FileInputStream(initFile)); } catch (final IOException e) { ConcurrentLog.logException(e); return m; } for (final Entry<Object, Object> e: p.entrySet()) m.put((String) e.getKey(), new File(dataPath, (String) e.getValue())); return m; } public Map<String, String> getListFriends(final LoaderDispatcher loader, final ClientIdentification.Agent agent) { final Map<String, String> map = new TreeMap<String, String>(); Map<String, String> m; for (final Map.Entry<String, File> oaiFriend: listFriends.entrySet()) try { if (!oaiFriend.getValue().exists()) { final Response response = loader == null ? null : loader.load(loader.request(new DigestURL(oaiFriend.getKey()), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, agent); if (response != null) FileUtils.copy(response.getContent(), oaiFriend.getValue()); } if (oaiFriend.getValue().exists()) { final byte[] b = FileUtils.read(oaiFriend.getValue()); if (b != null) { m = new Parser(b).map; if (m != null) map.putAll(m); } } } catch (final IOException e) {} return map; } private static final ThreadLocal<SAXParser> tlSax = new ThreadLocal<SAXParser>(); private SAXParser getParser() throws SAXException { SAXParser parser = tlSax.get(); if (parser == null) { try { parser = SAXParserFactory.newInstance().newSAXParser(); } catch (final ParserConfigurationException e) { throw new SAXException(e.getMessage(), e); } tlSax.set(parser); } return parser; } // get a resumption token using a SAX xml parser from am input stream private class Parser extends DefaultHandler { // class variables private final StringBuilder buffer; private boolean parsingValue; private SAXParser saxParser; private final InputStream stream; private Attributes atts; private int recordCounter; private final TreeMap<String, String> map; public Parser(final byte[] b) { this.map = new TreeMap<String, String>(); this.recordCounter = 0; this.buffer = new StringBuilder(); this.parsingValue = false; this.atts = null; this.stream = new ByteArrayInputStream(b); try { this.saxParser = getParser(); this.saxParser.parse(this.stream, this); } catch (final SAXException e) { ConcurrentLog.logException(e); ConcurrentLog.warn("OAIListFriendsLoader.Parser", "OAIListFriends was not parsed:\n" + UTF8.String(b)); } catch (final IOException e) { ConcurrentLog.logException(e); ConcurrentLog.warn("OAIListFriendsLoader.Parser", "OAIListFriends was not parsed:\n" + UTF8.String(b)); } finally { try { this.stream.close(); } catch (final IOException e) { ConcurrentLog.logException(e); } } } /* <?xml version="1.0" encoding="UTF-8"?> <BaseURLs> <baseURL id="http://roar.eprints.org/id/eprint/102">http://research.nla.gov.au/oai</baseURL> <baseURL id="http://roar.eprints.org/id/eprint/174">http://oai.bibsys.no/repository</baseURL> <baseURL id="http://roar.eprints.org/id/eprint/1064">http://oai.repec.openlib.org/</baseURL> </BaseURLs> */ public int getCounter() { return this.recordCounter; } @Override public void startElement(final String uri, final String name, final String tag, final Attributes atts) throws SAXException { if ("baseURL".equals(tag)) { this.recordCounter++; this.parsingValue = true; this.atts = atts; } } @Override public void endElement(final String uri, final String name, final String tag) { if (tag == null) return; if ("baseURL".equals(tag)) { this.map.put(this.buffer.toString(), this.atts.getValue("id")); this.buffer.setLength(0); this.parsingValue = false; } } @Override public void characters(final char ch[], final int start, final int length) { if (this.parsingValue) { this.buffer.append(ch, start, length); } } } }