package org.juxtasoftware.util; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.util.HashSet; import java.util.Set; import org.apache.commons.io.IOUtils; /** * Extract all namespace info from an XML source * @author loufoster * */ public final class NamespaceExtractor { public enum XmlType {GENERIC, TEI, RAM, JUXTA}; /** * Scan the XML source and extract all of the namespace information * * @param sourceReader * @return * @throws IOException */ public static Set<NamespaceInfo> extract( final Reader sourceReader ) throws IOException { BufferedReader br = new BufferedReader( sourceReader ); Set<NamespaceInfo> namespaces = new HashSet<NamespaceInfo>(); try { final String defaultNs = "xmlns=\""; final String noNamespace = ":noNamespaceSchemaLocation=\""; final String ns = "xmlns:"; final String commentStart = "<!--"; final String commentEnd = "-->"; boolean inComment = false; while (true) { String line = br.readLine(); if ( line == null ) { break; } else { line = line.trim(); if ( inComment ) { if ( line.contains(commentEnd)) { line = line.substring(line.indexOf(commentEnd)+3).trim(); inComment = false; } else { continue; } } if ( line.contains(commentStart) ) { if ( line.contains(commentEnd)) { String end = line.substring(line.indexOf(commentEnd)+3).trim(); line = line.substring(0, line.indexOf(commentStart)) + end; } else { line = line.substring(0, line.indexOf(commentStart)); inComment = true; } } if (line.length() == 0 ) { continue; } // default namespace? if ( line.contains(defaultNs) ) { int pos = line.indexOf(defaultNs)+defaultNs.length(); int end = line.indexOf('"', pos); NamespaceInfo info = NamespaceInfo.createDefaultNamespace( line.substring(pos,end) ); namespaces.add( info ); } // no-namespace loc? if ( line.contains(noNamespace) ) { int pos = line.indexOf(noNamespace)+noNamespace.length(); int end = line.indexOf('"', pos); namespaces.add( NamespaceInfo.createNoPrefixNamespace(line.substring(pos,end)) ); } // specifc namespace(s)? if ( line.contains(ns) ) { int pos = line.indexOf(ns)+ns.length(); while ( pos > -1 ) { int nsPos = pos; int nsEndPos = line.indexOf("=\"", pos); pos = nsEndPos+2; int end = line.indexOf('"', pos); String url = line.substring(pos,end); if ( url.contains("XMLSchema-instance") == false ) { String prefix = line.substring(nsPos,nsEndPos); namespaces.add( NamespaceInfo.create(prefix, url) ); } int newPos = line.indexOf(ns, end); if (newPos > -1 ) { pos = newPos+6; } else { pos = -1; } } } } } } catch (Exception e) { e.printStackTrace(); } return namespaces; } /** * Examine the namespace declarations of this source and attempt to determine * the XML type: TEI, RAM or Generic * * @param srcReader * @return */ public static XmlType determineXmlType(final Reader srcReader) { BufferedReader br = new BufferedReader(srcReader); int lineCnt = 0; XmlType type = XmlType.GENERIC; try { while ( true ) { String line = br.readLine(); if ( line == null ) { break; } else { if ( line.contains("http://www.tei-") || line.contains("tei2.dtd") || line.contains("teiCorpus") || line.contains("DOCTYPE TEI") || line.contains("<TEI")) { type = XmlType.TEI; break; } else if ( line.contains("ram.xsd")) { type = XmlType.RAM; break; } else if ( line.contains("juxta-document")) { type = XmlType.JUXTA; break; } // if we haven't found it in 20 lines.. give up lineCnt++; if (lineCnt > 20 ) { break; } } } } catch (IOException e ) { // swallow it } finally { IOUtils.closeQuietly(br); } return type; } /** * Namespace information. * NOTES: * a default namespace does not have a prefix in the XML, but must have one in the xslt * some xml docs include noNamespaceSchemaLocation. these have no prefix in XML nor XSLT * @author loufoster * */ public static class NamespaceInfo { private String prefix; private String url; private boolean noPrefix; private boolean isDefault = false; public static NamespaceInfo createBlankNamespace() { return new NamespaceInfo(); } public static NamespaceInfo createDefaultNamespace( final String url) { NamespaceInfo ns = new NamespaceInfo(); ns.isDefault = true; ns.noPrefix = false; ns.url = url; return ns; } public static NamespaceInfo createNoPrefixNamespace( final String url) { NamespaceInfo ns = new NamespaceInfo(); ns.isDefault = false; ns.noPrefix = true; ns.url = url; return ns; } public static NamespaceInfo create( final String prefix, final String url) { NamespaceInfo ns = new NamespaceInfo(); ns.isDefault = false; ns.noPrefix = false; ns.url = url; ns.prefix = prefix; return ns; } private NamespaceInfo( ) { this.isDefault = false; this.noPrefix = true; this.url = ""; this.prefix = "jxt"; } public void setDefaultPrefix(String string) { this.prefix = string; this.isDefault = true; } public boolean hasNoPrefix() { return this.noPrefix; } public boolean isDefault() { return this.isDefault; } public String getPrefix() { if ( hasNoPrefix() ) { return ""; } return this.prefix; } public String getUrl() { return this.url; } public String toString() { String p = getPrefix(); if ( p.length() > 0) { return "xmlns:"+getPrefix()+"=\""+getUrl()+"\""; } return "xmlns=\""+getUrl()+"\""; } public String addNamespacePrefix( final String tag ) { if ( hasNoPrefix() == false ) { return getPrefix() +":" + tag; } return tag; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((prefix == null) ? 0 : prefix.hashCode()); result = prime * result + ((url == null) ? 0 : url.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; NamespaceInfo other = (NamespaceInfo) obj; if (prefix == null) { if (other.prefix != null) return false; } else if (!prefix.equals(other.prefix)) return false; if (url == null) { if (other.url != null) return false; } else if (!url.equals(other.url)) return false; return true; } } }