/** * mp3Parser * Copyright 2012 by Stefan Foerster, Norderstedt, Germany * First released 01.10.2012 at http://yacy.net * * $LastChangedDate$ * $LastChangedRevision$ * $LastChangedBy$ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see <http://www.gnu.org/licenses/>. */ package net.yacy.document.parser; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Set; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; import net.yacy.document.VocabularyScraper; import org.jaudiotagger.audio.AudioFile; import org.jaudiotagger.audio.AudioFileIO; import org.jaudiotagger.tag.FieldKey; import org.jaudiotagger.tag.Tag; /** * this parser can parse id3 tags of mp3 audio files */ public class audioTagParser extends AbstractParser implements Parser { public static String EXTENSIONS = "mp3,ogg,oga,m4a,m4p,flac,wma"; public static String MIME_TYPES = "audio/mpeg,audio/MPA,audio/mpa-robust,audio/mp4,audio/flac,audio/x-flac,audio/x-ms-wma,audio/x-ms-asf"; public static String SEPERATOR = ","; public audioTagParser() { super("Audio File Meta-Tag Parser"); final String[] extArray = EXTENSIONS.split(SEPERATOR); for (final String ext : extArray) { this.SUPPORTED_EXTENSIONS.add(ext); } final String[] mimeArray = MIME_TYPES.split(SEPERATOR); for (final String mime : mimeArray) { this.SUPPORTED_MIME_TYPES.add(mime); } } @Override public Document[] parse( final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final int timezoneOffset, final InputStream source) throws Parser.Failure, InterruptedException { String filename = location.getFileName(); final String fileext = '.' + MultiProtocolURL.getFileExtension(filename); filename = filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename); String mime = mimeType; // fix mimeType if(!this.SUPPORTED_MIME_TYPES.contains(mimeType)) { if(fileext.equals("mp3")) { mime = "audio/mpeg"; } else if(fileext.equals("ogg")) { mime = "audio/ogg"; } else if(fileext.equals("flac")) { mime = "audio/flac"; } else if(fileext.equals("wma")) { mime = "audio/x-ms-wma"; } else if(fileext.startsWith("m4")) { mime = "audio/mp4"; } } Document[] docs; BufferedOutputStream fout = null; File tempFile = null; AudioFile f; try { if (location.isFile()) { f = AudioFileIO.read(location.getFSFile()); } else { // create a temporary file, as jaudiotagger requires a file rather than an input stream tempFile = File.createTempFile(filename,fileext); fout = new BufferedOutputStream(new FileOutputStream(tempFile)); int c; while ((c = source.read()) != -1) { fout.write(c); } f = AudioFileIO.read(tempFile); } Tag tag = f.getTag(); final Set<String> lang = new HashSet<String>(); lang.add(tag.getFirst(FieldKey.LANGUAGE)); // title final List<String> titles = new ArrayList<String>(); titles.add(tag.getFirst(FieldKey.TITLE)); titles.add(tag.getFirst(FieldKey.ALBUM)); titles.add(filename); // text final List<String> descriptions = new ArrayList<String>(7); final StringBuilder text = new StringBuilder(500); final char space = ' '; String field = tag.getFirst(FieldKey.ARTIST); descriptions.add(FieldKey.ARTIST.name() + ": " + field); text.append(field); text.append(space); field = tag.getFirst(FieldKey.ALBUM); descriptions.add(FieldKey.ALBUM.name() + ": " + field); text.append(field); text.append(space); field = tag.getFirst(FieldKey.TITLE); descriptions.add(FieldKey.TITLE.name() + ": " + field); text.append(field); text.append(space); field = tag.getFirst(FieldKey.COMMENT); descriptions.add(FieldKey.COMMENT.name() + ": " + field); text.append(field); text.append(space); field = tag.getFirst(FieldKey.LYRICS); descriptions.add(FieldKey.LYRICS.name() + ": " + field); text.append(field); text.append(space); field = tag.getFirst(FieldKey.TAGS); descriptions.add(FieldKey.TAGS.name() + ": " + field); text.append(field); text.append(space); field = tag.getFirst(FieldKey.GENRE); descriptions.add(FieldKey.GENRE.name() + ": " + field); text.append(field); text.append(space); text.append(location.toTokens()); // dc:subject final String[] subject = new String[1]; subject[0] = tag.getFirst(FieldKey.GENRE); docs = new Document[]{new Document( location, mime, charset, this, lang, // languages subject, // keywords, dc:subject titles, // title tag.getFirst(FieldKey.ARTIST), // author location.getHost(), // publisher null, // sections descriptions, // abstrct 0.0d, 0.0d, // lon, lat text.toString(), // text null, null, null, false, new Date()) }; return docs; } catch (final Exception e) { // return a generic document as default docs = new Document[]{new Document( location, mimeType, charset, this, null, null, singleList(filename), // title null, // author location.getHost(), null, null, 0.0d, 0.0d, location.toTokens(), null, null, null, false, new Date() )}; } finally { try { if (fout != null) fout.close(); } catch (final IOException e) { // TODO Auto-generated catch block ConcurrentLog.logException(e); } if (tempFile != null) tempFile.delete(); } return docs; } }