/**
* Html2Image
* Copyright 2014 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* First published 26.11.2014 on http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.util;
import java.awt.Container;
import java.awt.Dimension;
import java.awt.Graphics;
import java.awt.Image;
import java.awt.MediaTracker;
import java.awt.image.BufferedImage;
import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener;
import java.io.File;
import java.io.IOException;
import java.util.List;
import javax.imageio.ImageIO;
import javax.swing.JEditorPane;
import javax.swing.text.Document;
import javax.swing.text.Element;
import javax.swing.text.View;
import javax.swing.text.ViewFactory;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.ImageView;
import net.yacy.document.ImageParser;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.OS;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
/**
* Convert html to an copy on disk-image in a other file format
* currently (pdf and/or jpg)
*/
public class Html2Image {
// Mac
// to install wkhtmltopdf, download wkhtmltox-0.12.1_osx-cocoa-x86-64.pkg from http://wkhtmltopdf.org/downloads.html
// to install imagemagick, download from http://cactuslab.com/imagemagick/assets/ImageMagick-6.8.9-9.pkg.zip
// the convert command from imagemagick needs ghostscript, if not present on older macs, download a version of gs from http://pages.uoregon.edu/koch/
private final static File wkhtmltopdfMac = new File("/usr/local/bin/wkhtmltopdf"); // sometimes this is also the path on debian
private final static File convertMac1 = new File("/opt/local/bin/convert");
private final static File convertMac2 = new File("/opt/ImageMagick/bin/convert");
// debian
// to install: apt-get install wkhtmltopdf imagemagick xvfb
private final static File wkhtmltopdfDebian = new File("/usr/bin/wkhtmltopdf"); // there is no wkhtmltoimage, use convert to create images
private final static File convertDebian = new File("/usr/bin/convert");
private static boolean usexvfb = false;
public static boolean wkhtmltopdfAvailable() {
return wkhtmltopdfMac.exists() || wkhtmltopdfDebian.exists();
}
public static boolean convertAvailable() {
return convertMac1.exists() || convertMac2.exists() || convertDebian.exists();
}
/**
* write a pdf of a web page
* @param url
* @param proxy must be of the form http://host:port; use YaCy here as proxy which is mostly http://localhost:8090
* @param destination
* @return
*/
public static boolean writeWkhtmltopdf(String url, String proxy, String userAgent, final String acceptLanguage, File destination) {
boolean success = false;
for (boolean ignoreErrors: new boolean[]{false, true}) {
success = writeWkhtmltopdfInternal(url, proxy, destination, userAgent, acceptLanguage, ignoreErrors);
if (success) break;
if (!success && proxy != null) {
ConcurrentLog.warn("Html2Image", "trying to load without proxy: " + url);
success = writeWkhtmltopdfInternal(url, null, destination, userAgent, acceptLanguage, ignoreErrors);
if (success) break;
}
}
if (success) {
ConcurrentLog.info("Html2Image", "wrote " + destination.toString() + " for " + url);
} else {
ConcurrentLog.warn("Html2Image", "could not generate snapshot for " + url);
}
return success;
}
private static boolean writeWkhtmltopdfInternal(final String url, final String proxy, final File destination, final String userAgent, final String acceptLanguage, final boolean ignoreErrors) {
final File wkhtmltopdf = wkhtmltopdfMac.exists() ? wkhtmltopdfMac : wkhtmltopdfDebian;
String commandline =
wkhtmltopdf.getAbsolutePath() + " -q --title '" + url + "' " +
//acceptLanguage == null ? "" : "--custom-header 'Accept-Language' '" + acceptLanguage + "' " +
//(userAgent == null ? "" : "--custom-header \"User-Agent\" \"" + userAgent + "\" --custom-header-propagation ") +
(proxy == null ? "" : "--proxy " + proxy + " ") +
(ignoreErrors ? (OS.isMacArchitecture ? "--load-error-handling ignore " : "--ignore-load-errors ") : "") + // some versions do not have that flag and fail if attempting to use it...
//"--footer-font-name 'Courier' --footer-font-size 9 --footer-left [webpage] --footer-right [date]/[time]([page]/[topage]) " +
"--footer-left [webpage] --footer-right '[date]/[time]([page]/[topage])' --footer-font-size 7 " +
url + " " + destination.getAbsolutePath();
try {
ConcurrentLog.info("Html2Pdf", "creating pdf from url " + url + " with command: " + commandline);
List<String> message;
if (!usexvfb) {
message = OS.execSynchronous(commandline);
if (destination.exists()) return true;
ConcurrentLog.warn("Html2Image", "failed to create pdf " + (proxy == null ? "" : "using proxy " + proxy) + " with command: " + commandline);
for (String m: message) ConcurrentLog.warn("Html2Image", ">> " + m);
}
// if this fails, we should try to wrap the X server with a virtual screen using xvfb, this works on headless servers
commandline = "xvfb-run -a " + commandline;
message = OS.execSynchronous(commandline);
if (destination.exists()) {usexvfb = true; return true;}
ConcurrentLog.warn("Html2Pdf", "failed to create pdf " + (proxy == null ? "" : "using proxy " + proxy) + " and xvfb with command: " + commandline);
for (String m: message) ConcurrentLog.warn("Html2Image", ">> " + m);
return false;
} catch (IOException e) {
e.printStackTrace();
ConcurrentLog.warn("Html2Pdf", "exception while creation of pdf with command: " + commandline);
return false;
}
}
/**
* convert a pdf (first page) to an image. proper values are i.e. width = 1024, height = 1024, density = 300, quality = 75
* using internal pdf library or external command line tool on linux or mac
* @param pdf input pdf file
* @param image output jpg file
* @param width
* @param height
* @param density (dpi)
* @param quality
* @return
*/
public static boolean pdf2image(File pdf, File image, int width, int height, int density, int quality) {
final File convert = convertMac1.exists() ? convertMac1 : convertMac2.exists() ? convertMac2 : convertDebian;
// convert pdf to jpg using internal pdfbox capability
if (OS.isWindows || !convert.exists()) {
try {
PDDocument pdoc = PDDocument.load(pdf);
BufferedImage bi = new PDFRenderer(pdoc).renderImageWithDPI(0, density, ImageType.RGB);
return ImageIO.write(bi, "jpg", image);
} catch (IOException ex) { }
}
// convert on mac or linux using external command line utility
try {
// i.e. convert -density 300 -trim yacy.pdf[0] -trim -resize 1024x -crop x1024+0+0 -quality 75% yacy-convert-300.jpg
// note: both -trim are necessary, otherwise it is trimmed only on one side. The [0] selects the first page of the pdf
String command = convert.getAbsolutePath() + " -density " + density + " -trim " + pdf.getAbsolutePath() + "[0] -trim -resize " + width + "x -crop x" + height + "+0+0 -quality " + quality + "% " + image.getAbsolutePath();
List<String> message = OS.execSynchronous(command);
if (image.exists()) return true;
ConcurrentLog.warn("Html2Image", "failed to create image with command: " + command);
for (String m: message) ConcurrentLog.warn("Html2Image", ">> " + m);
// another try for mac: use Image Events using AppleScript in osacript commands...
// the following command overwrites a pdf with an png, so we must make a copy first
if (!OS.isMacArchitecture) return false;
File pngFile = new File(pdf.getAbsolutePath() + ".tmp.pdf");
org.apache.commons.io.FileUtils.copyFile(pdf, pngFile);
String[] commandx = {"osascript",
"-e", "set ImgFile to \"" + pngFile.getAbsolutePath() + "\"",
"-e", "tell application \"Image Events\"",
"-e", "set Img to open file ImgFile",
"-e", "save Img as PNG",
"-e", "end tell"};
//ConcurrentLog.warn("Html2Image", "failed to create image with command: " + commandx);
message = OS.execSynchronous(commandx);
for (String m: message) ConcurrentLog.warn("Html2Image", ">> " + m);
// now we must read and convert this file to a jpg with the target size 1024x1024
try {
File newPngFile = new File(pngFile.getAbsolutePath() + ".png");
pngFile.renameTo(newPngFile);
Image img = ImageParser.parse(pngFile.getAbsolutePath(), FileUtils.read(newPngFile));
final Image scaled = img.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING);
final MediaTracker mediaTracker = new MediaTracker(new Container());
mediaTracker.addImage(scaled, 0);
try {mediaTracker.waitForID(0);} catch (final InterruptedException e) {}
// finally write the image
final BufferedImage bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
bi.createGraphics().drawImage(scaled, 0, 0, width, height, null);
ImageIO.write(bi, "jpg", image);
newPngFile.delete();
return image.exists();
} catch (IOException e) {
ConcurrentLog.logException(e);
return false;
}
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
/**
* render a html page with a JEditorPane, which can do html up to html v 3.2. No CSS supported!
* @param url
* @param size
* @throws IOException
*/
public static void writeSwingImage(String url, Dimension size, File destination) throws IOException {
// set up a pane for rendering
final JEditorPane htmlPane = new JEditorPane();
htmlPane.setSize(size);
htmlPane.setEditable(false);
final HTMLEditorKit kit = new HTMLEditorKit() {
private static final long serialVersionUID = 1L;
@Override
public Document createDefaultDocument() {
HTMLDocument doc = (HTMLDocument) super.createDefaultDocument();
doc.setAsynchronousLoadPriority(-1);
return doc;
}
@Override
public ViewFactory getViewFactory() {
return new HTMLFactory() {
@Override
public View create(Element elem) {
View view = super.create(elem);
if (view instanceof ImageView) {
((ImageView) view).setLoadsSynchronously(true);
}
return view;
}
};
}
};
htmlPane.setEditorKitForContentType("text/html", kit);
htmlPane.setContentType("text/html");
htmlPane.addPropertyChangeListener(new PropertyChangeListener() {
@Override
public void propertyChange(PropertyChangeEvent evt) {
}
});
// load the page
try {
htmlPane.setPage(url);
} catch (IOException e) {
e.printStackTrace();
}
// render the page
Dimension prefSize = htmlPane.getPreferredSize();
BufferedImage img = new BufferedImage(prefSize.width, htmlPane.getPreferredSize().height, BufferedImage.TYPE_INT_ARGB);
Graphics graphics = img.getGraphics();
htmlPane.setSize(prefSize);
htmlPane.paint(graphics);
ImageIO.write(img, destination.getName().endsWith("jpg") ? "jpg" : "png", destination);
}
public static void main(String[] args) {
try {
Html2Image.writeSwingImage(args[0], new Dimension(1200, 2000), new File(args[1]));
} catch (IOException e) {
e.printStackTrace();
}
}
}