/*
* SonarQube
* Copyright (C) 2009-2017 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.scanner.scan.filesystem;
import static java.nio.charset.StandardCharsets.US_ASCII;
import static java.nio.charset.StandardCharsets.UTF_16;
import static java.nio.charset.StandardCharsets.UTF_16BE;
import static java.nio.charset.StandardCharsets.UTF_16LE;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.assertj.core.api.Assertions.assertThat;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Random;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.rules.TemporaryFolder;
public class CharsetDetectorTest {
@Rule
public TemporaryFolder temp = new TemporaryFolder();
@Rule
public ExpectedException exception = ExpectedException.none();
@Test
public void should_detect_charset_from_BOM() {
Path basedir = Paths.get("src/test/resources/org/sonar/scanner/scan/filesystem/");
assertThat(detectCharset(basedir.resolve("without_BOM.txt"), US_ASCII)).isEqualTo(US_ASCII);
assertThat(detectCharset(basedir.resolve("UTF-8.txt"), US_ASCII)).isEqualTo(UTF_8);
assertThat(detectCharset(basedir.resolve("UTF-16BE.txt"), US_ASCII)).isEqualTo(UTF_16BE);
assertThat(detectCharset(basedir.resolve("UTF-16LE.txt"), US_ASCII)).isEqualTo(UTF_16LE);
assertThat(detectCharset(basedir.resolve("UTF-32BE.txt"), US_ASCII)).isEqualTo(MetadataGenerator.UTF_32BE);
assertThat(detectCharset(basedir.resolve("UTF-32LE.txt"), US_ASCII)).isEqualTo(MetadataGenerator.UTF_32LE);
}
@Test
public void should_read_files_from_BOM() throws IOException {
Path basedir = Paths.get("src/test/resources/org/sonar/scanner/scan/filesystem/");
assertThat(readFile(basedir.resolve("without_BOM.txt"), US_ASCII)).isEqualTo("without BOM");
assertThat(readFile(basedir.resolve("UTF-8.txt"), US_ASCII)).isEqualTo("UTF-8");
assertThat(readFile(basedir.resolve("UTF-16BE.txt"), US_ASCII)).isEqualTo("UTF-16BE");
assertThat(readFile(basedir.resolve("UTF-16LE.txt"), US_ASCII)).isEqualTo("UTF-16LE");
assertThat(readFile(basedir.resolve("UTF-32BE.txt"), US_ASCII)).isEqualTo("UTF-32BE");
assertThat(readFile(basedir.resolve("UTF-32LE.txt"), US_ASCII)).isEqualTo("UTF-32LE");
}
@Test
public void always_try_utf8() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
// this is a valid 2 byte UTF-8.
out.write(194);
out.write(128);
Path filePath = temp.newFile().toPath();
Files.write(filePath, out.toByteArray());
assertThat(detectCharset(filePath, UTF_16)).isEqualTo(UTF_8);
}
@Test
public void fail_if_file_doesnt_exist() {
exception.expect(IllegalStateException.class);
exception.expectMessage("Unable to read file " + Paths.get("non_existing").toAbsolutePath());
detectCharset(Paths.get("non_existing"), UTF_8);
}
@Test
public void no_encoding_found() throws IOException {
Path filePath = temp.newFile().toPath();
byte[] b = new byte[512];
new Random().nextBytes(b);
Files.write(filePath, b);
CharsetDetector detector = new CharsetDetector(filePath, UTF_8);
assertThat(detector.run()).isFalse();
assertThat(detector.charset()).isNull();
}
private String readFile(Path file, Charset defaultEncoding) throws IOException {
CharsetDetector detector = new CharsetDetector(file, defaultEncoding);
assertThat(detector.run()).isTrue();
List<String> readLines = IOUtils.readLines(new InputStreamReader(detector.inputStream(), detector.charset()));
return StringUtils.join(readLines, "\n");
}
private Charset detectCharset(Path file, Charset defaultEncoding) {
CharsetDetector detector = new CharsetDetector(file, defaultEncoding);
assertThat(detector.run()).isTrue();
return detector.charset();
}
}