/*
* SonarQube
* Copyright (C) 2009-2017 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.scanner.scan.filesystem;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.commons.io.ByteOrderMark;
import org.junit.Before;
import org.junit.Test;
import org.sonar.scanner.scan.filesystem.CharsetValidation.Result;
import org.sonar.scanner.scan.filesystem.CharsetValidation.Validation;
public class ByteCharsetDetectorTest {
private CharsetValidation validation;
private ByteCharsetDetector charsets;
@Before
public void setUp() {
validation = mock(CharsetValidation.class);
charsets = new ByteCharsetDetector(validation, null);
}
@Test
public void detectBOM() throws URISyntaxException, IOException {
byte[] b = ByteOrderMark.UTF_16BE.getBytes();
assertThat(charsets.detectBOM(b)).isEqualTo(ByteOrderMark.UTF_16BE);
assertThat(charsets.detectBOM(readFile("UTF-8"))).isEqualTo(ByteOrderMark.UTF_8);
assertThat(charsets.detectBOM(readFile("UTF-16BE"))).isEqualTo(ByteOrderMark.UTF_16BE);
assertThat(charsets.detectBOM(readFile("UTF-16LE"))).isEqualTo(ByteOrderMark.UTF_16LE);
assertThat(charsets.detectBOM(readFile("UTF-32BE"))).isEqualTo(ByteOrderMark.UTF_32BE);
assertThat(charsets.detectBOM(readFile("UTF-32LE"))).isEqualTo(ByteOrderMark.UTF_32LE);
}
private byte[] readFile(String fileName) throws URISyntaxException, IOException {
Path path = Paths.get(this.getClass().getClassLoader().getResource("org/sonar/scanner/scan/filesystem/" + fileName + ".txt").toURI());
return Files.readAllBytes(path);
}
@Test
public void tryUTF8First() {
when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.newValid(StandardCharsets.UTF_8));
assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.UTF_8);
}
@Test
public void tryUTF16heuristics() {
when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.INVALID);
when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(Result.newValid(StandardCharsets.UTF_16));
when(validation.isValidUTF16(any(byte[].class), anyBoolean())).thenReturn(true);
assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.UTF_16);
}
@Test
public void failAll() {
when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.INVALID);
when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
assertThat(charsets.detect(new byte[1])).isEqualTo(null);
}
@Test
public void failAnsii() {
when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(Result.newValid(StandardCharsets.UTF_16));
when(validation.isValidUTF16(any(byte[].class), anyBoolean())).thenReturn(true);
assertThat(charsets.detect(new byte[1])).isEqualTo(null);
}
@Test
public void tryUserAnsii() {
when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(Result.newValid(StandardCharsets.UTF_16));
when(validation.isValidUTF16(any(byte[].class), anyBoolean())).thenReturn(true);
when(validation.tryDecode(any(byte[].class), eq(StandardCharsets.ISO_8859_1))).thenReturn(true);
charsets = new ByteCharsetDetector(validation, StandardCharsets.ISO_8859_1);
assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.ISO_8859_1);
}
@Test
public void tryOtherUserCharset() {
when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.INVALID);
when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
when(validation.tryDecode(any(byte[].class), eq(StandardCharsets.ISO_8859_1))).thenReturn(true);
charsets = new ByteCharsetDetector(validation, StandardCharsets.ISO_8859_1);
assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.ISO_8859_1);
}
@Test
public void invalidBOM() {
byte[] b1 = {(byte) 0xFF, (byte) 0xFF};
assertThat(charsets.detectBOM(b1)).isNull();
// not enough bytes
byte[] b2 = {(byte) 0xFE};
assertThat(charsets.detectBOM(b2)).isNull();
// empty
byte[] b3 = new byte[0];
assertThat(charsets.detectBOM(b3)).isNull();
}
}