ByteCharsetDetectorTest.java example

Explorer
sonarqube-master
/*
 * SonarQube
 * Copyright (C) 2009-2017 SonarSource SA
 * mailto:info AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.scanner.scan.filesystem;

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

import org.apache.commons.io.ByteOrderMark;
import org.junit.Before;
import org.junit.Test;
import org.sonar.scanner.scan.filesystem.CharsetValidation.Result;
import org.sonar.scanner.scan.filesystem.CharsetValidation.Validation;

public class ByteCharsetDetectorTest {
  private CharsetValidation validation;
  private ByteCharsetDetector charsets;

  @Before
  public void setUp() {
    validation = mock(CharsetValidation.class);
    charsets = new ByteCharsetDetector(validation, null);
  }

  @Test
  public void detectBOM() throws URISyntaxException, IOException {
    byte[] b = ByteOrderMark.UTF_16BE.getBytes();
    assertThat(charsets.detectBOM(b)).isEqualTo(ByteOrderMark.UTF_16BE);

    assertThat(charsets.detectBOM(readFile("UTF-8"))).isEqualTo(ByteOrderMark.UTF_8);
    assertThat(charsets.detectBOM(readFile("UTF-16BE"))).isEqualTo(ByteOrderMark.UTF_16BE);
    assertThat(charsets.detectBOM(readFile("UTF-16LE"))).isEqualTo(ByteOrderMark.UTF_16LE);
    assertThat(charsets.detectBOM(readFile("UTF-32BE"))).isEqualTo(ByteOrderMark.UTF_32BE);
    assertThat(charsets.detectBOM(readFile("UTF-32LE"))).isEqualTo(ByteOrderMark.UTF_32LE);
  }

  private byte[] readFile(String fileName) throws URISyntaxException, IOException {
    Path path = Paths.get(this.getClass().getClassLoader().getResource("org/sonar/scanner/scan/filesystem/" + fileName + ".txt").toURI());
    return Files.readAllBytes(path);
  }

  @Test
  public void tryUTF8First() {
    when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.newValid(StandardCharsets.UTF_8));
    assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.UTF_8);
  }

  @Test
  public void tryUTF16heuristics() {
    when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.INVALID);
    when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(Result.newValid(StandardCharsets.UTF_16));
    when(validation.isValidUTF16(any(byte[].class), anyBoolean())).thenReturn(true);

    assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.UTF_16);
  }

  @Test
  public void failAll() {
    when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.INVALID);
    when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));

    assertThat(charsets.detect(new byte[1])).isEqualTo(null);
  }

  @Test
  public void failAnsii() {
    when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
    when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(Result.newValid(StandardCharsets.UTF_16));
    when(validation.isValidUTF16(any(byte[].class), anyBoolean())).thenReturn(true);

    assertThat(charsets.detect(new byte[1])).isEqualTo(null);
  }

  @Test
  public void tryUserAnsii() {
    when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
    when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(Result.newValid(StandardCharsets.UTF_16));
    when(validation.isValidUTF16(any(byte[].class), anyBoolean())).thenReturn(true);
    when(validation.tryDecode(any(byte[].class), eq(StandardCharsets.ISO_8859_1))).thenReturn(true);

    charsets = new ByteCharsetDetector(validation, StandardCharsets.ISO_8859_1);
    assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.ISO_8859_1);
  }

  @Test
  public void tryOtherUserCharset() {
    when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.INVALID);
    when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
    when(validation.tryDecode(any(byte[].class), eq(StandardCharsets.ISO_8859_1))).thenReturn(true);

    charsets = new ByteCharsetDetector(validation, StandardCharsets.ISO_8859_1);
    assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.ISO_8859_1);
  }

  @Test
  public void invalidBOM() {
    byte[] b1 = {(byte) 0xFF, (byte) 0xFF};
    assertThat(charsets.detectBOM(b1)).isNull();

    // not enough bytes
    byte[] b2 = {(byte) 0xFE};
    assertThat(charsets.detectBOM(b2)).isNull();

    // empty
    byte[] b3 = new byte[0];
    assertThat(charsets.detectBOM(b3)).isNull();
  }
}