open-keychain/OpenKeychain/src/test/java/org/sufficientlysecure/keychain/util/CharsetVerifierTest.java
2017-02-02 10:57:16 +01:00

142 lines
7.3 KiB
Java

package org.sufficientlysecure.keychain.util;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.sufficientlysecure.keychain.KeychainTestRunner;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
@RunWith(KeychainTestRunner.class)
public class CharsetVerifierTest {
@Test
public void testTypeImagePngAlwaysBinary() throws Exception {
byte[] bytes = "bla bluh ☭".getBytes("utf-8");
CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "image/png", null);
charsetVerifier.readBytesFromBuffer(0, bytes.length);
assertTrue("image/png should be marked as definitely binary", charsetVerifier.isDefinitelyBinary());
assertFalse("image/png should never be marked as, even if it is", charsetVerifier.isProbablyText());
assertNull("charset should be null", charsetVerifier.getCharset());
}
@Test
public void testUtf8SpecifiedButFaulty() throws Exception {
byte[] bytes = "bla bluh ☭".getBytes("utf-8");
bytes[4] = (byte) 0xc3;
bytes[5] = (byte) 0x28;
CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/something", "utf-8");
charsetVerifier.readBytesFromBuffer(0, bytes.length);
assertFalse("text/plain should not be marked as binary, even if it is", charsetVerifier.isDefinitelyBinary());
assertTrue("text/plain should be marked as text, even if it isn't valid", charsetVerifier.isProbablyText());
assertTrue("encoding contained illegal chars, so it should be marked as faulty", charsetVerifier.isCharsetFaulty());
assertFalse("charset was specified and should not be marked as guessed", charsetVerifier.isCharsetGuessed());
assertEquals("mimetype should be preserved", "text/something", charsetVerifier.getGuessedMimeType());
assertEquals("charset should be utf-8 since it was given explicitly", "utf-8", charsetVerifier.getCharset());
assertEquals("charset should be utf-8 since it was given explicitly", "utf-8", charsetVerifier.getMaybeFaultyCharset());
}
@Test
public void testUtf8GuessedAndFaulty() throws Exception {
byte[] bytes = "bla bluh ☭".getBytes("utf-8");
bytes[4] = (byte) 0xc3;
bytes[5] = (byte) 0x28;
CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", null);
charsetVerifier.readBytesFromBuffer(0, bytes.length);
assertFalse("text/plain should not be marked as binary, even if it is", charsetVerifier.isDefinitelyBinary());
assertTrue("text/plain should be marked as text, even if it isn't valid", charsetVerifier.isProbablyText());
assertTrue("encoding contained illegal chars, so it should be marked as faulty", charsetVerifier.isCharsetFaulty());
assertTrue("charset was guessed and should be marked as such", charsetVerifier.isCharsetGuessed());
assertNull("charset should be null since the guess was faulty", charsetVerifier.getCharset());
assertEquals("mimetype should be set to text", "text/plain", charsetVerifier.getGuessedMimeType());
assertEquals("maybe-faulty charset should be utf-8", "utf-8", charsetVerifier.getMaybeFaultyCharset());
}
@Test
public void testGuessedEncoding() throws Exception {
byte[] bytes = "bla bluh ☭".getBytes("utf-8");
CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "application/octet-stream", null);
charsetVerifier.readBytesFromBuffer(0, bytes.length);
assertFalse("application/octet-stream with text content is not definitely binary", charsetVerifier.isDefinitelyBinary());
assertTrue("application/octet-stream with text content should be probably text", charsetVerifier.isProbablyText());
assertFalse("detected charset should not be faulty", charsetVerifier.isCharsetFaulty());
assertTrue("charset was guessed and should be marked as such", charsetVerifier.isCharsetGuessed());
assertEquals("mimetype should be set to text", "text/plain", charsetVerifier.getGuessedMimeType());
assertEquals("guessed charset is utf-8", "utf-8", charsetVerifier.getCharset());
}
@Test
public void testWindows1252Faulty() throws Exception {
byte[] bytes = "bla bluh ☭".getBytes("windows-1252");
bytes[2] = (byte) 0x9d;
CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", "windows-1252");
charsetVerifier.readBytesFromBuffer(0, bytes.length);
assertFalse("text/plain is never definitely binary", charsetVerifier.isDefinitelyBinary());
assertTrue("text/plain is always probably text", charsetVerifier.isProbablyText());
assertTrue("charset contained faulty characters", charsetVerifier.isCharsetFaulty());
assertFalse("charset was not guessed", charsetVerifier.isCharsetGuessed());
assertEquals("charset is returned correctly", "windows-1252", charsetVerifier.getCharset());
}
@Test
public void testWindows1252Good() throws Exception {
byte[] bytes = "bla bluh ☭".getBytes("windows-1252");
// this is ‡ in windows-1252
bytes[2] = (byte) 0x87;
CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", "windows-1252");
charsetVerifier.readBytesFromBuffer(0, bytes.length);
assertFalse("text/plain is never definitely binary", charsetVerifier.isDefinitelyBinary());
assertTrue("text/plain is always probably text", charsetVerifier.isProbablyText());
assertFalse("charset contained no faulty characters", charsetVerifier.isCharsetFaulty());
assertFalse("charset was not guessed", charsetVerifier.isCharsetGuessed());
assertEquals("charset is returned correctly", "windows-1252", charsetVerifier.getCharset());
}
@Test(expected = IllegalStateException.class)
public void testReadAfterGetterShouldCrash() throws Exception {
byte[] bytes = "bla bluh ☭".getBytes("utf-8");
CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", null);
charsetVerifier.readBytesFromBuffer(0, bytes.length);
charsetVerifier.isCharsetFaulty();
charsetVerifier.readBytesFromBuffer(0, bytes.length);
}
@Test
public void testStaggeredInput() throws Exception {
byte[] bytes = "bla bluh ☭".getBytes("utf-8");
bytes[4] = (byte) 0xc3;
bytes[5] = (byte) 0x28;
CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", null);
for (int i = 0; i < bytes.length; i++) {
charsetVerifier.readBytesFromBuffer(i, i+1);
}
assertFalse("text/plain should not be marked as binary, even if it is", charsetVerifier.isDefinitelyBinary());
assertTrue("text/plain should be marked as text, even if it isn't valid", charsetVerifier.isProbablyText());
assertTrue("encoding contained illegal chars, so it should be marked as faulty", charsetVerifier.isCharsetFaulty());
assertTrue("charset was guessed and should be marked as such", charsetVerifier.isCharsetGuessed());
assertNull("charset should be null since the guess was faulty", charsetVerifier.getCharset());
assertEquals("maybe-faulty charset should be utf-8", "utf-8", charsetVerifier.getMaybeFaultyCharset());
}
}