/**
* Copyright (C) 2009-2014 FoundationDB, LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.foundationdb.server.collation;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import com.foundationdb.server.error.AmbiguousCollationException;
import com.foundationdb.server.error.InvalidCollationKeywordException;
import com.foundationdb.server.error.InvalidCollationSchemeException;
import com.foundationdb.server.error.UnsupportedCollationException;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.util.ULocale;
public class CollationSpecifier {
// Only the region needs to be checked, for ambiguity
private final static int REGION_NDX = 1;
private final static String CASE_SENSITIVE = "cs";
private final static String CASE_INSENSITIVE = "ci";
private final static String ACCENT_SENSITIVE = "co";
private final static String ACCENT_INSENSITIVE = "cx";
private final static String DEFAULT_CASE = CASE_SENSITIVE;
private final static String DEFAULT_ACCENT = ACCENT_SENSITIVE;
// Used to check the validity of requested locales
private final static HashSet<ULocale> locales = new HashSet<ULocale>(Arrays.asList(ULocale.getAvailableLocales()));
private final String scheme; // the original, user-created scheme; use toString() instead for a standardized version
private final String locale;
private final boolean caseSensitive;
private final boolean accentSensitive;
private final HashMap<String, String> keywordsToValues = new HashMap<String, String>();
public CollationSpecifier(String scheme) {
this.scheme = scheme;
String[] pieces = scheme.toLowerCase().split("_");
StringBuilder localeBuilder = new StringBuilder();
boolean localeStarted = false;
boolean localeFinished = false;
boolean caseSet = false;
boolean accentSet = false;
boolean caseSensitive = false;
boolean accentSensitive = false;
for (int i = 0; i < pieces.length; i++) {
if (pieces[i].contains("=")) {
addKeyword(pieces[i], scheme);
localeFinished = true;
}
else if (isCaseShortcut(pieces[i]) || (isAccentShortcut(pieces[i]))) {
if (i == REGION_NDX) {
if (localeStarted) localeBuilder.append("_");
localeStarted = true;
localeBuilder.append(pieces[i]);
} else if (isCaseShortcut(pieces[i])){
if (caseSet) {
throw new InvalidCollationSchemeException(scheme, "can't set the case sensitivity twice");
}
caseSensitive = CASE_SENSITIVE.equalsIgnoreCase(pieces[i]);
localeFinished = true;
caseSet = true;
} else {
if (accentSet) {
throw new InvalidCollationSchemeException(scheme, "can't set the accent sensitivity twice");
}
accentSensitive = ACCENT_SENSITIVE.equalsIgnoreCase(pieces[i]);
localeFinished = true;
accentSet = true;
}
}
else if (localeFinished) {
throw new InvalidCollationSchemeException(scheme, "can't define locale after keywords or shortcuts");
} else {
if (localeStarted) localeBuilder.append("_");
localeStarted = true;
localeBuilder.append(pieces[i]);
}
}
// if the locale is just a language, need to append an underscore
// to avoid ambiguity in toString()
if (localeBuilder.indexOf("_") == -1) {
localeBuilder.append("_");
}
locale = localeBuilder.toString();
checkKeywordsAndShortcuts(caseSet, accentSet);
checkAmbiguous(pieces, caseSensitive, accentSensitive, caseSet, accentSet);
if (caseSet) {
this.caseSensitive = caseSensitive;
} else {
this.caseSensitive = CASE_SENSITIVE.equalsIgnoreCase(DEFAULT_CASE);
}
if (accentSet) {
this.accentSensitive = accentSensitive;
} else {
this. accentSensitive = ACCENT_SENSITIVE.equalsIgnoreCase(DEFAULT_ACCENT);
}
}
private void checkKeywordsAndShortcuts(boolean caseSet, boolean accentSet) {
if (!keywordsToValues.isEmpty() && (caseSet || accentSet)) {
throw new InvalidCollationSchemeException(scheme, "can't include both keywords and case/accent shortcuts");
}
}
private void checkAmbiguous(String[] pieces, boolean caseSensitive, boolean accentSensitive,
boolean caseSet, boolean accentSet) {
if (pieces.length < REGION_NDX + 1) return;
if ((isCaseShortcut(pieces[REGION_NDX]) && !caseSet) ||
(isAccentShortcut(pieces[REGION_NDX]) && !accentSet)) {
String providedCase = !caseSet ? DEFAULT_CASE
: caseSensitive ? CASE_SENSITIVE : CASE_INSENSITIVE;
String providedAccent = !accentSet ? DEFAULT_ACCENT
: accentSensitive ? ACCENT_SENSITIVE: ACCENT_INSENSITIVE;
String possibility1case = isCaseShortcut(pieces[REGION_NDX]) ? pieces[REGION_NDX] : providedCase;
String possibility1accent = isAccentShortcut(pieces[REGION_NDX]) ? pieces[REGION_NDX] : providedAccent;
String possibility1 = new StringBuilder().append(locale.replace(pieces[REGION_NDX], ""))
.append("_")
.append(possibility1case)
.append("_")
.append(possibility1accent)
.toString();
String possibility2 = new StringBuilder().append(locale)
.append("_")
.append(providedCase)
.append("_")
.append(providedAccent)
.toString();
throw new AmbiguousCollationException(scheme, possibility1, possibility2);
}
}
public RuleBasedCollator createCollator() {
ULocale ulocale = new ULocale(locale);
checkLocale(ulocale, scheme);
ulocale = setKeywords(ulocale, keywordsToValues);
RuleBasedCollator collator = (RuleBasedCollator) RuleBasedCollator.getInstance(ulocale);
checkKeywords(collator.getLocale(ULocale.VALID_LOCALE), keywordsToValues,
scheme);
if (shouldSetStrength()) {
setCollatorStrength(collator, this);
}
return collator;
}
private static void checkKeywords(ULocale locale, Map<String, String> keywordsToValues, String scheme) {
for (Entry<String, String> entry : keywordsToValues.entrySet()) {
if (locale.getKeywordValue(entry.getKey()) == null ||
!locale.getKeywordValue(entry.getKey()).equalsIgnoreCase(entry.getValue())) {
throw new InvalidCollationKeywordException(scheme, entry.getKey(), entry.getValue());
}
}
}
private static void setCollatorStrength(RuleBasedCollator collator, CollationSpecifier specifier) {
if (specifier.caseSensitive() && specifier.accentSensitive()) {
collator.setStrength(Collator.TERTIARY);
collator.setCaseLevel(false);
}
else if (specifier.caseSensitive() && !specifier.accentSensitive()) {
collator.setCaseLevel(true);
collator.setStrength(Collator.PRIMARY);
}
else if (!specifier.caseSensitive() && specifier.accentSensitive()) {
collator.setStrength(Collator.SECONDARY);
collator.setCaseLevel(false);
}
else {
collator.setStrength(Collator.PRIMARY);
collator.setCaseLevel(false);
}
}
private static ULocale setKeywords(ULocale locale, Map<String, String> keywordsToValues) {
for (Entry<String, String> entry : keywordsToValues.entrySet()) {
locale = locale.setKeywordValue(entry.getKey(), entry.getValue());
}
return locale;
}
private static void checkLocale(ULocale locale, String scheme) {
if (!locales.contains(locale))
throw new UnsupportedCollationException(scheme);
}
private static Boolean isCaseShortcut(String caseOrNot) {
return caseOrNot.equalsIgnoreCase(CASE_INSENSITIVE) ||
caseOrNot.equalsIgnoreCase(CASE_SENSITIVE);
}
private static Boolean isAccentShortcut(String accentOrNot) {
return accentOrNot.equalsIgnoreCase(ACCENT_INSENSITIVE) ||
accentOrNot.equalsIgnoreCase(ACCENT_SENSITIVE);
}
private void addKeyword(String keywordAndValue, String scheme) {
String[] pieces = keywordAndValue.split("=");
if (pieces.length != 2) {
throw new InvalidCollationSchemeException(scheme, "keywords and values must be of the form `keyword=value`");
}
keywordsToValues.put(pieces[0], pieces[1]);
}
public boolean caseSensitive() {
return caseSensitive;
}
public boolean accentSensitive() {
return accentSensitive;
}
public HashMap<String, String> getKeywordsAndValues() {
return keywordsToValues;
}
public Boolean shouldSetStrength() {
return keywordsToValues.isEmpty();
}
public String toString() {
StringBuilder builder = new StringBuilder().append(locale);
if (!keywordsToValues.isEmpty()) {
for (Entry<String, String> entry : keywordsToValues.entrySet()) {
builder.append("_")
.append(entry.getKey())
.append("=")
.append(entry.getValue());
}
}
else {
builder.append("_")
.append(caseSensitive ? CASE_SENSITIVE : CASE_INSENSITIVE)
.append("_")
.append(accentSensitive ? ACCENT_SENSITIVE : ACCENT_INSENSITIVE);
}
return builder.toString();
}
}