/* * Copyright 2012, Ryan J. McDonough * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.damnhandy.uri.template; import java.io.ByteArrayOutputStream; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.BitSet; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * <p> * A light-weight utility class for applying encoding to values that are applied to * expression values. * </p> * * @author <a href="ryan@damnhandy.com">Ryan J. McDonough</a> * @version $Revision: 1.1 $ */ public final class UriUtil { static final Pattern PCT_ENCODDED_STRING = Pattern.compile("%[0-9A-Fa-f]{2}"); static final char[] GENERAL_DELIM_CHARS = {':', '/', ',', '?', '#', '[', ']', '@'}; static final char[] SUB_DELIMS_CHARS = {'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '<', '>', '{', '}'}; private static final BitSet RESERVED; private static final BitSet ESCAPE_CHARS; static { RESERVED = new BitSet(); for (int i = 0; i < GENERAL_DELIM_CHARS.length; i++) { RESERVED.set(GENERAL_DELIM_CHARS[i]); } RESERVED.set(' '); RESERVED.set('%'); RESERVED.set('|'); RESERVED.set('\\'); RESERVED.set('`'); RESERVED.set('"'); RESERVED.set('^'); for (int i = 0; i < SUB_DELIMS_CHARS.length; i++) { RESERVED.set(SUB_DELIMS_CHARS[i]); } ESCAPE_CHARS = new BitSet(); ESCAPE_CHARS.set('<'); ESCAPE_CHARS.set('>'); ESCAPE_CHARS.set('%'); ESCAPE_CHARS.set('\"'); ESCAPE_CHARS.set('{'); ESCAPE_CHARS.set('}'); ESCAPE_CHARS.set('|'); ESCAPE_CHARS.set('\\'); ESCAPE_CHARS.set('^'); ESCAPE_CHARS.set('['); ESCAPE_CHARS.set(']'); ESCAPE_CHARS.set('`'); } private UriUtil() { } /** * @param sourceValue * @return the encoded string */ public static String encodeFragment(String sourceValue) throws UnsupportedEncodingException { // Check if the string has %-encoded values already. // if it does, rebuild the string and encode the non-encoded bit // but don't re-encode the already encoded strings. // // There's probably a cleaner way to do this. Matcher m = PCT_ENCODDED_STRING.matcher(sourceValue); List<int[]> positions = new ArrayList<int[]>(); while (m.find()) { positions.add(new int[]{m.start(), m.end()}); } if(!positions.isEmpty()) { StringBuilder b = new StringBuilder(); int offset = 0; for (int[] pos : positions) { // encode the non-encoded portion of the string b.append(UriUtil.encode(sourceValue.substring(offset, pos[0]),ESCAPE_CHARS)); // the already encodede string does not get encoded twice b.append(sourceValue.substring(pos[0], pos[1])); offset = pos[1]; } b.append(encode(sourceValue.substring(offset, sourceValue.length()),ESCAPE_CHARS)); return b.toString(); } // If there's return encode(sourceValue, ESCAPE_CHARS); } /** * @param sourceValue * @return the encoded string */ public static String encode(String sourceValue) throws UnsupportedEncodingException { return encode(sourceValue, RESERVED); } /** * @param sourceValue * @param chars * @return the encoded string * @throws UnsupportedEncodingException */ private static String encode(String sourceValue, BitSet chars) throws UnsupportedEncodingException { ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] source = sourceValue.getBytes(Charset.forName("UTF-8")); for (int i = 0; i < source.length; i++) { byte c = source[i]; // fixed unsigned problem if (chars.get(c & 0xff) || c <= 0x20) { out.write('%'); char hex1 = Character.toUpperCase(Character.forDigit((c >> 4) & 0xF, 16)); char hex2 = Character.toUpperCase(Character.forDigit(c & 0xF, 16)); out.write(hex1); out.write(hex2); } else { out.write(c); } } return new String(out.toByteArray(), "UTF-8"); } }