001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.vfs2.provider; 018 019import org.apache.commons.vfs2.FileName; 020import org.apache.commons.vfs2.FileSystemException; 021import org.apache.commons.vfs2.FileType; 022import org.apache.commons.vfs2.VFS; 023import org.apache.commons.vfs2.util.Os; 024 025/** 026 * Utilities for dealing with URIs. See RFC 2396 for details. 027 * 028 * 2005) $ 029 */ 030public final class UriParser { 031 /** 032 * The set of valid separators. These are all converted to the normalized one. Does <i>not</i> contain the 033 * normalized separator 034 */ 035 // public static final char[] separators = {'\\'}; 036 public static final char TRANS_SEPARATOR = '\\'; 037 038 /** 039 * The normalised separator to use. 040 */ 041 private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR; 042 043 private static final int HEX_BASE = 16; 044 045 private static final int BITS_IN_HALF_BYTE = 4; 046 047 private static final char LOW_MASK = 0x0F; 048 049 private UriParser() { 050 } 051 052 /** 053 * Extracts the first element of a path. 054 * 055 * @param name StringBuilder containing the path. 056 * @return The first element of the path. 057 */ 058 public static String extractFirstElement(final StringBuilder name) { 059 final int len = name.length(); 060 if (len < 1) { 061 return null; 062 } 063 int startPos = 0; 064 if (name.charAt(0) == SEPARATOR_CHAR) { 065 startPos = 1; 066 } 067 for (int pos = startPos; pos < len; pos++) { 068 if (name.charAt(pos) == SEPARATOR_CHAR) { 069 // Found a separator 070 final String elem = name.substring(startPos, pos); 071 name.delete(startPos, pos + 1); 072 return elem; 073 } 074 } 075 076 // No separator 077 final String elem = name.substring(startPos); 078 name.setLength(0); 079 return elem; 080 } 081 082 /** 083 * Normalises a path. Does the following: 084 * <ul> 085 * <li>Removes empty path elements. 086 * <li>Handles '.' and '..' elements. 087 * <li>Removes trailing separator. 088 * </ul> 089 * 090 * Its assumed that the separators are already fixed. 091 * 092 * @param path The path to normalize. 093 * @return The FileType. 094 * @throws FileSystemException if an error occurs. 095 * 096 * @see #fixSeparators 097 */ 098 public static FileType normalisePath(final StringBuilder path) throws FileSystemException { 099 FileType fileType = FileType.FOLDER; 100 if (path.length() == 0) { 101 return fileType; 102 } 103 104 if (path.charAt(path.length() - 1) != '/') { 105 fileType = FileType.FILE; 106 } 107 108 // Adjust separators 109 // fixSeparators(path); 110 111 // Determine the start of the first element 112 int startFirstElem = 0; 113 if (path.charAt(0) == SEPARATOR_CHAR) { 114 if (path.length() == 1) { 115 return fileType; 116 } 117 startFirstElem = 1; 118 } 119 120 // Iterate over each element 121 int startElem = startFirstElem; 122 int maxlen = path.length(); 123 while (startElem < maxlen) { 124 // Find the end of the element 125 int endElem = startElem; 126 for (; endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR; endElem++) { 127 } 128 129 final int elemLen = endElem - startElem; 130 if (elemLen == 0) { 131 // An empty element - axe it 132 path.delete(endElem, endElem + 1); 133 maxlen = path.length(); 134 continue; 135 } 136 if (elemLen == 1 && path.charAt(startElem) == '.') { 137 // A '.' element - axe it 138 path.delete(startElem, endElem + 1); 139 maxlen = path.length(); 140 continue; 141 } 142 if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') { 143 // A '..' element - remove the previous element 144 if (startElem == startFirstElem) { 145 // Previous element is missing 146 throw new FileSystemException("vfs.provider/invalid-relative-path.error"); 147 } 148 149 // Find start of previous element 150 int pos = startElem - 2; 151 for (; pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR; pos--) { 152 } 153 startElem = pos + 1; 154 155 path.delete(startElem, endElem + 1); 156 maxlen = path.length(); 157 continue; 158 } 159 160 // A regular element 161 startElem = endElem + 1; 162 } 163 164 // Remove trailing separator 165 if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) { 166 path.delete(maxlen - 1, maxlen); 167 } 168 169 return fileType; 170 } 171 172 /** 173 * Normalises the separators in a name. 174 * 175 * @param name The StringBuilder containing the name 176 * @return true if the StringBuilder was modified. 177 */ 178 public static boolean fixSeparators(final StringBuilder name) { 179 boolean changed = false; 180 final int maxlen = name.length(); 181 for (int i = 0; i < maxlen; i++) { 182 final char ch = name.charAt(i); 183 if (ch == TRANS_SEPARATOR) { 184 name.setCharAt(i, SEPARATOR_CHAR); 185 changed = true; 186 } 187 } 188 return changed; 189 } 190 191 /** 192 * Extracts the scheme from a URI. 193 * 194 * @param uri The URI. 195 * @return The scheme name. Returns null if there is no scheme. 196 */ 197 public static String extractScheme(final String uri) { 198 return extractScheme(uri, null); 199 } 200 201 /** 202 * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. 203 * 204 * @param uri The URI. 205 * @param buffer Returns the remainder of the URI. 206 * @return The scheme name. Returns null if there is no scheme. 207 */ 208 public static String extractScheme(final String uri, final StringBuilder buffer) { 209 if (buffer != null) { 210 buffer.setLength(0); 211 buffer.append(uri); 212 } 213 214 final int maxPos = uri.length(); 215 for (int pos = 0; pos < maxPos; pos++) { 216 final char ch = uri.charAt(pos); 217 218 if (ch == ':') { 219 // Found the end of the scheme 220 final String scheme = uri.substring(0, pos); 221 if (scheme.length() <= 1 && Os.isFamily(Os.OS_FAMILY_WINDOWS)) { 222 // This is not a scheme, but a Windows drive letter 223 return null; 224 } 225 if (buffer != null) { 226 buffer.delete(0, pos + 1); 227 } 228 return scheme.intern(); 229 } 230 231 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 232 // A scheme character 233 continue; 234 } 235 if (pos > 0 && ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.')) { 236 // A scheme character (these are not allowed as the first 237 // character of the scheme, but can be used as subsequent 238 // characters. 239 continue; 240 } 241 242 // Not a scheme character 243 break; 244 } 245 246 // No scheme in URI 247 return null; 248 } 249 250 /** 251 * Removes %nn encodings from a string. 252 * 253 * @param encodedStr The encoded String. 254 * @return The decoded String. 255 * @throws FileSystemException if an error occurs. 256 */ 257 public static String decode(final String encodedStr) throws FileSystemException { 258 if (encodedStr == null) { 259 return null; 260 } 261 if (encodedStr.indexOf('%') < 0) { 262 return encodedStr; 263 } 264 final StringBuilder buffer = new StringBuilder(encodedStr); 265 decode(buffer, 0, buffer.length()); 266 return buffer.toString(); 267 } 268 269 /** 270 * Removes %nn encodings from a string. 271 * 272 * @param buffer StringBuilder containing the string to decode. 273 * @param offset The position in the string to start decoding. 274 * @param length The number of characters to decode. 275 * @throws FileSystemException if an error occurs. 276 */ 277 public static void decode(final StringBuilder buffer, final int offset, final int length) 278 throws FileSystemException { 279 int index = offset; 280 int count = length; 281 for (; count > 0; count--, index++) { 282 final char ch = buffer.charAt(index); 283 if (ch != '%') { 284 continue; 285 } 286 if (count < 3) { 287 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", 288 buffer.substring(index, index + count)); 289 } 290 291 // Decode 292 final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); 293 final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); 294 if (dig1 == -1 || dig2 == -1) { 295 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", 296 buffer.substring(index, index + 3)); 297 } 298 final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); 299 300 // Replace 301 buffer.setCharAt(index, value); 302 buffer.delete(index + 1, index + 3); 303 count -= 2; 304 } 305 } 306 307 /** 308 * Encodes and appends a string to a StringBuilder. 309 * 310 * @param buffer The StringBuilder to append to. 311 * @param unencodedValue The String to encode and append. 312 * @param reserved characters to encode. 313 */ 314 public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) { 315 final int offset = buffer.length(); 316 buffer.append(unencodedValue); 317 encode(buffer, offset, unencodedValue.length(), reserved); 318 } 319 320 /** 321 * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters. 322 * 323 * @param buffer The StringBuilder to append to. 324 * @param offset The position in the buffer to start encoding at. 325 * @param length The number of characters to encode. 326 * @param reserved characters to encode. 327 */ 328 public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) { 329 int index = offset; 330 int count = length; 331 for (; count > 0; index++, count--) { 332 final char ch = buffer.charAt(index); 333 boolean match = ch == '%'; 334 if (reserved != null) { 335 for (int i = 0; !match && i < reserved.length; i++) { 336 if (ch == reserved[i]) { 337 match = true; 338 } 339 } 340 } 341 if (match) { 342 // Encode 343 final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE), 344 Character.forDigit(ch & LOW_MASK, HEX_BASE) }; 345 buffer.setCharAt(index, '%'); 346 buffer.insert(index + 1, digits); 347 index += 2; 348 } 349 } 350 } 351 352 /** 353 * Removes %nn encodings from a string. 354 * 355 * @param decodedStr The decoded String. 356 * @return The encoded String. 357 */ 358 public static String encode(final String decodedStr) { 359 return encode(decodedStr, null); 360 } 361 362 /** 363 * Converts "special" characters to their %nn value. 364 * 365 * @param decodedStr The decoded String. 366 * @param reserved Characters to encode. 367 * @return The encoded String 368 */ 369 public static String encode(final String decodedStr, final char[] reserved) { 370 if (decodedStr == null) { 371 return null; 372 } 373 final StringBuilder buffer = new StringBuilder(decodedStr); 374 encode(buffer, 0, buffer.length(), reserved); 375 return buffer.toString(); 376 } 377 378 /** 379 * Encode an array of Strings. 380 * 381 * @param strings The array of Strings to encode. 382 * @return An array of encoded Strings. 383 */ 384 public static String[] encode(final String[] strings) { 385 if (strings == null) { 386 return null; 387 } 388 for (int i = 0; i < strings.length; i++) { 389 strings[i] = encode(strings[i]); 390 } 391 return strings; 392 } 393 394 /** 395 * Decodes the String. 396 * 397 * @param uri The String to decode. 398 * @throws FileSystemException if an error occurs. 399 */ 400 public static void checkUriEncoding(final String uri) throws FileSystemException { 401 decode(uri); 402 } 403 404 public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length, 405 final FileNameParser fileNameParser) throws FileSystemException { 406 int index = offset; 407 int count = length; 408 for (; count > 0; count--, index++) { 409 final char ch = buffer.charAt(index); 410 if (ch == '%') { 411 if (count < 3) { 412 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", 413 buffer.substring(index, index + count)); 414 } 415 416 // Decode 417 final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); 418 final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); 419 if (dig1 == -1 || dig2 == -1) { 420 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", 421 buffer.substring(index, index + 3)); 422 } 423 final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); 424 425 final boolean match = value == '%' || fileNameParser.encodeCharacter(value); 426 427 if (match) { 428 // this is a reserved character, not allowed to decode 429 index += 2; 430 count -= 2; 431 continue; 432 } 433 434 // Replace 435 buffer.setCharAt(index, value); 436 buffer.delete(index + 1, index + 3); 437 count -= 2; 438 } else if (fileNameParser.encodeCharacter(ch)) { 439 // Encode 440 final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE), 441 Character.forDigit(ch & LOW_MASK, HEX_BASE) }; 442 buffer.setCharAt(index, '%'); 443 buffer.insert(index + 1, digits); 444 index += 2; 445 } 446 } 447 } 448 449 /** 450 * Extract the query String from the URI. 451 * 452 * @param name StringBuilder containing the URI. 453 * @return The query string, if any. null otherwise. 454 */ 455 public static String extractQueryString(final StringBuilder name) { 456 for (int pos = 0; pos < name.length(); pos++) { 457 if (name.charAt(pos) == '?') { 458 final String queryString = name.substring(pos + 1); 459 name.delete(pos, name.length()); 460 return queryString; 461 } 462 } 463 464 return null; 465 } 466}