001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.io.File; 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.List; 025import java.util.Stack; 026import java.util.regex.Matcher; 027import java.util.regex.Pattern; 028 029/** 030 * General filename and filepath manipulation utilities. 031 * <p> 032 * When dealing with filenames you can hit problems when moving from a Windows 033 * based development machine to a Unix based production machine. 034 * This class aims to help avoid those problems. 035 * <p> 036 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 037 * using JDK {@link java.io.File File} objects and the two argument constructor 038 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. 039 * <p> 040 * Most methods on this class are designed to work the same on both Unix and Windows. 041 * Those that don't include 'System', 'Unix' or 'Windows' in their name. 042 * <p> 043 * Most methods recognise both separators (forward and back), and both 044 * sets of prefixes. See the javadoc of each method for details. 045 * <p> 046 * This class defines six components within a filename 047 * (example C:\dev\project\file.txt): 048 * <ul> 049 * <li>the prefix - C:\</li> 050 * <li>the path - dev\project\</li> 051 * <li>the full path - C:\dev\project\</li> 052 * <li>the name - file.txt</li> 053 * <li>the base name - file</li> 054 * <li>the extension - txt</li> 055 * </ul> 056 * Note that this class works best if directory filenames end with a separator. 057 * If you omit the last separator, it is impossible to determine if the filename 058 * corresponds to a file or a directory. As a result, we have chosen to say 059 * it corresponds to a file. 060 * <p> 061 * This class only supports Unix and Windows style names. 062 * Prefixes are matched as follows: 063 * <pre> 064 * Windows: 065 * a\b\c.txt --> "" --> relative 066 * \a\b\c.txt --> "\" --> current drive absolute 067 * C:a\b\c.txt --> "C:" --> drive relative 068 * C:\a\b\c.txt --> "C:\" --> absolute 069 * \\server\a\b\c.txt --> "\\server\" --> UNC 070 * 071 * Unix: 072 * a/b/c.txt --> "" --> relative 073 * /a/b/c.txt --> "/" --> absolute 074 * ~/a/b/c.txt --> "~/" --> current user 075 * ~ --> "~/" --> current user (slash added) 076 * ~user/a/b/c.txt --> "~user/" --> named user 077 * ~user --> "~user/" --> named user (slash added) 078 * </pre> 079 * Both prefix styles are matched always, irrespective of the machine that you are 080 * currently running on. 081 * <p> 082 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. 083 * 084 * @since 1.1 085 */ 086public class FilenameUtils { 087 088 private static final int NOT_FOUND = -1; 089 090 /** 091 * The extension separator character. 092 * @since 1.4 093 */ 094 public static final char EXTENSION_SEPARATOR = '.'; 095 096 /** 097 * The extension separator String. 098 * @since 1.4 099 */ 100 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 101 102 /** 103 * The Unix separator character. 104 */ 105 private static final char UNIX_SEPARATOR = '/'; 106 107 /** 108 * The Windows separator character. 109 */ 110 private static final char WINDOWS_SEPARATOR = '\\'; 111 112 /** 113 * The system separator character. 114 */ 115 private static final char SYSTEM_SEPARATOR = File.separatorChar; 116 117 /** 118 * The separator character that is the opposite of the system separator. 119 */ 120 private static final char OTHER_SEPARATOR; 121 static { 122 if (isSystemWindows()) { 123 OTHER_SEPARATOR = UNIX_SEPARATOR; 124 } else { 125 OTHER_SEPARATOR = WINDOWS_SEPARATOR; 126 } 127 } 128 129 /** 130 * Instances should NOT be constructed in standard programming. 131 */ 132 public FilenameUtils() { 133 super(); 134 } 135 136 //----------------------------------------------------------------------- 137 /** 138 * Determines if Windows file system is in use. 139 * 140 * @return true if the system is Windows 141 */ 142 static boolean isSystemWindows() { 143 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; 144 } 145 146 //----------------------------------------------------------------------- 147 /** 148 * Checks if the character is a separator. 149 * 150 * @param ch the character to check 151 * @return true if it is a separator character 152 */ 153 private static boolean isSeparator(final char ch) { 154 return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR; 155 } 156 157 //----------------------------------------------------------------------- 158 /** 159 * Normalizes a path, removing double and single dot path steps. 160 * <p> 161 * This method normalizes a path to a standard format. 162 * The input may contain separators in either Unix or Windows format. 163 * The output will contain separators in the format of the system. 164 * <p> 165 * A trailing slash will be retained. 166 * A double slash will be merged to a single slash (but UNC names are handled). 167 * A single dot path segment will be removed. 168 * A double dot will cause that path segment and the one before to be removed. 169 * If the double dot has no parent path segment to work with, {@code null} 170 * is returned. 171 * <p> 172 * The output will be the same on both Unix and Windows except 173 * for the separator character. 174 * <pre> 175 * /foo// --> /foo/ 176 * /foo/./ --> /foo/ 177 * /foo/../bar --> /bar 178 * /foo/../bar/ --> /bar/ 179 * /foo/../bar/../baz --> /baz 180 * //foo//./bar --> /foo/bar 181 * /../ --> null 182 * ../foo --> null 183 * foo/bar/.. --> foo/ 184 * foo/../../bar --> null 185 * foo/../bar --> bar 186 * //server/foo/../bar --> //server/bar 187 * //server/../bar --> null 188 * C:\foo\..\bar --> C:\bar 189 * C:\..\bar --> null 190 * ~/foo/../bar/ --> ~/bar/ 191 * ~/../bar --> null 192 * </pre> 193 * (Note the file separator returned will be correct for Windows/Unix) 194 * 195 * @param filename the filename to normalize, null returns null 196 * @return the normalized filename, or null if invalid. Null bytes inside string will be removed 197 */ 198 public static String normalize(final String filename) { 199 return doNormalize(filename, SYSTEM_SEPARATOR, true); 200 } 201 /** 202 * Normalizes a path, removing double and single dot path steps. 203 * <p> 204 * This method normalizes a path to a standard format. 205 * The input may contain separators in either Unix or Windows format. 206 * The output will contain separators in the format specified. 207 * <p> 208 * A trailing slash will be retained. 209 * A double slash will be merged to a single slash (but UNC names are handled). 210 * A single dot path segment will be removed. 211 * A double dot will cause that path segment and the one before to be removed. 212 * If the double dot has no parent path segment to work with, {@code null} 213 * is returned. 214 * <p> 215 * The output will be the same on both Unix and Windows except 216 * for the separator character. 217 * <pre> 218 * /foo// --> /foo/ 219 * /foo/./ --> /foo/ 220 * /foo/../bar --> /bar 221 * /foo/../bar/ --> /bar/ 222 * /foo/../bar/../baz --> /baz 223 * //foo//./bar --> /foo/bar 224 * /../ --> null 225 * ../foo --> null 226 * foo/bar/.. --> foo/ 227 * foo/../../bar --> null 228 * foo/../bar --> bar 229 * //server/foo/../bar --> //server/bar 230 * //server/../bar --> null 231 * C:\foo\..\bar --> C:\bar 232 * C:\..\bar --> null 233 * ~/foo/../bar/ --> ~/bar/ 234 * ~/../bar --> null 235 * </pre> 236 * The output will be the same on both Unix and Windows including 237 * the separator character. 238 * 239 * @param filename the filename to normalize, null returns null 240 * @param unixSeparator {@code true} if a unix separator should 241 * be used or {@code false} if a windows separator should be used. 242 * @return the normalized filename, or null if invalid. Null bytes inside string will be removed 243 * @since 2.0 244 */ 245 public static String normalize(final String filename, final boolean unixSeparator) { 246 final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 247 return doNormalize(filename, separator, true); 248 } 249 250 //----------------------------------------------------------------------- 251 /** 252 * Normalizes a path, removing double and single dot path steps, 253 * and removing any final directory separator. 254 * <p> 255 * This method normalizes a path to a standard format. 256 * The input may contain separators in either Unix or Windows format. 257 * The output will contain separators in the format of the system. 258 * <p> 259 * A trailing slash will be removed. 260 * A double slash will be merged to a single slash (but UNC names are handled). 261 * A single dot path segment will be removed. 262 * A double dot will cause that path segment and the one before to be removed. 263 * If the double dot has no parent path segment to work with, {@code null} 264 * is returned. 265 * <p> 266 * The output will be the same on both Unix and Windows except 267 * for the separator character. 268 * <pre> 269 * /foo// --> /foo 270 * /foo/./ --> /foo 271 * /foo/../bar --> /bar 272 * /foo/../bar/ --> /bar 273 * /foo/../bar/../baz --> /baz 274 * //foo//./bar --> /foo/bar 275 * /../ --> null 276 * ../foo --> null 277 * foo/bar/.. --> foo 278 * foo/../../bar --> null 279 * foo/../bar --> bar 280 * //server/foo/../bar --> //server/bar 281 * //server/../bar --> null 282 * C:\foo\..\bar --> C:\bar 283 * C:\..\bar --> null 284 * ~/foo/../bar/ --> ~/bar 285 * ~/../bar --> null 286 * </pre> 287 * (Note the file separator returned will be correct for Windows/Unix) 288 * 289 * @param filename the filename to normalize, null returns null 290 * @return the normalized filename, or null if invalid. Null bytes inside string will be removed 291 */ 292 public static String normalizeNoEndSeparator(final String filename) { 293 return doNormalize(filename, SYSTEM_SEPARATOR, false); 294 } 295 296 /** 297 * Normalizes a path, removing double and single dot path steps, 298 * and removing any final directory separator. 299 * <p> 300 * This method normalizes a path to a standard format. 301 * The input may contain separators in either Unix or Windows format. 302 * The output will contain separators in the format specified. 303 * <p> 304 * A trailing slash will be removed. 305 * A double slash will be merged to a single slash (but UNC names are handled). 306 * A single dot path segment will be removed. 307 * A double dot will cause that path segment and the one before to be removed. 308 * If the double dot has no parent path segment to work with, {@code null} 309 * is returned. 310 * <p> 311 * The output will be the same on both Unix and Windows including 312 * the separator character. 313 * <pre> 314 * /foo// --> /foo 315 * /foo/./ --> /foo 316 * /foo/../bar --> /bar 317 * /foo/../bar/ --> /bar 318 * /foo/../bar/../baz --> /baz 319 * //foo//./bar --> /foo/bar 320 * /../ --> null 321 * ../foo --> null 322 * foo/bar/.. --> foo 323 * foo/../../bar --> null 324 * foo/../bar --> bar 325 * //server/foo/../bar --> //server/bar 326 * //server/../bar --> null 327 * C:\foo\..\bar --> C:\bar 328 * C:\..\bar --> null 329 * ~/foo/../bar/ --> ~/bar 330 * ~/../bar --> null 331 * </pre> 332 * 333 * @param filename the filename to normalize, null returns null 334 * @param unixSeparator {@code true} if a unix separator should 335 * be used or {@code false} if a windows separator should be used. 336 * @return the normalized filename, or null if invalid. Null bytes inside string will be removed 337 * @since 2.0 338 */ 339 public static String normalizeNoEndSeparator(final String filename, final boolean unixSeparator) { 340 final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 341 return doNormalize(filename, separator, false); 342 } 343 344 /** 345 * Internal method to perform the normalization. 346 * 347 * @param filename the filename 348 * @param separator The separator character to use 349 * @param keepSeparator true to keep the final separator 350 * @return the normalized filename. Null bytes inside string will be removed. 351 */ 352 private static String doNormalize(final String filename, final char separator, final boolean keepSeparator) { 353 if (filename == null) { 354 return null; 355 } 356 357 failIfNullBytePresent(filename); 358 359 int size = filename.length(); 360 if (size == 0) { 361 return filename; 362 } 363 final int prefix = getPrefixLength(filename); 364 if (prefix < 0) { 365 return null; 366 } 367 368 final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 369 filename.getChars(0, filename.length(), array, 0); 370 371 // fix separators throughout 372 final char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR; 373 for (int i = 0; i < array.length; i++) { 374 if (array[i] == otherSeparator) { 375 array[i] = separator; 376 } 377 } 378 379 // add extra separator on the end to simplify code below 380 boolean lastIsDirectory = true; 381 if (array[size - 1] != separator) { 382 array[size++] = separator; 383 lastIsDirectory = false; 384 } 385 386 // adjoining slashes 387 for (int i = prefix + 1; i < size; i++) { 388 if (array[i] == separator && array[i - 1] == separator) { 389 System.arraycopy(array, i, array, i - 1, size - i); 390 size--; 391 i--; 392 } 393 } 394 395 // dot slash 396 for (int i = prefix + 1; i < size; i++) { 397 if (array[i] == separator && array[i - 1] == '.' && 398 (i == prefix + 1 || array[i - 2] == separator)) { 399 if (i == size - 1) { 400 lastIsDirectory = true; 401 } 402 System.arraycopy(array, i + 1, array, i - 1, size - i); 403 size -=2; 404 i--; 405 } 406 } 407 408 // double dot slash 409 outer: 410 for (int i = prefix + 2; i < size; i++) { 411 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && 412 (i == prefix + 2 || array[i - 3] == separator)) { 413 if (i == prefix + 2) { 414 return null; 415 } 416 if (i == size - 1) { 417 lastIsDirectory = true; 418 } 419 int j; 420 for (j = i - 4 ; j >= prefix; j--) { 421 if (array[j] == separator) { 422 // remove b/../ from a/b/../c 423 System.arraycopy(array, i + 1, array, j + 1, size - i); 424 size -= i - j; 425 i = j + 1; 426 continue outer; 427 } 428 } 429 // remove a/../ from a/../c 430 System.arraycopy(array, i + 1, array, prefix, size - i); 431 size -= i + 1 - prefix; 432 i = prefix + 1; 433 } 434 } 435 436 if (size <= 0) { // should never be less than 0 437 return ""; 438 } 439 if (size <= prefix) { // should never be less than prefix 440 return new String(array, 0, size); 441 } 442 if (lastIsDirectory && keepSeparator) { 443 return new String(array, 0, size); // keep trailing separator 444 } 445 return new String(array, 0, size - 1); // lose trailing separator 446 } 447 448 //----------------------------------------------------------------------- 449 /** 450 * Concatenates a filename to a base path using normal command line style rules. 451 * <p> 452 * The effect is equivalent to resultant directory after changing 453 * directory to the first argument, followed by changing directory to 454 * the second argument. 455 * <p> 456 * The first argument is the base path, the second is the path to concatenate. 457 * The returned path is always normalized via {@link #normalize(String)}, 458 * thus <code>..</code> is handled. 459 * <p> 460 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then 461 * it will be normalized and returned. 462 * Otherwise, the paths will be joined, normalized and returned. 463 * <p> 464 * The output will be the same on both Unix and Windows except 465 * for the separator character. 466 * <pre> 467 * /foo/ + bar --> /foo/bar 468 * /foo + bar --> /foo/bar 469 * /foo + /bar --> /bar 470 * /foo + C:/bar --> C:/bar 471 * /foo + C:bar --> C:bar (*) 472 * /foo/a/ + ../bar --> foo/bar 473 * /foo/ + ../../bar --> null 474 * /foo/ + /bar --> /bar 475 * /foo/.. + /bar --> /bar 476 * /foo + bar/c.txt --> /foo/bar/c.txt 477 * /foo/c.txt + bar --> /foo/c.txt/bar (!) 478 * </pre> 479 * (*) Note that the Windows relative drive prefix is unreliable when 480 * used with this method. 481 * (!) Note that the first parameter must be a path. If it ends with a name, then 482 * the name will be built into the concatenated path. If this might be a problem, 483 * use {@link #getFullPath(String)} on the base path argument. 484 * 485 * @param basePath the base path to attach to, always treated as a path 486 * @param fullFilenameToAdd the filename (or path) to attach to the base 487 * @return the concatenated path, or null if invalid. Null bytes inside string will be removed 488 */ 489 public static String concat(final String basePath, final String fullFilenameToAdd) { 490 final int prefix = getPrefixLength(fullFilenameToAdd); 491 if (prefix < 0) { 492 return null; 493 } 494 if (prefix > 0) { 495 return normalize(fullFilenameToAdd); 496 } 497 if (basePath == null) { 498 return null; 499 } 500 final int len = basePath.length(); 501 if (len == 0) { 502 return normalize(fullFilenameToAdd); 503 } 504 final char ch = basePath.charAt(len - 1); 505 if (isSeparator(ch)) { 506 return normalize(basePath + fullFilenameToAdd); 507 } else { 508 return normalize(basePath + '/' + fullFilenameToAdd); 509 } 510 } 511 512 /** 513 * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory). 514 * <p> 515 * The files names are expected to be normalized. 516 * </p> 517 * 518 * Edge cases: 519 * <ul> 520 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> 521 * <li>A directory does not contain itself: return false</li> 522 * <li>A null child file is not contained in any parent: return false</li> 523 * </ul> 524 * 525 * @param canonicalParent 526 * the file to consider as the parent. 527 * @param canonicalChild 528 * the file to consider as the child. 529 * @return true is the candidate leaf is under by the specified composite. False otherwise. 530 * @throws IOException 531 * if an IO error occurs while checking the files. 532 * @since 2.2 533 * @see FileUtils#directoryContains(File, File) 534 */ 535 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) 536 throws IOException { 537 538 // Fail fast against NullPointerException 539 if (canonicalParent == null) { 540 throw new IllegalArgumentException("Directory must not be null"); 541 } 542 543 if (canonicalChild == null) { 544 return false; 545 } 546 547 if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { 548 return false; 549 } 550 551 return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent); 552 } 553 554 //----------------------------------------------------------------------- 555 /** 556 * Converts all separators to the Unix separator of forward slash. 557 * 558 * @param path the path to be changed, null ignored 559 * @return the updated path 560 */ 561 public static String separatorsToUnix(final String path) { 562 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) { 563 return path; 564 } 565 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); 566 } 567 568 /** 569 * Converts all separators to the Windows separator of backslash. 570 * 571 * @param path the path to be changed, null ignored 572 * @return the updated path 573 */ 574 public static String separatorsToWindows(final String path) { 575 if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) { 576 return path; 577 } 578 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); 579 } 580 581 /** 582 * Converts all separators to the system separator. 583 * 584 * @param path the path to be changed, null ignored 585 * @return the updated path 586 */ 587 public static String separatorsToSystem(final String path) { 588 if (path == null) { 589 return null; 590 } 591 if (isSystemWindows()) { 592 return separatorsToWindows(path); 593 } else { 594 return separatorsToUnix(path); 595 } 596 } 597 598 //----------------------------------------------------------------------- 599 /** 600 * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>. 601 * <p> 602 * This method will handle a file in either Unix or Windows format. 603 * <p> 604 * The prefix length includes the first slash in the full filename 605 * if applicable. Thus, it is possible that the length returned is greater 606 * than the length of the input string. 607 * <pre> 608 * Windows: 609 * a\b\c.txt --> "" --> relative 610 * \a\b\c.txt --> "\" --> current drive absolute 611 * C:a\b\c.txt --> "C:" --> drive relative 612 * C:\a\b\c.txt --> "C:\" --> absolute 613 * \\server\a\b\c.txt --> "\\server\" --> UNC 614 * \\\a\b\c.txt --> error, length = -1 615 * 616 * Unix: 617 * a/b/c.txt --> "" --> relative 618 * /a/b/c.txt --> "/" --> absolute 619 * ~/a/b/c.txt --> "~/" --> current user 620 * ~ --> "~/" --> current user (slash added) 621 * ~user/a/b/c.txt --> "~user/" --> named user 622 * ~user --> "~user/" --> named user (slash added) 623 * //server/a/b/c.txt --> "//server/" 624 * ///a/b/c.txt --> error, length = -1 625 * </pre> 626 * <p> 627 * The output will be the same irrespective of the machine that the code is running on. 628 * ie. both Unix and Windows prefixes are matched regardless. 629 * 630 * Note that a leading // (or \\) is used to indicate a UNC name on Windows. 631 * These must be followed by a server name, so double-slashes are not collapsed 632 * to a single slash at the start of the filename. 633 * 634 * @param filename the filename to find the prefix in, null returns -1 635 * @return the length of the prefix, -1 if invalid or null 636 */ 637 public static int getPrefixLength(final String filename) { 638 if (filename == null) { 639 return NOT_FOUND; 640 } 641 final int len = filename.length(); 642 if (len == 0) { 643 return 0; 644 } 645 char ch0 = filename.charAt(0); 646 if (ch0 == ':') { 647 return NOT_FOUND; 648 } 649 if (len == 1) { 650 if (ch0 == '~') { 651 return 2; // return a length greater than the input 652 } 653 return isSeparator(ch0) ? 1 : 0; 654 } else { 655 if (ch0 == '~') { 656 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1); 657 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1); 658 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { 659 return len + 1; // return a length greater than the input 660 } 661 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 662 posWin = posWin == NOT_FOUND ? posUnix : posWin; 663 return Math.min(posUnix, posWin) + 1; 664 } 665 final char ch1 = filename.charAt(1); 666 if (ch1 == ':') { 667 ch0 = Character.toUpperCase(ch0); 668 if (ch0 >= 'A' && ch0 <= 'Z') { 669 if (len == 2 || isSeparator(filename.charAt(2)) == false) { 670 return 2; 671 } 672 return 3; 673 } else if (ch0 == UNIX_SEPARATOR) { 674 return 1; 675 } 676 return NOT_FOUND; 677 678 } else if (isSeparator(ch0) && isSeparator(ch1)) { 679 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2); 680 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2); 681 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { 682 return NOT_FOUND; 683 } 684 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 685 posWin = posWin == NOT_FOUND ? posUnix : posWin; 686 int pos = Math.min(posUnix, posWin) + 1; 687 String hostnamePart = filename.substring(2, pos - 1); 688 return isValidHostName(hostnamePart) ? pos : NOT_FOUND; 689 } else { 690 return isSeparator(ch0) ? 1 : 0; 691 } 692 } 693 } 694 695 /** 696 * Returns the index of the last directory separator character. 697 * <p> 698 * This method will handle a file in either Unix or Windows format. 699 * The position of the last forward or backslash is returned. 700 * <p> 701 * The output will be the same irrespective of the machine that the code is running on. 702 * 703 * @param filename the filename to find the last path separator in, null returns -1 704 * @return the index of the last separator character, or -1 if there 705 * is no such character 706 */ 707 public static int indexOfLastSeparator(final String filename) { 708 if (filename == null) { 709 return NOT_FOUND; 710 } 711 final int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR); 712 final int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR); 713 return Math.max(lastUnixPos, lastWindowsPos); 714 } 715 716 /** 717 * Returns the index of the last extension separator character, which is a dot. 718 * <p> 719 * This method also checks that there is no directory separator after the last dot. To do this it uses 720 * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format. 721 * </p> 722 * <p> 723 * The output will be the same irrespective of the machine that the code is running on. 724 * </p> 725 * 726 * @param filename 727 * the filename to find the last extension separator in, null returns -1 728 * @return the index of the last extension separator character, or -1 if there is no such character 729 */ 730 public static int indexOfExtension(final String filename) { 731 if (filename == null) { 732 return NOT_FOUND; 733 } 734 final int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR); 735 final int lastSeparator = indexOfLastSeparator(filename); 736 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; 737 } 738 739 //----------------------------------------------------------------------- 740 /** 741 * Gets the prefix from a full filename, such as <code>C:/</code> 742 * or <code>~/</code>. 743 * <p> 744 * This method will handle a file in either Unix or Windows format. 745 * The prefix includes the first slash in the full filename where applicable. 746 * <pre> 747 * Windows: 748 * a\b\c.txt --> "" --> relative 749 * \a\b\c.txt --> "\" --> current drive absolute 750 * C:a\b\c.txt --> "C:" --> drive relative 751 * C:\a\b\c.txt --> "C:\" --> absolute 752 * \\server\a\b\c.txt --> "\\server\" --> UNC 753 * 754 * Unix: 755 * a/b/c.txt --> "" --> relative 756 * /a/b/c.txt --> "/" --> absolute 757 * ~/a/b/c.txt --> "~/" --> current user 758 * ~ --> "~/" --> current user (slash added) 759 * ~user/a/b/c.txt --> "~user/" --> named user 760 * ~user --> "~user/" --> named user (slash added) 761 * </pre> 762 * <p> 763 * The output will be the same irrespective of the machine that the code is running on. 764 * ie. both Unix and Windows prefixes are matched regardless. 765 * 766 * @param filename the filename to query, null returns null 767 * @return the prefix of the file, null if invalid. Null bytes inside string will be removed 768 */ 769 public static String getPrefix(final String filename) { 770 if (filename == null) { 771 return null; 772 } 773 final int len = getPrefixLength(filename); 774 if (len < 0) { 775 return null; 776 } 777 if (len > filename.length()) { 778 failIfNullBytePresent(filename + UNIX_SEPARATOR); 779 return filename + UNIX_SEPARATOR; 780 } 781 final String path = filename.substring(0, len); 782 failIfNullBytePresent(path); 783 return path; 784 } 785 786 /** 787 * Gets the path from a full filename, which excludes the prefix. 788 * <p> 789 * This method will handle a file in either Unix or Windows format. 790 * The method is entirely text based, and returns the text before and 791 * including the last forward or backslash. 792 * <pre> 793 * C:\a\b\c.txt --> a\b\ 794 * ~/a/b/c.txt --> a/b/ 795 * a.txt --> "" 796 * a/b/c --> a/b/ 797 * a/b/c/ --> a/b/c/ 798 * </pre> 799 * <p> 800 * The output will be the same irrespective of the machine that the code is running on. 801 * <p> 802 * This method drops the prefix from the result. 803 * See {@link #getFullPath(String)} for the method that retains the prefix. 804 * 805 * @param filename the filename to query, null returns null 806 * @return the path of the file, an empty string if none exists, null if invalid. 807 * Null bytes inside string will be removed 808 */ 809 public static String getPath(final String filename) { 810 return doGetPath(filename, 1); 811 } 812 813 /** 814 * Gets the path from a full filename, which excludes the prefix, and 815 * also excluding the final directory separator. 816 * <p> 817 * This method will handle a file in either Unix or Windows format. 818 * The method is entirely text based, and returns the text before the 819 * last forward or backslash. 820 * <pre> 821 * C:\a\b\c.txt --> a\b 822 * ~/a/b/c.txt --> a/b 823 * a.txt --> "" 824 * a/b/c --> a/b 825 * a/b/c/ --> a/b/c 826 * </pre> 827 * <p> 828 * The output will be the same irrespective of the machine that the code is running on. 829 * <p> 830 * This method drops the prefix from the result. 831 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 832 * 833 * @param filename the filename to query, null returns null 834 * @return the path of the file, an empty string if none exists, null if invalid. 835 * Null bytes inside string will be removed 836 */ 837 public static String getPathNoEndSeparator(final String filename) { 838 return doGetPath(filename, 0); 839 } 840 841 /** 842 * Does the work of getting the path. 843 * 844 * @param filename the filename 845 * @param separatorAdd 0 to omit the end separator, 1 to return it 846 * @return the path. Null bytes inside string will be removed 847 */ 848 private static String doGetPath(final String filename, final int separatorAdd) { 849 if (filename == null) { 850 return null; 851 } 852 final int prefix = getPrefixLength(filename); 853 if (prefix < 0) { 854 return null; 855 } 856 final int index = indexOfLastSeparator(filename); 857 final int endIndex = index+separatorAdd; 858 if (prefix >= filename.length() || index < 0 || prefix >= endIndex) { 859 return ""; 860 } 861 final String path = filename.substring(prefix, endIndex); 862 failIfNullBytePresent(path); 863 return path; 864 } 865 866 /** 867 * Gets the full path from a full filename, which is the prefix + path. 868 * <p> 869 * This method will handle a file in either Unix or Windows format. 870 * The method is entirely text based, and returns the text before and 871 * including the last forward or backslash. 872 * <pre> 873 * C:\a\b\c.txt --> C:\a\b\ 874 * ~/a/b/c.txt --> ~/a/b/ 875 * a.txt --> "" 876 * a/b/c --> a/b/ 877 * a/b/c/ --> a/b/c/ 878 * C: --> C: 879 * C:\ --> C:\ 880 * ~ --> ~/ 881 * ~/ --> ~/ 882 * ~user --> ~user/ 883 * ~user/ --> ~user/ 884 * </pre> 885 * <p> 886 * The output will be the same irrespective of the machine that the code is running on. 887 * 888 * @param filename the filename to query, null returns null 889 * @return the path of the file, an empty string if none exists, null if invalid 890 */ 891 public static String getFullPath(final String filename) { 892 return doGetFullPath(filename, true); 893 } 894 895 /** 896 * Gets the full path from a full filename, which is the prefix + path, 897 * and also excluding the final directory separator. 898 * <p> 899 * This method will handle a file in either Unix or Windows format. 900 * The method is entirely text based, and returns the text before the 901 * last forward or backslash. 902 * <pre> 903 * C:\a\b\c.txt --> C:\a\b 904 * ~/a/b/c.txt --> ~/a/b 905 * a.txt --> "" 906 * a/b/c --> a/b 907 * a/b/c/ --> a/b/c 908 * C: --> C: 909 * C:\ --> C:\ 910 * ~ --> ~ 911 * ~/ --> ~ 912 * ~user --> ~user 913 * ~user/ --> ~user 914 * </pre> 915 * <p> 916 * The output will be the same irrespective of the machine that the code is running on. 917 * 918 * @param filename the filename to query, null returns null 919 * @return the path of the file, an empty string if none exists, null if invalid 920 */ 921 public static String getFullPathNoEndSeparator(final String filename) { 922 return doGetFullPath(filename, false); 923 } 924 925 /** 926 * Does the work of getting the path. 927 * 928 * @param filename the filename 929 * @param includeSeparator true to include the end separator 930 * @return the path 931 */ 932 private static String doGetFullPath(final String filename, final boolean includeSeparator) { 933 if (filename == null) { 934 return null; 935 } 936 final int prefix = getPrefixLength(filename); 937 if (prefix < 0) { 938 return null; 939 } 940 if (prefix >= filename.length()) { 941 if (includeSeparator) { 942 return getPrefix(filename); // add end slash if necessary 943 } else { 944 return filename; 945 } 946 } 947 final int index = indexOfLastSeparator(filename); 948 if (index < 0) { 949 return filename.substring(0, prefix); 950 } 951 int end = index + (includeSeparator ? 1 : 0); 952 if (end == 0) { 953 end++; 954 } 955 return filename.substring(0, end); 956 } 957 958 /** 959 * Gets the name minus the path from a full filename. 960 * <p> 961 * This method will handle a file in either Unix or Windows format. 962 * The text after the last forward or backslash is returned. 963 * <pre> 964 * a/b/c.txt --> c.txt 965 * a.txt --> a.txt 966 * a/b/c --> c 967 * a/b/c/ --> "" 968 * </pre> 969 * <p> 970 * The output will be the same irrespective of the machine that the code is running on. 971 * 972 * @param filename the filename to query, null returns null 973 * @return the name of the file without the path, or an empty string if none exists. 974 * Null bytes inside string will be removed 975 */ 976 public static String getName(final String filename) { 977 if (filename == null) { 978 return null; 979 } 980 failIfNullBytePresent(filename); 981 final int index = indexOfLastSeparator(filename); 982 return filename.substring(index + 1); 983 } 984 985 /** 986 * Check the input for null bytes, a sign of unsanitized data being passed to to file level functions. 987 * 988 * This may be used for poison byte attacks. 989 * @param path the path to check 990 */ 991 private static void failIfNullBytePresent(final String path) { 992 final int len = path.length(); 993 for (int i = 0; i < len; i++) { 994 if (path.charAt(i) == 0) { 995 throw new IllegalArgumentException("Null byte present in file/path name. There are no " + 996 "known legitimate use cases for such data, but several injection attacks may use it"); 997 } 998 } 999 } 1000 1001 /** 1002 * Gets the base name, minus the full path and extension, from a full filename. 1003 * <p> 1004 * This method will handle a file in either Unix or Windows format. 1005 * The text after the last forward or backslash and before the last dot is returned. 1006 * <pre> 1007 * a/b/c.txt --> c 1008 * a.txt --> a 1009 * a/b/c --> c 1010 * a/b/c/ --> "" 1011 * </pre> 1012 * <p> 1013 * The output will be the same irrespective of the machine that the code is running on. 1014 * 1015 * @param filename the filename to query, null returns null 1016 * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string 1017 * will be removed 1018 */ 1019 public static String getBaseName(final String filename) { 1020 return removeExtension(getName(filename)); 1021 } 1022 1023 /** 1024 * Gets the extension of a filename. 1025 * <p> 1026 * This method returns the textual part of the filename after the last dot. 1027 * There must be no directory separator after the dot. 1028 * <pre> 1029 * foo.txt --> "txt" 1030 * a/b/c.jpg --> "jpg" 1031 * a/b.txt/c --> "" 1032 * a/b/c --> "" 1033 * </pre> 1034 * <p> 1035 * The output will be the same irrespective of the machine that the code is running on. 1036 * 1037 * @param filename the filename to retrieve the extension of. 1038 * @return the extension of the file or an empty string if none exists or {@code null} 1039 * if the filename is {@code null}. 1040 */ 1041 public static String getExtension(final String filename) { 1042 if (filename == null) { 1043 return null; 1044 } 1045 final int index = indexOfExtension(filename); 1046 if (index == NOT_FOUND) { 1047 return ""; 1048 } else { 1049 return filename.substring(index + 1); 1050 } 1051 } 1052 1053 //----------------------------------------------------------------------- 1054 /** 1055 * Removes the extension from a filename. 1056 * <p> 1057 * This method returns the textual part of the filename before the last dot. 1058 * There must be no directory separator after the dot. 1059 * <pre> 1060 * foo.txt --> foo 1061 * a\b\c.jpg --> a\b\c 1062 * a\b\c --> a\b\c 1063 * a.b\c --> a.b\c 1064 * </pre> 1065 * <p> 1066 * The output will be the same irrespective of the machine that the code is running on. 1067 * 1068 * @param filename the filename to query, null returns null 1069 * @return the filename minus the extension 1070 */ 1071 public static String removeExtension(final String filename) { 1072 if (filename == null) { 1073 return null; 1074 } 1075 failIfNullBytePresent(filename); 1076 1077 final int index = indexOfExtension(filename); 1078 if (index == NOT_FOUND) { 1079 return filename; 1080 } else { 1081 return filename.substring(0, index); 1082 } 1083 } 1084 1085 //----------------------------------------------------------------------- 1086 /** 1087 * Checks whether two filenames are equal exactly. 1088 * <p> 1089 * No processing is performed on the filenames other than comparison, 1090 * thus this is merely a null-safe case-sensitive equals. 1091 * 1092 * @param filename1 the first filename to query, may be null 1093 * @param filename2 the second filename to query, may be null 1094 * @return true if the filenames are equal, null equals null 1095 * @see IOCase#SENSITIVE 1096 */ 1097 public static boolean equals(final String filename1, final String filename2) { 1098 return equals(filename1, filename2, false, IOCase.SENSITIVE); 1099 } 1100 1101 /** 1102 * Checks whether two filenames are equal using the case rules of the system. 1103 * <p> 1104 * No processing is performed on the filenames other than comparison. 1105 * The check is case-sensitive on Unix and case-insensitive on Windows. 1106 * 1107 * @param filename1 the first filename to query, may be null 1108 * @param filename2 the second filename to query, may be null 1109 * @return true if the filenames are equal, null equals null 1110 * @see IOCase#SYSTEM 1111 */ 1112 public static boolean equalsOnSystem(final String filename1, final String filename2) { 1113 return equals(filename1, filename2, false, IOCase.SYSTEM); 1114 } 1115 1116 //----------------------------------------------------------------------- 1117 /** 1118 * Checks whether two filenames are equal after both have been normalized. 1119 * <p> 1120 * Both filenames are first passed to {@link #normalize(String)}. 1121 * The check is then performed in a case-sensitive manner. 1122 * 1123 * @param filename1 the first filename to query, may be null 1124 * @param filename2 the second filename to query, may be null 1125 * @return true if the filenames are equal, null equals null 1126 * @see IOCase#SENSITIVE 1127 */ 1128 public static boolean equalsNormalized(final String filename1, final String filename2) { 1129 return equals(filename1, filename2, true, IOCase.SENSITIVE); 1130 } 1131 1132 /** 1133 * Checks whether two filenames are equal after both have been normalized 1134 * and using the case rules of the system. 1135 * <p> 1136 * Both filenames are first passed to {@link #normalize(String)}. 1137 * The check is then performed case-sensitive on Unix and 1138 * case-insensitive on Windows. 1139 * 1140 * @param filename1 the first filename to query, may be null 1141 * @param filename2 the second filename to query, may be null 1142 * @return true if the filenames are equal, null equals null 1143 * @see IOCase#SYSTEM 1144 */ 1145 public static boolean equalsNormalizedOnSystem(final String filename1, final String filename2) { 1146 return equals(filename1, filename2, true, IOCase.SYSTEM); 1147 } 1148 1149 /** 1150 * Checks whether two filenames are equal, optionally normalizing and providing 1151 * control over the case-sensitivity. 1152 * 1153 * @param filename1 the first filename to query, may be null 1154 * @param filename2 the second filename to query, may be null 1155 * @param normalized whether to normalize the filenames 1156 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1157 * @return true if the filenames are equal, null equals null 1158 * @since 1.3 1159 */ 1160 public static boolean equals( 1161 String filename1, String filename2, 1162 final boolean normalized, IOCase caseSensitivity) { 1163 1164 if (filename1 == null || filename2 == null) { 1165 return filename1 == null && filename2 == null; 1166 } 1167 if (normalized) { 1168 filename1 = normalize(filename1); 1169 filename2 = normalize(filename2); 1170 if (filename1 == null || filename2 == null) { 1171 throw new NullPointerException( 1172 "Error normalizing one or both of the file names"); 1173 } 1174 } 1175 if (caseSensitivity == null) { 1176 caseSensitivity = IOCase.SENSITIVE; 1177 } 1178 return caseSensitivity.checkEquals(filename1, filename2); 1179 } 1180 1181 //----------------------------------------------------------------------- 1182 /** 1183 * Checks whether the extension of the filename is that specified. 1184 * <p> 1185 * This method obtains the extension as the textual part of the filename 1186 * after the last dot. There must be no directory separator after the dot. 1187 * The extension check is case-sensitive on all platforms. 1188 * 1189 * @param filename the filename to query, null returns false 1190 * @param extension the extension to check for, null or empty checks for no extension 1191 * @return true if the filename has the specified extension 1192 * @throws java.lang.IllegalArgumentException if the supplied filename contains null bytes 1193 */ 1194 public static boolean isExtension(final String filename, final String extension) { 1195 if (filename == null) { 1196 return false; 1197 } 1198 failIfNullBytePresent(filename); 1199 1200 if (extension == null || extension.isEmpty()) { 1201 return indexOfExtension(filename) == NOT_FOUND; 1202 } 1203 final String fileExt = getExtension(filename); 1204 return fileExt.equals(extension); 1205 } 1206 1207 /** 1208 * Checks whether the extension of the filename is one of those specified. 1209 * <p> 1210 * This method obtains the extension as the textual part of the filename 1211 * after the last dot. There must be no directory separator after the dot. 1212 * The extension check is case-sensitive on all platforms. 1213 * 1214 * @param filename the filename to query, null returns false 1215 * @param extensions the extensions to check for, null checks for no extension 1216 * @return true if the filename is one of the extensions 1217 * @throws java.lang.IllegalArgumentException if the supplied filename contains null bytes 1218 */ 1219 public static boolean isExtension(final String filename, final String[] extensions) { 1220 if (filename == null) { 1221 return false; 1222 } 1223 failIfNullBytePresent(filename); 1224 1225 if (extensions == null || extensions.length == 0) { 1226 return indexOfExtension(filename) == NOT_FOUND; 1227 } 1228 final String fileExt = getExtension(filename); 1229 for (final String extension : extensions) { 1230 if (fileExt.equals(extension)) { 1231 return true; 1232 } 1233 } 1234 return false; 1235 } 1236 1237 /** 1238 * Checks whether the extension of the filename is one of those specified. 1239 * <p> 1240 * This method obtains the extension as the textual part of the filename 1241 * after the last dot. There must be no directory separator after the dot. 1242 * The extension check is case-sensitive on all platforms. 1243 * 1244 * @param filename the filename to query, null returns false 1245 * @param extensions the extensions to check for, null checks for no extension 1246 * @return true if the filename is one of the extensions 1247 * @throws java.lang.IllegalArgumentException if the supplied filename contains null bytes 1248 */ 1249 public static boolean isExtension(final String filename, final Collection<String> extensions) { 1250 if (filename == null) { 1251 return false; 1252 } 1253 failIfNullBytePresent(filename); 1254 1255 if (extensions == null || extensions.isEmpty()) { 1256 return indexOfExtension(filename) == NOT_FOUND; 1257 } 1258 final String fileExt = getExtension(filename); 1259 for (final String extension : extensions) { 1260 if (fileExt.equals(extension)) { 1261 return true; 1262 } 1263 } 1264 return false; 1265 } 1266 1267 //----------------------------------------------------------------------- 1268 /** 1269 * Checks a filename to see if it matches the specified wildcard matcher, 1270 * always testing case-sensitive. 1271 * <p> 1272 * The wildcard matcher uses the characters '?' and '*' to represent a 1273 * single or multiple (zero or more) wildcard characters. 1274 * This is the same as often found on Dos/Unix command lines. 1275 * The check is case-sensitive always. 1276 * <pre> 1277 * wildcardMatch("c.txt", "*.txt") --> true 1278 * wildcardMatch("c.txt", "*.jpg") --> false 1279 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1280 * wildcardMatch("c.txt", "*.???") --> true 1281 * wildcardMatch("c.txt", "*.????") --> false 1282 * </pre> 1283 * N.B. the sequence "*?" does not work properly at present in match strings. 1284 * 1285 * @param filename the filename to match on 1286 * @param wildcardMatcher the wildcard string to match against 1287 * @return true if the filename matches the wildcard string 1288 * @see IOCase#SENSITIVE 1289 */ 1290 public static boolean wildcardMatch(final String filename, final String wildcardMatcher) { 1291 return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE); 1292 } 1293 1294 /** 1295 * Checks a filename to see if it matches the specified wildcard matcher 1296 * using the case rules of the system. 1297 * <p> 1298 * The wildcard matcher uses the characters '?' and '*' to represent a 1299 * single or multiple (zero or more) wildcard characters. 1300 * This is the same as often found on Dos/Unix command lines. 1301 * The check is case-sensitive on Unix and case-insensitive on Windows. 1302 * <pre> 1303 * wildcardMatch("c.txt", "*.txt") --> true 1304 * wildcardMatch("c.txt", "*.jpg") --> false 1305 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1306 * wildcardMatch("c.txt", "*.???") --> true 1307 * wildcardMatch("c.txt", "*.????") --> false 1308 * </pre> 1309 * N.B. the sequence "*?" does not work properly at present in match strings. 1310 * 1311 * @param filename the filename to match on 1312 * @param wildcardMatcher the wildcard string to match against 1313 * @return true if the filename matches the wildcard string 1314 * @see IOCase#SYSTEM 1315 */ 1316 public static boolean wildcardMatchOnSystem(final String filename, final String wildcardMatcher) { 1317 return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM); 1318 } 1319 1320 /** 1321 * Checks a filename to see if it matches the specified wildcard matcher 1322 * allowing control over case-sensitivity. 1323 * <p> 1324 * The wildcard matcher uses the characters '?' and '*' to represent a 1325 * single or multiple (zero or more) wildcard characters. 1326 * N.B. the sequence "*?" does not work properly at present in match strings. 1327 * 1328 * @param filename the filename to match on 1329 * @param wildcardMatcher the wildcard string to match against 1330 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1331 * @return true if the filename matches the wildcard string 1332 * @since 1.3 1333 */ 1334 public static boolean wildcardMatch(final String filename, final String wildcardMatcher, IOCase caseSensitivity) { 1335 if (filename == null && wildcardMatcher == null) { 1336 return true; 1337 } 1338 if (filename == null || wildcardMatcher == null) { 1339 return false; 1340 } 1341 if (caseSensitivity == null) { 1342 caseSensitivity = IOCase.SENSITIVE; 1343 } 1344 final String[] wcs = splitOnTokens(wildcardMatcher); 1345 boolean anyChars = false; 1346 int textIdx = 0; 1347 int wcsIdx = 0; 1348 final Stack<int[]> backtrack = new Stack<>(); 1349 1350 // loop around a backtrack stack, to handle complex * matching 1351 do { 1352 if (backtrack.size() > 0) { 1353 final int[] array = backtrack.pop(); 1354 wcsIdx = array[0]; 1355 textIdx = array[1]; 1356 anyChars = true; 1357 } 1358 1359 // loop whilst tokens and text left to process 1360 while (wcsIdx < wcs.length) { 1361 1362 if (wcs[wcsIdx].equals("?")) { 1363 // ? so move to next text char 1364 textIdx++; 1365 if (textIdx > filename.length()) { 1366 break; 1367 } 1368 anyChars = false; 1369 1370 } else if (wcs[wcsIdx].equals("*")) { 1371 // set any chars status 1372 anyChars = true; 1373 if (wcsIdx == wcs.length - 1) { 1374 textIdx = filename.length(); 1375 } 1376 1377 } else { 1378 // matching text token 1379 if (anyChars) { 1380 // any chars then try to locate text token 1381 textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]); 1382 if (textIdx == NOT_FOUND) { 1383 // token not found 1384 break; 1385 } 1386 final int repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]); 1387 if (repeat >= 0) { 1388 backtrack.push(new int[] {wcsIdx, repeat}); 1389 } 1390 } else { 1391 // matching from current position 1392 if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) { 1393 // couldnt match token 1394 break; 1395 } 1396 } 1397 1398 // matched text token, move text index to end of matched token 1399 textIdx += wcs[wcsIdx].length(); 1400 anyChars = false; 1401 } 1402 1403 wcsIdx++; 1404 } 1405 1406 // full match 1407 if (wcsIdx == wcs.length && textIdx == filename.length()) { 1408 return true; 1409 } 1410 1411 } while (backtrack.size() > 0); 1412 1413 return false; 1414 } 1415 1416 /** 1417 * Splits a string into a number of tokens. 1418 * The text is split by '?' and '*'. 1419 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1420 * 1421 * @param text the text to split 1422 * @return the array of tokens, never null 1423 */ 1424 static String[] splitOnTokens(final String text) { 1425 // used by wildcardMatch 1426 // package level so a unit test may run on this 1427 1428 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { 1429 return new String[] { text }; 1430 } 1431 1432 final char[] array = text.toCharArray(); 1433 final ArrayList<String> list = new ArrayList<>(); 1434 final StringBuilder buffer = new StringBuilder(); 1435 char prevChar = 0; 1436 for (final char ch : array) { 1437 if (ch == '?' || ch == '*') { 1438 if (buffer.length() != 0) { 1439 list.add(buffer.toString()); 1440 buffer.setLength(0); 1441 } 1442 if (ch == '?') { 1443 list.add("?"); 1444 } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' 1445 list.add("*"); 1446 } 1447 } else { 1448 buffer.append(ch); 1449 } 1450 prevChar = ch; 1451 } 1452 if (buffer.length() != 0) { 1453 list.add(buffer.toString()); 1454 } 1455 1456 return list.toArray( new String[ list.size() ] ); 1457 } 1458 1459 /** 1460 * Checks whether a given string is a valid host name according to 1461 * RFC 3986. 1462 * 1463 * <p>Accepted are IP addresses (v4 and v6) as well as what the 1464 * RFC calls a "reg-name". Percent encoded names don't seem to be 1465 * valid names in UNC paths.</p> 1466 * 1467 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1468 * @param name the hostname to validate 1469 * @return true if the given name is a valid host name 1470 */ 1471 private static boolean isValidHostName(String name) { 1472 return isIPv4Address(name) || isIPv6Address(name) || isRFC3986HostName(name); 1473 } 1474 1475 private static final Pattern IPV4_PATTERN = 1476 Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); 1477 private static final int IPV4_MAX_OCTET_VALUE = 255; 1478 1479 // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address 1480 private static boolean isIPv4Address(String name) { 1481 Matcher m = IPV4_PATTERN.matcher(name); 1482 if (!m.matches() || m.groupCount() != 4) { 1483 return false; 1484 } 1485 1486 // verify that address subgroups are legal 1487 for (int i = 1; i < 5; i++) { 1488 String ipSegment = m.group(i); 1489 if (ipSegment == null || ipSegment.length() == 0) { 1490 return false; 1491 } 1492 1493 int iIpSegment = 0; 1494 1495 try { 1496 iIpSegment = Integer.parseInt(ipSegment); 1497 } catch(NumberFormatException e) { 1498 return false; 1499 } 1500 1501 if (iIpSegment > IPV4_MAX_OCTET_VALUE) { 1502 return false; 1503 } 1504 1505 if (ipSegment.length() > 1 && ipSegment.startsWith("0")) { 1506 return false; 1507 } 1508 1509 } 1510 1511 return true; 1512 } 1513 1514 private static final int IPV6_MAX_HEX_GROUPS = 8; 1515 private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; 1516 private static final int MAX_UNSIGNED_SHORT = 0xffff; 1517 private static final int BASE_16 = 16; 1518 1519 // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address 1520 private static boolean isIPv6Address(String inet6Address) { 1521 boolean containsCompressedZeroes = inet6Address.contains("::"); 1522 if (containsCompressedZeroes && (inet6Address.indexOf("::") != inet6Address.lastIndexOf("::"))) { 1523 return false; 1524 } 1525 if ((inet6Address.startsWith(":") && !inet6Address.startsWith("::")) 1526 || (inet6Address.endsWith(":") && !inet6Address.endsWith("::"))) { 1527 return false; 1528 } 1529 String[] octets = inet6Address.split(":"); 1530 if (containsCompressedZeroes) { 1531 List<String> octetList = new ArrayList<String>(Arrays.asList(octets)); 1532 if (inet6Address.endsWith("::")) { 1533 // String.split() drops ending empty segments 1534 octetList.add(""); 1535 } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { 1536 octetList.remove(0); 1537 } 1538 octets = octetList.toArray(new String[octetList.size()]); 1539 } 1540 if (octets.length > IPV6_MAX_HEX_GROUPS) { 1541 return false; 1542 } 1543 int validOctets = 0; 1544 int emptyOctets = 0; // consecutive empty chunks 1545 for (int index = 0; index < octets.length; index++) { 1546 String octet = octets[index]; 1547 if (octet.length() == 0) { 1548 emptyOctets++; 1549 if (emptyOctets > 1) { 1550 return false; 1551 } 1552 } else { 1553 emptyOctets = 0; 1554 // Is last chunk an IPv4 address? 1555 if (index == octets.length - 1 && octet.contains(".")) { 1556 if (!isIPv4Address(octet)) { 1557 return false; 1558 } 1559 validOctets += 2; 1560 continue; 1561 } 1562 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { 1563 return false; 1564 } 1565 int octetInt = 0; 1566 try { 1567 octetInt = Integer.parseInt(octet, BASE_16); 1568 } catch (NumberFormatException e) { 1569 return false; 1570 } 1571 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { 1572 return false; 1573 } 1574 } 1575 validOctets++; 1576 } 1577 if (validOctets > IPV6_MAX_HEX_GROUPS || (validOctets < IPV6_MAX_HEX_GROUPS && !containsCompressedZeroes)) { 1578 return false; 1579 } 1580 return true; 1581 } 1582 1583 private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); 1584 1585 private static boolean isRFC3986HostName(String name) { 1586 String[] parts = name.split("\\.", -1); 1587 for (int i = 0; i < parts.length; i++) { 1588 if (parts[i].length() == 0) { 1589 // trailing dot is legal, otherwise we've hit a .. sequence 1590 return i == parts.length - 1; 1591 } 1592 if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { 1593 return false; 1594 } 1595 } 1596 return true; 1597 } 1598}