001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.fileupload2.core; 018 019import java.io.IOException; 020import java.nio.charset.Charset; 021import java.nio.charset.StandardCharsets; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Locale; 026import java.util.Map; 027import java.util.Objects; 028 029import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder; 030import org.apache.commons.io.IOUtils; 031 032/** 033 * High level API for processing file uploads. 034 * <p> 035 * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by 036 * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with 037 * a given HTML widget. 038 * </p> 039 * <p> 040 * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else. 041 * </p> 042 * 043 * @param <R> The request context type. 044 * @param <I> The FileItem type. 045 * @param <F> the FileItemFactory type. 046 */ 047public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> { 048 049 /** 050 * Boundary parameter key. 051 */ 052 private static final String BOUNDARY_KEY = "boundary"; 053 054 /** 055 * Name parameter key. 056 */ 057 private static final String NAME_KEY = "name"; 058 059 /** 060 * File name parameter key. 061 */ 062 private static final String FILENAME_KEY = "filename"; 063 064 /** 065 * HTTP content type header name. 066 */ 067 public static final String CONTENT_TYPE = "Content-type"; 068 069 /** 070 * HTTP content disposition header name. 071 */ 072 public static final String CONTENT_DISPOSITION = "Content-disposition"; 073 074 /** 075 * HTTP content length header name. 076 */ 077 public static final String CONTENT_LENGTH = "Content-length"; 078 079 /** 080 * Content-disposition value for form data. 081 */ 082 public static final String FORM_DATA = "form-data"; 083 084 /** 085 * Content-disposition value for file attachment. 086 */ 087 public static final String ATTACHMENT = "attachment"; 088 089 /** 090 * Part of HTTP content type header. 091 */ 092 public static final String MULTIPART = "multipart/"; 093 094 /** 095 * HTTP content type header for multipart forms. 096 */ 097 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 098 099 /** 100 * HTTP content type header for multiple uploads. 101 */ 102 public static final String MULTIPART_MIXED = "multipart/mixed"; 103 104 /** 105 * Utility method that determines whether the request contains multipart content. 106 * <p> 107 * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this 108 * method is static, it is not possible to provide its replacement until this method is removed. 109 * </p> 110 * 111 * @param ctx The request context to be evaluated. Must be non-null. 112 * @return {@code true} if the request is multipart; {@code false} otherwise. 113 */ 114 public static final boolean isMultipartContent(final RequestContext ctx) { 115 final var contentType = ctx.getContentType(); 116 if (contentType == null) { 117 return false; 118 } 119 return contentType.toLowerCase(Locale.ROOT).startsWith(MULTIPART); 120 } 121 122 /** 123 * The maximum size permitted for the complete request, as opposed to {@link #fileSizeMax}. A value of -1 indicates no maximum. 124 */ 125 private long sizeMax = -1; 126 127 /** 128 * The maximum size permitted for a single uploaded file, as opposed to {@link #sizeMax}. A value of -1 indicates no maximum. 129 */ 130 private long fileSizeMax = -1; 131 132 /** 133 * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum. 134 */ 135 private long fileCountMax = -1; 136 137 /** 138 * The content encoding to use when reading part headers. 139 */ 140 private Charset headerCharset; 141 142 /** 143 * The progress listener. 144 */ 145 private ProgressListener progressListener = ProgressListener.NOP; 146 147 /** 148 * The factory to use to create new form items. 149 */ 150 private F fileItemFactory; 151 152 /** 153 * Constructs a new instance for subclasses. 154 */ 155 public AbstractFileUpload() { 156 // empty 157 } 158 159 /** 160 * Gets the boundary from the {@code Content-type} header. 161 * 162 * @param contentType The value of the content type header from which to extract the boundary value. 163 * @return The boundary, as a byte array. 164 */ 165 public byte[] getBoundary(final String contentType) { 166 final var parser = new ParameterParser(); 167 parser.setLowerCaseNames(true); 168 // Parameter parser can handle null input 169 final var params = parser.parse(contentType, new char[] { ';', ',' }); 170 final var boundaryStr = params.get(BOUNDARY_KEY); 171 return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null; 172 } 173 174 /** 175 * Gets the field name from the {@code Content-disposition} header. 176 * 177 * @param headers A {@code Map} containing the HTTP request headers. 178 * @return The field name for the current {@code encapsulation}. 179 */ 180 public String getFieldName(final FileItemHeaders headers) { 181 return getFieldName(headers.getHeader(CONTENT_DISPOSITION)); 182 } 183 184 /** 185 * Gets the field name, which is given by the content-disposition header. 186 * 187 * @param contentDisposition The content-dispositions header value. 188 * @return The field name. 189 */ 190 private String getFieldName(final String contentDisposition) { 191 String fieldName = null; 192 if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ROOT).startsWith(FORM_DATA)) { 193 final var parser = new ParameterParser(); 194 parser.setLowerCaseNames(true); 195 // Parameter parser can handle null input 196 final var params = parser.parse(contentDisposition, ';'); 197 fieldName = params.get(NAME_KEY); 198 if (fieldName != null) { 199 fieldName = fieldName.trim(); 200 } 201 } 202 return fieldName; 203 } 204 205 /** 206 * Gets the maximum number of files allowed in a single request. 207 * 208 * @return The maximum number of files allowed in a single request. 209 */ 210 public long getFileCountMax() { 211 return fileCountMax; 212 } 213 214 /** 215 * Gets the factory class used when creating file items. 216 * 217 * @return The factory class for new file items. 218 */ 219 public F getFileItemFactory() { 220 return fileItemFactory; 221 } 222 223 /** 224 * Gets the file name from the {@code Content-disposition} header. 225 * 226 * @param headers The HTTP headers object. 227 * @return The file name for the current {@code encapsulation}. 228 */ 229 public String getFileName(final FileItemHeaders headers) { 230 return getFileName(headers.getHeader(CONTENT_DISPOSITION)); 231 } 232 233 /** 234 * Gets the given content-disposition headers file name. 235 * 236 * @param contentDisposition The content-disposition headers value. 237 * @return The file name 238 */ 239 private String getFileName(final String contentDisposition) { 240 String fileName = null; 241 if (contentDisposition != null) { 242 final var cdl = contentDisposition.toLowerCase(Locale.ROOT); 243 if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) { 244 final var parser = new ParameterParser(); 245 parser.setLowerCaseNames(true); 246 // Parameter parser can handle null input 247 final var params = parser.parse(contentDisposition, ';'); 248 if (params.containsKey(FILENAME_KEY)) { 249 fileName = params.get(FILENAME_KEY); 250 if (fileName != null) { 251 fileName = fileName.trim(); 252 } else { 253 // Even if there is no value, the parameter is present, 254 // so we return an empty file name rather than no file 255 // name. 256 fileName = ""; 257 } 258 } 259 } 260 } 261 return fileName; 262 } 263 264 /** 265 * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}. 266 * 267 * @see #setFileSizeMax(long) 268 * @return Maximum size of a single uploaded file. 269 */ 270 public long getFileSizeMax() { 271 return fileSizeMax; 272 } 273 274 /** 275 * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If 276 * that is also not specified, or {@code null}, the platform default encoding is used. 277 * 278 * @return The encoding used to read part headers. 279 */ 280 public Charset getHeaderCharset() { 281 return headerCharset; 282 } 283 284 /** 285 * Gets a file item iterator. 286 * 287 * @param request The servlet request to be parsed. 288 * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted. 289 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 290 * @throws IOException An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the 291 * uploaded content. 292 */ 293 public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException; 294 295 /** 296 * Gets an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 297 * 298 * @param requestContext The context for the request to be parsed. 299 * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted. 300 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 301 * @throws IOException An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the 302 * uploaded content. 303 */ 304 public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException { 305 return new FileItemInputIteratorImpl(this, requestContext); 306 } 307 308 /** 309 * Parses the {@code header-part} and returns as key/value pairs. 310 * <p> 311 * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values. 312 * </p> 313 * 314 * @param headerPart The {@code header-part} of the current {@code encapsulation}. 315 * @return A {@code Map} containing the parsed HTTP request headers. 316 */ 317 public FileItemHeaders getParsedHeaders(final String headerPart) { 318 final var len = headerPart.length(); 319 final var headers = newFileItemHeaders(); 320 var start = 0; 321 for (;;) { 322 var end = parseEndOfLine(headerPart, start); 323 if (start == end) { 324 break; 325 } 326 final var header = new StringBuilder(headerPart.substring(start, end)); 327 start = end + 2; 328 while (start < len) { 329 var nonWs = start; 330 while (nonWs < len) { 331 final var c = headerPart.charAt(nonWs); 332 if (c != ' ' && c != '\t') { 333 break; 334 } 335 ++nonWs; 336 } 337 if (nonWs == start) { 338 break; 339 } 340 // Continuation line found 341 end = parseEndOfLine(headerPart, nonWs); 342 header.append(' ').append(headerPart, nonWs, end); 343 start = end + 2; 344 } 345 parseHeaderLine(headers, header.toString()); 346 } 347 return headers; 348 } 349 350 /** 351 * Gets the progress listener. 352 * 353 * @return The progress listener, if any, or null. 354 */ 355 public ProgressListener getProgressListener() { 356 return progressListener; 357 } 358 359 /** 360 * Gets the maximum allowed size of a complete request, as opposed to {@link #getFileSizeMax()}. 361 * 362 * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit. 363 * @see #setSizeMax(long) 364 */ 365 public long getSizeMax() { 366 return sizeMax; 367 } 368 369 /** 370 * Creates a new instance of {@link FileItemHeaders}. 371 * 372 * @return The new instance. 373 */ 374 protected FileItemHeaders newFileItemHeaders() { 375 return AbstractFileItemBuilder.newFileItemHeaders(); 376 } 377 378 /** 379 * Skips bytes until the end of the current line. 380 * 381 * @param headerPart The headers, which are being parsed. 382 * @param end Index of the last byte, which has yet been processed. 383 * @return Index of the \r\n sequence, which indicates end of line. 384 */ 385 private int parseEndOfLine(final String headerPart, final int end) { 386 var index = end; 387 for (;;) { 388 final var offset = headerPart.indexOf('\r', index); 389 if (offset == -1 || offset + 1 >= headerPart.length()) { 390 throw new IllegalStateException("Expected headers to be terminated by an empty line."); 391 } 392 if (headerPart.charAt(offset + 1) == '\n') { 393 return offset; 394 } 395 index = offset + 1; 396 } 397 } 398 399 /** 400 * Parses the next header line. 401 * 402 * @param headers String with all headers. 403 * @param header Map where to store the current header. 404 */ 405 private void parseHeaderLine(final FileItemHeaders headers, final String header) { 406 final var colonOffset = header.indexOf(':'); 407 if (colonOffset == -1) { 408 // This header line is malformed, skip it. 409 return; 410 } 411 final var headerName = header.substring(0, colonOffset).trim(); 412 final var headerValue = header.substring(colonOffset + 1).trim(); 413 headers.addHeader(headerName, headerValue); 414 } 415 416 /** 417 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 418 * 419 * @param request The servlet request to be parsed. 420 * @return A map of {@code FileItem} instances parsed from the request. 421 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 422 */ 423 public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException; 424 425 /** 426 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 427 * 428 * @param ctx The context for the request to be parsed. 429 * @return A map of {@code FileItem} instances parsed from the request. 430 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 431 */ 432 public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException { 433 final var items = parseRequest(ctx); 434 final Map<String, List<I>> itemsMap = new HashMap<>(items.size()); 435 436 for (final I fileItem : items) { 437 final var fieldName = fileItem.getFieldName(); 438 final var mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>()); 439 mappedItems.add(fileItem); 440 } 441 442 return itemsMap; 443 } 444 445 /** 446 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 447 * 448 * @param request The servlet request to be parsed. 449 * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted. 450 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 451 */ 452 public abstract List<I> parseRequest(R request) throws FileUploadException; 453 454 /** 455 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 456 * 457 * @param requestContext The context for the request to be parsed. 458 * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted. 459 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 460 */ 461 public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException { 462 final List<I> itemList = new ArrayList<>(); 463 var successful = false; 464 try { 465 final var fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set."); 466 final var buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE]; 467 getItemIterator(requestContext).forEachRemaining(fileItemInput -> { 468 if (itemList.size() == fileCountMax) { 469 // The next item will exceed the limit. 470 throw new FileUploadFileCountLimitException(ATTACHMENT, getFileCountMax(), itemList.size()); 471 } 472 // Don't use getName() here to prevent an InvalidFileNameException. 473 // @formatter:off 474 final var fileItem = fileItemFactory.fileItemBuilder() 475 .setFieldName(fileItemInput.getFieldName()) 476 .setContentType(fileItemInput.getContentType()) 477 .setFormField(fileItemInput.isFormField()) 478 .setFileName(fileItemInput.getName()) 479 .setFileItemHeaders(fileItemInput.getHeaders()) 480 .get(); 481 // @formatter:on 482 itemList.add(fileItem); 483 try (var inputStream = fileItemInput.getInputStream(); 484 var outputStream = fileItem.getOutputStream()) { 485 IOUtils.copyLarge(inputStream, outputStream, buffer); 486 } catch (final FileUploadException e) { 487 throw e; 488 } catch (final IOException e) { 489 throw new FileUploadException(String.format("Processing of %s request failed. %s", MULTIPART_FORM_DATA, e.getMessage()), e); 490 } 491 }); 492 successful = true; 493 return itemList; 494 } catch (final FileUploadException e) { 495 throw e; 496 } catch (final IOException e) { 497 throw new FileUploadException(e.getMessage(), e); 498 } finally { 499 if (!successful) { 500 for (final I fileItem : itemList) { 501 try { 502 fileItem.delete(); 503 } catch (final Exception ignored) { 504 // ignored TODO perhaps add to tracker delete failure list somehow? 505 } 506 } 507 } 508 } 509 } 510 511 /** 512 * Sets the maximum number of files allowed per request. 513 * 514 * @param fileCountMax The new limit. {@code -1} means no limit. 515 */ 516 public void setFileCountMax(final long fileCountMax) { 517 this.fileCountMax = fileCountMax; 518 } 519 520 /** 521 * Sets the factory class to use when creating file items. 522 * 523 * @param factory The factory class for new file items. 524 */ 525 public void setFileItemFactory(final F factory) { 526 this.fileItemFactory = factory; 527 } 528 529 /** 530 * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}. 531 * 532 * @see #getFileSizeMax() 533 * @param fileSizeMax Maximum size of a single uploaded file. 534 */ 535 public void setFileSizeMax(final long fileSizeMax) { 536 this.fileSizeMax = fileSizeMax; 537 } 538 539 /** 540 * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is 541 * used. If that is also not specified, or {@code null}, the platform default encoding is used. 542 * 543 * @param headerCharset The encoding used to read part headers. 544 */ 545 public void setHeaderCharset(final Charset headerCharset) { 546 this.headerCharset = headerCharset; 547 } 548 549 /** 550 * Sets the progress listener. 551 * 552 * @param progressListener The progress listener, if any. Defaults to null. 553 */ 554 public void setProgressListener(final ProgressListener progressListener) { 555 this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP; 556 } 557 558 /** 559 * Sets the maximum allowed size of a complete request, as opposed to {@link #setFileSizeMax(long)}. 560 * 561 * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit. 562 * @see #getSizeMax() 563 */ 564 public void setSizeMax(final long sizeMax) { 565 this.sizeMax = sizeMax; 566 } 567 568}