1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.fileupload2.core;
18
19 import java.io.IOException;
20 import java.nio.charset.Charset;
21 import java.nio.charset.StandardCharsets;
22 import java.util.ArrayList;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Locale;
26 import java.util.Map;
27 import java.util.Objects;
28
29 import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
30 import org.apache.commons.io.IOUtils;
31
32 /**
33 * High level API for processing file uploads.
34 * <p>
35 * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by
36 * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with
37 * a given HTML widget.
38 * </p>
39 * <p>
40 * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else.
41 * </p>
42 *
43 * @param <R> The request context type.
44 * @param <I> The FileItem type.
45 * @param <F> the FileItemFactory type.
46 */
47 public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> {
48
49 /**
50 * Boundary parameter key.
51 */
52 private static final String BOUNDARY_KEY = "boundary";
53
54 /**
55 * Name parameter key.
56 */
57 private static final String NAME_KEY = "name";
58
59 /**
60 * File name parameter key.
61 */
62 private static final String FILENAME_KEY = "filename";
63
64 /**
65 * HTTP content type header name.
66 */
67 public static final String CONTENT_TYPE = "Content-type";
68
69 /**
70 * HTTP content disposition header name.
71 */
72 public static final String CONTENT_DISPOSITION = "Content-disposition";
73
74 /**
75 * HTTP content length header name.
76 */
77 public static final String CONTENT_LENGTH = "Content-length";
78
79 /**
80 * Content-disposition value for form data.
81 */
82 public static final String FORM_DATA = "form-data";
83
84 /**
85 * Content-disposition value for file attachment.
86 */
87 public static final String ATTACHMENT = "attachment";
88
89 /**
90 * Part of HTTP content type header.
91 */
92 public static final String MULTIPART = "multipart/";
93
94 /**
95 * HTTP content type header for multipart forms.
96 */
97 public static final String MULTIPART_FORM_DATA = "multipart/form-data";
98
99 /**
100 * HTTP content type header for multiple uploads.
101 */
102 public static final String MULTIPART_MIXED = "multipart/mixed";
103
104 /**
105 * Utility method that determines whether the request contains multipart content.
106 * <p>
107 * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this
108 * method is static, it is not possible to provide its replacement until this method is removed.
109 * </p>
110 *
111 * @param ctx The request context to be evaluated. Must be non-null.
112 * @return {@code true} if the request is multipart; {@code false} otherwise.
113 */
114 public static final boolean isMultipartContent(final RequestContext ctx) {
115 final var contentType = ctx.getContentType();
116 if (contentType == null) {
117 return false;
118 }
119 return contentType.toLowerCase(Locale.ENGLISH).startsWith(MULTIPART);
120 }
121
122 /**
123 * The maximum size permitted for the complete request, as opposed to {@link #fileSizeMax}. A value of -1 indicates no maximum.
124 */
125 private long sizeMax = -1;
126
127 /**
128 * The maximum size permitted for a single uploaded file, as opposed to {@link #sizeMax}. A value of -1 indicates no maximum.
129 */
130 private long fileSizeMax = -1;
131
132 /**
133 * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum.
134 */
135 private long fileCountMax = -1;
136
137 /**
138 * The content encoding to use when reading part headers.
139 */
140 private Charset headerCharset;
141
142 /**
143 * The progress listener.
144 */
145 private ProgressListener progressListener = ProgressListener.NOP;
146
147 /**
148 * The factory to use to create new form items.
149 */
150 private F fileItemFactory;
151
152 /**
153 * Gets the boundary from the {@code Content-type} header.
154 *
155 * @param contentType The value of the content type header from which to extract the boundary value.
156 * @return The boundary, as a byte array.
157 */
158 public byte[] getBoundary(final String contentType) {
159 final var parser = new ParameterParser();
160 parser.setLowerCaseNames(true);
161 // Parameter parser can handle null input
162 final var params = parser.parse(contentType, new char[] { ';', ',' });
163 final var boundaryStr = params.get(BOUNDARY_KEY);
164 return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null;
165 }
166
167 /**
168 * Gets the field name from the {@code Content-disposition} header.
169 *
170 * @param headers A {@code Map} containing the HTTP request headers.
171 * @return The field name for the current {@code encapsulation}.
172 */
173 public String getFieldName(final FileItemHeaders headers) {
174 return getFieldName(headers.getHeader(CONTENT_DISPOSITION));
175 }
176
177 /**
178 * Gets the field name, which is given by the content-disposition header.
179 *
180 * @param contentDisposition The content-dispositions header value.
181 * @return The field name.
182 */
183 private String getFieldName(final String contentDisposition) {
184 String fieldName = null;
185 if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ENGLISH).startsWith(FORM_DATA)) {
186 final var parser = new ParameterParser();
187 parser.setLowerCaseNames(true);
188 // Parameter parser can handle null input
189 final var params = parser.parse(contentDisposition, ';');
190 fieldName = params.get(NAME_KEY);
191 if (fieldName != null) {
192 fieldName = fieldName.trim();
193 }
194 }
195 return fieldName;
196 }
197
198 /**
199 * Gets the maximum number of files allowed in a single request.
200 *
201 * @return The maximum number of files allowed in a single request.
202 */
203 public long getFileCountMax() {
204 return fileCountMax;
205 }
206
207 /**
208 * Gets the factory class used when creating file items.
209 *
210 * @return The factory class for new file items.
211 */
212 public F getFileItemFactory() {
213 return fileItemFactory;
214 }
215
216 /**
217 * Gets the file name from the {@code Content-disposition} header.
218 *
219 * @param headers The HTTP headers object.
220 *
221 * @return The file name for the current {@code encapsulation}.
222 */
223 public String getFileName(final FileItemHeaders headers) {
224 return getFileName(headers.getHeader(CONTENT_DISPOSITION));
225 }
226
227 /**
228 * Gets the given content-disposition headers file name.
229 *
230 * @param contentDisposition The content-disposition headers value.
231 * @return The file name
232 */
233 private String getFileName(final String contentDisposition) {
234 String fileName = null;
235 if (contentDisposition != null) {
236 final var cdl = contentDisposition.toLowerCase(Locale.ENGLISH);
237 if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) {
238 final var parser = new ParameterParser();
239 parser.setLowerCaseNames(true);
240 // Parameter parser can handle null input
241 final var params = parser.parse(contentDisposition, ';');
242 if (params.containsKey(FILENAME_KEY)) {
243 fileName = params.get(FILENAME_KEY);
244 if (fileName != null) {
245 fileName = fileName.trim();
246 } else {
247 // Even if there is no value, the parameter is present,
248 // so we return an empty file name rather than no file
249 // name.
250 fileName = "";
251 }
252 }
253 }
254 }
255 return fileName;
256 }
257
258 /**
259 * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
260 *
261 * @see #setFileSizeMax(long)
262 * @return Maximum size of a single uploaded file.
263 */
264 public long getFileSizeMax() {
265 return fileSizeMax;
266 }
267
268 /**
269 * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If
270 * that is also not specified, or {@code null}, the platform default encoding is used.
271 *
272 * @return The encoding used to read part headers.
273 */
274 public Charset getHeaderCharset() {
275 return headerCharset;
276 }
277
278 /**
279 * Gets a file item iterator.
280 *
281 * @param request The servlet request to be parsed.
282 * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
283 * @throws FileUploadException if there are problems reading/parsing the request or storing files.
284 * @throws IOException An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
285 * uploaded content.
286 */
287 public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException;
288
289 /**
290 * Gets an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
291 *
292 * @param requestContext The context for the request to be parsed.
293 * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
294 * @throws FileUploadException if there are problems reading/parsing the request or storing files.
295 * @throws IOException An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
296 * uploaded content.
297 */
298 public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException {
299 return new FileItemInputIteratorImpl(this, requestContext);
300 }
301
302 /**
303 * Parses the {@code header-part} and returns as key/value pairs.
304 * <p>
305 * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values.
306 * </p>
307 *
308 * @param headerPart The {@code header-part} of the current {@code encapsulation}.
309 * @return A {@code Map} containing the parsed HTTP request headers.
310 */
311 public FileItemHeaders getParsedHeaders(final String headerPart) {
312 final var len = headerPart.length();
313 final var headers = newFileItemHeaders();
314 var start = 0;
315 for (;;) {
316 var end = parseEndOfLine(headerPart, start);
317 if (start == end) {
318 break;
319 }
320 final var header = new StringBuilder(headerPart.substring(start, end));
321 start = end + 2;
322 while (start < len) {
323 var nonWs = start;
324 while (nonWs < len) {
325 final var c = headerPart.charAt(nonWs);
326 if (c != ' ' && c != '\t') {
327 break;
328 }
329 ++nonWs;
330 }
331 if (nonWs == start) {
332 break;
333 }
334 // Continuation line found
335 end = parseEndOfLine(headerPart, nonWs);
336 header.append(' ').append(headerPart, nonWs, end);
337 start = end + 2;
338 }
339 parseHeaderLine(headers, header.toString());
340 }
341 return headers;
342 }
343
344 /**
345 * Gets the progress listener.
346 *
347 * @return The progress listener, if any, or null.
348 */
349 public ProgressListener getProgressListener() {
350 return progressListener;
351 }
352
353 /**
354 * Gets the maximum allowed size of a complete request, as opposed to {@link #getFileSizeMax()}.
355 *
356 * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
357 * @see #setSizeMax(long)
358 *
359 */
360 public long getSizeMax() {
361 return sizeMax;
362 }
363
364 /**
365 * Creates a new instance of {@link FileItemHeaders}.
366 *
367 * @return The new instance.
368 */
369 protected FileItemHeaders newFileItemHeaders() {
370 return AbstractFileItemBuilder.newFileItemHeaders();
371 }
372
373 /**
374 * Skips bytes until the end of the current line.
375 *
376 * @param headerPart The headers, which are being parsed.
377 * @param end Index of the last byte, which has yet been processed.
378 * @return Index of the \r\n sequence, which indicates end of line.
379 */
380 private int parseEndOfLine(final String headerPart, final int end) {
381 var index = end;
382 for (;;) {
383 final var offset = headerPart.indexOf('\r', index);
384 if (offset == -1 || offset + 1 >= headerPart.length()) {
385 throw new IllegalStateException("Expected headers to be terminated by an empty line.");
386 }
387 if (headerPart.charAt(offset + 1) == '\n') {
388 return offset;
389 }
390 index = offset + 1;
391 }
392 }
393
394 /**
395 * Parses the next header line.
396 *
397 * @param headers String with all headers.
398 * @param header Map where to store the current header.
399 */
400 private void parseHeaderLine(final FileItemHeaders headers, final String header) {
401 final var colonOffset = header.indexOf(':');
402 if (colonOffset == -1) {
403 // This header line is malformed, skip it.
404 return;
405 }
406 final var headerName = header.substring(0, colonOffset).trim();
407 final var headerValue = header.substring(colonOffset + 1).trim();
408 headers.addHeader(headerName, headerValue);
409 }
410
411 /**
412 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
413 *
414 * @param request The servlet request to be parsed.
415 * @return A map of {@code FileItem} instances parsed from the request.
416 * @throws FileUploadException if there are problems reading/parsing the request or storing files.
417 */
418 public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException;
419
420 /**
421 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
422 *
423 * @param ctx The context for the request to be parsed.
424 * @return A map of {@code FileItem} instances parsed from the request.
425 * @throws FileUploadException if there are problems reading/parsing the request or storing files.
426 */
427 public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException {
428 final var items = parseRequest(ctx);
429 final Map<String, List<I>> itemsMap = new HashMap<>(items.size());
430
431 for (final I fileItem : items) {
432 final var fieldName = fileItem.getFieldName();
433 final var mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>());
434 mappedItems.add(fileItem);
435 }
436
437 return itemsMap;
438 }
439
440 /**
441 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
442 *
443 * @param request The servlet request to be parsed.
444 * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
445 * @throws FileUploadException if there are problems reading/parsing the request or storing files.
446 */
447 public abstract List<I> parseRequest(R request) throws FileUploadException;
448
449 /**
450 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
451 *
452 * @param requestContext The context for the request to be parsed.
453 * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
454 * @throws FileUploadException if there are problems reading/parsing the request or storing files.
455 */
456 public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException {
457 final List<I> itemList = new ArrayList<>();
458 var successful = false;
459 try {
460 final var fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set.");
461 final var buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE];
462 getItemIterator(requestContext).forEachRemaining(fileItemInput -> {
463 if (itemList.size() == fileCountMax) {
464 // The next item will exceed the limit.
465 throw new FileUploadFileCountLimitException(ATTACHMENT, getFileCountMax(), itemList.size());
466 }
467 // Don't use getName() here to prevent an InvalidFileNameException.
468 // @formatter:off
469 final var fileItem = fileItemFactory.fileItemBuilder()
470 .setFieldName(fileItemInput.getFieldName())
471 .setContentType(fileItemInput.getContentType())
472 .setFormField(fileItemInput.isFormField())
473 .setFileName(fileItemInput.getName())
474 .setFileItemHeaders(fileItemInput.getHeaders())
475 .get();
476 // @formatter:on
477 itemList.add(fileItem);
478 try (var inputStream = fileItemInput.getInputStream();
479 var outputStream = fileItem.getOutputStream()) {
480 IOUtils.copyLarge(inputStream, outputStream, buffer);
481 } catch (final FileUploadException e) {
482 throw e;
483 } catch (final IOException e) {
484 throw new FileUploadException(String.format("Processing of %s request failed. %s", MULTIPART_FORM_DATA, e.getMessage()), e);
485 }
486 });
487 successful = true;
488 return itemList;
489 } catch (final FileUploadException e) {
490 throw e;
491 } catch (final IOException e) {
492 throw new FileUploadException(e.getMessage(), e);
493 } finally {
494 if (!successful) {
495 for (final I fileItem : itemList) {
496 try {
497 fileItem.delete();
498 } catch (final Exception ignored) {
499 // ignored TODO perhaps add to tracker delete failure list somehow?
500 }
501 }
502 }
503 }
504 }
505
506 /**
507 * Sets the maximum number of files allowed per request.
508 *
509 * @param fileCountMax The new limit. {@code -1} means no limit.
510 */
511 public void setFileCountMax(final long fileCountMax) {
512 this.fileCountMax = fileCountMax;
513 }
514
515 /**
516 * Sets the factory class to use when creating file items.
517 *
518 * @param factory The factory class for new file items.
519 */
520 public void setFileItemFactory(final F factory) {
521 this.fileItemFactory = factory;
522 }
523
524 /**
525 * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
526 *
527 * @see #getFileSizeMax()
528 * @param fileSizeMax Maximum size of a single uploaded file.
529 */
530 public void setFileSizeMax(final long fileSizeMax) {
531 this.fileSizeMax = fileSizeMax;
532 }
533
534 /**
535 * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is
536 * used. If that is also not specified, or {@code null}, the platform default encoding is used.
537 *
538 * @param headerCharset The encoding used to read part headers.
539 */
540 public void setHeaderCharset(final Charset headerCharset) {
541 this.headerCharset = headerCharset;
542 }
543
544 /**
545 * Sets the progress listener.
546 *
547 * @param progressListener The progress listener, if any. Defaults to null.
548 */
549 public void setProgressListener(final ProgressListener progressListener) {
550 this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
551 }
552
553 /**
554 * Sets the maximum allowed size of a complete request, as opposed to {@link #setFileSizeMax(long)}.
555 *
556 * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
557 * @see #getSizeMax()
558 */
559 public void setSizeMax(final long sizeMax) {
560 this.sizeMax = sizeMax;
561 }
562
563 }