001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.IOException;
020import java.nio.charset.Charset;
021import java.nio.charset.StandardCharsets;
022import java.util.ArrayList;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Locale;
026import java.util.Map;
027import java.util.Objects;
028
029import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
030import org.apache.commons.io.IOUtils;
031
032/**
033 * High level API for processing file uploads.
034 * <p>
035 * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by
036 * <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with
037 * a given HTML widget.
038 * </p>
039 * <p>
040 * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else.
041 * </p>
042 *
043 * @param <R> The request context type.
044 * @param <I> The FileItem type.
045 * @param <F> the FileItemFactory type.
046 */
047public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> {
048
049    /**
050     * Boundary parameter key.
051     */
052    private static final String BOUNDARY_KEY = "boundary";
053
054    /**
055     * Name parameter key.
056     */
057    private static final String NAME_KEY = "name";
058
059    /**
060     * File name parameter key.
061     */
062    private static final String FILENAME_KEY = "filename";
063
064    /**
065     * Constant for HTTP POST method.
066     */
067    private static final String POST_METHOD = "POST";
068
069    /**
070     * Constant for HTTP PUT method.
071     */
072    private static final String PUT_METHOD = "PUT";
073
074    /**
075     * Constant for HTTP PATCH method.
076     */
077    private static final String PATCH_METHOD = "PATCH";
078
079    /**
080     * HTTP content type header name.
081     */
082    public static final String CONTENT_TYPE = "Content-type";
083
084    /**
085     * HTTP content disposition header name.
086     */
087    public static final String CONTENT_DISPOSITION = "Content-disposition";
088
089    /**
090     * HTTP content length header name.
091     */
092    public static final String CONTENT_LENGTH = "Content-length";
093
094    /**
095     * Content-disposition value for form data.
096     */
097    public static final String FORM_DATA = "form-data";
098
099    /**
100     * Content-disposition value for file attachment.
101     */
102    public static final String ATTACHMENT = "attachment";
103
104    /**
105     * Part of HTTP content type header.
106     */
107    public static final String MULTIPART = "multipart/";
108
109    /**
110     * HTTP content type header for multipart forms.
111     */
112    public static final String MULTIPART_FORM_DATA = "multipart/form-data";
113
114    /**
115     * HTTP content type header for multiple uploads.
116     */
117    public static final String MULTIPART_MIXED = "multipart/mixed";
118
119    /**
120     * Utility method that determines whether the request contains multipart content.
121     * <p>
122     * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this
123     * method is static, it is not possible to provide its replacement until this method is removed.
124     * </p>
125     *
126     * @param ctx The request context to be evaluated. Must be non-null.
127     * @return {@code true} if the request is multipart; {@code false} otherwise.
128     */
129    public static final boolean isMultipartContent(final RequestContext ctx) {
130        final var contentType = ctx.getContentType();
131        if (contentType == null) {
132            return false;
133        }
134        return contentType.toLowerCase(Locale.ROOT).startsWith(MULTIPART);
135    }
136
137    /**
138     * Checks if a given request method is a valid multipart request method.
139     *
140     * @param method The request method verb.
141     * @return {@code true} if the request method supports multipart request payloads; {@code false} otherwise.
142     * @since 2.0.0-M5
143     */
144    protected static boolean isMultipartRequestMethod(final String method) {
145        return POST_METHOD.equalsIgnoreCase(method) || PUT_METHOD.equalsIgnoreCase(method) || PATCH_METHOD.equalsIgnoreCase(method);
146    }
147
148    /**
149     * The maximum size permitted for the complete request, as opposed to {@link #maxFileSize}. A value of -1 indicates no maximum.
150     */
151    private long maxSize = -1;
152
153    /**
154     * The maximum size permitted for a single uploaded file, as opposed to {@link #maxSize}. A value of -1 indicates no maximum.
155     */
156    private long maxFileSize = -1;
157
158    /**
159     * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum.
160     */
161    private long maxFileCount = -1;
162
163    /**
164     * The maximum permitted size of the headers provided with a single part in bytes.
165     */
166    private int maxPartHeaderSize = MultipartInput.DEFAULT_PART_HEADER_SIZE_MAX;
167
168    /**
169     * The content encoding to use when reading part headers.
170     */
171    private Charset headerCharset;
172
173    /**
174     * The progress listener.
175     */
176    private ProgressListener progressListener = ProgressListener.NOP;
177
178    /**
179     * The factory to use to create new form items.
180     */
181    private F fileItemFactory;
182
183    /**
184     * Constructs a new instance for subclasses.
185     */
186    public AbstractFileUpload() {
187        // empty
188    }
189
190    /**
191     * Gets the boundary from the {@code Content-type} header.
192     *
193     * @param contentType The value of the content type header from which to extract the boundary value.
194     * @return The boundary, as a byte array.
195     */
196    public byte[] getBoundary(final String contentType) {
197        final var parser = new ParameterParser();
198        parser.setLowerCaseNames(true);
199        // Parameter parser can handle null input
200        final var params = parser.parse(contentType, new char[] { ';', ',' });
201        final var boundaryStr = params.get(BOUNDARY_KEY);
202        return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null;
203    }
204
205    /**
206     * Gets the field name from the {@code Content-disposition} header.
207     *
208     * @param headers A {@code Map} containing the HTTP request headers.
209     * @return The field name for the current {@code encapsulation}.
210     */
211    public String getFieldName(final FileItemHeaders headers) {
212        return getFieldName(headers.getHeader(CONTENT_DISPOSITION));
213    }
214
215    /**
216     * Gets the field name, which is given by the content-disposition header.
217     *
218     * @param contentDisposition The content-dispositions header value.
219     * @return The field name.
220     */
221    private String getFieldName(final String contentDisposition) {
222        String fieldName = null;
223        if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ROOT).startsWith(FORM_DATA)) {
224            final var parser = new ParameterParser();
225            parser.setLowerCaseNames(true);
226            // Parameter parser can handle null input
227            final var params = parser.parse(contentDisposition, ';');
228            fieldName = params.get(NAME_KEY);
229            if (fieldName != null) {
230                fieldName = fieldName.trim();
231            }
232        }
233        return fieldName;
234    }
235
236    /**
237     * Gets the factory class used when creating file items.
238     *
239     * @return The factory class for new file items.
240     */
241    public F getFileItemFactory() {
242        return fileItemFactory;
243    }
244
245    /**
246     * Gets the file name from the {@code Content-disposition} header.
247     *
248     * @param headers The HTTP headers object.
249     * @return The file name for the current {@code encapsulation}.
250     */
251    public String getFileName(final FileItemHeaders headers) {
252        return getFileName(headers.getHeader(CONTENT_DISPOSITION));
253    }
254
255    /**
256     * Gets the given content-disposition headers file name.
257     *
258     * @param contentDisposition The content-disposition headers value.
259     * @return The file name
260     */
261    private String getFileName(final String contentDisposition) {
262        String fileName = null;
263        if (contentDisposition != null) {
264            final var cdl = contentDisposition.toLowerCase(Locale.ROOT);
265            if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) {
266                final var parser = new ParameterParser();
267                parser.setLowerCaseNames(true);
268                // Parameter parser can handle null input
269                final var params = parser.parse(contentDisposition, ';');
270                if (params.containsKey(FILENAME_KEY)) {
271                    fileName = params.get(FILENAME_KEY);
272                    if (fileName != null) {
273                        fileName = fileName.trim();
274                    } else {
275                        // Even if there is no value, the parameter is present,
276                        // so we return an empty file name rather than no file
277                        // name.
278                        fileName = "";
279                    }
280                }
281            }
282        }
283        return fileName;
284    }
285
286    /**
287     * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If
288     * that is also not specified, or {@code null}, the platform default encoding is used.
289     *
290     * @return The encoding used to read part headers.
291     */
292    public Charset getHeaderCharset() {
293        return headerCharset;
294    }
295
296    /**
297     * Gets a file item iterator.
298     *
299     * @param request The servlet request to be parsed.
300     * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
301     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
302     * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
303     *                             uploaded content.
304     */
305    public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException;
306
307    /**
308     * Gets an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
309     *
310     * @param requestContext The context for the request to be parsed.
311     * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
312     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
313     * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
314     *                             uploaded content.
315     */
316    public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException {
317        return new FileItemInputIteratorImpl(this, requestContext);
318    }
319
320    /**
321     * Gets the maximum number of files allowed in a single request.
322     *
323     * @return The maximum number of files allowed in a single request.
324     */
325    public long getMaxFileCount() {
326        return maxFileCount;
327    }
328
329    /**
330     * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getMaxSize()}.
331     *
332     * @see #setMaxFileSize(long)
333     * @return Maximum size of a single uploaded file.
334     */
335    public long getMaxFileSize() {
336        return maxFileSize;
337    }
338
339    /**
340     * Gets the per part size limit for headers.
341     *
342     * @return The maximum size of the headers for a single part in bytes.
343     *
344     * @since 2.0.0-M5
345     */
346    public int getMaxPartHeaderSize() {
347        return maxPartHeaderSize;
348    }
349
350    /**
351     * Gets the maximum allowed size of a complete request, as opposed to {@link #getMaxFileSize()}.
352     *
353     * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
354     * @see #setMaxSize(long)
355     */
356    public long getMaxSize() {
357        return maxSize;
358    }
359
360    /**
361     * Parses the {@code header-part} and returns as key/value pairs.
362     * <p>
363     * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values.
364     * </p>
365     *
366     * @param headerPart The {@code header-part} of the current {@code encapsulation}.
367     * @return A {@code Map} containing the parsed HTTP request headers.
368     */
369    public FileItemHeaders getParsedHeaders(final String headerPart) {
370        final var len = headerPart.length();
371        final var headers = newFileItemHeaders();
372        var start = 0;
373        for (;;) {
374            var end = parseEndOfLine(headerPart, start);
375            if (start == end) {
376                break;
377            }
378            final var header = new StringBuilder(headerPart.substring(start, end));
379            start = end + 2;
380            while (start < len) {
381                var nonWs = start;
382                while (nonWs < len) {
383                    final var c = headerPart.charAt(nonWs);
384                    if (c != ' ' && c != '\t') {
385                        break;
386                    }
387                    ++nonWs;
388                }
389                if (nonWs == start) {
390                    break;
391                }
392                // Continuation line found
393                end = parseEndOfLine(headerPart, nonWs);
394                header.append(' ').append(headerPart, nonWs, end);
395                start = end + 2;
396            }
397            parseHeaderLine(headers, header.toString());
398        }
399        return headers;
400    }
401
402    /**
403     * Gets the progress listener.
404     *
405     * @return The progress listener, if any, or null.
406     */
407    public ProgressListener getProgressListener() {
408        return progressListener;
409    }
410
411    /**
412     * Creates a new instance of {@link FileItemHeaders}.
413     *
414     * @return The new instance.
415     */
416    protected FileItemHeaders newFileItemHeaders() {
417        return AbstractFileItemBuilder.newFileItemHeaders();
418    }
419
420    /**
421     * Skips bytes until the end of the current line.
422     *
423     * @param headerPart The headers, which are being parsed.
424     * @param end        Index of the last byte, which has yet been processed.
425     * @return Index of the \r\n sequence, which indicates end of line.
426     */
427    private int parseEndOfLine(final String headerPart, final int end) {
428        var index = end;
429        for (;;) {
430            final var offset = headerPart.indexOf('\r', index);
431            if (offset == -1 || offset + 1 >= headerPart.length()) {
432                throw new IllegalStateException("Expected headers to be terminated by an empty line.");
433            }
434            if (headerPart.charAt(offset + 1) == '\n') {
435                return offset;
436            }
437            index = offset + 1;
438        }
439    }
440
441    /**
442     * Parses the next header line.
443     *
444     * @param headers String with all headers.
445     * @param header  Map where to store the current header.
446     */
447    private void parseHeaderLine(final FileItemHeaders headers, final String header) {
448        final var colonOffset = header.indexOf(':');
449        if (colonOffset == -1) {
450            // This header line is malformed, skip it.
451            return;
452        }
453        final var headerName = header.substring(0, colonOffset).trim();
454        final var headerValue = header.substring(colonOffset + 1).trim();
455        headers.addHeader(headerName, headerValue);
456    }
457
458    /**
459     * Parses an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
460     *
461     * @param request The servlet request to be parsed.
462     * @return A map of {@code FileItem} instances parsed from the request.
463     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
464     */
465    public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException;
466
467    /**
468     * Parses an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
469     *
470     * @param ctx The context for the request to be parsed.
471     * @return A map of {@code FileItem} instances parsed from the request.
472     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
473     */
474    public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException {
475        final var items = parseRequest(ctx);
476        final Map<String, List<I>> itemsMap = new HashMap<>(items.size());
477
478        for (final I fileItem : items) {
479            final var fieldName = fileItem.getFieldName();
480            final var mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>());
481            mappedItems.add(fileItem);
482        }
483
484        return itemsMap;
485    }
486
487    /**
488     * Parses an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
489     *
490     * @param request The servlet request to be parsed.
491     * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
492     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
493     */
494    public abstract List<I> parseRequest(R request) throws FileUploadException;
495
496    /**
497     * Parses an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
498     *
499     * @param requestContext The context for the request to be parsed.
500     * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
501     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
502     */
503    public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException {
504        final List<I> itemList = new ArrayList<>();
505        var successful = false;
506        try {
507            final var fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set.");
508            final var buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE];
509            getItemIterator(requestContext).forEachRemaining(fileItemInput -> {
510                final int size = itemList.size();
511                if (size == maxFileCount) {
512                    // The next item will exceed the limit.
513                    throw new FileUploadFileCountLimitException(
514                            String.format("Request '%s' failed: Maximum file count %,d exceeded.", MULTIPART_FORM_DATA, Long.valueOf(maxFileCount)),
515                            getMaxFileCount(), size);
516                }
517                // Don't use getName() here to prevent an InvalidFileNameException.
518                // @formatter:off
519                final var fileItem = fileItemFactory.fileItemBuilder()
520                    .setFieldName(fileItemInput.getFieldName())
521                    .setContentType(fileItemInput.getContentType())
522                    .setFormField(fileItemInput.isFormField())
523                    .setFileName(fileItemInput.getName())
524                    .setFileItemHeaders(fileItemInput.getHeaders())
525                    .get();
526                // @formatter:on
527                itemList.add(fileItem);
528                try (var inputStream = fileItemInput.getInputStream(); var outputStream = fileItem.getOutputStream()) {
529                    IOUtils.copyLarge(inputStream, outputStream, buffer);
530                } catch (final FileUploadException e) {
531                    throw e;
532                } catch (final IOException e) {
533                    throw new FileUploadException(String.format("Request '%s' failed: %s", MULTIPART_FORM_DATA, e.getMessage()), e);
534                }
535            });
536            successful = true;
537            return itemList;
538        } catch (final FileUploadException e) {
539            throw e;
540        } catch (final IOException e) {
541            throw new FileUploadException(e.getMessage(), e);
542        } finally {
543            if (!successful) {
544                for (final I fileItem : itemList) {
545                    try {
546                        fileItem.delete();
547                    } catch (final Exception ignored) {
548                        // ignored TODO perhaps add to tracker delete failure list somehow?
549                    }
550                }
551            }
552        }
553    }
554
555    /**
556     * Sets the factory class to use when creating file items.
557     *
558     * @param factory The factory class for new file items.
559     */
560    public void setFileItemFactory(final F factory) {
561        this.fileItemFactory = factory;
562    }
563
564    /**
565     * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is
566     * used. If that is also not specified, or {@code null}, the platform default encoding is used.
567     *
568     * @param headerCharset The encoding used to read part headers.
569     */
570    public void setHeaderCharset(final Charset headerCharset) {
571        this.headerCharset = headerCharset;
572    }
573
574    /**
575     * Sets the maximum number of files allowed per request.
576     *
577     * @param fileCountMax The new limit. {@code -1} means no limit.
578     */
579    public void setMaxFileCount(final long fileCountMax) {
580        this.maxFileCount = fileCountMax;
581    }
582
583    /**
584     * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getMaxSize()}.
585     *
586     * @see #getMaxFileSize()
587     * @param fileSizeMax Maximum size of a single uploaded file.
588     */
589    public void setMaxFileSize(final long fileSizeMax) {
590        this.maxFileSize = fileSizeMax;
591    }
592
593    /**
594     * Sets the per part size limit for headers.
595     *
596     * @param partHeaderSizeMax The maximum size of the headers in bytes.
597     *
598     * @since 2.0.0-M5
599     */
600    public void setMaxPartHeaderSize(final int partHeaderSizeMax) {
601        this.maxPartHeaderSize = partHeaderSizeMax;
602    }
603
604    /**
605     * Sets the maximum allowed size of a complete request, as opposed to {@link #setMaxFileSize(long)}.
606     *
607     * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
608     * @see #getMaxSize()
609     */
610    public void setMaxSize(final long sizeMax) {
611        this.maxSize = sizeMax;
612    }
613
614    /**
615     * Sets the progress listener.
616     *
617     * @param progressListener The progress listener, if any. Defaults to null.
618     */
619    public void setProgressListener(final ProgressListener progressListener) {
620        this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
621    }
622
623}