001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.ByteArrayOutputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.OutputStream;
023import java.io.UnsupportedEncodingException;
024import java.nio.charset.Charset;
025
026import org.apache.commons.fileupload2.core.FileItemInput.ItemSkippedException;
027import org.apache.commons.io.Charsets;
028import org.apache.commons.io.IOUtils;
029import org.apache.commons.io.build.AbstractOrigin;
030import org.apache.commons.io.build.AbstractStreamBuilder;
031import org.apache.commons.io.output.NullOutputStream;
032
033/**
034 * Low-level API for processing file uploads.
035 *
036 * <p>
037 * This class can be used to process data streams conforming to MIME 'multipart' format as defined in <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC
038 * 1867</a>. Arbitrarily large amounts of data in the stream can be processed under constant memory usage.
039 * </p>
040 * <p>
041 * The format of the stream is defined in the following way:
042 * </p>
043 * <pre>
044 *   multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
045 *   encapsulation := delimiter body CRLF<br>
046 *   delimiter := "--" boundary CRLF<br>
047 *   close-delimiter := "--" boundary "--"<br>
048 *   preamble := &lt;ignore&gt;<br>
049 *   epilogue := &lt;ignore&gt;<br>
050 *   body := header-part CRLF body-part<br>
051 *   header-part := 1*header CRLF<br>
052 *   header := header-name ":" header-value<br>
053 *   header-name := &lt;printable ASCII characters except ":"&gt;<br>
054 *   header-value := &lt;any ASCII characters except CR &amp; LF&gt;<br>
055 *   body-data := &lt;arbitrary data&gt;<br>
056 * </pre>
057 *
058 * <p>
059 * Note that body-data can contain another mulipart entity. There is limited support for single pass processing of such nested streams. The nested stream is
060 * <strong>required</strong> to have a boundary token of the same length as the parent stream (see {@link #setBoundary(byte[])}).
061 * </p>
062 * <p>
063 * Here is an example of usage of this class:
064 * </p>
065 *
066 * <pre>
067 * try {
068 *     MultipartInput multipartStream = MultipartInput.builder()
069 *             .setBoundary(boundary)
070 *             .setInputStream(input)
071 *             .get();
072 *     boolean nextPart = multipartStream.skipPreamble();
073 *     OutputStream output;
074 *     while (nextPart) {
075 *         String header = multipartStream.readHeaders();
076 *         // process headers
077 *         // create some output stream
078 *         multipartStream.readBodyData(output);
079 *         nextPart = multipartStream.readBoundary();
080 *     }
081 * } catch (MultipartInput.MalformedStreamException e) {
082 *     // the stream failed to follow required syntax
083 * } catch (IOException e) {
084 *     // a read or write error occurred
085 * }
086 * </pre>
087 */
088public final class MultipartInput {
089
090    /**
091     * Builds a new {@link MultipartInput} instance.
092     * <p>
093     * For example:
094     * </p>
095     *
096     * <pre>{@code
097     * MultipartInput factory = MultipartInput.builder().setPath(path).setBufferSize(DEFAULT_THRESHOLD).get();
098     * }
099     * </pre>
100     */
101    public static class Builder extends AbstractStreamBuilder<MultipartInput, Builder> {
102
103        /**
104         * Boundary.
105         */
106        private byte[] boundary;
107
108        /**
109         * Progress notifier.
110         */
111        private ProgressNotifier progressNotifier;
112
113        /**
114         * The  per part size limit for headers.
115         */
116        private int maxPartHeaderSize = DEFAULT_PART_HEADER_SIZE_MAX;
117
118        /**
119         * Constructs a new instance.
120         */
121        public Builder() {
122            setBufferSizeDefault(DEFAULT_BUFSIZE);
123        }
124
125        /**
126         * Constructs a new instance.
127         * <p>
128         * This builder uses the InputStream, buffer size, boundary and progress notifier aspects.
129         * </p>
130         * <p>
131         * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
132         * {@link UnsupportedOperationException}.
133         * </p>
134         *
135         * @return a new instance.
136         * @throws IOException                   if an I/O error occurs.
137         * @throws UnsupportedOperationException if the origin cannot provide a Path.
138         * @see AbstractOrigin#getReader(Charset)
139         */
140        @Override
141        public MultipartInput get() throws IOException {
142            return new MultipartInput(this);
143        }
144
145        /**
146         * Gets the per part size limit for headers.
147         *
148         * @return The maximum size of the headers in bytes.
149         * @since 2.0.0-M5
150         */
151        public int getMaxPartHeaderSize() {
152            return maxPartHeaderSize;
153        }
154
155        /**
156         * Sets the boundary.
157         *
158         * @param boundary the boundary.
159         * @return {@code this} instance.
160         */
161        public Builder setBoundary(final byte[] boundary) {
162            this.boundary = boundary;
163            return this;
164        }
165
166        /**
167         * Sets the per part size limit for headers.
168         * @param partHeaderSizeMax The maximum size of the headers in bytes.
169         * @return This builder.
170         * @since 2.0.0-M5
171         */
172        public Builder setMaxPartHeaderSize(final int partHeaderSizeMax) {
173            this.maxPartHeaderSize = partHeaderSizeMax;
174            return this;
175        }
176
177        /**
178         * Sets the progress notifier.
179         *
180         * @param progressNotifier progress notifier.
181         * @return {@code this} instance.
182         */
183        public Builder setProgressNotifier(final ProgressNotifier progressNotifier) {
184            this.progressNotifier = progressNotifier;
185            return this;
186        }
187    }
188
189    /**
190     * Signals an attempt to set an invalid boundary token.
191     */
192    public static class FileUploadBoundaryException extends FileUploadException {
193
194        /**
195         * The UID to use when serializing this instance.
196         */
197        private static final long serialVersionUID = 2;
198
199        /**
200         * Constructs an instance with the specified detail message.
201         *
202         * @param message The detail message (which is saved for later retrieval by the {@link #getMessage()} method)
203         */
204        public FileUploadBoundaryException(final String message) {
205            super(message);
206        }
207
208    }
209
210    /**
211     * An {@link InputStream} for reading an items contents.
212     */
213    public class ItemInputStream extends InputStream {
214
215        /**
216         * Offset when converting negative bytes to integers.
217         */
218        private static final int BYTE_POSITIVE_OFFSET = 256;
219
220        /**
221         * The number of bytes, which have been read so far.
222         */
223        private long total;
224
225        /**
226         * The number of bytes, which must be hold, because they might be a part of the boundary.
227         */
228        private int pad;
229
230        /**
231         * The current offset in the buffer.
232         */
233        private int pos;
234
235        /**
236         * Whether the stream is already closed.
237         */
238        private boolean closed;
239
240        /**
241         * Creates a new instance.
242         */
243        ItemInputStream() {
244            findSeparator();
245        }
246
247        /**
248         * Returns the number of bytes, which are currently available, without blocking.
249         *
250         * @throws IOException An I/O error occurs.
251         * @return Number of bytes in the buffer.
252         */
253        @Override
254        public int available() throws IOException {
255            if (pos == -1) {
256                return tail - head - pad;
257            }
258            return pos - head;
259        }
260
261        private void checkOpen() throws ItemSkippedException {
262            if (closed) {
263                throw new FileItemInput.ItemSkippedException("checkOpen()");
264            }
265        }
266
267        /**
268         * Closes the input stream.
269         *
270         * @throws IOException An I/O error occurred.
271         */
272        @Override
273        public void close() throws IOException {
274            close(false);
275        }
276
277        /**
278         * Closes the input stream.
279         *
280         * @param closeUnderlying Whether to close the underlying stream (hard close)
281         * @throws IOException An I/O error occurred.
282         */
283        public void close(final boolean closeUnderlying) throws IOException {
284            if (closed) {
285                return;
286            }
287            if (closeUnderlying) {
288                closed = true;
289                input.close();
290            } else {
291                for (;;) {
292                    var avail = available();
293                    if (avail == 0) {
294                        avail = makeAvailable();
295                        if (avail == 0) {
296                            break;
297                        }
298                    }
299                    if (skip(avail) != avail) {
300                        // TODO What to do?
301                    }
302                }
303            }
304            closed = true;
305        }
306
307        /**
308         * Called for finding the separator.
309         */
310        private void findSeparator() {
311            pos = MultipartInput.this.findSeparator();
312            if (pos == -1) {
313                if (tail - head > keepRegion) {
314                    pad = keepRegion;
315                } else {
316                    pad = tail - head;
317                }
318            }
319        }
320
321        /**
322         * Gets the number of bytes, which have been read by the stream.
323         *
324         * @return Number of bytes, which have been read so far.
325         */
326        public long getBytesRead() {
327            return total;
328        }
329
330        /**
331         * Tests whether this instance is closed.
332         *
333         * @return whether this instance is closed.
334         */
335        public boolean isClosed() {
336            return closed;
337        }
338
339        /**
340         * Attempts to read more data.
341         *
342         * @return Number of available bytes
343         * @throws IOException An I/O error occurred.
344         */
345        private int makeAvailable() throws IOException {
346            if (pos != -1) {
347                return 0;
348            }
349
350            // Move the data to the beginning of the buffer.
351            total += tail - head - pad;
352            System.arraycopy(buffer, tail - pad, buffer, 0, pad);
353
354            // Refill buffer with new data.
355            head = 0;
356            tail = pad;
357
358            for (;;) {
359                final var bytesRead = input.read(buffer, tail, bufSize - tail);
360                if (bytesRead == -1) {
361                    // The last pad amount is left in the buffer.
362                    // Boundary can't be in there so signal an error
363                    // condition.
364                    final var msg = "Stream ended unexpectedly";
365                    throw new MalformedStreamException(msg);
366                }
367                if (notifier != null) {
368                    notifier.noteBytesRead(bytesRead);
369                }
370                tail += bytesRead;
371
372                findSeparator();
373                final var av = available();
374
375                if (av > 0 || pos != -1) {
376                    return av;
377                }
378            }
379        }
380
381        /**
382         * Reads the next byte in the stream.
383         *
384         * @return The next byte in the stream, as a non-negative integer, or -1 for EOF.
385         * @throws IOException An I/O error occurred.
386         */
387        @Override
388        public int read() throws IOException {
389            checkOpen();
390            if (available() == 0 && makeAvailable() == 0) {
391                return -1;
392            }
393            ++total;
394            final int b = buffer[head++];
395            if (b >= 0) {
396                return b;
397            }
398            return b + BYTE_POSITIVE_OFFSET;
399        }
400
401        /**
402         * Reads bytes into the given buffer.
403         *
404         * @param b   The destination buffer, where to write to.
405         * @param off Offset of the first byte in the buffer.
406         * @param len Maximum number of bytes to read.
407         * @return Number of bytes, which have been actually read, or -1 for EOF.
408         * @throws IOException An I/O error occurred.
409         */
410        @Override
411        public int read(final byte[] b, final int off, final int len) throws IOException {
412            checkOpen();
413            if (len == 0) {
414                return 0;
415            }
416            var res = available();
417            if (res == 0) {
418                res = makeAvailable();
419                if (res == 0) {
420                    return -1;
421                }
422            }
423            res = Math.min(res, len);
424            System.arraycopy(buffer, head, b, off, res);
425            head += res;
426            total += res;
427            return res;
428        }
429
430        /**
431         * Skips the given number of bytes.
432         *
433         * @param bytes Number of bytes to skip.
434         * @return The number of bytes, which have actually been skipped.
435         * @throws IOException An I/O error occurred.
436         */
437        @Override
438        public long skip(final long bytes) throws IOException {
439            checkOpen();
440            var available = available();
441            if (available == 0) {
442                available = makeAvailable();
443                if (available == 0) {
444                    return 0;
445                }
446            }
447            // Fix "Implicit narrowing conversion in compound assignment"
448            // https://github.com/apache/commons-fileupload/security/code-scanning/118
449            // Math.min always returns an int because available is an int.
450            final var res = Math.toIntExact(Math.min(available, bytes));
451            head += res;
452            return res;
453        }
454
455    }
456
457    /**
458     * Signals that the input stream fails to follow the required syntax.
459     */
460    public static class MalformedStreamException extends FileUploadException {
461
462        /**
463         * The UID to use when serializing this instance.
464         */
465        private static final long serialVersionUID = 2;
466
467        /**
468         * Constructs an {@code MalformedStreamException} with the specified detail message.
469         *
470         * @param message The detail message.
471         */
472        public MalformedStreamException(final String message) {
473            super(message);
474        }
475
476        /**
477         * Constructs an {@code MalformedStreamException} with the specified detail message.
478         *
479         * @param message The detail message.
480         * @param cause   The cause (which is saved for later retrieval by the {@link #getCause()} method). (A null value is permitted, and indicates that the
481         *                cause is nonexistent or unknown.)
482         */
483        public MalformedStreamException(final String message, final Throwable cause) {
484            super(message, cause);
485        }
486
487    }
488
489    /**
490     * Internal class, which is used to invoke the {@link ProgressListener}.
491     */
492    public static class ProgressNotifier {
493
494        /**
495         * The listener to invoke.
496         */
497        private final ProgressListener progressListener;
498
499        /**
500         * Number of expected bytes, if known, or -1.
501         */
502        private final long contentLength;
503
504        /**
505         * Number of bytes, which have been read so far.
506         */
507        private long bytesRead;
508
509        /**
510         * Number of items, which have been read so far.
511         */
512        private int items;
513
514        /**
515         * Creates a new instance with the given listener and content length.
516         *
517         * @param progressListener The listener to invoke.
518         * @param contentLength    The expected content length.
519         */
520        public ProgressNotifier(final ProgressListener progressListener, final long contentLength) {
521            this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
522            this.contentLength = contentLength;
523        }
524
525        /**
526         * Called to indicate that bytes have been read.
527         *
528         * @param byteCount Number of bytes, which have been read.
529         */
530        void noteBytesRead(final int byteCount) {
531            //
532            // Indicates, that the given number of bytes have been read from the input stream.
533            //
534            bytesRead += byteCount;
535            notifyListener();
536        }
537
538        /**
539         * Called to indicate, that a new file item has been detected.
540         */
541        public void noteItem() {
542            ++items;
543            notifyListener();
544        }
545
546        /**
547         * Called for notifying the listener.
548         */
549        private void notifyListener() {
550            progressListener.update(bytesRead, contentLength, items);
551        }
552
553    }
554
555    /**
556     * The Carriage Return ASCII character value.
557     */
558    public static final byte CR = 0x0D;
559
560    /**
561     * The Line Feed ASCII character value.
562     */
563    public static final byte LF = 0x0A;
564
565    /**
566     * The dash (-) ASCII character value.
567     */
568    public static final byte DASH = 0x2D;
569
570    /**
571     * The default length of the buffer used for processing a request.
572     */
573    static final int DEFAULT_BUFSIZE = 4096;
574
575    /**
576     * Default per part header size limit in bytes.
577     * @since 2.0.0-M4
578     */
579    public static final int DEFAULT_PART_HEADER_SIZE_MAX = 512;
580
581    /**
582     * A byte sequence that marks the end of {@code header-part} ({@code CRLFCRLF}).
583     */
584    static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF };
585
586    /**
587     * A byte sequence that that follows a delimiter that will be followed by an encapsulation ({@code CRLF}).
588     */
589    static final byte[] FIELD_SEPARATOR = { CR, LF };
590
591    /**
592     * A byte sequence that that follows a delimiter of the last encapsulation in the stream ({@code --}).
593     */
594    static final byte[] STREAM_TERMINATOR = { DASH, DASH };
595
596    /**
597     * A byte sequence that precedes a boundary ({@code CRLF--}).
598     */
599    static final byte[] BOUNDARY_PREFIX = { CR, LF, DASH, DASH };
600
601    /**
602     * Compares {@code count} first bytes in the arrays {@code a} and {@code b}.
603     *
604     * @param a     The first array to compare.
605     * @param b     The second array to compare.
606     * @param count How many bytes should be compared.
607     * @return {@code true} if {@code count} first bytes in arrays {@code a} and {@code b} are equal.
608     */
609    static boolean arrayEquals(final byte[] a, final byte[] b, final int count) {
610        for (var i = 0; i < count; i++) {
611            if (a[i] != b[i]) {
612                return false;
613            }
614        }
615        return true;
616    }
617
618    /**
619     * Constructs a new {@link Builder}.
620     *
621     * @return a new {@link Builder}.
622     */
623    public static Builder builder() {
624        return new Builder();
625    }
626
627    /**
628     * The input stream from which data is read.
629     */
630    private final InputStream input;
631
632    /**
633     * The length of the boundary token plus the leading {@code CRLF--}.
634     */
635    private int boundaryLength;
636
637    /**
638     * The amount of data, in bytes, that must be kept in the buffer in order to detect delimiters reliably.
639     */
640    private final int keepRegion;
641
642    /**
643     * The byte sequence that partitions the stream.
644     */
645    private final byte[] boundary;
646
647    /**
648     * The table for Knuth-Morris-Pratt search algorithm.
649     */
650    private final int[] boundaryTable;
651
652    /**
653     * The length of the buffer used for processing the request.
654     */
655    private final int bufSize;
656
657    /**
658     * The buffer used for processing the request.
659     */
660    private final byte[] buffer;
661
662    /**
663     * The index of first valid character in the buffer. <br>
664     * 0 <= head < bufSize
665     */
666    private int head;
667
668    /**
669     * The index of last valid character in the buffer + 1. <br>
670     * 0 <= tail <= bufSize
671     */
672    private int tail;
673
674    /**
675     * The content encoding to use when reading headers.
676     */
677    private Charset headerCharset;
678
679    /**
680     * The progress notifier, if any, or null.
681     */
682    private final ProgressNotifier notifier;
683
684    /**
685     * The maximum size of the headers in bytes.
686     */
687    private final int maxPartHeaderSize;
688
689    /**
690     * Constructs a {@code MultipartInput} with a custom size buffer.
691     * <p>
692     * Note that the buffer must be at least big enough to contain the boundary string, plus 4 characters for CR/LF and double dash, plus at least one byte of
693     * data. Too small a buffer size setting will degrade performance.
694     * </p>
695     *
696     * @param input      The {@code InputStream} to serve as a data source.
697     * @param boundary   The token used for dividing the stream into {@code encapsulations}.
698     * @param bufferSize The size of the buffer to be used, in bytes.
699     * @param notifier   The notifier, which is used for calling the progress listener, if any.
700     * @throws IOException Thrown if an I/O error occurs.
701     * @throws IllegalArgumentException If the buffer size is too small.
702     */
703    private MultipartInput(final Builder builder) throws IOException {
704        if (builder.boundary == null) {
705            throw new IllegalArgumentException("boundary may not be null");
706        }
707        // We prepend CR/LF to the boundary to chop trailing CR/LF from
708        // body-data tokens.
709        this.boundaryLength = builder.boundary.length + BOUNDARY_PREFIX.length;
710        if (builder.getBufferSize() < this.boundaryLength + 1) {
711            throw new IllegalArgumentException("The buffer size specified for the MultipartInput is too small");
712        }
713        this.input = builder.getInputStream();
714        this.bufSize = Math.max(builder.getBufferSize(), boundaryLength * 2);
715        this.buffer = new byte[this.bufSize];
716        this.notifier = builder.progressNotifier;
717        this.maxPartHeaderSize = builder.getMaxPartHeaderSize();
718        this.boundary = new byte[this.boundaryLength];
719        this.boundaryTable = new int[this.boundaryLength + 1];
720        this.keepRegion = this.boundary.length;
721        System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0, BOUNDARY_PREFIX.length);
722        System.arraycopy(builder.boundary, 0, this.boundary, BOUNDARY_PREFIX.length, builder.boundary.length);
723        computeBoundaryTable();
724        head = 0;
725        tail = 0;
726    }
727
728    /**
729     * Computes the table used for Knuth-Morris-Pratt search algorithm.
730     */
731    private void computeBoundaryTable() {
732        var position = 2;
733        var candidate = 0;
734
735        boundaryTable[0] = -1;
736        boundaryTable[1] = 0;
737
738        while (position <= boundaryLength) {
739            if (boundary[position - 1] == boundary[candidate]) {
740                boundaryTable[position] = candidate + 1;
741                candidate++;
742                position++;
743            } else if (candidate > 0) {
744                candidate = boundaryTable[candidate];
745            } else {
746                boundaryTable[position] = 0;
747                position++;
748            }
749        }
750    }
751
752    /**
753     * Reads {@code body-data} from the current {@code encapsulation} and discards it.
754     * <p>
755     * Use this method to skip encapsulations you don't need or don't understand.
756     * </p>
757     *
758     * @return The amount of data discarded.
759     * @throws MalformedStreamException if the stream ends unexpectedly.
760     * @throws IOException              if an i/o error occurs.
761     */
762    public long discardBodyData() throws MalformedStreamException, IOException {
763        return readBodyData(NullOutputStream.INSTANCE);
764    }
765
766    /**
767     * Searches for a byte of specified value in the {@code buffer}, starting at the specified {@code position}.
768     *
769     * @param value The value to find.
770     * @param pos   The starting position for searching.
771     * @return The position of byte found, counting from beginning of the {@code buffer}, or {@code -1} if not found.
772     */
773    protected int findByte(final byte value, final int pos) {
774        for (var i = pos; i < tail; i++) {
775            if (buffer[i] == value) {
776                return i;
777            }
778        }
779
780        return -1;
781    }
782
783    /**
784     * Searches for the {@code boundary} in the {@code buffer} region delimited by {@code head} and {@code tail}.
785     *
786     * @return The position of the boundary found, counting from the beginning of the {@code buffer}, or {@code -1} if not found.
787     */
788    protected int findSeparator() {
789        var bufferPos = this.head;
790        var tablePos = 0;
791        while (bufferPos < this.tail) {
792            while (tablePos >= 0 && buffer[bufferPos] != boundary[tablePos]) {
793                tablePos = boundaryTable[tablePos];
794            }
795            bufferPos++;
796            tablePos++;
797            if (tablePos == boundaryLength) {
798                return bufferPos - boundaryLength;
799            }
800        }
801        return -1;
802    }
803
804    /**
805     * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the platform default encoding is
806     * used.
807     *
808     * @return The encoding used to read part headers.
809     */
810    public Charset getHeaderCharset() {
811        return headerCharset;
812    }
813
814    /**
815     * Returns the per part size limit for headers.
816     *
817     * @return The maximum size of the headers in bytes.
818     * @since 2.0.0-M5
819     */
820    public int getMaxPartHeaderSize() {
821        return maxPartHeaderSize;
822    }
823
824    /**
825     * Creates a new {@link ItemInputStream}.
826     *
827     * @return A new instance of {@link ItemInputStream}.
828     */
829    public ItemInputStream newInputStream() {
830        return new ItemInputStream();
831    }
832
833    /**
834     * Reads {@code body-data} from the current {@code encapsulation} and writes its contents into the output {@code Stream}.
835     * <p>
836     * Arbitrary large amounts of data can be processed by this method using a constant size buffer. (see {@link MultipartInput#builder()}).
837     * </p>
838     *
839     * @param output The {@code Stream} to write data into. May be null, in which case this method is equivalent to {@link #discardBodyData()}.
840     * @return the amount of data written.
841     * @throws MalformedStreamException if the stream ends unexpectedly.
842     * @throws IOException              if an i/o error occurs.
843     */
844    public long readBodyData(final OutputStream output) throws MalformedStreamException, IOException {
845        try (var inputStream = newInputStream()) {
846            return IOUtils.copyLarge(inputStream, output);
847        }
848    }
849
850    /**
851     * Skips a {@code boundary} token, and checks whether more {@code encapsulations} are contained in the stream.
852     *
853     * @return {@code true} if there are more encapsulations in this stream; {@code false} otherwise.
854     * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits
855     * @throws MalformedStreamException if the stream ends unexpectedly or fails to follow required syntax.
856     */
857    public boolean readBoundary() throws FileUploadSizeException, MalformedStreamException {
858        final var marker = new byte[2];
859        final boolean nextChunk;
860        head += boundaryLength;
861        try {
862            marker[0] = readByte();
863            if (marker[0] == LF) {
864                // Work around IE5 Mac bug with input type=image.
865                // Because the boundary delimiter, not including the trailing
866                // CRLF, must not appear within any file (RFC 2046, section
867                // 5.1.1), we know the missing CR is due to a buggy browser
868                // rather than a file containing something similar to a
869                // boundary.
870                return true;
871            }
872
873            marker[1] = readByte();
874            if (arrayEquals(marker, STREAM_TERMINATOR, 2)) {
875                nextChunk = false;
876            } else if (arrayEquals(marker, FIELD_SEPARATOR, 2)) {
877                nextChunk = true;
878            } else {
879                throw new MalformedStreamException("Unexpected characters follow a boundary");
880            }
881        } catch (final FileUploadSizeException e) {
882            throw e;
883        } catch (final IOException e) {
884            throw new MalformedStreamException("Stream ended unexpectedly", e);
885        }
886        return nextChunk;
887    }
888
889    /**
890     * Reads a byte from the {@code buffer}, and refills it as necessary.
891     *
892     * @return The next byte from the input stream.
893     * @throws IOException if there is no more data available.
894     */
895    public byte readByte() throws IOException {
896        // Buffer depleted ?
897        if (head == tail) {
898            head = 0;
899            // Refill.
900            tail = input.read(buffer, head, bufSize);
901            if (tail == -1) {
902                // No more data available.
903                throw new IOException("No more data is available");
904            }
905            if (notifier != null) {
906                notifier.noteBytesRead(tail);
907            }
908        }
909        return buffer[head++];
910    }
911
912    /**
913     * Reads the {@code header-part} of the current {@code encapsulation}.
914     * <p>
915     * Headers are returned verbatim to the input stream, including the trailing {@code CRLF} marker. Parsing is left to the application.
916     * </p>
917     * <p>
918     * <strong>TODO</strong> allow limiting maximum header size to protect against abuse.
919     * </p>
920     *
921     * @return The {@code header-part} of the current encapsulation.
922     * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits.
923     * @throws MalformedStreamException if the stream ends unexpectedly.
924     */
925    public String readHeaders() throws FileUploadSizeException, MalformedStreamException {
926        var i = 0;
927        byte b;
928        // to support multi-byte characters
929        final var baos = new ByteArrayOutputStream();
930        var size = 0;
931        while (i < HEADER_SEPARATOR.length) {
932            try {
933                b = readByte();
934            } catch (final FileUploadSizeException e) {
935                // wraps a FileUploadSizeException, re-throw as it will be unwrapped later
936                throw e;
937            } catch (final IOException e) {
938                throw new MalformedStreamException("Stream ended unexpectedly", e);
939            }
940            final int phsm = getMaxPartHeaderSize();
941            if (phsm != -1 && ++size > phsm) {
942                throw new FileUploadSizeException(
943                        String.format("Header section has more than %s bytes (maybe it is not properly terminated)", Integer.valueOf(phsm)), phsm, size);
944            }
945            if (b == HEADER_SEPARATOR[i]) {
946                i++;
947            } else {
948                i = 0;
949            }
950            baos.write(b);
951        }
952        try {
953            return baos.toString(Charsets.toCharset(headerCharset, Charset.defaultCharset()).name());
954        } catch (final UnsupportedEncodingException e) {
955            // not possible
956            throw new IllegalStateException(e);
957        }
958    }
959
960    /**
961     * Changes the boundary token used for partitioning the stream.
962     * <p>
963     * This method allows single pass processing of nested multipart streams.
964     * </p>
965     * <p>
966     * The boundary token of the nested stream is {@code required} to be of the same length as the boundary token in parent stream.
967     * </p>
968     * <p>
969     * Restoring the parent stream boundary token after processing of a nested stream is left to the application.
970     * </p>
971     *
972     * @param boundary The boundary to be used for parsing of the nested stream.
973     * @throws FileUploadBoundaryException if the {@code boundary} has a different length than the one being currently parsed.
974     */
975    public void setBoundary(final byte[] boundary) throws FileUploadBoundaryException {
976        if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) {
977            throw new FileUploadBoundaryException("The length of a boundary token cannot be changed");
978        }
979        System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length);
980        computeBoundaryTable();
981    }
982
983    /**
984     * Sets the character encoding to be used when reading the headers of individual parts. When not specified, or {@code null}, the platform default encoding
985     * is used.
986     *
987     * @param headerCharset The encoding used to read part headers.
988     */
989    public void setHeaderCharset(final Charset headerCharset) {
990        this.headerCharset = headerCharset;
991    }
992
993    /**
994     * Finds the beginning of the first {@code encapsulation}.
995     *
996     * @return {@code true} if an {@code encapsulation} was found in the stream.
997     * @throws IOException if an i/o error occurs.
998     */
999    public boolean skipPreamble() throws IOException {
1000        // First delimiter may be not preceded with a CRLF.
1001        System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
1002        boundaryLength = boundary.length - 2;
1003        computeBoundaryTable();
1004        try {
1005            // Discard all data up to the delimiter.
1006            discardBodyData();
1007
1008            // Read boundary - if succeeded, the stream contains an
1009            // encapsulation.
1010            return readBoundary();
1011        } catch (final MalformedStreamException e) {
1012            return false;
1013        } finally {
1014            // Restore delimiter.
1015            System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2);
1016            boundaryLength = boundary.length;
1017            boundary[0] = CR;
1018            boundary[1] = LF;
1019            computeBoundaryTable();
1020        }
1021    }
1022
1023}