Kea 1.5.0
base_n.cc
Go to the documentation of this file.
1// Copyright (C) 2010-2015 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
14#include <util/encode/base64.h>
15
17
18#include <boost/archive/iterators/base64_from_binary.hpp>
19#include <boost/archive/iterators/binary_from_base64.hpp>
20#include <boost/archive/iterators/transform_width.hpp>
21#include <boost/math/common_factor.hpp>
22
23#include <stdint.h>
24#include <stdexcept>
25#include <cassert>
26#include <iterator>
27#include <string>
28#include <vector>
29
30using namespace std;
31using namespace boost::archive::iterators;
32
33namespace isc {
34namespace util {
35namespace encode {
36
37// Some versions of clang cannot handle exceptions in unnamed namespaces
38// so this exception is defined in an 'internal' namespace
39namespace clang_unnamed_namespace_workaround {
40// An internally caught exception to unify a few possible cases of the same
41// error.
42class IncompleteBaseInput : public std::exception {
43};
44} // end namespace internal
45
46// In the following anonymous namespace, we provide a generic framework
47// to encode/decode baseN format. We use the following tools:
48// - boost base64_from_binary/binary_from_base64: provide mapping table for
49// base64.
50// These classes take another iterator (Base) as a template argument, and
51// their dereference operator (operator*()) first retrieves an input value
52// from Base via Base::operator* and converts the value using their mapping
53// table. The converted value is returned as their own operator*.
54// - base{32hex,16}_from_binary/binary_from_base{32hex,16}: provide mapping
55// table for base32hex and base16. A straightforward variation of their
56// base64 counterparts.
57// - EncodeNormalizer/DecodeNormalizer: supplemental filter handling baseN
58// padding characters (=)
59// - boost transform_width: an iterator framework for handling data stream
60// per bit-group. It takes another iterator (Base) and output/input bit
61// numbers (BitsOut/BitsIn) template arguments. A transform_width object
62// internally maintains a bit stream, which can be retrieved per BitsOut
63// bits via its dereference operator (operator*()). It builds the stream
64// by internally iterating over the Base object via Base::operator++ and
65// Base::operator*, using the least BitsIn bits of the result of
66// Base::operator*. In our usage BitsIn for encoding and BitsOut for
67// decoding are always 8 (# of bits for one byte).
68//
69// Its dereference operator
70// retrieves BitsIn bits from the result of "*Base" (if necessary it
71// internally calls ++Base)
72//
73// A conceptual description of how the encoding and decoding work is as
74// follows:
75// Encoding:
76// input binary data => Normalizer (append sufficient number of 0 bits)
77// => transform_width (extract bit groups from the original
78// stream)
79// => baseXX_from_binary (convert each bit group to an
80// encoded byte using the mapping)
81// Decoding:
82// input baseXX text => Normalizer (convert '='s to the encoded characters
83// corresponding to 0, e.g. 'A's in base64)
84// => binary_from_baseXX (convert each encoded byte into
85// the original group bit)
86// => transform_width (build original byte stream by
87// concatenating the decoded bit
88// stream)
89//
90// Below, we define a set of templated classes to handle different parameters
91// for different encoding algorithms.
92namespace {
93// Common constants used for all baseN encoding.
94const char BASE_PADDING_CHAR = '=';
95const uint8_t BINARY_ZERO_CODE = 0;
96
97// EncodeNormalizer is an input iterator intended to be used as a filter
98// between the binary stream and baseXX_from_binary translator (via
99// transform_width). An EncodeNormalizer object is configured with two
100// iterators (base and base_end), specifying the head and end of the input
101// stream. It internally iterators over the original stream, and return
102// each byte of the stream intact via its dereference operator until it
103// reaches the end of the stream. After that the EncodeNormalizer object
104// will return 0 no matter how many times it is subsequently incremented.
105// This is necessary because the input binary stream may not contain
106// sufficient bits for a full encoded text while baseXX_from_binary expects
107// a sufficient length of input.
108// Note: this class is intended to be used within this implementation file,
109// and assumes "base < base_end" on construction without validating the
110// arguments. The behavior is undefined if this assumption doesn't hold.
111class EncodeNormalizer : public iterator<input_iterator_tag, uint8_t> {
112public:
113 EncodeNormalizer(const vector<uint8_t>::const_iterator& base,
114 const vector<uint8_t>::const_iterator& base_end) :
115 base_(base), base_end_(base_end), in_pad_(false)
116 {}
117 EncodeNormalizer& operator++() { // prefix version
118 increment();
119 return (*this);
120 }
121 EncodeNormalizer operator++(int) { // postfix version
122 const EncodeNormalizer copy = *this;
123 increment();
124 return (copy);
125 }
126 const uint8_t& operator*() const {
127 if (in_pad_) {
128 return (BINARY_ZERO_CODE);
129 } else {
130 return (*base_);
131 }
132 }
133 bool operator==(const EncodeNormalizer& other) const {
134 return (base_ == other.base_);
135 }
136private:
137 void increment() {
138 if (!in_pad_) {
139 ++base_;
140 }
141 if (base_ == base_end_) {
142 in_pad_ = true;
143 }
144 }
145 vector<uint8_t>::const_iterator base_;
146 const vector<uint8_t>::const_iterator base_end_;
147 bool in_pad_;
148};
149
150// DecodeNormalizer is an input iterator intended to be used as a filter
151// between the encoded baseX stream and binary_from_baseXX.
152// A DecodeNormalizer object is configured with three string iterators
153// (base, base_beginpad, and base_end), specifying the head of the string,
154// the beginning position of baseX padding (when there's padding), and
155// end of the string, respectively. It internally iterators over the original
156// stream, and return each character of the encoded string via its dereference
157// operator until it reaches base_beginpad. After that the DecodeNormalizer
158// will return the encoding character corresponding to the all-0 value
159// (which is specified on construction via base_zero_code. see also
160// BaseZeroCode below). This translation is necessary because
161// binary_from_baseXX doesn't accept the padding character (i.e. '=').
162// Note: this class is intended to be used within this implementation file,
163// and for simplicity assumes "base < base_beginpad <= base_end" on
164// construction without validating the arguments. The behavior is undefined
165// if this assumption doesn't hold.
166class DecodeNormalizer : public iterator<input_iterator_tag, char> {
167public:
168 DecodeNormalizer(const char base_zero_code,
169 const string::const_iterator& base,
170 const string::const_iterator& base_beginpad,
171 const string::const_iterator& base_end,
172 size_t* char_count) :
173 base_zero_code_(base_zero_code),
174 base_(base), base_beginpad_(base_beginpad), base_end_(base_end),
175 in_pad_(false), char_count_(char_count)
176 {
177 // Skip beginning spaces, if any. We need do it here because
178 // otherwise the first call to operator*() would be confused.
179 skipSpaces();
180 }
181 DecodeNormalizer& operator++() {
182 if (base_ < base_end_) {
183 ++*char_count_;
184 }
185 ++base_;
186 skipSpaces();
187 if (base_ == base_beginpad_) {
188 in_pad_ = true;
189 }
190 return (*this);
191 }
192 void skipSpaces() {
193 // If (char is signed and) *base_ < 0, on Windows platform with Visual
194 // Studio compiler it may trigger _ASSERTE((unsigned)(c + 1) <= 256);
195 // so make sure that the parameter of isspace() is larger than 0.
196 // We don't simply cast it to unsigned char to avoid confusing the
197 // isspace() implementation with a possible extension for values
198 // larger than 127. Also note the check is not ">= 0"; for systems
199 // where char is unsigned that would always be true and would possibly
200 // trigger a compiler warning that could stop the build.
201 while (base_ != base_end_ && *base_ > 0 && isspace(*base_)) {
202 ++base_;
203 }
204 }
205 const char& operator*() const {
206 if (base_ == base_end_) {
207 // binary_from_baseX can call this operator when it needs more bits
208 // even if the internal iterator (base_) has reached its end
209 // (if that happens it means the input is an incomplete baseX
210 // string and should be rejected). So this is the only point
211 // we can catch and reject this type of invalid input.
212 //
213 // More recent versions of Boost fixed the behavior and the
214 // out-of-range call to this operator doesn't happen. It's good,
215 // but in that case we need to catch incomplete baseX input in
216 // a different way. It's done via char_count_ and after the
217 // completion of decoding.
218
219 // throw this now and convert it
220 throw clang_unnamed_namespace_workaround::IncompleteBaseInput();
221 }
222 if (*base_ == BASE_PADDING_CHAR) {
223 // Padding can only happen at the end of the input string. We can
224 // detect any violation of this by checking in_pad_, which is
225 // true iff we are on or after the first valid sequence of padding
226 // characters.
227 if (in_pad_) {
228 return (base_zero_code_);
229 } else {
230 isc_throw(BadValue, "Intermediate padding found");
231 }
232 } else {
233 return (*base_);
234 }
235 }
236 bool operator==(const DecodeNormalizer& other) const {
237 return (base_ == other.base_);
238 }
239private:
240 const char base_zero_code_;
241 string::const_iterator base_;
242 const string::const_iterator base_beginpad_;
243 const string::const_iterator base_end_;
244 bool in_pad_;
245 // Store number of non-space decoded characters (incl. pad) here. Define
246 // it as a pointer so we can carry it over to any copied objects.
247 size_t* char_count_;
248};
249
250// BitsPerChunk: number of bits to be converted using the baseN mapping table.
251// e.g. 6 for base64.
252// BaseZeroCode: the byte character that represents a value of 0 in
253// the corresponding encoding. e.g. 'A' for base64.
254// Encoder: baseX_from_binary<transform_width<EncodeNormalizer,
255// BitsPerChunk, 8> >
256// Decoder: transform_width<binary_from_baseX<DecodeNormalizer>,
257// 8, BitsPerChunk>
258template <int BitsPerChunk, char BaseZeroCode,
259 typename Encoder, typename Decoder>
260struct BaseNTransformer {
261 static string encode(const vector<uint8_t>& binary);
262 static void decode(const char* algorithm,
263 const string& base64, vector<uint8_t>& result);
264
265 // BITS_PER_GROUP is the number of bits for the smallest possible (non
266 // empty) bit string that can be converted to a valid baseN encoded text
267 // without padding. It's the least common multiple of 8 and BitsPerChunk,
268 // e.g. 24 for base64.
269 static const int BITS_PER_GROUP =
270 boost::math::static_lcm<BitsPerChunk, 8>::value;
271
272 // MAX_PADDING_CHARS is the maximum number of padding characters
273 // that can appear in a valid baseN encoded text.
274 // It's group_len - chars_for_byte, where group_len is the number of
275 // encoded characters to represent BITS_PER_GROUP bits, and
276 // chars_for_byte is the number of encoded character that is needed to
277 // represent a single byte, which is ceil(8 / BitsPerChunk).
278 // For example, for base64 we need two encoded characters to represent a
279 // byte, and each group consists of 4 encoded characters, so
280 // MAX_PADDING_CHARS is 4 - 2 = 2.
281 static const int MAX_PADDING_CHARS =
282 BITS_PER_GROUP / BitsPerChunk -
283 (8 / BitsPerChunk + ((8 % BitsPerChunk) == 0 ? 0 : 1));
284};
285
286template <int BitsPerChunk, char BaseZeroCode,
287 typename Encoder, typename Decoder>
288string
289BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::encode(
290 const vector<uint8_t>& binary)
291{
292 // calculate the resulting length.
293 size_t bits = binary.size() * 8;
294 if (bits % BITS_PER_GROUP > 0) {
295 bits += (BITS_PER_GROUP - (bits % BITS_PER_GROUP));
296 }
297 const size_t len = bits / BitsPerChunk;
298
299 string result;
300 result.reserve(len);
301 result.assign(Encoder(EncodeNormalizer(binary.begin(), binary.end())),
302 Encoder(EncodeNormalizer(binary.end(), binary.end())));
303 assert(len >= result.length());
304 result.append(len - result.length(), BASE_PADDING_CHAR);
305 return (result);
306}
307
308template <int BitsPerChunk, char BaseZeroCode,
309 typename Encoder, typename Decoder>
310void
311BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::decode(
312 const char* const algorithm,
313 const string& input,
314 vector<uint8_t>& result)
315{
316 // enumerate the number of trailing padding characters (=), ignoring
317 // white spaces. since baseN_from_binary doesn't accept padding,
318 // we handle it explicitly.
319 size_t padchars = 0;
320 string::const_reverse_iterator srit = input.rbegin();
321 string::const_reverse_iterator srit_end = input.rend();
322 while (srit != srit_end) {
323 char ch = *srit;
324 if (ch == BASE_PADDING_CHAR) {
325 if (++padchars > MAX_PADDING_CHARS) {
326 isc_throw(BadValue, "Too many " << algorithm
327 << " padding characters: " << input);
328 }
329 } else if (!(ch > 0 && isspace(ch))) {
330 // see the note for DecodeNormalizer::skipSpaces() above for ch > 0
331 break;
332 }
333 ++srit;
334 }
335 // then calculate the number of padding bits corresponding to the padding
336 // characters. In general, the padding bits consist of all-zero
337 // trailing bits of the last encoded character followed by zero bits
338 // represented by the padding characters:
339 // 1st pad 2nd pad 3rd pad...
340 // +++===== ======= ===... (+: from encoded chars, =: from pad chars)
341 // 0000...0 0......0 000...
342 // 0 7 8 15 16.... (bits)
343 // The number of bits for the '==...' part is padchars * BitsPerChunk.
344 // So the total number of padding bits is the smallest multiple of 8
345 // that is >= padchars * BitsPerChunk.
346 // (Below, note the common idiom of the bitwise AND with ~7. It clears the
347 // lowest three bits, so has the effect of rounding the result down to the
348 // nearest multiple of 8)
349 const size_t padbits = (padchars * BitsPerChunk + 7) & ~7;
350
351 // In some encoding algorithm, it could happen that a padding byte would
352 // contain a full set of encoded bits, which is not allowed by definition
353 // of padding. For example, if BitsPerChunk is 5, the following
354 // representation could happen:
355 // ++00000= (+: from encoded chars, 0: encoded char for '0', =: pad chars)
356 // 0 7 (bits)
357 // This must actually be encoded as follows:
358 // ++======
359 // 0 7 (bits)
360 // The following check rejects this type of invalid encoding.
361 if (padbits > BitsPerChunk * (padchars + 1)) {
362 isc_throw(BadValue, "Invalid " << algorithm << " padding: " << input);
363 }
364
365 // convert the number of bits in bytes for convenience.
366 const size_t padbytes = padbits / 8;
367
368 try {
369 size_t char_count = 0;
370 result.assign(Decoder(DecodeNormalizer(BaseZeroCode, input.begin(),
371 srit.base(), input.end(),
372 &char_count)),
373 Decoder(DecodeNormalizer(BaseZeroCode, input.end(),
374 input.end(), input.end(),
375 NULL)));
376
377 // Number of bits of the conversion result including padding must be
378 // a multiple of 8; otherwise the decoder reaches the end of input
379 // with some incomplete bits of data, which is invalid.
380 if (((char_count * BitsPerChunk) % 8) != 0) {
381 // catch this immediately below
382 throw clang_unnamed_namespace_workaround::IncompleteBaseInput();
383 }
384 } catch (const clang_unnamed_namespace_workaround::IncompleteBaseInput&) {
385 // we unify error handling for incomplete input here.
386 isc_throw(BadValue, "Incomplete input for " << algorithm
387 << ": " << input);
388 } catch (const dataflow_exception& ex) {
389 // convert any boost exceptions into our local one.
390 isc_throw(BadValue, ex.what());
391 }
392
393 // Confirm the original BaseX text is the canonical encoding of the
394 // data, that is, that the first byte of padding is indeed 0.
395 // (DecodeNormalizer and binary_from_baseXX ensure that the rest of the
396 // padding is all zero).
397 assert(result.size() >= padbytes);
398 if (padbytes > 0 && *(result.end() - padbytes) != 0) {
399 isc_throw(BadValue, "Non 0 bits included in " << algorithm
400 << " padding: " << input);
401 }
402
403 // strip the padded zero-bit fields
404 result.resize(result.size() - padbytes);
405}
406
407//
408// Instantiation for BASE-64
409//
410typedef
411base64_from_binary<transform_width<EncodeNormalizer, 6, 8> > base64_encoder;
412typedef
413transform_width<binary_from_base64<DecodeNormalizer>, 8, 6> base64_decoder;
414typedef BaseNTransformer<6, 'A', base64_encoder, base64_decoder>
415Base64Transformer;
416
417//
418// Instantiation for BASE-32HEX
419//
420typedef
422base32hex_encoder;
423typedef
424transform_width<binary_from_base32hex<DecodeNormalizer>, 8, 5>
425base32hex_decoder;
426typedef BaseNTransformer<5, '0', base32hex_encoder, base32hex_decoder>
427Base32HexTransformer;
428
429//
430// Instantiation for BASE-16 (HEX)
431//
432typedef
434typedef
435transform_width<binary_from_base16<DecodeNormalizer>, 8, 4> base16_decoder;
436typedef BaseNTransformer<4, '0', base16_encoder, base16_decoder>
437Base16Transformer;
438}
439
440string
441encodeBase64(const vector<uint8_t>& binary) {
442 return (Base64Transformer::encode(binary));
443}
444
445void
446decodeBase64(const string& input, vector<uint8_t>& result) {
447 Base64Transformer::decode("base64", input, result);
448}
449
450string
451encodeBase32Hex(const vector<uint8_t>& binary) {
452 return (Base32HexTransformer::encode(binary));
453}
454
455void
456decodeBase32Hex(const string& input, vector<uint8_t>& result) {
457 Base32HexTransformer::decode("base32hex", input, result);
458}
459
460string
461encodeHex(const vector<uint8_t>& binary) {
462 return (Base16Transformer::encode(binary));
463}
464
465void
466decodeHex(const string& input, vector<uint8_t>& result) {
467 Base16Transformer::decode("base16", input, result);
468}
469
470} // namespace encode
471} // namespace util
472} // namespace isc
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
bool operator==(const Element &a, const Element &b)
Definition: data.cc:211
void decodeBase64(const std::string &input, std::vector< uint8_t > &result)
Decode a text encoded in the base64 format into the original data.
Definition: base_n.cc:446
std::string encodeBase64(const std::vector< uint8_t > &binary)
Encode binary data in the base64 format.
Definition: base_n.cc:441
string encodeHex(const vector< uint8_t > &binary)
Encode binary data in the base16 ('hex') format.
Definition: base_n.cc:461
void decodeBase32Hex(const std::string &input, std::vector< uint8_t > &result)
Decode a text encoded in the base32hex format into the original data.
Definition: base_n.cc:456
void decodeHex(const string &input, vector< uint8_t > &result)
Decode a text encoded in the base16 ('hex') format into the original data.
Definition: base_n.cc:466
std::string encodeBase32Hex(const std::vector< uint8_t > &binary)
Encode binary data in the base32hex format.
Definition: base_n.cc:451
Defines the logger used by the top-level component of kea-dhcp-ddns.