Kea 1.5.0
strutil.cc
Go to the documentation of this file.
1// Copyright (C) 2011-2018 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
9#include <util/encode/hex.h>
10#include <util/strutil.h>
11
12#include <boost/algorithm/string/classification.hpp>
13#include <boost/algorithm/string/constants.hpp>
14#include <boost/algorithm/string/split.hpp>
15
16#include <numeric>
17#include <iostream>
18#include <sstream>
19
20// Early versions of C++11 regex were buggy, use it if we
21// can otherwise, we fall back to regcomp/regexec. For more info see:
22// https://stackoverflow.com/questions/12530406/is-gcc-4-8-or-earlier-buggy-about-regular-expressions
23#ifdef USE_REGEX
24#include <regex>
25#else
26#include <sys/types.h>
27#include <regex.h>
28#endif
29
30#include <string.h>
31
32using namespace std;
33
34namespace isc {
35namespace util {
36namespace str {
37
38// Normalize slashes
39
40void
41normalizeSlash(std::string& name) {
42 if (!name.empty()) {
43 size_t pos = 0;
44 while ((pos = name.find('\\', pos)) != std::string::npos) {
45 name[pos] = '/';
46 }
47 }
48}
49
50// Trim String
51
52string
53trim(const string& instring) {
54 string retstring = "";
55 if (!instring.empty()) {
56 static const char* blanks = " \t\n";
57
58 // Search for first non-blank character in the string
59 size_t first = instring.find_first_not_of(blanks);
60 if (first != string::npos) {
61
62 // String not all blanks, so look for last character
63 size_t last = instring.find_last_not_of(blanks);
64
65 // Extract the trimmed substring
66 retstring = instring.substr(first, (last - first + 1));
67 }
68 }
69
70 return (retstring);
71}
72
73// Tokenize string. As noted in the header, this is locally written to avoid
74// another dependency on a Boost library.
75
76vector<string>
77tokens(const std::string& text, const std::string& delim, bool escape) {
78 vector<string> result;
79 string token;
80 bool in_token = false;
81 bool escaped = false;
82 for (auto c = text.cbegin(); c != text.cend(); ++c) {
83 if (delim.find(*c) != string::npos) {
84 // Current character is a delimiter
85 if (!in_token) {
86 // Two or more delimiters, eat them
87 } else if (escaped) {
88 // Escaped delimiter in a token: reset escaped and keep it
89 escaped = false;
90 token.push_back(*c);
91 } else {
92 // End of the current token: save it if not empty
93 if (!token.empty()) {
94 result.push_back(token);
95 }
96 // Reset state
97 in_token = false;
98 token.clear();
99 }
100 } else if (escape && (*c == '\\')) {
101 // Current character is the escape character
102 if (!in_token) {
103 // The escape character is the first character of a new token
104 in_token = true;
105 }
106 if (escaped) {
107 // Escaped escape: reset escaped and keep one character
108 escaped = false;
109 token.push_back(*c);
110 } else {
111 // Remember to keep the next character
112 escaped = true;
113 }
114 } else {
115 // Not a delimiter nor an escape
116 if (!in_token) {
117 // First character of a new token
118 in_token = true;
119 }
120 if (escaped) {
121 // Escaped common character: as escape was false
122 escaped = false;
123 token.push_back('\\');
124 token.push_back(*c);
125 } else {
126 // The common case: keep it
127 token.push_back(*c);
128 }
129 }
130 }
131 // End of input: close and save the current token if not empty
132 if (escaped) {
133 // Pending escape
134 token.push_back('\\');
135 }
136 if (!token.empty()) {
137 result.push_back(token);
138 }
139
140 return (result);
141}
142
143// Local function to pass to accumulate() for summing up string lengths.
144
145namespace {
146
147size_t
148lengthSum(string::size_type curlen, const string& cur_string) {
149 return (curlen + cur_string.size());
150}
151
152}
153
154// Provide printf-style formatting.
155
156std::string
157format(const std::string& format, const std::vector<std::string>& args) {
158
159 static const string flag = "%s";
160
161 // Initialize return string. To speed things up, we'll reserve an
162 // appropriate amount of space - current string size, plus length of all
163 // the argument strings, less two characters for each argument (the %s in
164 // the format string is being replaced).
165 string result;
166 size_t length = accumulate(args.begin(), args.end(), format.size(),
167 lengthSum) - (args.size() * flag.size());
168 result.reserve(length);
169
170 // Iterate through replacing all tokens
171 result = format;
172 size_t tokenpos = 0; // Position of last token replaced
173 std::vector<std::string>::size_type i = 0; // Index into argument array
174
175 while ((i < args.size()) && (tokenpos != string::npos)) {
176 tokenpos = result.find(flag, tokenpos);
177 if (tokenpos != string::npos) {
178 result.replace(tokenpos, flag.size(), args[i++]);
179 }
180 }
181
182 return (result);
183}
184
185std::string
186getToken(std::istringstream& iss) {
187 string token;
188 iss >> token;
189 if (iss.bad() || iss.fail()) {
190 isc_throw(StringTokenError, "could not read token from string");
191 }
192 return (token);
193}
194
195std::vector<uint8_t>
196quotedStringToBinary(const std::string& quoted_string) {
197 std::vector<uint8_t> binary;
198 // Remove whitespace before and after the quotes.
199 std::string trimmed_string = trim(quoted_string);
200
201 // We require two quote characters, so the length of the string must be
202 // equal to 2 at minimum, and it must start and end with quotes.
203 if ((trimmed_string.length() > 1) && ((trimmed_string[0] == '\'') &&
204 (trimmed_string[trimmed_string.length()-1] == '\''))) {
205 // Remove quotes and trim the text inside the quotes.
206 trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2));
207 // Copy string contents into the vector.
208 binary.assign(trimmed_string.begin(), trimmed_string.end());
209 }
210 // Return resulting vector or empty vector.
211 return (binary);
212}
213
214void
215decodeColonSeparatedHexString(const std::string& hex_string,
216 std::vector<uint8_t>& binary) {
217 std::vector<std::string> split_text;
218 boost::split(split_text, hex_string, boost::is_any_of(":"),
219 boost::algorithm::token_compress_off);
220
221 std::vector<uint8_t> binary_vec;
222 for (size_t i = 0; i < split_text.size(); ++i) {
223
224 // If there are multiple tokens and the current one is empty, it
225 // means that two consecutive colons were specified. This is not
226 // allowed.
227 if ((split_text.size() > 1) && split_text[i].empty()) {
228 isc_throw(isc::BadValue, "two consecutive colons specified in"
229 " a decoded string '" << hex_string << "'");
230
231 // Between a colon we expect at most two characters.
232 } else if (split_text[i].size() > 2) {
233 isc_throw(isc::BadValue, "invalid format of the decoded string"
234 << " '" << hex_string << "'");
235
236 } else if (!split_text[i].empty()) {
237 std::stringstream s;
238 s << "0x";
239
240 for (unsigned int j = 0; j < split_text[i].length(); ++j) {
241 // Check if we're dealing with hexadecimal digit.
242 if (!isxdigit(split_text[i][j])) {
243 isc_throw(isc::BadValue, "'" << split_text[i][j]
244 << "' is not a valid hexadecimal digit in"
245 << " decoded string '" << hex_string << "'");
246 }
247 s << split_text[i][j];
248 }
249
250 // The stream should now have one or two hexadecimal digits.
251 // Let's convert it to a number and store in a temporary
252 // vector.
253 unsigned int binary_value;
254 s >> std::hex >> binary_value;
255
256 binary_vec.push_back(static_cast<uint8_t>(binary_value));
257 }
258
259 }
260
261 // All ok, replace the data in the output vector with a result.
262 binary.swap(binary_vec);
263}
264
265void
266decodeFormattedHexString(const std::string& hex_string,
267 std::vector<uint8_t>& binary) {
268 // If there is at least one colon we assume that the string
269 // comprises octets separated by colons (e.g. MAC address notation).
270 if (hex_string.find(':') != std::string::npos) {
271 decodeColonSeparatedHexString(hex_string, binary);
272
273 } else {
274 std::ostringstream s;
275
276 // If we have odd number of digits we'll have to prepend '0'.
277 if (hex_string.length() % 2 != 0) {
278 s << "0";
279 }
280
281 // It is ok to use '0x' prefix in a string.
282 if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) {
283 // Exclude '0x' from the decoded string.
284 s << hex_string.substr(2);
285
286 } else {
287 // No '0x', so decode the whole string.
288 s << hex_string;
289 }
290
291 try {
292 // Decode the hex string.
293 encode::decodeHex(s.str(), binary);
294
295 } catch (...) {
296 isc_throw(isc::BadValue, "'" << hex_string << "' is not a valid"
297 " string of hexadecimal digits");
298 }
299 }
300}
301
303public:
304 StringSanitizerImpl(const std::string& char_set, const std::string& char_replacement)
305 : char_set_(char_set), char_replacement_(char_replacement) {
306#ifdef USE_REGEX
307 try {
308 scrub_exp_ = std::regex(char_set, std::regex::extended);
309 } catch (const std::exception& ex) {
310 isc_throw(isc::BadValue, "invalid regex: '"
311 << char_set_ << "', " << ex.what());
312 }
313#else
314 int ec = regcomp(&scrub_exp_, char_set_.c_str(), REG_EXTENDED);
315 if (ec) {
316 char errbuf[512] = "";
317 static_cast<void>(regerror(ec, &scrub_exp_, errbuf, sizeof(errbuf)));
318 regfree(&scrub_exp_);
319 isc_throw(isc::BadValue, "invalid regex: '" << char_set_ << "', " << errbuf);
320 }
321#endif
322 }
323
326#ifndef USE_REGEX
327 regfree(&scrub_exp_);
328#endif
329 }
330
331 std::string scrub(const std::string& original) {
332#ifdef USE_REGEX
333 std::stringstream result;
334 try {
335 std::regex_replace(std::ostream_iterator<char>(result),
336 original.begin(), original.end(),
337 scrub_exp_, char_replacement_);
338 } catch (const std::exception& ex) {
339 isc_throw(isc::BadValue, "replacing '" << char_set_ << "' with '"
340 << char_replacement_ << "' in '" << original << "' failed: ,"
341 << ex.what());
342 }
343
344 return (result.str());
345#else
346 // Iterate over original string, match by match.
347 const char* origStr = original.c_str();
348 const char* startFrom = origStr;
349 const char* endAt = origStr + strlen(origStr);
350 regmatch_t matches[2]; // n matches + 1
351 stringstream result;
352
353 while (startFrom < endAt) {
354 // Look for the next match
355 if (regexec(&scrub_exp_, startFrom, 1, matches, 0) == REG_NOMATCH) {
356 // No matches, so add in the remainder
357 result << startFrom;
358 break;
359 }
360
361 // Shouldn't happen, but one never knows eh?
362 if (matches[0].rm_so == -1) {
363 isc_throw(isc::Unexpected, "matched but so is -1?");
364 }
365
366 // Add everything from starting point up to the current match
367 const char* matchAt = startFrom + matches[0].rm_so;
368 while (startFrom < matchAt) {
369 result << *startFrom;
370 ++startFrom;
371 }
372
373 // Add in the replacement
374 result << char_replacement_;
375
376 // Move past the match.
377 ++startFrom;
378 }
379
380 return (result.str());
381#endif
382 }
383
384private:
385 std::string char_set_;
386 std::string char_replacement_;
387
388#ifdef USE_REGEX
389 regex scrub_exp_;
390#else
391 regex_t scrub_exp_;
392#endif
393};
394
395StringSanitizer::StringSanitizer(const std::string& char_set,
396 const std::string& char_replacement)
397 : impl_(new StringSanitizerImpl(char_set, char_replacement)) {
398}
399
401 delete impl_;
402}
403
404std::string
405StringSanitizer::scrub(const std::string& original) {
406 return (impl_->scrub(original));
407}
408
409} // namespace str
410} // namespace util
411} // namespace isc
A generic exception that is thrown if a parameter given to a method is considered invalid in that con...
A generic exception that is thrown when an unexpected error condition occurs.
StringSanitizerImpl(const std::string &char_set, const std::string &char_replacement)
Definition: strutil.cc:304
std::string scrub(const std::string &original)
Definition: strutil.cc:331
std::string scrub(const std::string &original)
Returns a scrubbed copy of a given string.
Definition: strutil.cc:405
StringSanitizer(const std::string &char_set, const std::string &char_replacement)
Constructor.
Definition: strutil.cc:395
A Set of C++ Utilities for Manipulating Strings.
Definition: strutil.h:30
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
void decodeHex(const string &input, vector< uint8_t > &result)
Decode a text encoded in the base16 ('hex') format into the original data.
Definition: base_n.cc:466
void normalizeSlash(std::string &name)
Normalize Backslash.
Definition: strutil.cc:41
std::string format(const std::string &format, const std::vector< std::string > &args)
Apply Formatting.
Definition: strutil.cc:157
std::string getToken(std::istringstream &iss)
Returns one token from the given stringstream.
Definition: strutil.cc:186
void decodeFormattedHexString(const std::string &hex_string, std::vector< uint8_t > &binary)
Converts a formatted string of hexadecimal digits into a vector.
Definition: strutil.cc:266
std::vector< uint8_t > quotedStringToBinary(const std::string &quoted_string)
Converts a string in quotes into vector.
Definition: strutil.cc:196
void decodeColonSeparatedHexString(const std::string &hex_string, std::vector< uint8_t > &binary)
Converts a string of hexadecimal digits with colons into a vector.
Definition: strutil.cc:215
string trim(const string &instring)
Trim Leading and Trailing Spaces.
Definition: strutil.cc:53
vector< string > tokens(const std::string &text, const std::string &delim, bool escape)
Split String into Tokens.
Definition: strutil.cc:77
Defines the logger used by the top-level component of kea-dhcp-ddns.