C++ Utilities 5.31.1
Useful C++ classes and routines such as argument parser, IO and conversion utilities
Loading...
Searching...
No Matches
stringconversion.cpp
Go to the documentation of this file.
2
3#ifndef CPP_UTILITIES_NO_THREAD_LOCAL
4#include "../feature_detection/features.h"
5#endif
6
7#ifndef CPP_UTILITIES_THREAD_LOCAL
8#define CPP_UTILITIES_THREAD_LOCAL
9#endif
10
11#include <cmath>
12#include <cstdlib>
13#include <iomanip>
14#include <memory>
15#include <sstream>
16
17#ifdef PLATFORM_WINDOWS
18#include <algorithm> // for std::clamp
19#include <limits>
20#endif
21
22#include <errno.h>
23
24#ifndef CPP_UTILITIES_NO_ICONV
25#include <iconv.h>
26#endif
27
28#ifdef PLATFORM_WINDOWS
29#include <windows.h>
30// note: The windows header seriously defines a macro called "max" breaking the (common) use
31// of std::numeric_limits in the subsequent code. So we need to undefine this macro. Note that
32// this is not the case using mingw-w64 but it is happening with windows.h from Windows Kits
33// version 10.0.22000.0 via Visual Studio 2022.
34#ifdef max
35#undef max
36#endif
37#endif
38
39using namespace std;
40
41namespace CppUtilities {
42
43#ifndef CPP_UTILITIES_NO_ICONV
44
46
47struct Keep {
48 size_t operator()(size_t value)
49 {
50 return value;
51 }
52};
53struct Double {
54 size_t operator()(size_t value)
55 {
56 return value + value;
57 }
58};
59struct Half {
60 size_t operator()(size_t value)
61 {
62 return value / 2;
63 }
64};
65struct Factor {
66 Factor(float factor)
67 : factor(factor) {};
68 size_t operator()(size_t value)
69 {
70 return static_cast<size_t>(static_cast<float>(value) * factor);
71 }
72 float factor;
73};
74
75template <class OutputSizeHint> class ConversionDescriptor {
76public:
77 ConversionDescriptor(const char *fromCharset, const char *toCharset)
78 : m_ptr(iconv_open(toCharset, fromCharset))
79 , m_outputSizeHint(OutputSizeHint())
80 {
81 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
82 throw ConversionException("Unable to allocate descriptor for character set conversion.");
83 }
84 }
85
86 ConversionDescriptor(const char *fromCharset, const char *toCharset, OutputSizeHint outputSizeHint)
87 : m_ptr(iconv_open(toCharset, fromCharset))
88 , m_outputSizeHint(outputSizeHint)
89 {
90 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
91 throw ConversionException("Unable to allocate descriptor for character set conversion.");
92 }
93 }
94
95 ~ConversionDescriptor()
96 {
97 iconv_close(m_ptr);
98 }
99
100public:
101 StringData convertString(const char *inputBuffer, size_t inputBufferSize)
102 {
103 // setup input and output buffer
104 size_t inputBytesLeft = inputBufferSize;
105 size_t outputSize = m_outputSizeHint(inputBufferSize);
106 size_t outputBytesLeft = outputSize;
107 char *outputBuffer = reinterpret_cast<char *>(malloc(outputSize));
108 size_t bytesWritten;
109
110 char *currentOutputOffset = outputBuffer;
111 for (;; currentOutputOffset = outputBuffer + bytesWritten) {
112 bytesWritten = iconv(m_ptr, const_cast<char **>(&inputBuffer), &inputBytesLeft, &currentOutputOffset, &outputBytesLeft);
113 if (bytesWritten == static_cast<size_t>(-1)) {
114 if (errno == EINVAL) {
115 // ignore incomplete multibyte sequence in the input
116 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
117 break;
118 } else if (errno == E2BIG) {
119 // output buffer has no more room for next converted character
120 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
121 outputBytesLeft = (outputSize += m_outputSizeHint(inputBytesLeft)) - bytesWritten;
122 outputBuffer = reinterpret_cast<char *>(realloc(outputBuffer, outputSize));
123 } else /*if(errno == EILSEQ)*/ {
124 // invalid multibyte sequence in the input
125 free(outputBuffer);
126 throw ConversionException("Invalid multibyte sequence in the input.");
127 }
128 } else {
129 // conversion completed without (further) errors
130 break;
131 }
132 }
133 return StringData(std::unique_ptr<char[], StringDataDeleter>(outputBuffer), currentOutputOffset - outputBuffer);
134 }
135
136private:
137 iconv_t m_ptr;
138 OutputSizeHint m_outputSizeHint;
139};
140
142
153 const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor)
154{
155 return ConversionDescriptor<Factor>(fromCharset, toCharset, outputBufferSizeFactor).convertString(inputBuffer, inputBufferSize);
156}
157
161StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
162{
163 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16LE");
164 return descriptor.convertString(inputBuffer, inputBufferSize);
165}
166
170StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
171{
172 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16LE", "UTF-8");
173 return descriptor.convertString(inputBuffer, inputBufferSize);
174}
175
179StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
180{
181 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16BE");
182 return descriptor.convertString(inputBuffer, inputBufferSize);
183}
184
188StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
189{
190 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16BE", "UTF-8");
191 return descriptor.convertString(inputBuffer, inputBufferSize);
192}
193
197StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
198{
199 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("ISO-8859-1", "UTF-8");
200 return descriptor.convertString(inputBuffer, inputBufferSize);
201}
202
206StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
207{
208 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("UTF-8", "ISO-8859-1");
209 return descriptor.convertString(inputBuffer, inputBufferSize);
210}
211
212#endif
213
214#ifdef PLATFORM_WINDOWS
221std::wstring convertMultiByteToWide(std::error_code &ec, std::string_view inputBuffer)
222{
223 // calculate required size
224 auto widePath = std::wstring();
225 auto bufferSize = static_cast<int>(std::clamp<std::size_t>(inputBuffer.size(), 0, std::numeric_limits<int>::max()));
226 auto size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), bufferSize, nullptr, 0);
227 if (size <= 0) {
228 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
229 return widePath;
230 }
231 // do the actual conversion
232 widePath.resize(static_cast<std::wstring::size_type>(size));
233 size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), bufferSize, widePath.data(), size);
234 if (size <= 0) {
235 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
236 widePath.clear();
237 }
238 return widePath;
239}
240
247WideStringData convertMultiByteToWide(std::error_code &ec, const char *inputBuffer, int inputBufferSize)
248{
249 // calculate required size
250 WideStringData widePath;
251 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, nullptr, 0);
252 if (widePath.second <= 0) {
253 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
254 return widePath;
255 }
256 // do the actual conversion
257 widePath.first = make_unique<wchar_t[]>(static_cast<size_t>(widePath.second));
258 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, widePath.first.get(), widePath.second);
259 if (widePath.second <= 0) {
260 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
261 widePath.first.reset();
262 }
263 return widePath;
264}
265
270WideStringData convertMultiByteToWide(std::error_code &ec, const std::string &inputBuffer)
271{
272 return convertMultiByteToWide(ec, inputBuffer.data(),
273 inputBuffer.size() < static_cast<std::size_t>(std::numeric_limits<int>::max() - 1) ? static_cast<int>(inputBuffer.size() + 1) : -1);
274}
275
282WideStringData convertMultiByteToWide(const char *inputBuffer, int inputBufferSize)
283{
284 std::error_code ec;
285 return convertMultiByteToWide(ec, inputBuffer, inputBufferSize);
286}
287
292WideStringData convertMultiByteToWide(const std::string &inputBuffer)
293{
294 std::error_code ec;
295 return convertMultiByteToWide(ec, inputBuffer);
296}
297#endif
298
303void truncateString(string &str, char terminationChar)
304{
305 string::size_type firstNullByte = str.find(terminationChar);
306 if (firstNullByte != string::npos) {
307 str.resize(firstNullByte);
308 }
309}
310
316string dataSizeToString(std::uint64_t sizeInByte, bool includeByte)
317{
318 stringstream res(stringstream::in | stringstream::out);
319 res.setf(ios::fixed, ios::floatfield);
320 res << setprecision(2);
321 if (sizeInByte < 1024LL) {
322 res << sizeInByte << " bytes";
323 } else if (sizeInByte < 1048576LL) {
324 res << (static_cast<double>(sizeInByte) / 1024.0) << " KiB";
325 } else if (sizeInByte < 1073741824LL) {
326 res << (static_cast<double>(sizeInByte) / 1048576.0) << " MiB";
327 } else if (sizeInByte < 1099511627776LL) {
328 res << (static_cast<double>(sizeInByte) / 1073741824.0) << " GiB";
329 } else {
330 res << (static_cast<double>(sizeInByte) / 1099511627776.0) << " TiB";
331 }
332 if (includeByte && sizeInByte > 1024LL) {
333 res << ' ' << '(' << sizeInByte << " byte)";
334 }
335 return res.str();
336}
337
348string bitrateToString(double bitrateInKbitsPerSecond, bool useIecBinaryPrefixes)
349{
350 stringstream res(stringstream::in | stringstream::out);
351 res << setprecision(3);
352 if (std::isnan(bitrateInKbitsPerSecond)) {
353 res << "indeterminable";
354 } else if (useIecBinaryPrefixes) {
355 if (bitrateInKbitsPerSecond < 8.0) {
356 res << (bitrateInKbitsPerSecond * 125.0) << " byte/s";
357 } else if (bitrateInKbitsPerSecond < 8000.0) {
358 res << (bitrateInKbitsPerSecond * 0.125) << " KiB/s";
359 } else if (bitrateInKbitsPerSecond < 8000000.0) {
360 res << (bitrateInKbitsPerSecond * 0.000125) << " MiB/s";
361 } else {
362 res << (bitrateInKbitsPerSecond * 0.000000125) << " GiB/s";
363 }
364 } else {
365 if (bitrateInKbitsPerSecond < 1.0) {
366 res << (bitrateInKbitsPerSecond * 1000.0) << " bit/s";
367 } else if (bitrateInKbitsPerSecond < 1000.0) {
368 res << (bitrateInKbitsPerSecond) << " kbit/s";
369 } else if (bitrateInKbitsPerSecond < 1000000.0) {
370 res << (bitrateInKbitsPerSecond * 0.001) << " Mbit/s";
371 } else {
372 res << (bitrateInKbitsPerSecond * 0.000001) << " Gbit/s";
373 }
374 }
375 return res.str();
376}
377
379const char *const base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
380const char base64Pad = '=';
382
387string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
388{
389 auto encoded = std::string();
390 auto mod = static_cast<std::uint8_t>(dataSize % 3);
391 auto temp = std::uint32_t();
392 encoded.reserve(((dataSize / 3) + (mod > 0)) * 4);
393 for (const std::uint8_t *end = --data + dataSize - mod; data != end;) {
394 temp = static_cast<std::uint32_t>(*++data << 16);
395 temp |= static_cast<std::uint32_t>(*++data << 8);
396 temp |= *++data;
397 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
398 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
399 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
400 encoded.push_back(base64Chars[(temp & 0x0000003F)]);
401 }
402 switch (mod) {
403 case 1:
404 temp = static_cast<std::uint32_t>(*++data << 16);
405 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
406 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
407 encoded.push_back(base64Pad);
408 encoded.push_back(base64Pad);
409 break;
410 case 2:
411 temp = static_cast<std::uint32_t>(*++data << 16);
412 temp |= static_cast<std::uint32_t>(*++data << 8);
413 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
414 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
415 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
416 encoded.push_back(base64Pad);
417 break;
418 }
419 return encoded;
420}
421
427std::pair<std::unique_ptr<std::uint8_t[]>, std::uint32_t> decodeBase64(const char *encodedStr, const std::uint32_t strSize)
428{
429 if (!strSize) {
430 return std::make_pair(std::make_unique<std::uint8_t[]>(0), 0); // early return to prevent clazy warning
431 }
432 if (strSize % 4) {
433 throw ConversionException("invalid size of base64");
434 }
435 std::uint32_t decodedSize = (strSize / 4) * 3;
436 const char *const end = encodedStr + strSize;
437 if (*(end - 1) == base64Pad) {
438 --decodedSize;
439 }
440 if (*(end - 2) == base64Pad) {
441 --decodedSize;
442 }
443 auto buffer = std::make_unique<std::uint8_t[]>(decodedSize);
444 auto *iter = buffer.get() - 1;
445 while (encodedStr < end) {
446 std::int32_t temp = 0;
447 for (std::uint8_t quantumPos = 0; quantumPos < 4; ++quantumPos, ++encodedStr) {
448 temp <<= 6;
449 if (*encodedStr >= 'A' && *encodedStr <= 'Z') {
450 temp |= *encodedStr - 'A';
451 } else if (*encodedStr >= 'a' && *encodedStr <= 'z') {
452 temp |= *encodedStr - 'a' + 26;
453 } else if (*encodedStr >= '0' && *encodedStr <= '9') {
454 temp |= *encodedStr - '0' + 2 * 26;
455 } else if (*encodedStr == '+') {
456 temp |= 2 * 26 + 10;
457 } else if (*encodedStr == '/') {
458 temp |= 2 * 26 + 10 + 1;
459 } else if (*encodedStr == base64Pad) {
460 switch (end - encodedStr) {
461 case 1:
462 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
463 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
464 return std::make_pair(std::move(buffer), decodedSize);
465 case 2:
466 *++iter = static_cast<std::uint8_t>((temp >> 10) & 0xFF);
467 return std::make_pair(std::move(buffer), decodedSize);
468 default:
469 throw ConversionException("invalid padding in base64");
470 }
471 } else {
472 throw ConversionException("invalid character in base64");
473 }
474 }
475 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
476 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
477 *++iter = static_cast<std::uint8_t>(temp & 0xFF);
478 }
479 return std::make_pair(std::move(buffer), decodedSize);
480}
481} // namespace CppUtilities
The ConversionException class is thrown by the various conversion functions of this library when a co...
Contains all utilities provided by the c++utilities library.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.
CPP_UTILITIES_EXPORT std::string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
Encodes the specified data to Base64.
CPP_UTILITIES_EXPORT std::string dataSizeToString(std::uint64_t sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< std::uint8_t[]>, std::uint32_t > decodeBase64(const char *encodedStr, const std::uint32_t strSize)
Decodes the specified Base64 encoded string.
STL namespace.
#define CPP_UTILITIES_THREAD_LOCAL