C++ Utilities 5.26.1
Useful C++ classes and routines such as argument parser, IO and conversion utilities
Loading...
Searching...
No Matches
stringconversion.cpp
Go to the documentation of this file.
2
3#ifndef CPP_UTILITIES_NO_THREAD_LOCAL
4#include "../feature_detection/features.h"
5#endif
6
7#ifndef CPP_UTILITIES_THREAD_LOCAL
8#define CPP_UTILITIES_THREAD_LOCAL
9#endif
10
11#include <algorithm>
12#include <cmath>
13#include <cstdlib>
14#include <iomanip>
15#include <limits>
16#include <memory>
17#include <sstream>
18
19#include <errno.h>
20#include <iconv.h>
21
22#ifdef PLATFORM_WINDOWS
23#include <windows.h>
24// note: The windows header seriously defines a macro called "max" breaking the (common) use
25// of std::numeric_limits in the subsequent code. So we need to undefine this macro. Note that
26// this is not the case using mingw-w64 but it is happening with windows.h from Windows Kits
27// version 10.0.22000.0 via Visual Studio 2022.
28#ifdef max
29#undef max
30#endif
31#endif
32
33using namespace std;
34
35namespace CppUtilities {
36
38
39struct Keep {
40 size_t operator()(size_t value)
41 {
42 return value;
43 }
44};
45struct Double {
46 size_t operator()(size_t value)
47 {
48 return value + value;
49 }
50};
51struct Half {
52 size_t operator()(size_t value)
53 {
54 return value / 2;
55 }
56};
57struct Factor {
58 Factor(float factor)
59 : factor(factor) {};
60 size_t operator()(size_t value)
61 {
62 return static_cast<size_t>(static_cast<float>(value) * factor);
63 }
64 float factor;
65};
66
67template <class OutputSizeHint> class ConversionDescriptor {
68public:
69 ConversionDescriptor(const char *fromCharset, const char *toCharset)
70 : m_ptr(iconv_open(toCharset, fromCharset))
71 , m_outputSizeHint(OutputSizeHint())
72 {
73 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
74 throw ConversionException("Unable to allocate descriptor for character set conversion.");
75 }
76 }
77
78 ConversionDescriptor(const char *fromCharset, const char *toCharset, OutputSizeHint outputSizeHint)
79 : m_ptr(iconv_open(toCharset, fromCharset))
80 , m_outputSizeHint(outputSizeHint)
81 {
82 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
83 throw ConversionException("Unable to allocate descriptor for character set conversion.");
84 }
85 }
86
87 ~ConversionDescriptor()
88 {
89 iconv_close(m_ptr);
90 }
91
92public:
93 StringData convertString(const char *inputBuffer, size_t inputBufferSize)
94 {
95 // setup input and output buffer
96 size_t inputBytesLeft = inputBufferSize;
97 size_t outputSize = m_outputSizeHint(inputBufferSize);
98 size_t outputBytesLeft = outputSize;
99 char *outputBuffer = reinterpret_cast<char *>(malloc(outputSize));
100 size_t bytesWritten;
101
102 char *currentOutputOffset = outputBuffer;
103 for (;; currentOutputOffset = outputBuffer + bytesWritten) {
104 bytesWritten = iconv(m_ptr, const_cast<char **>(&inputBuffer), &inputBytesLeft, &currentOutputOffset, &outputBytesLeft);
105 if (bytesWritten == static_cast<size_t>(-1)) {
106 if (errno == EINVAL) {
107 // ignore incomplete multibyte sequence in the input
108 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
109 break;
110 } else if (errno == E2BIG) {
111 // output buffer has no more room for next converted character
112 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
113 outputBytesLeft = (outputSize += m_outputSizeHint(inputBytesLeft)) - bytesWritten;
114 outputBuffer = reinterpret_cast<char *>(realloc(outputBuffer, outputSize));
115 } else /*if(errno == EILSEQ)*/ {
116 // invalid multibyte sequence in the input
117 free(outputBuffer);
118 throw ConversionException("Invalid multibyte sequence in the input.");
119 }
120 } else {
121 // conversion completed without (further) errors
122 break;
123 }
124 }
125 return StringData(std::unique_ptr<char[], StringDataDeleter>(outputBuffer), currentOutputOffset - outputBuffer);
126 }
127
128private:
129 iconv_t m_ptr;
130 OutputSizeHint m_outputSizeHint;
131};
132
134
145 const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor)
146{
147 return ConversionDescriptor<Factor>(fromCharset, toCharset, outputBufferSizeFactor).convertString(inputBuffer, inputBufferSize);
148}
149
153StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
154{
155 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16LE");
156 return descriptor.convertString(inputBuffer, inputBufferSize);
157}
158
162StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
163{
164 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16LE", "UTF-8");
165 return descriptor.convertString(inputBuffer, inputBufferSize);
166}
167
171StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
172{
173 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16BE");
174 return descriptor.convertString(inputBuffer, inputBufferSize);
175}
176
180StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
181{
182 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16BE", "UTF-8");
183 return descriptor.convertString(inputBuffer, inputBufferSize);
184}
185
189StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
190{
191 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("ISO-8859-1", "UTF-8");
192 return descriptor.convertString(inputBuffer, inputBufferSize);
193}
194
198StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
199{
200 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("UTF-8", "ISO-8859-1");
201 return descriptor.convertString(inputBuffer, inputBufferSize);
202}
203
204#ifdef PLATFORM_WINDOWS
211std::wstring convertMultiByteToWide(std::error_code &ec, std::string_view inputBuffer)
212{
213 // calculate required size
214 auto widePath = std::wstring();
215 auto bufferSize = static_cast<int>(std::clamp<std::size_t>(inputBuffer.size(), 0, std::numeric_limits<int>::max()));
216 auto size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), bufferSize, nullptr, 0);
217 if (size <= 0) {
218 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
219 return widePath;
220 }
221 // do the actual conversion
222 widePath.resize(static_cast<std::wstring::size_type>(size));
223 size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), bufferSize, widePath.data(), size);
224 if (size <= 0) {
225 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
226 widePath.clear();
227 }
228 return widePath;
229}
230
237WideStringData convertMultiByteToWide(std::error_code &ec, const char *inputBuffer, int inputBufferSize)
238{
239 // calculate required size
240 WideStringData widePath;
241 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, nullptr, 0);
242 if (widePath.second <= 0) {
243 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
244 return widePath;
245 }
246 // do the actual conversion
247 widePath.first = make_unique<wchar_t[]>(static_cast<size_t>(widePath.second));
248 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, widePath.first.get(), widePath.second);
249 if (widePath.second <= 0) {
250 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
251 widePath.first.reset();
252 }
253 return widePath;
254}
255
260WideStringData convertMultiByteToWide(std::error_code &ec, const std::string &inputBuffer)
261{
262 return convertMultiByteToWide(ec, inputBuffer.data(),
263 inputBuffer.size() < static_cast<std::size_t>(std::numeric_limits<int>::max() - 1) ? static_cast<int>(inputBuffer.size() + 1) : -1);
264}
265
272WideStringData convertMultiByteToWide(const char *inputBuffer, int inputBufferSize)
273{
274 std::error_code ec;
275 return convertMultiByteToWide(ec, inputBuffer, inputBufferSize);
276}
277
282WideStringData convertMultiByteToWide(const std::string &inputBuffer)
283{
284 std::error_code ec;
285 return convertMultiByteToWide(ec, inputBuffer);
286}
287#endif
288
293void truncateString(string &str, char terminationChar)
294{
295 string::size_type firstNullByte = str.find(terminationChar);
296 if (firstNullByte != string::npos) {
297 str.resize(firstNullByte);
298 }
299}
300
306string dataSizeToString(std::uint64_t sizeInByte, bool includeByte)
307{
308 stringstream res(stringstream::in | stringstream::out);
309 res.setf(ios::fixed, ios::floatfield);
310 res << setprecision(2);
311 if (sizeInByte < 1024LL) {
312 res << sizeInByte << " bytes";
313 } else if (sizeInByte < 1048576LL) {
314 res << (static_cast<double>(sizeInByte) / 1024.0) << " KiB";
315 } else if (sizeInByte < 1073741824LL) {
316 res << (static_cast<double>(sizeInByte) / 1048576.0) << " MiB";
317 } else if (sizeInByte < 1099511627776LL) {
318 res << (static_cast<double>(sizeInByte) / 1073741824.0) << " GiB";
319 } else {
320 res << (static_cast<double>(sizeInByte) / 1099511627776.0) << " TiB";
321 }
322 if (includeByte && sizeInByte > 1024LL) {
323 res << ' ' << '(' << sizeInByte << " byte)";
324 }
325 return res.str();
326}
327
338string bitrateToString(double bitrateInKbitsPerSecond, bool useIecBinaryPrefixes)
339{
340 stringstream res(stringstream::in | stringstream::out);
341 res << setprecision(3);
342 if (std::isnan(bitrateInKbitsPerSecond)) {
343 res << "indeterminable";
344 } else if (useIecBinaryPrefixes) {
345 if (bitrateInKbitsPerSecond < 8.0) {
346 res << (bitrateInKbitsPerSecond * 125.0) << " byte/s";
347 } else if (bitrateInKbitsPerSecond < 8000.0) {
348 res << (bitrateInKbitsPerSecond * 0.125) << " KiB/s";
349 } else if (bitrateInKbitsPerSecond < 8000000.0) {
350 res << (bitrateInKbitsPerSecond * 0.000125) << " MiB/s";
351 } else {
352 res << (bitrateInKbitsPerSecond * 0.000000125) << " GiB/s";
353 }
354 } else {
355 if (bitrateInKbitsPerSecond < 1.0) {
356 res << (bitrateInKbitsPerSecond * 1000.0) << " bit/s";
357 } else if (bitrateInKbitsPerSecond < 1000.0) {
358 res << (bitrateInKbitsPerSecond) << " kbit/s";
359 } else if (bitrateInKbitsPerSecond < 1000000.0) {
360 res << (bitrateInKbitsPerSecond * 0.001) << " Mbit/s";
361 } else {
362 res << (bitrateInKbitsPerSecond * 0.000001) << " Gbit/s";
363 }
364 }
365 return res.str();
366}
367
369const char *const base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
370const char base64Pad = '=';
372
377string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
378{
379 auto encoded = std::string();
380 auto mod = static_cast<std::uint8_t>(dataSize % 3);
381 auto temp = std::uint32_t();
382 encoded.reserve(((dataSize / 3) + (mod > 0)) * 4);
383 for (const std::uint8_t *end = --data + dataSize - mod; data != end;) {
384 temp = static_cast<std::uint32_t>(*++data << 16);
385 temp |= static_cast<std::uint32_t>(*++data << 8);
386 temp |= *++data;
387 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
388 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
389 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
390 encoded.push_back(base64Chars[(temp & 0x0000003F)]);
391 }
392 switch (mod) {
393 case 1:
394 temp = static_cast<std::uint32_t>(*++data << 16);
395 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
396 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
397 encoded.push_back(base64Pad);
398 encoded.push_back(base64Pad);
399 break;
400 case 2:
401 temp = static_cast<std::uint32_t>(*++data << 16);
402 temp |= static_cast<std::uint32_t>(*++data << 8);
403 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
404 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
405 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
406 encoded.push_back(base64Pad);
407 break;
408 }
409 return encoded;
410}
411
417std::pair<unique_ptr<std::uint8_t[]>, std::uint32_t> decodeBase64(const char *encodedStr, const std::uint32_t strSize)
418{
419 if (!strSize) {
420 return std::make_pair(std::make_unique<std::uint8_t[]>(0), 0); // early return to prevent clazy warning
421 }
422 if (strSize % 4) {
423 throw ConversionException("invalid size of base64");
424 }
425 std::uint32_t decodedSize = (strSize / 4) * 3;
426 const char *const end = encodedStr + strSize;
427 if (*(end - 1) == base64Pad) {
428 --decodedSize;
429 }
430 if (*(end - 2) == base64Pad) {
431 --decodedSize;
432 }
433 auto buffer = std::make_unique<std::uint8_t[]>(decodedSize);
434 auto *iter = buffer.get() - 1;
435 while (encodedStr < end) {
436 std::int32_t temp = 0;
437 for (std::uint8_t quantumPos = 0; quantumPos < 4; ++quantumPos, ++encodedStr) {
438 temp <<= 6;
439 if (*encodedStr >= 'A' && *encodedStr <= 'Z') {
440 temp |= *encodedStr - 'A';
441 } else if (*encodedStr >= 'a' && *encodedStr <= 'z') {
442 temp |= *encodedStr - 'a' + 26;
443 } else if (*encodedStr >= '0' && *encodedStr <= '9') {
444 temp |= *encodedStr - '0' + 2 * 26;
445 } else if (*encodedStr == '+') {
446 temp |= 2 * 26 + 10;
447 } else if (*encodedStr == '/') {
448 temp |= 2 * 26 + 10 + 1;
449 } else if (*encodedStr == base64Pad) {
450 switch (end - encodedStr) {
451 case 1:
452 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
453 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
454 return std::make_pair(std::move(buffer), decodedSize);
455 case 2:
456 *++iter = static_cast<std::uint8_t>((temp >> 10) & 0xFF);
457 return std::make_pair(std::move(buffer), decodedSize);
458 default:
459 throw ConversionException("invalid padding in base64");
460 }
461 } else {
462 throw ConversionException("invalid character in base64");
463 }
464 }
465 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
466 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
467 *++iter = static_cast<std::uint8_t>(temp & 0xFF);
468 }
469 return std::make_pair(std::move(buffer), decodedSize);
470}
471} // namespace CppUtilities
The ConversionException class is thrown by the various conversion functions of this library when a co...
Contains all utilities provides by the c++utilities library.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< std::uint8_t[]>, std::uint32_t > decodeBase64(const char *encodedStr, const std::uint32_t strSize)
Decodes the specified Base64 encoded string.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.
CPP_UTILITIES_EXPORT std::string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
Encodes the specified data to Base64.
CPP_UTILITIES_EXPORT std::string dataSizeToString(std::uint64_t sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
STL namespace.
#define CPP_UTILITIES_THREAD_LOCAL