C++ Utilities 5.27.0
Useful C++ classes and routines such as argument parser, IO and conversion utilities
Loading...
Searching...
No Matches
stringconversion.cpp
Go to the documentation of this file.
2
3#ifndef CPP_UTILITIES_NO_THREAD_LOCAL
4#include "../feature_detection/features.h"
5#endif
6
7#ifndef CPP_UTILITIES_THREAD_LOCAL
8#define CPP_UTILITIES_THREAD_LOCAL
9#endif
10
11#include <algorithm>
12#include <cmath>
13#include <cstdlib>
14#include <iomanip>
15#include <limits>
16#include <memory>
17#include <sstream>
18
19#include <errno.h>
20
21#ifndef CPP_UTILITIES_NO_ICONV
22#include <iconv.h>
23#endif
24
25#ifdef PLATFORM_WINDOWS
26#include <windows.h>
27// note: The windows header seriously defines a macro called "max" breaking the (common) use
28// of std::numeric_limits in the subsequent code. So we need to undefine this macro. Note that
29// this is not the case using mingw-w64 but it is happening with windows.h from Windows Kits
30// version 10.0.22000.0 via Visual Studio 2022.
31#ifdef max
32#undef max
33#endif
34#endif
35
36using namespace std;
37
38namespace CppUtilities {
39
40#ifndef CPP_UTILITIES_NO_ICONV
41
43
44struct Keep {
45 size_t operator()(size_t value)
46 {
47 return value;
48 }
49};
50struct Double {
51 size_t operator()(size_t value)
52 {
53 return value + value;
54 }
55};
56struct Half {
57 size_t operator()(size_t value)
58 {
59 return value / 2;
60 }
61};
62struct Factor {
63 Factor(float factor)
64 : factor(factor) {};
65 size_t operator()(size_t value)
66 {
67 return static_cast<size_t>(static_cast<float>(value) * factor);
68 }
69 float factor;
70};
71
72template <class OutputSizeHint> class ConversionDescriptor {
73public:
74 ConversionDescriptor(const char *fromCharset, const char *toCharset)
75 : m_ptr(iconv_open(toCharset, fromCharset))
76 , m_outputSizeHint(OutputSizeHint())
77 {
78 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
79 throw ConversionException("Unable to allocate descriptor for character set conversion.");
80 }
81 }
82
83 ConversionDescriptor(const char *fromCharset, const char *toCharset, OutputSizeHint outputSizeHint)
84 : m_ptr(iconv_open(toCharset, fromCharset))
85 , m_outputSizeHint(outputSizeHint)
86 {
87 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
88 throw ConversionException("Unable to allocate descriptor for character set conversion.");
89 }
90 }
91
92 ~ConversionDescriptor()
93 {
94 iconv_close(m_ptr);
95 }
96
97public:
98 StringData convertString(const char *inputBuffer, size_t inputBufferSize)
99 {
100 // setup input and output buffer
101 size_t inputBytesLeft = inputBufferSize;
102 size_t outputSize = m_outputSizeHint(inputBufferSize);
103 size_t outputBytesLeft = outputSize;
104 char *outputBuffer = reinterpret_cast<char *>(malloc(outputSize));
105 size_t bytesWritten;
106
107 char *currentOutputOffset = outputBuffer;
108 for (;; currentOutputOffset = outputBuffer + bytesWritten) {
109 bytesWritten = iconv(m_ptr, const_cast<char **>(&inputBuffer), &inputBytesLeft, &currentOutputOffset, &outputBytesLeft);
110 if (bytesWritten == static_cast<size_t>(-1)) {
111 if (errno == EINVAL) {
112 // ignore incomplete multibyte sequence in the input
113 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
114 break;
115 } else if (errno == E2BIG) {
116 // output buffer has no more room for next converted character
117 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
118 outputBytesLeft = (outputSize += m_outputSizeHint(inputBytesLeft)) - bytesWritten;
119 outputBuffer = reinterpret_cast<char *>(realloc(outputBuffer, outputSize));
120 } else /*if(errno == EILSEQ)*/ {
121 // invalid multibyte sequence in the input
122 free(outputBuffer);
123 throw ConversionException("Invalid multibyte sequence in the input.");
124 }
125 } else {
126 // conversion completed without (further) errors
127 break;
128 }
129 }
130 return StringData(std::unique_ptr<char[], StringDataDeleter>(outputBuffer), currentOutputOffset - outputBuffer);
131 }
132
133private:
134 iconv_t m_ptr;
135 OutputSizeHint m_outputSizeHint;
136};
137
139
150 const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor)
151{
152 return ConversionDescriptor<Factor>(fromCharset, toCharset, outputBufferSizeFactor).convertString(inputBuffer, inputBufferSize);
153}
154
158StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
159{
160 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16LE");
161 return descriptor.convertString(inputBuffer, inputBufferSize);
162}
163
167StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
168{
169 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16LE", "UTF-8");
170 return descriptor.convertString(inputBuffer, inputBufferSize);
171}
172
176StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
177{
178 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16BE");
179 return descriptor.convertString(inputBuffer, inputBufferSize);
180}
181
185StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
186{
187 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16BE", "UTF-8");
188 return descriptor.convertString(inputBuffer, inputBufferSize);
189}
190
194StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
195{
196 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("ISO-8859-1", "UTF-8");
197 return descriptor.convertString(inputBuffer, inputBufferSize);
198}
199
203StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
204{
205 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("UTF-8", "ISO-8859-1");
206 return descriptor.convertString(inputBuffer, inputBufferSize);
207}
208
209#endif
210
211#ifdef PLATFORM_WINDOWS
218std::wstring convertMultiByteToWide(std::error_code &ec, std::string_view inputBuffer)
219{
220 // calculate required size
221 auto widePath = std::wstring();
222 auto bufferSize = static_cast<int>(std::clamp<std::size_t>(inputBuffer.size(), 0, std::numeric_limits<int>::max()));
223 auto size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), bufferSize, nullptr, 0);
224 if (size <= 0) {
225 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
226 return widePath;
227 }
228 // do the actual conversion
229 widePath.resize(static_cast<std::wstring::size_type>(size));
230 size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), bufferSize, widePath.data(), size);
231 if (size <= 0) {
232 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
233 widePath.clear();
234 }
235 return widePath;
236}
237
244WideStringData convertMultiByteToWide(std::error_code &ec, const char *inputBuffer, int inputBufferSize)
245{
246 // calculate required size
247 WideStringData widePath;
248 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, nullptr, 0);
249 if (widePath.second <= 0) {
250 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
251 return widePath;
252 }
253 // do the actual conversion
254 widePath.first = make_unique<wchar_t[]>(static_cast<size_t>(widePath.second));
255 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, widePath.first.get(), widePath.second);
256 if (widePath.second <= 0) {
257 ec = std::error_code(static_cast<int>(GetLastError()), std::system_category());
258 widePath.first.reset();
259 }
260 return widePath;
261}
262
267WideStringData convertMultiByteToWide(std::error_code &ec, const std::string &inputBuffer)
268{
269 return convertMultiByteToWide(ec, inputBuffer.data(),
270 inputBuffer.size() < static_cast<std::size_t>(std::numeric_limits<int>::max() - 1) ? static_cast<int>(inputBuffer.size() + 1) : -1);
271}
272
279WideStringData convertMultiByteToWide(const char *inputBuffer, int inputBufferSize)
280{
281 std::error_code ec;
282 return convertMultiByteToWide(ec, inputBuffer, inputBufferSize);
283}
284
289WideStringData convertMultiByteToWide(const std::string &inputBuffer)
290{
291 std::error_code ec;
292 return convertMultiByteToWide(ec, inputBuffer);
293}
294#endif
295
300void truncateString(string &str, char terminationChar)
301{
302 string::size_type firstNullByte = str.find(terminationChar);
303 if (firstNullByte != string::npos) {
304 str.resize(firstNullByte);
305 }
306}
307
313string dataSizeToString(std::uint64_t sizeInByte, bool includeByte)
314{
315 stringstream res(stringstream::in | stringstream::out);
316 res.setf(ios::fixed, ios::floatfield);
317 res << setprecision(2);
318 if (sizeInByte < 1024LL) {
319 res << sizeInByte << " bytes";
320 } else if (sizeInByte < 1048576LL) {
321 res << (static_cast<double>(sizeInByte) / 1024.0) << " KiB";
322 } else if (sizeInByte < 1073741824LL) {
323 res << (static_cast<double>(sizeInByte) / 1048576.0) << " MiB";
324 } else if (sizeInByte < 1099511627776LL) {
325 res << (static_cast<double>(sizeInByte) / 1073741824.0) << " GiB";
326 } else {
327 res << (static_cast<double>(sizeInByte) / 1099511627776.0) << " TiB";
328 }
329 if (includeByte && sizeInByte > 1024LL) {
330 res << ' ' << '(' << sizeInByte << " byte)";
331 }
332 return res.str();
333}
334
345string bitrateToString(double bitrateInKbitsPerSecond, bool useIecBinaryPrefixes)
346{
347 stringstream res(stringstream::in | stringstream::out);
348 res << setprecision(3);
349 if (std::isnan(bitrateInKbitsPerSecond)) {
350 res << "indeterminable";
351 } else if (useIecBinaryPrefixes) {
352 if (bitrateInKbitsPerSecond < 8.0) {
353 res << (bitrateInKbitsPerSecond * 125.0) << " byte/s";
354 } else if (bitrateInKbitsPerSecond < 8000.0) {
355 res << (bitrateInKbitsPerSecond * 0.125) << " KiB/s";
356 } else if (bitrateInKbitsPerSecond < 8000000.0) {
357 res << (bitrateInKbitsPerSecond * 0.000125) << " MiB/s";
358 } else {
359 res << (bitrateInKbitsPerSecond * 0.000000125) << " GiB/s";
360 }
361 } else {
362 if (bitrateInKbitsPerSecond < 1.0) {
363 res << (bitrateInKbitsPerSecond * 1000.0) << " bit/s";
364 } else if (bitrateInKbitsPerSecond < 1000.0) {
365 res << (bitrateInKbitsPerSecond) << " kbit/s";
366 } else if (bitrateInKbitsPerSecond < 1000000.0) {
367 res << (bitrateInKbitsPerSecond * 0.001) << " Mbit/s";
368 } else {
369 res << (bitrateInKbitsPerSecond * 0.000001) << " Gbit/s";
370 }
371 }
372 return res.str();
373}
374
376const char *const base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
377const char base64Pad = '=';
379
384string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
385{
386 auto encoded = std::string();
387 auto mod = static_cast<std::uint8_t>(dataSize % 3);
388 auto temp = std::uint32_t();
389 encoded.reserve(((dataSize / 3) + (mod > 0)) * 4);
390 for (const std::uint8_t *end = --data + dataSize - mod; data != end;) {
391 temp = static_cast<std::uint32_t>(*++data << 16);
392 temp |= static_cast<std::uint32_t>(*++data << 8);
393 temp |= *++data;
394 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
395 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
396 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
397 encoded.push_back(base64Chars[(temp & 0x0000003F)]);
398 }
399 switch (mod) {
400 case 1:
401 temp = static_cast<std::uint32_t>(*++data << 16);
402 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
403 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
404 encoded.push_back(base64Pad);
405 encoded.push_back(base64Pad);
406 break;
407 case 2:
408 temp = static_cast<std::uint32_t>(*++data << 16);
409 temp |= static_cast<std::uint32_t>(*++data << 8);
410 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
411 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
412 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
413 encoded.push_back(base64Pad);
414 break;
415 }
416 return encoded;
417}
418
424std::pair<unique_ptr<std::uint8_t[]>, std::uint32_t> decodeBase64(const char *encodedStr, const std::uint32_t strSize)
425{
426 if (!strSize) {
427 return std::make_pair(std::make_unique<std::uint8_t[]>(0), 0); // early return to prevent clazy warning
428 }
429 if (strSize % 4) {
430 throw ConversionException("invalid size of base64");
431 }
432 std::uint32_t decodedSize = (strSize / 4) * 3;
433 const char *const end = encodedStr + strSize;
434 if (*(end - 1) == base64Pad) {
435 --decodedSize;
436 }
437 if (*(end - 2) == base64Pad) {
438 --decodedSize;
439 }
440 auto buffer = std::make_unique<std::uint8_t[]>(decodedSize);
441 auto *iter = buffer.get() - 1;
442 while (encodedStr < end) {
443 std::int32_t temp = 0;
444 for (std::uint8_t quantumPos = 0; quantumPos < 4; ++quantumPos, ++encodedStr) {
445 temp <<= 6;
446 if (*encodedStr >= 'A' && *encodedStr <= 'Z') {
447 temp |= *encodedStr - 'A';
448 } else if (*encodedStr >= 'a' && *encodedStr <= 'z') {
449 temp |= *encodedStr - 'a' + 26;
450 } else if (*encodedStr >= '0' && *encodedStr <= '9') {
451 temp |= *encodedStr - '0' + 2 * 26;
452 } else if (*encodedStr == '+') {
453 temp |= 2 * 26 + 10;
454 } else if (*encodedStr == '/') {
455 temp |= 2 * 26 + 10 + 1;
456 } else if (*encodedStr == base64Pad) {
457 switch (end - encodedStr) {
458 case 1:
459 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
460 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
461 return std::make_pair(std::move(buffer), decodedSize);
462 case 2:
463 *++iter = static_cast<std::uint8_t>((temp >> 10) & 0xFF);
464 return std::make_pair(std::move(buffer), decodedSize);
465 default:
466 throw ConversionException("invalid padding in base64");
467 }
468 } else {
469 throw ConversionException("invalid character in base64");
470 }
471 }
472 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
473 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
474 *++iter = static_cast<std::uint8_t>(temp & 0xFF);
475 }
476 return std::make_pair(std::move(buffer), decodedSize);
477}
478} // namespace CppUtilities
The ConversionException class is thrown by the various conversion functions of this library when a co...
Contains all utilities provides by the c++utilities library.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< std::uint8_t[]>, std::uint32_t > decodeBase64(const char *encodedStr, const std::uint32_t strSize)
Decodes the specified Base64 encoded string.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.
CPP_UTILITIES_EXPORT std::string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
Encodes the specified data to Base64.
CPP_UTILITIES_EXPORT std::string dataSizeToString(std::uint64_t sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
STL namespace.
#define CPP_UTILITIES_THREAD_LOCAL