mirror of
				https://github.com/dolphin-emu/dolphin.git
				synced 2025-10-24 17:09:06 +00:00 
			
		
		
		
	Add a function that safely returns whether a character is printable i.e. whether 0x20 <= c <= 0x7e is true. This is done in several places in our codebase and it's easy to run into undefined behaviour if the C version defined in <cctype> is used instead of this one, since its behaviour is undefined if the character is not representable as an unsigned char. This fixes MemoryViewWidget.
		
			
				
	
	
		
			603 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			603 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2008 Dolphin Emulator Project
 | |
| // Licensed under GPLv2+
 | |
| // Refer to the license.txt file included.
 | |
| 
 | |
| #include "Common/StringUtil.h"
 | |
| 
 | |
| #include <algorithm>
 | |
| #include <cstdarg>
 | |
| #include <cstddef>
 | |
| #include <cstdio>
 | |
| #include <cstdlib>
 | |
| #include <cstring>
 | |
| #include <iomanip>
 | |
| #include <istream>
 | |
| #include <iterator>
 | |
| #include <limits.h>
 | |
| #include <locale>
 | |
| #include <sstream>
 | |
| #include <string>
 | |
| #include <vector>
 | |
| 
 | |
| #include <fmt/format.h>
 | |
| 
 | |
| #include "Common/CommonFuncs.h"
 | |
| #include "Common/CommonPaths.h"
 | |
| #include "Common/CommonTypes.h"
 | |
| #include "Common/Logging/Log.h"
 | |
| #include "Common/Swap.h"
 | |
| 
 | |
| #ifdef _WIN32
 | |
| #include <Windows.h>
 | |
| constexpr u32 CODEPAGE_SHIFT_JIS = 932;
 | |
| constexpr u32 CODEPAGE_WINDOWS_1252 = 1252;
 | |
| #else
 | |
| #include <codecvt>
 | |
| #include <errno.h>
 | |
| #include <iconv.h>
 | |
| #include <locale.h>
 | |
| #endif
 | |
| 
 | |
| #if !defined(_WIN32) && !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__)
 | |
| static locale_t GetCLocale()
 | |
| {
 | |
|   static locale_t c_locale = newlocale(LC_ALL_MASK, "C", nullptr);
 | |
|   return c_locale;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| std::string HexDump(const u8* data, size_t size)
 | |
| {
 | |
|   constexpr size_t BYTES_PER_LINE = 16;
 | |
| 
 | |
|   std::string out;
 | |
|   for (size_t row_start = 0; row_start < size; row_start += BYTES_PER_LINE)
 | |
|   {
 | |
|     out += fmt::format("{:06x}: ", row_start);
 | |
|     for (size_t i = 0; i < BYTES_PER_LINE; ++i)
 | |
|     {
 | |
|       if (row_start + i < size)
 | |
|       {
 | |
|         out += fmt::format("{:02x} ", data[row_start + i]);
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         out += "   ";
 | |
|       }
 | |
|     }
 | |
|     out += " ";
 | |
|     for (size_t i = 0; i < BYTES_PER_LINE; ++i)
 | |
|     {
 | |
|       if (row_start + i < size)
 | |
|       {
 | |
|         char c = static_cast<char>(data[row_start + i]);
 | |
|         out += IsPrintableCharacter(c) ? c : '.';
 | |
|       }
 | |
|     }
 | |
|     out += "\n";
 | |
|   }
 | |
|   return out;
 | |
| }
 | |
| 
 | |
| // faster than sscanf
 | |
| bool AsciiToHex(const std::string& _szValue, u32& result)
 | |
| {
 | |
|   // Set errno to a good state.
 | |
|   errno = 0;
 | |
| 
 | |
|   char* endptr = nullptr;
 | |
|   const u32 value = strtoul(_szValue.c_str(), &endptr, 16);
 | |
| 
 | |
|   if (!endptr || *endptr)
 | |
|     return false;
 | |
| 
 | |
|   if (errno == ERANGE)
 | |
|     return false;
 | |
| 
 | |
|   result = value;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list args)
 | |
| {
 | |
|   int writtenCount;
 | |
| 
 | |
| #ifdef _WIN32
 | |
|   // You would think *printf are simple, right? Iterate on each character,
 | |
|   // if it's a format specifier handle it properly, etc.
 | |
|   //
 | |
|   // Nooooo. Not according to the C standard.
 | |
|   //
 | |
|   // According to the C99 standard (7.19.6.1 "The fprintf function")
 | |
|   //     The format shall be a multibyte character sequence
 | |
|   //
 | |
|   // Because some character encodings might have '%' signs in the middle of
 | |
|   // a multibyte sequence (SJIS for example only specifies that the first
 | |
|   // byte of a 2 byte sequence is "high", the second byte can be anything),
 | |
|   // printf functions have to decode the multibyte sequences and try their
 | |
|   // best to not screw up.
 | |
|   //
 | |
|   // Unfortunately, on Windows, the locale for most languages is not UTF-8
 | |
|   // as we would need. Notably, for zh_TW, Windows chooses EUC-CN as the
 | |
|   // locale, and completely fails when trying to decode UTF-8 as EUC-CN.
 | |
|   //
 | |
|   // On the other hand, the fix is simple: because we use UTF-8, no such
 | |
|   // multibyte handling is required as we can simply assume that no '%' char
 | |
|   // will be present in the middle of a multibyte sequence.
 | |
|   //
 | |
|   // This is why we look up the default C locale here and use _vsnprintf_l.
 | |
|   static _locale_t c_locale = nullptr;
 | |
|   if (!c_locale)
 | |
|     c_locale = _create_locale(LC_ALL, "C");
 | |
|   writtenCount = _vsnprintf_l(out, outsize, format, c_locale, args);
 | |
| #else
 | |
| #if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__)
 | |
|   locale_t previousLocale = uselocale(GetCLocale());
 | |
| #endif
 | |
|   writtenCount = vsnprintf(out, outsize, format, args);
 | |
| #if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__)
 | |
|   uselocale(previousLocale);
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
|   if (writtenCount > 0 && writtenCount < outsize)
 | |
|   {
 | |
|     out[writtenCount] = '\0';
 | |
|     return true;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     out[outsize - 1] = '\0';
 | |
|     return false;
 | |
|   }
 | |
| }
 | |
| 
 | |
| std::string StringFromFormat(const char* format, ...)
 | |
| {
 | |
|   va_list args;
 | |
|   va_start(args, format);
 | |
|   std::string res = StringFromFormatV(format, args);
 | |
|   va_end(args);
 | |
|   return res;
 | |
| }
 | |
| 
 | |
| std::string StringFromFormatV(const char* format, va_list args)
 | |
| {
 | |
|   char* buf = nullptr;
 | |
| #ifdef _WIN32
 | |
|   int required = _vscprintf(format, args);
 | |
|   buf = new char[required + 1];
 | |
|   CharArrayFromFormatV(buf, required + 1, format, args);
 | |
| 
 | |
|   std::string temp = buf;
 | |
|   delete[] buf;
 | |
| #else
 | |
| #if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__)
 | |
|   locale_t previousLocale = uselocale(GetCLocale());
 | |
| #endif
 | |
|   if (vasprintf(&buf, format, args) < 0)
 | |
|   {
 | |
|     ERROR_LOG(COMMON, "Unable to allocate memory for string");
 | |
|     buf = nullptr;
 | |
|   }
 | |
| 
 | |
| #if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__)
 | |
|   uselocale(previousLocale);
 | |
| #endif
 | |
| 
 | |
|   std::string temp = buf;
 | |
|   free(buf);
 | |
| #endif
 | |
|   return temp;
 | |
| }
 | |
| 
 | |
| // For Debugging. Read out an u8 array.
 | |
| std::string ArrayToString(const u8* data, u32 size, int line_len, bool spaces)
 | |
| {
 | |
|   std::ostringstream oss;
 | |
|   oss << std::setfill('0') << std::hex;
 | |
| 
 | |
|   for (int line = 0; size; ++data, --size)
 | |
|   {
 | |
|     oss << std::setw(2) << static_cast<int>(*data);
 | |
| 
 | |
|     if (line_len == ++line)
 | |
|     {
 | |
|       oss << '\n';
 | |
|       line = 0;
 | |
|     }
 | |
|     else if (spaces)
 | |
|       oss << ' ';
 | |
|   }
 | |
| 
 | |
|   return oss.str();
 | |
| }
 | |
| 
 | |
| // Turns "  hello " into "hello". Also handles tabs.
 | |
| std::string_view StripSpaces(std::string_view str)
 | |
| {
 | |
|   const size_t s = str.find_first_not_of(" \t\r\n");
 | |
| 
 | |
|   if (str.npos != s)
 | |
|     return str.substr(s, str.find_last_not_of(" \t\r\n") - s + 1);
 | |
|   else
 | |
|     return "";
 | |
| }
 | |
| 
 | |
| // "\"hello\"" is turned to "hello"
 | |
| // This one assumes that the string has already been space stripped in both
 | |
| // ends, as done by StripSpaces above, for example.
 | |
| std::string_view StripQuotes(std::string_view s)
 | |
| {
 | |
|   if (!s.empty() && '\"' == s[0] && '\"' == *s.rbegin())
 | |
|     return s.substr(1, s.size() - 2);
 | |
|   else
 | |
|     return s;
 | |
| }
 | |
| 
 | |
| bool TryParse(const std::string& str, bool* const output)
 | |
| {
 | |
|   float value;
 | |
|   const bool is_valid_float = TryParse(str, &value);
 | |
|   if ((is_valid_float && value == 1) || !strcasecmp("true", str.c_str()))
 | |
|     *output = true;
 | |
|   else if ((is_valid_float && value == 0) || !strcasecmp("false", str.c_str()))
 | |
|     *output = false;
 | |
|   else
 | |
|     return false;
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| std::string ValueToString(u16 value)
 | |
| {
 | |
|   return fmt::format("0x{:04x}", value);
 | |
| }
 | |
| 
 | |
| std::string ValueToString(u32 value)
 | |
| {
 | |
|   return fmt::format("0x{:08x}", value);
 | |
| }
 | |
| 
 | |
| std::string ValueToString(u64 value)
 | |
| {
 | |
|   return fmt::format("0x{:016x}", value);
 | |
| }
 | |
| 
 | |
| std::string ValueToString(float value)
 | |
| {
 | |
|   return fmt::format("{:#.9g}", value);
 | |
| }
 | |
| 
 | |
| std::string ValueToString(double value)
 | |
| {
 | |
|   return fmt::format("{:#.17g}", value);
 | |
| }
 | |
| 
 | |
| std::string ValueToString(int value)
 | |
| {
 | |
|   return std::to_string(value);
 | |
| }
 | |
| 
 | |
| std::string ValueToString(s64 value)
 | |
| {
 | |
|   return std::to_string(value);
 | |
| }
 | |
| 
 | |
| std::string ValueToString(bool value)
 | |
| {
 | |
|   return value ? "True" : "False";
 | |
| }
 | |
| 
 | |
| bool SplitPath(std::string_view full_path, std::string* path, std::string* filename,
 | |
|                std::string* extension)
 | |
| {
 | |
|   if (full_path.empty())
 | |
|     return false;
 | |
| 
 | |
|   size_t dir_end = full_path.find_last_of("/"
 | |
| // Windows needs the : included for something like just "C:" to be considered a directory
 | |
| #ifdef _WIN32
 | |
|                                           ":"
 | |
| #endif
 | |
|   );
 | |
|   if (std::string::npos == dir_end)
 | |
|     dir_end = 0;
 | |
|   else
 | |
|     dir_end += 1;
 | |
| 
 | |
|   size_t fname_end = full_path.rfind('.');
 | |
|   if (fname_end < dir_end || std::string::npos == fname_end)
 | |
|     fname_end = full_path.size();
 | |
| 
 | |
|   if (path)
 | |
|     *path = full_path.substr(0, dir_end);
 | |
| 
 | |
|   if (filename)
 | |
|     *filename = full_path.substr(dir_end, fname_end - dir_end);
 | |
| 
 | |
|   if (extension)
 | |
|     *extension = full_path.substr(fname_end);
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| void BuildCompleteFilename(std::string& complete_filename, std::string_view path,
 | |
|                            std::string_view filename)
 | |
| {
 | |
|   complete_filename = path;
 | |
| 
 | |
|   // check for seperator
 | |
|   if (DIR_SEP_CHR != *complete_filename.rbegin())
 | |
|     complete_filename += DIR_SEP_CHR;
 | |
| 
 | |
|   // add the filename
 | |
|   complete_filename += filename;
 | |
| }
 | |
| 
 | |
| std::vector<std::string> SplitString(const std::string& str, const char delim)
 | |
| {
 | |
|   std::istringstream iss(str);
 | |
|   std::vector<std::string> output(1);
 | |
| 
 | |
|   while (std::getline(iss, *output.rbegin(), delim))
 | |
|     output.push_back("");
 | |
| 
 | |
|   output.pop_back();
 | |
|   return output;
 | |
| }
 | |
| 
 | |
| std::string JoinStrings(const std::vector<std::string>& strings, const std::string& delimiter)
 | |
| {
 | |
|   // Check if we can return early, just for speed
 | |
|   if (strings.empty())
 | |
|     return "";
 | |
| 
 | |
|   std::ostringstream res;
 | |
|   std::copy(strings.begin(), strings.end(),
 | |
|             std::ostream_iterator<std::string>(res, delimiter.c_str()));
 | |
| 
 | |
|   // Drop the trailing delimiter.
 | |
|   std::string joined = res.str();
 | |
|   return joined.substr(0, joined.length() - delimiter.length());
 | |
| }
 | |
| 
 | |
| std::string TabsToSpaces(int tab_size, std::string str)
 | |
| {
 | |
|   const std::string spaces(tab_size, ' ');
 | |
| 
 | |
|   size_t i = 0;
 | |
|   while (str.npos != (i = str.find('\t')))
 | |
|     str.replace(i, 1, spaces);
 | |
| 
 | |
|   return str;
 | |
| }
 | |
| 
 | |
| std::string ReplaceAll(std::string result, std::string_view src, std::string_view dest)
 | |
| {
 | |
|   size_t pos = 0;
 | |
| 
 | |
|   if (src == dest)
 | |
|     return result;
 | |
| 
 | |
|   while ((pos = result.find(src, pos)) != std::string::npos)
 | |
|   {
 | |
|     result.replace(pos, src.size(), dest);
 | |
|     pos += dest.length();
 | |
|   }
 | |
| 
 | |
|   return result;
 | |
| }
 | |
| 
 | |
| bool StringBeginsWith(std::string_view str, std::string_view begin)
 | |
| {
 | |
|   return str.size() >= begin.size() && std::equal(begin.begin(), begin.end(), str.begin());
 | |
| }
 | |
| 
 | |
| bool StringEndsWith(std::string_view str, std::string_view end)
 | |
| {
 | |
|   return str.size() >= end.size() && std::equal(end.rbegin(), end.rend(), str.rbegin());
 | |
| }
 | |
| 
 | |
| void StringPopBackIf(std::string* s, char c)
 | |
| {
 | |
|   if (!s->empty() && s->back() == c)
 | |
|     s->pop_back();
 | |
| }
 | |
| 
 | |
| #ifdef _WIN32
 | |
| 
 | |
| std::wstring CPToUTF16(u32 code_page, std::string_view input)
 | |
| {
 | |
|   auto const size =
 | |
|       MultiByteToWideChar(code_page, 0, input.data(), static_cast<int>(input.size()), nullptr, 0);
 | |
| 
 | |
|   std::wstring output;
 | |
|   output.resize(size);
 | |
| 
 | |
|   if (size == 0 ||
 | |
|       size != MultiByteToWideChar(code_page, 0, input.data(), static_cast<int>(input.size()),
 | |
|                                   &output[0], static_cast<int>(output.size())))
 | |
|   {
 | |
|     output.clear();
 | |
|   }
 | |
| 
 | |
|   return output;
 | |
| }
 | |
| 
 | |
| std::string UTF16ToCP(u32 code_page, std::wstring_view input)
 | |
| {
 | |
|   std::string output;
 | |
| 
 | |
|   if (0 != input.size())
 | |
|   {
 | |
|     // "If cchWideChar [input buffer size] is set to 0, the function fails." -MSDN
 | |
|     auto const size = WideCharToMultiByte(
 | |
|         code_page, 0, input.data(), static_cast<int>(input.size()), nullptr, 0, nullptr, false);
 | |
| 
 | |
|     output.resize(size);
 | |
| 
 | |
|     if (size != WideCharToMultiByte(code_page, 0, input.data(), static_cast<int>(input.size()),
 | |
|                                     &output[0], static_cast<int>(output.size()), nullptr, nullptr))
 | |
|     {
 | |
|       const DWORD error_code = GetLastError();
 | |
|       ERROR_LOG(COMMON, "WideCharToMultiByte Error in String '%s': %lu",
 | |
|                 std::wstring(input).c_str(), error_code);
 | |
|       output.clear();
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return output;
 | |
| }
 | |
| 
 | |
| std::wstring UTF8ToUTF16(std::string_view input)
 | |
| {
 | |
|   return CPToUTF16(CP_UTF8, input);
 | |
| }
 | |
| 
 | |
| std::string UTF16ToUTF8(std::wstring_view input)
 | |
| {
 | |
|   return UTF16ToCP(CP_UTF8, input);
 | |
| }
 | |
| 
 | |
| std::string SHIFTJISToUTF8(std::string_view input)
 | |
| {
 | |
|   return UTF16ToUTF8(CPToUTF16(CODEPAGE_SHIFT_JIS, input));
 | |
| }
 | |
| 
 | |
| std::string UTF8ToSHIFTJIS(std::string_view input)
 | |
| {
 | |
|   return UTF16ToCP(CODEPAGE_SHIFT_JIS, UTF8ToUTF16(input));
 | |
| }
 | |
| 
 | |
| std::string CP1252ToUTF8(std::string_view input)
 | |
| {
 | |
|   return UTF16ToUTF8(CPToUTF16(CODEPAGE_WINDOWS_1252, input));
 | |
| }
 | |
| 
 | |
| std::string UTF16BEToUTF8(const char16_t* str, size_t max_size)
 | |
| {
 | |
|   const char16_t* str_end = std::find(str, str + max_size, '\0');
 | |
|   std::wstring result(static_cast<size_t>(str_end - str), '\0');
 | |
|   std::transform(str, str_end, result.begin(), static_cast<u16 (&)(u16)>(Common::swap16));
 | |
|   return UTF16ToUTF8(result);
 | |
| }
 | |
| 
 | |
| #else
 | |
| 
 | |
| template <typename T>
 | |
| std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_view<T> input)
 | |
| {
 | |
|   std::string result;
 | |
| 
 | |
|   iconv_t const conv_desc = iconv_open(tocode, fromcode);
 | |
|   if ((iconv_t)-1 == conv_desc)
 | |
|   {
 | |
|     ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno));
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     size_t const in_bytes = sizeof(T) * input.size();
 | |
|     size_t const out_buffer_size = 4 * in_bytes;
 | |
| 
 | |
|     std::string out_buffer;
 | |
|     out_buffer.resize(out_buffer_size);
 | |
| 
 | |
|     auto src_buffer = input.data();
 | |
|     size_t src_bytes = in_bytes;
 | |
|     auto dst_buffer = out_buffer.data();
 | |
|     size_t dst_bytes = out_buffer.size();
 | |
| 
 | |
|     while (src_bytes != 0)
 | |
|     {
 | |
|       size_t const iconv_result =
 | |
|           iconv(conv_desc, (char**)(&src_buffer), &src_bytes, &dst_buffer, &dst_bytes);
 | |
| 
 | |
|       if ((size_t)-1 == iconv_result)
 | |
|       {
 | |
|         if (EILSEQ == errno || EINVAL == errno)
 | |
|         {
 | |
|           // Try to skip the bad character
 | |
|           if (src_bytes != 0)
 | |
|           {
 | |
|             --src_bytes;
 | |
|             ++src_buffer;
 | |
|           }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|           ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno));
 | |
|           break;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     out_buffer.resize(out_buffer_size - dst_bytes);
 | |
|     out_buffer.swap(result);
 | |
| 
 | |
|     iconv_close(conv_desc);
 | |
|   }
 | |
| 
 | |
|   return result;
 | |
| }
 | |
| 
 | |
| template <typename T>
 | |
| std::string CodeToUTF8(const char* fromcode, std::basic_string_view<T> input)
 | |
| {
 | |
|   return CodeTo("UTF-8", fromcode, input);
 | |
| }
 | |
| 
 | |
| std::string CP1252ToUTF8(std::string_view input)
 | |
| {
 | |
|   // return CodeToUTF8("CP1252//TRANSLIT", input);
 | |
|   // return CodeToUTF8("CP1252//IGNORE", input);
 | |
|   return CodeToUTF8("CP1252", input);
 | |
| }
 | |
| 
 | |
| std::string SHIFTJISToUTF8(std::string_view input)
 | |
| {
 | |
|   // return CodeToUTF8("CP932", input);
 | |
|   return CodeToUTF8("SJIS", input);
 | |
| }
 | |
| 
 | |
| std::string UTF8ToSHIFTJIS(std::string_view input)
 | |
| {
 | |
|   return CodeTo("SJIS", "UTF-8", input);
 | |
| }
 | |
| 
 | |
| std::string UTF16ToUTF8(std::wstring_view input)
 | |
| {
 | |
|   std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
 | |
|   return converter.to_bytes(input.data(), input.data() + input.size());
 | |
| }
 | |
| 
 | |
| std::string UTF16BEToUTF8(const char16_t* str, size_t max_size)
 | |
| {
 | |
|   const char16_t* str_end = std::find(str, str + max_size, '\0');
 | |
|   return CodeToUTF8("UTF-16BE", std::u16string_view(str, static_cast<size_t>(str_end - str)));
 | |
| }
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef HAS_STD_FILESYSTEM
 | |
| // This is a replacement for path::u8path, which is deprecated starting with C++20.
 | |
| std::filesystem::path StringToPath(std::string_view path)
 | |
| {
 | |
| #ifdef _MSC_VER
 | |
|   return std::filesystem::path(UTF8ToUTF16(path));
 | |
| #else
 | |
|   return std::filesystem::path(path);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| // This is a replacement for path::u8string that always has the return type std::string.
 | |
| // path::u8string returns std::u8string starting with C++20, which is annoying to convert.
 | |
| std::string PathToString(const std::filesystem::path& path)
 | |
| {
 | |
| #ifdef _MSC_VER
 | |
|   return UTF16ToUTF8(path.native());
 | |
| #else
 | |
|   return path.native();
 | |
| #endif
 | |
| }
 | |
| #endif
 |