diff --git a/rpcs3/Emu/SysCalls/Modules/cellL10n.cpp b/rpcs3/Emu/SysCalls/Modules/cellL10n.cpp index 5a25c36c76..696483a1f6 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellL10n.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellL10n.cpp @@ -6,8 +6,11 @@ #include #endif -#ifndef _MSC_VER +#ifdef _MSC_VER +typedef int HostCode; +#else #include +typedef const char *HostCode; #endif #include "cellL10n.h" @@ -16,73 +19,9 @@ extern Module<> cellL10n; // Translate code id to code name. some codepage may has another name. // If this makes your compilation fail, try replace the string code with one in "iconv -l" -bool _L10nCodeParse(s32 code, std::string& retCode) -{ - if ((code >= _L10N_CODE_) || (code < 0)) return false; - switch (code) - { - // I don't know these Unicode Variants is LB or BE. - case L10N_UTF8: retCode = "UTF-8"; return true; - case L10N_UTF16: retCode = "UTF-16"; return true; - case L10N_UTF32: retCode = "UTF-32"; return true; - case L10N_UCS2: retCode = "UCS-2"; return true; - case L10N_UCS4: retCode = "UCS-4"; return true; - case L10N_ISO_8859_1: retCode = "ISO-8859-1"; return true; - case L10N_ISO_8859_2: retCode = "ISO-8859-2"; return true; - case L10N_ISO_8859_3: retCode = "ISO-8859-3"; return true; - case L10N_ISO_8859_4: retCode = "ISO-8859-4"; return true; - case L10N_ISO_8859_5: retCode = "ISO-8859-5"; return true; - case L10N_ISO_8859_6: retCode = "ISO-8859-6"; return true; - case L10N_ISO_8859_7: retCode = "ISO-8859-7"; return true; - case L10N_ISO_8859_8: retCode = "ISO-8859-8"; return true; - case L10N_ISO_8859_9: retCode = "ISO-8859-9"; return true; - case L10N_ISO_8859_10: retCode = "ISO-8859-10"; return true; - case L10N_ISO_8859_11: retCode = "ISO-8859-11"; return true; - case L10N_ISO_8859_13: retCode = "ISO-8859-13"; return true; // No ISO-8859-12 ha ha. - case L10N_ISO_8859_14: retCode = "ISO-8859-14"; return true; - case L10N_ISO_8859_15: retCode = "ISO-8859-15"; return true; - case L10N_ISO_8859_16: retCode = "ISO-8859-16"; return true; - case L10N_CODEPAGE_437: retCode = "CP437"; return true; - case L10N_CODEPAGE_850: retCode = "CP850"; return true; - case L10N_CODEPAGE_863: retCode = "CP863"; return true; - case L10N_CODEPAGE_866: retCode = "CP866"; return true; - case L10N_CODEPAGE_932: retCode = "CP932"; return true; - case L10N_CODEPAGE_936: retCode = "CP936"; return true; - case L10N_CODEPAGE_949: retCode = "CP949"; return true; - case L10N_CODEPAGE_950: retCode = "CP950"; return true; - case L10N_CODEPAGE_1251:retCode = "CP1251"; return true; // CYRL - case L10N_CODEPAGE_1252:retCode = "CP1252"; return true; // ANSI - case L10N_EUC_CN: retCode = "EUC-CN"; return true; // GB2312 - case L10N_EUC_JP: retCode = "EUC-JP"; return true; - case L10N_EUC_KR: retCode = "EUC-KR"; return true; - case L10N_ISO_2022_JP: retCode = "ISO-2022-JP"; return true; - case L10N_ARIB: retCode = "ARABIC"; return true; // TODO: think that should be ARABIC. - case L10N_HZ: retCode = "HZ"; return true; - case L10N_GB18030: retCode = "GB18030"; return true; - case L10N_RIS_506: retCode = "SHIFT-JIS"; return true; // MusicShiftJIS, MS_KANJI - // These are only supported with FW 3.10 and above - case L10N_CODEPAGE_852: retCode = "CP852"; return true; - case L10N_CODEPAGE_1250:retCode = "CP1250"; return true; // EE - case L10N_CODEPAGE_737: retCode = "CP737"; return true; - case L10N_CODEPAGE_1253:retCode = "CP1253"; return true; // Greek - case L10N_CODEPAGE_857: retCode = "CP857"; return true; - case L10N_CODEPAGE_1254:retCode = "CP1254"; return true; // Turk - case L10N_CODEPAGE_775: retCode = "CP775"; return true; - case L10N_CODEPAGE_1257:retCode = "CP1257"; return true; // WINBALTRIM - case L10N_CODEPAGE_855: retCode = "CP855"; return true; - case L10N_CODEPAGE_858: retCode = "CP858"; return true; - case L10N_CODEPAGE_860: retCode = "CP860"; return true; - case L10N_CODEPAGE_861: retCode = "CP861"; return true; - case L10N_CODEPAGE_865: retCode = "CP865"; return true; - case L10N_CODEPAGE_869: retCode = "CP869"; return true; - default: return false; - } -} - -// Translate code id to code name. -// If this makes your compilation fail, try replace the string code with one in "iconv -l" -bool _L10nCodeParse(s32 code, u32& retCode) +bool _L10nCodeParse(s32 code, HostCode& retCode) { +#ifdef _MSC_VER retCode = 0; if ((code >= _L10N_CODE_) || (code < 0)) return false; switch (code) @@ -144,13 +83,73 @@ bool _L10nCodeParse(s32 code, u32& retCode) case L10N_CODEPAGE_869: retCode = 869; return true; default: return false; } +#else + if ((code >= _L10N_CODE_) || (code < 0)) return false; + switch (code) + { + // I don't know these Unicode Variants is LB or BE. + case L10N_UTF8: retCode = "UTF-8"; return true; + case L10N_UTF16: retCode = "UTF-16"; return true; + case L10N_UTF32: retCode = "UTF-32"; return true; + case L10N_UCS2: retCode = "UCS-2"; return true; + case L10N_UCS4: retCode = "UCS-4"; return true; + case L10N_ISO_8859_1: retCode = "ISO-8859-1"; return true; + case L10N_ISO_8859_2: retCode = "ISO-8859-2"; return true; + case L10N_ISO_8859_3: retCode = "ISO-8859-3"; return true; + case L10N_ISO_8859_4: retCode = "ISO-8859-4"; return true; + case L10N_ISO_8859_5: retCode = "ISO-8859-5"; return true; + case L10N_ISO_8859_6: retCode = "ISO-8859-6"; return true; + case L10N_ISO_8859_7: retCode = "ISO-8859-7"; return true; + case L10N_ISO_8859_8: retCode = "ISO-8859-8"; return true; + case L10N_ISO_8859_9: retCode = "ISO-8859-9"; return true; + case L10N_ISO_8859_10: retCode = "ISO-8859-10"; return true; + case L10N_ISO_8859_11: retCode = "ISO-8859-11"; return true; + case L10N_ISO_8859_13: retCode = "ISO-8859-13"; return true; // No ISO-8859-12 ha ha. + case L10N_ISO_8859_14: retCode = "ISO-8859-14"; return true; + case L10N_ISO_8859_15: retCode = "ISO-8859-15"; return true; + case L10N_ISO_8859_16: retCode = "ISO-8859-16"; return true; + case L10N_CODEPAGE_437: retCode = "CP437"; return true; + case L10N_CODEPAGE_850: retCode = "CP850"; return true; + case L10N_CODEPAGE_863: retCode = "CP863"; return true; + case L10N_CODEPAGE_866: retCode = "CP866"; return true; + case L10N_CODEPAGE_932: retCode = "CP932"; return true; + case L10N_CODEPAGE_936: retCode = "CP936"; return true; + case L10N_CODEPAGE_949: retCode = "CP949"; return true; + case L10N_CODEPAGE_950: retCode = "CP950"; return true; + case L10N_CODEPAGE_1251:retCode = "CP1251"; return true; // CYRL + case L10N_CODEPAGE_1252:retCode = "CP1252"; return true; // ANSI + case L10N_EUC_CN: retCode = "EUC-CN"; return true; // GB2312 + case L10N_EUC_JP: retCode = "EUC-JP"; return true; + case L10N_EUC_KR: retCode = "EUC-KR"; return true; + case L10N_ISO_2022_JP: retCode = "ISO-2022-JP"; return true; + case L10N_ARIB: retCode = "ARABIC"; return true; // TODO: think that should be ARABIC. + case L10N_HZ: retCode = "HZ"; return true; + case L10N_GB18030: retCode = "GB18030"; return true; + case L10N_RIS_506: retCode = "SHIFT-JIS"; return true; // MusicShiftJIS, MS_KANJI + // These are only supported with FW 3.10 and above + case L10N_CODEPAGE_852: retCode = "CP852"; return true; + case L10N_CODEPAGE_1250:retCode = "CP1250"; return true; // EE + case L10N_CODEPAGE_737: retCode = "CP737"; return true; + case L10N_CODEPAGE_1253:retCode = "CP1253"; return true; // Greek + case L10N_CODEPAGE_857: retCode = "CP857"; return true; + case L10N_CODEPAGE_1254:retCode = "CP1254"; return true; // Turk + case L10N_CODEPAGE_775: retCode = "CP775"; return true; + case L10N_CODEPAGE_1257:retCode = "CP1257"; return true; // WINBALTRIM + case L10N_CODEPAGE_855: retCode = "CP855"; return true; + case L10N_CODEPAGE_858: retCode = "CP858"; return true; + case L10N_CODEPAGE_860: retCode = "CP860"; return true; + case L10N_CODEPAGE_861: retCode = "CP861"; return true; + case L10N_CODEPAGE_865: retCode = "CP865"; return true; + case L10N_CODEPAGE_869: retCode = "CP869"; return true; + default: return false; + } +#endif } -// TODO: check and complete transforms. note: unicode to/from other Unicode Formats is needed. #ifdef _MSC_VER // Use code page to transform std::string to std::wstring. -s32 _OEM2Wide(u32 oem_code, const std::string src, std::wstring& dst) +s32 _OEM2Wide(HostCode oem_code, const std::string src, std::wstring& dst) { //Such length returned should include the '\0' character. s32 length = MultiByteToWideChar(oem_code, 0, src.c_str(), -1, NULL, 0); @@ -167,7 +166,7 @@ s32 _OEM2Wide(u32 oem_code, const std::string src, std::wstring& dst) } // Use Code page to transform std::wstring to std::string. -s32 _Wide2OEM(u32 oem_code, const std::wstring src, std::string& dst) +s32 _Wide2OEM(HostCode oem_code, const std::wstring src, std::string& dst) { //Such length returned should include the '\0' character. s32 length = WideCharToMultiByte(oem_code, 0, src.c_str(), -1, NULL, 0, NULL, NULL); @@ -184,7 +183,7 @@ s32 _Wide2OEM(u32 oem_code, const std::wstring src, std::string& dst) } // Convert Codepage to Codepage (all char*) -std::string _OemToOem(u32 src_code, u32 dst_code, const std::string str) +std::string _OemToOem(HostCode src_code, HostCode dst_code, const std::string str) { std::wstring wide; std::string result; _OEM2Wide(src_code, str, wide); @@ -192,12 +191,11 @@ std::string _OemToOem(u32 src_code, u32 dst_code, const std::string str) return result; } -/* -// Original piece of code. and this is for windows using with _OEM2Wide,_Wide2OEM,_OemToOem. -// The Char -> Char Execution of this function has already been tested using VS and CJK text with encoding. -s32 _L10nConvertStr(s32 src_code, const void *src, size_t * src_len, s32 dst_code, void *dst, size_t * dst_len) +#endif + +s32 _ConvertStr(s32 src_code, const void *src, s32 src_len, s32 dst_code, void *dst, s32 *dst_len, bool allowIncomplete) { - u32 srcCode = 0, dstCode = 0; //OEM code pages + HostCode srcCode = 0, dstCode = 0; //OEM code pages bool src_page_converted = _L10nCodeParse(src_code, srcCode); //Check if code is in list. bool dst_page_converted = _L10nCodeParse(dst_code, dstCode); @@ -205,48 +203,63 @@ s32 _L10nConvertStr(s32 src_code, const void *src, size_t * src_len, s32 dst_cod || ((!dst_page_converted) && (dstCode == 0))) return ConverterUnknown; - if (strnlen_s((char*)src, *src_len) != *src_len) return SRCIllegal; - //std::string wrapped_source = (char*)Memory.VirtualToRealAddr(src.addr()); - std::string wrapped_source((char*)src); - //if (wrapped_source.length != src_len.GetValue()) return SRCIllegal; +#ifdef _MSC_VER + std::string wrapped_source = std::string(static_cast(src), src_len); std::string target = _OemToOem(srcCode, dstCode, wrapped_source); if (target.length() > *dst_len) return DSTExhausted; - Memory.WriteString(dst.addr(), target); + memcpy(dst, target.c_str(), target.length()); + *dst_len = target.length(); return ConversionOK; -} -// This is the one used with iconv library for linux/mac. Also char->char. -// I've tested the code with console apps using codeblocks. -s32 _L10nConvertStr(s32 src_code, const void* src, size_t * src_len, s32 dst_code, void * dst, size_t * dst_len) -{ - std::string srcCode, dstCode; +#else s32 retValue = ConversionOK; - if ((_L10nCodeParse(src_code, srcCode)) && (_L10nCodeParse(dst_code, dstCode))) + iconv_t ict = iconv_open(dstCode, srcCode); + size_t srcLen = src_len, dstLen = *dst_len; + size_t ictd = iconv(ict, (char **)&src, &srcLen, (char **)&dst, &dstLen); + *dst_len -= dstLen; + if (ictd == -1) { - iconv_t ict = iconv_open(srcCode.c_str(), dstCode.c_str()); - //char *srcBuf = (char*)Memory.VirtualToRealAddr(src.addr()); - //char *dstBuf = (char*)Memory.VirtualToRealAddr(dst.addr()); - char *srcBuf = (char*)src, *dstBuf = (char*)dst; - size_t srcLen = *src_len, dstLen = *dst_len; - size_t ictd = iconv(ict, &srcBuf, &srcLen, &dstBuf, &dstLen); - if (ictd != *src_len) + if (errno == EILSEQ) + retValue = SRCIllegal; //Invalid multi-byte sequence + else if (errno == E2BIG) + retValue = DSTExhausted;//Not enough space + else if (errno == EINVAL) { - if (errno == EILSEQ) - retValue = SRCIllegal; //Invalid multi-byte sequence - else if (errno == E2BIG) - retValue = DSTExhausted;//Not enough space - else if (errno == EINVAL) + if (allowIncomplete) + *dst_len = -1; // TODO: correct value? + else retValue = SRCIllegal; } - iconv_close(ict); - //retValue = ConversionOK; } - else retValue = ConverterUnknown; + iconv_close(ict); return retValue; -}*/ #endif +} + +s32 _L10nConvertStr(s32 src_code, vm::cptr src, vm::cptr src_len, s32 dst_code, vm::ptr dst, vm::ptr dst_len) +{ + s32 dstLen = *dst_len; + s32 result = _ConvertStr(src_code, src.get_ptr(), *src_len, dst_code, dst.get_ptr(), &dstLen, false); + *dst_len = dstLen; + return result; +} + +s32 _L10nConvertChar(s32 src_code, const void *src, s32 src_len, s32 dst_code, vm::ptr dst, vm::ptr dst_len) +{ + s32 dstLen = 0x7FFFFFFF; + s32 result = _ConvertStr(src_code, src, src_len, dst_code, dst.get_ptr(), &dstLen, true); + *dst_len = dstLen; + return result; +} + +s32 _L10nConvertCharNoResult(s32 src_code, const void *src, s32 src_len, s32 dst_code, vm::ptr dst) +{ + s32 dstLen = 0x7FFFFFFF; + s32 result = _ConvertStr(src_code, src, src_len, dst_code, dst.get_ptr(), &dstLen, true); + return dstLen; +} s32 UCS2toEUCJP() { @@ -428,9 +441,10 @@ s32 ARIBstoUTF8s() throw EXCEPTION(""); } -s32 SJISstoUTF8s() +s32 SJISstoUTF8s(vm::cptr src, vm::cptr src_len, vm::ptr dst, vm::ptr dst_len) { - throw EXCEPTION(""); + cellL10n.Warning("SJISstoUTF8s(src=*0x%x, src_len=*0x%x, dst=*0x%x, dst_len=*0x%x)", src, src_len, dst, dst_len); + return _L10nConvertStr(L10N_CODEPAGE_932, src, src_len, L10N_UTF8, dst, dst_len); } s32 sjiszen2han() @@ -533,9 +547,10 @@ s32 EUCKRtoUHC() throw EXCEPTION(""); } -s32 UCS2toSJIS() +s32 UCS2toSJIS(u16 ch, vm::ptr dst) { - throw EXCEPTION(""); + cellL10n.Warning("UCS2toSJIS(ch=%d, dst=*0x%x)", ch, dst); + return _L10nConvertCharNoResult(L10N_UTF8, &ch, sizeof(ch), L10N_CODEPAGE_932, dst); } s32 MSJISstoUTF8s() @@ -673,7 +688,7 @@ s32 UTF8toUTF32() throw EXCEPTION(""); } -s32 jstrchk(vm::cptr jstr) +s32 jstrchk(vm::cptr jstr) { cellL10n.Warning("jstrchk(jstr=*0x%x) -> utf8", jstr); @@ -755,56 +770,10 @@ s32 UHCtoUCS2() throw EXCEPTION(""); } -s32 L10nConvertStr(s32 src_code, vm::cptr src, vm::ptr src_len, s32 dst_code, vm::ptr dst, vm::ptr dst_len) +s32 L10nConvertStr(s32 src_code, vm::cptr src, vm::ptr src_len, s32 dst_code, vm::ptr dst, vm::ptr dst_len) { - cellL10n.Error("L10nConvertStr(src_code=%d, srca=*0x%x, src_len=*0x%x, dst_code=%d, dst=*0x%x, dst_len=*0x%x)", src_code, src, src_len, dst_code, dst, dst_len); -#ifdef _MSC_VER - u32 srcCode = 0, dstCode = 0; //OEM code pages - bool src_page_converted = _L10nCodeParse(src_code, srcCode); //Check if code is in list. - bool dst_page_converted = _L10nCodeParse(dst_code, dstCode); - - if (((!src_page_converted) && (srcCode == 0)) - || ((!dst_page_converted) && (dstCode == 0))) - return ConverterUnknown; - - //if (strnlen_s((char*)src, *src_len) != *src_len) return SRCIllegal; - std::string wrapped_source = (char*)src.get_ptr(); - //std::string wrapped_source((char*)src); - if (wrapped_source.length() != *src_len) return SRCIllegal; - std::string target = _OemToOem(srcCode, dstCode, wrapped_source); - - if (target.length() > *dst_len) return DSTExhausted; - - memcpy(dst.get_ptr(), target.c_str(), target.size()); - - return ConversionOK; -#else - std::string srcCode, dstCode; - s32 retValue = ConversionOK; - if ((_L10nCodeParse(src_code, srcCode)) && (_L10nCodeParse(dst_code, dstCode))) - { - iconv_t ict = iconv_open(srcCode.c_str(), dstCode.c_str()); - char *srcBuf = (char*)src.get_ptr(); - char *dstBuf = (char*)dst.get_ptr(); - //char *srcBuf = (char*)src, *dstBuf = (char*)dst; - //size_t srcLen = *src_len, dstLen = *dst_len; - size_t srcLen = *src_len, dstLen = *dst_len; - size_t ictd = iconv(ict, &srcBuf, &srcLen, &dstBuf, &dstLen); - if (ictd != *src_len)//if (ictd != *src_len) - { - if (errno == EILSEQ) - retValue = SRCIllegal; //Invalid multi-byte sequence - else if (errno == E2BIG) - retValue = DSTExhausted;//Not enough space - else if (errno == EINVAL) - retValue = SRCIllegal; - } - iconv_close(ict); - //retValue = ConversionOK; - } - else retValue = ConverterUnknown; - return retValue; -#endif + cellL10n.Error("L10nConvertStr(src_code=%d, src=*0x%x, src_len=*0x%x, dst_code=%d, dst=*0x%x, dst_len=*0x%x)", src_code, src, src_len, dst_code, dst, dst_len); + return _L10nConvertStr(src_code, src, src_len, dst_code, dst, dst_len); } s32 GBKstoUTF8s() @@ -887,14 +856,14 @@ s32 UTF16toUTF32() throw EXCEPTION(""); } -s32 l10n_convert_str(s32 cd, vm::cptr src, vm::ptr src_len, vm::ptr dst, vm::ptr dst_len) +s32 l10n_convert_str(s32 cd, vm::cptr src, vm::ptr src_len, vm::ptr dst, vm::ptr dst_len) { cellL10n.Warning("l10n_convert_str(cd=%d, src=*0x%x, src_len=*0x%x, dst=*0x%x, dst_len=*0x%x)", cd, src, src_len, dst, dst_len); s32 src_code = cd >> 16; s32 dst_code = cd & 0xffff; - return L10nConvertStr(src_code, src, src_len, dst_code, dst, dst_len); + return _L10nConvertStr(src_code, src, src_len, dst_code, dst, dst_len); } s32 EUCJPstoJISs() @@ -1058,7 +1027,7 @@ s32 UTF8toBIG5() throw EXCEPTION(""); } -s32 UTF16stoUTF8s(vm::cptr utf16, vm::ref utf16_len, vm::ptr utf8, vm::ref utf8_len) +s32 UTF16stoUTF8s(vm::cptr utf16, vm::ref utf16_len, vm::ptr utf8, vm::ref utf8_len) { cellL10n.Error("UTF16stoUTF8s(utf16=*0x%x, utf16_len=*0x%x, utf8=*0x%x, utf8_len=*0x%x)", utf16, utf16_len, utf8, utf8_len); @@ -1110,9 +1079,10 @@ s32 GB18030toUTF8() throw EXCEPTION(""); } -s32 UTF8toSJIS() +s32 UTF8toSJIS(u8 ch, vm::ptr dst, vm::ptr dst_len) { - throw EXCEPTION(""); + cellL10n.Warning("UTF8toSJIS(ch=%d, dst=*0x%x, dst_len=*0x%x)", ch, dst, dst_len); + return _L10nConvertChar(L10N_UTF8, &ch, sizeof(ch), L10N_CODEPAGE_932, dst, dst_len); } s32 ARIBstoUCS2s() @@ -1150,9 +1120,10 @@ s32 UTF8stoUTF16s() throw EXCEPTION(""); } -s32 SJISstoUCS2s() +s32 SJISstoUCS2s(vm::cptr src, vm::cptr src_len, vm::ptr dst, vm::ptr dst_len) { - throw EXCEPTION(""); + cellL10n.Warning("SJISstoUCS2s(src=*0x%x, src_len=*0x%x, dst=*0x%x, dst_len=*0x%x)", src, src_len, dst, dst_len); + return _L10nConvertStr(L10N_CODEPAGE_932, src, src_len, L10N_UCS2, dst, dst_len); } s32 BIG5stoUCS2s()