From 4274bdc5d158eefd5a5ca2fcb1176dcbcfdbeb67 Mon Sep 17 00:00:00 2001 From: cflakes Date: Sun, 25 Apr 2010 21:45:13 +0000 Subject: [PATCH] Add the cool charset module to "extra". It normalizes character sets, so if your client sends e.g. ISO-8859-1, and users on your IRC server want UTF-8, this module can help you. Or, the other way around, if users on your IRC server send messages with weird charsets that your client doesn't understand, this module can convert them to UTF-8 or something for you (as long as you know the names of the source charsets, at least). More explanation will be added to the wiki shortly. git-svn-id: https://znc.svn.sourceforge.net/svnroot/znc/trunk@1942 726aef4b-f618-498e-8847-2d620e286838 --- modules/extra/charset.cpp | 204 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 modules/extra/charset.cpp diff --git a/modules/extra/charset.cpp b/modules/extra/charset.cpp new file mode 100644 index 00000000..e9928465 --- /dev/null +++ b/modules/extra/charset.cpp @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2004-2010 See the AUTHORS file for details. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include "Modules.h" +#include + +class CCharsetMod : public CModule +{ +private: + VCString m_vsClientCharsets; + VCString m_vsServerCharsets; + + size_t GetConversionLength(iconv_t& ic, const CString& sData) + { + if(sData.empty()) return 0; + + size_t uLength = 0; + char tmpbuf[1024]; + const char *pIn = sData.c_str(); + size_t uInLen = sData.size(); + bool bBreak; + + do + { + char *pOut = tmpbuf; + size_t uBufSize = 1024; + bBreak = (uInLen < 1); + + if(iconv(ic, // this is ugly, but keeps the code short: + (uInLen < 1 ? NULL : const_cast(&pIn)), + (uInLen < 1 ? NULL : &uInLen), + &pOut, &uBufSize) == (size_t)-1) + // explanation: iconv needs a last call with input = NULL to + // copy/convert possibly left data chunks into the output buffer. + { + if(errno == EINVAL) + { + iconv_close(ic); + return (size_t)-1; + } + else if(errno != E2BIG) + { + return (size_t)-2; + } + } + + uLength += (pOut - tmpbuf); + } while(!bBreak); + + return uLength; + } + + bool ConvertCharset(const CString& sFrom, const CString& sTo, CString& sData) + { + if(sData.empty()) return true; + + iconv_t ic = iconv_open(sTo.c_str(), sFrom.c_str()); + if(ic == (iconv_t)-1) return false; + + size_t uLength = GetConversionLength(ic, sData); + + if(uLength == (size_t)-1) + { + // incompatible input encoding. + iconv_close(ic); + return false; + } + else if(uLength != (size_t)-2) + { + iconv(ic, NULL, NULL, NULL, NULL); // reset + + size_t uResultBufSize = uLength + 1; + char *pResult = new char[uResultBufSize]; + memset(pResult, 0, uResultBufSize); + char *pResultWalker = pResult; + + const char* pIn = sData.c_str(); + size_t uInLen = sData.size(); + + size_t uResult = iconv(ic, const_cast(&pIn), &uInLen, &pResultWalker, &uResultBufSize); + + iconv_close(ic); + + if(uResult != (size_t)-1) + { + sData.erase(); + sData.append(pResult, uLength); + + delete[] pResult; + return true; + } + else + { + delete[] pResult; + } + } + + int tmp_errno = errno; + iconv_close(ic); + errno = tmp_errno; + return false; + } + + bool ConvertCharset(const VCString& vsFrom, const CString& sTo, CString& sData) + { + CString sDataCopy(sData); + + iconv_t icTest = iconv_open(sTo.c_str(), sTo.c_str()); + if(icTest != (iconv_t)-1) + { + size_t uTest = GetConversionLength(icTest, sData); + + if(uTest != (size_t)-1 && uTest != (size_t)-2) + { + return true; + } + + iconv_close(icTest); + } + + bool bConverted = false; + + for(VCString::const_iterator itf = vsFrom.begin(); itf != vsFrom.end(); itf++) + { + if(ConvertCharset(*itf, sTo, sDataCopy)) + { + sData = sDataCopy; + bConverted = true; + break; + } + else + { + sDataCopy = sData; + } + } + + return bConverted; + } + +public: + MODCONSTRUCTOR(CCharsetMod) {} + + bool OnLoad(const CString& sArgs, CString& sMessage) + { + if(sArgs.Token(1).empty() || !sArgs.Token(2).empty()) + { + sMessage = "This module needs two charset lists as arguments: " + " " + ""; + return false; + } + + VCString vsFrom, vsTo; + sArgs.Token(0).Split(",", vsFrom); + sArgs.Token(1).Split(",", vsTo); + + // probe conversions: + for(VCString::const_iterator itf = vsFrom.begin(); itf != vsFrom.end(); itf++) + { + for(VCString::const_iterator itt = vsTo.begin(); itt != vsTo.end(); itt++) + { + iconv_t icTest = iconv_open(itt->c_str(), itf->c_str()); + if(icTest == (iconv_t)-1) + { + sMessage = "Conversion from '" + *itf + "' to '" + *itt + "' is not possible."; + return false; + } + iconv_close(icTest); + + icTest = iconv_open(itf->c_str(), itt->c_str()); + if(icTest == (iconv_t)-1) + { + sMessage = "Conversion from '" + *itt + "' to '" + *itf + "' is not possible."; + return false; + } + iconv_close(icTest); + } + } + + m_vsClientCharsets = vsFrom; + m_vsServerCharsets = vsTo; + + return true; + } + + EModRet OnRaw(CString& sLine) + { + ConvertCharset(m_vsServerCharsets, m_vsClientCharsets[0], sLine); + return CONTINUE; + } + + EModRet OnUserRaw(CString& sLine) + { + ConvertCharset(m_vsClientCharsets, m_vsServerCharsets[0], sLine); + return CONTINUE; + } +}; + +MODULEDEFS(CCharsetMod, "Normalizes character encodings.")