diff --git a/String.cpp b/String.cpp index 4d65f4bd..3a8d82df 100644 --- a/String.cpp +++ b/String.cpp @@ -3,6 +3,151 @@ #include "FileUtils.h" #include "md5.h" +const char* g_szHTMLescapes[256] = { + "�", 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0-9 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10-19 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20-29 + 0, 0, 0, 0, """, 0, 0, 0, "&", "'", // 30-39 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40-49 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50-59 + "<", 0, ">", 0, 0, 0, 0, 0, 0, 0, // 60-69 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70-79 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80-89 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90-99 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 100-109 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 110-119 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 120-129 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 130-139 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 140-149 + 0, 0, 0, "™", 0, 0, 0, 0, 0, 0, // 150-159 + " ", // 160 + "¡", // 161 + "¢", // 162 + "£", // 163 + "¤", // 164 + "¥", // 165 + "¦", // 166 + "§", // 167 + "¨", // 168 + "©", // 169 + "ª", // 170 + "«", // 171 + "¬", // 172 + "­", // 173 + "®", // 174 + "¯", // 175 + "°", // 176 + "±", // 177 + "²", // 178 + "³", // 179 + "´", // 180 + "µ", // 181 + "¶", // 182 + "·", // 183 + "¸", // 184 + "¹", // 185 + "º", // 186 + "»", // 187 + "¼", // 188 + "½", // 189 + "¾", // 190 + "¿", // 191 + "À", // 192 + "Á", // 193 + "Â", // 194 + "Ã", // 195 + "Ä", // 196 + "Å", // 197 + "Æ", // 198 + "Ç", // 199 + "È", // 200 + "É", // 201 + "Ê", // 202 + "Ë", // 203 + "Ì", // 204 + "Í", // 205 + "Î", // 206 + "Ï", // 207 + "Ð", // 208 + "Ñ", // 209 + "Ò", // 210 + "Ó", // 211 + "Ô", // 212 + "Õ", // 213 + "Ö", // 214 + "×", // 215 + "Ø", // 216 + "Ù", // 217 + "Ú", // 218 + "Û", // 219 + "Ü", // 220 + "Ý", // 221 + "Þ", // 222 + "ß", // 223 + "à", // 224 + "á", // 225 + "â", // 226 + "ã", // 227 + "ä", // 228 + "å", // 229 + "æ", // 230 + "ç", // 231 + "è", // 232 + "é", // 233 + "ê", // 234 + "ë", // 235 + "ì", // 236 + "í", // 237 + "î", // 238 + "ï", // 239 + "ð", // 240 + "ñ", // 241 + "ò", // 242 + "ó", // 243 + "ô", // 244 + "õ", // 245 + "ö", // 246 + "÷", // 247 + "ø", // 248 + "ù", // 249 + "ú", // 250 + "û", // 251 + "ü", // 252 + "ý", // 253 + "þ", // 254 + "ÿ", // 255 +}; + +inline unsigned char* CString::strnchr(const unsigned char* src, unsigned char c, unsigned int iMaxBytes, unsigned char* pFill, unsigned int* piCount) const { + for (unsigned int a = 0; a < iMaxBytes && *src; a++, src++) { + if (pFill) { + pFill[a] = *src; + } + + if (*src == c) { + if (pFill) { + pFill[a +1] = 0; + } + + if (piCount) { + *piCount = a; + } + + return (unsigned char*) src; + } + } + + if (pFill) { + *pFill = 0; + } + + if (piCount) { + *piCount = 0; + } + + return NULL; +} + int CString::CaseCmp(const CString& s) const { return strcasecmp(c_str(), s.c_str()); } @@ -101,10 +246,49 @@ CString CString::Escape_n(EEscape eFrom, EEscape eTo) const { unsigned int iLength = length(); sRet.reserve(iLength *3); unsigned char ch = 0; + unsigned int iMaxLen = (eFrom == EHTML) ? 20 : 0; + unsigned char pTmp[iMaxLen +1]; + unsigned int iCounted = 0; for (unsigned int a = 0; a < iLength; a++, p = pStart + a) { switch (eFrom) { case EHTML: + if ((*p == '&') && (strnchr((unsigned char*) p, ';', iMaxLen, pTmp, &iCounted))) { + if ((iCounted >= 3) && (pTmp[1] == '#')) { + // do XML and HTML a < + int base = 10; + + if ((pTmp[2] & 0xDF) == 'X') { + base = 16; + } + + char* endptr = NULL; + unsigned int b = strtol((const char*) (pTmp +2 + (base == 16)), &endptr, base); + + if ( ( *endptr == ';' ) && ( b <= 255 ) ) + { // incase they do something like � + ch = b; + a += iCounted; + break; + } + } + + for (unsigned int c = 0; c < 256; c++) { + if (strcmp(g_szHTMLescapes[c], (const char*) &pTmp) == 0) { + ch = c; + break; + } + } + + if (ch > 0) { + a += iCounted; + } else { + ch = *p; // Not a valid escape, just record the & + } + } else { + ch = *p; + } + break; case EAscii: ch = *p; break; @@ -136,6 +320,13 @@ CString CString::Escape_n(EEscape eFrom, EEscape eTo) const { switch (eTo) { case EHTML: + if (g_szHTMLescapes[ch]) { + sRet += g_szHTMLescapes[ch]; + } else { + sRet += ch; + } + + break; case EAscii: sRet += ch; break; diff --git a/String.h b/String.h index 0154eeef..d68c0327 100644 --- a/String.h +++ b/String.h @@ -34,6 +34,8 @@ static const unsigned char base64_table[256] = { XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, }; +extern const char* g_szHTMLescapes[256]; + class CString : public string { public: typedef enum { @@ -47,6 +49,7 @@ public: CString(const string& s) : string(s) {} virtual ~CString() {} + inline unsigned char* strnchr(const unsigned char* src, unsigned char c, unsigned int iMaxBytes, unsigned char* pFill = NULL, unsigned int* piCount = NULL) const; int CaseCmp(const CString& s) const; int StrCmp(const CString& s) const; static bool WildCmp(const CString& sWild, const CString& sString);