kmime_parsers.cpp
00001 /* 00002 kmime_parsers.cpp 00003 00004 KMime, the KDE internet mail/usenet news message library. 00005 Copyright (c) 2001 the KMime authors. 00006 See file AUTHORS for details 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 You should have received a copy of the GNU General Public License 00013 along with this program; if not, write to the Free Software Foundation, 00014 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US 00015 */ 00016 #include "kmime_parsers.h" 00017 00018 #include <tqregexp.h> 00019 00020 using namespace KMime::Parser; 00021 00022 namespace KMime { 00023 namespace Parser { 00024 00025 00026 MultiPart::MultiPart(const TQCString &src, const TQCString &boundary) 00027 { 00028 s_rc=src; 00029 b_oundary=boundary; 00030 } 00031 00032 00033 bool MultiPart::parse() 00034 { 00035 TQCString b="--"+b_oundary, part; 00036 int pos1=0, pos2=0, blen=b.length(); 00037 00038 p_arts.clear(); 00039 00040 //find the first valid boundary 00041 while(1) { 00042 if( (pos1=s_rc.find(b.data(), pos1))==-1 || pos1==0 || s_rc[pos1-1]=='\n' ) //valid boundary found or no boundary at all 00043 break; 00044 pos1+=blen; //boundary found but not valid => skip it; 00045 } 00046 00047 if(pos1>-1) { 00048 pos1+=blen; 00049 if(s_rc[pos1]=='-' && s_rc[pos1+1]=='-') // the only valid boundary is the end-boundary - this message is *really* broken 00050 pos1=-1; //we give up 00051 else if( (pos1-blen)>1 ) //preamble present 00052 p_reamble=s_rc.left(pos1-blen); 00053 } 00054 00055 00056 while(pos1>-1 && pos2>-1) { 00057 00058 //skip the rest of the line for the first boundary - the message-part starts here 00059 if( (pos1=s_rc.find('\n', pos1))>-1 ) { //now search the next linebreak 00060 //now find the next valid boundary 00061 pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary 00062 while(1) { 00063 if( (pos2=s_rc.find(b.data(), pos2))==-1 || s_rc[pos2-1]=='\n' ) //valid boundary or no more boundaries found 00064 break; 00065 pos2+=blen; //boundary is invalid => skip it; 00066 } 00067 00068 if(pos2==-1) { // no more boundaries found 00069 part=s_rc.mid(pos1, s_rc.length()-pos1); //take the rest of the string 00070 p_arts.append(part); 00071 pos1=-1; 00072 pos2=-1; //break; 00073 } 00074 else { 00075 part=s_rc.mid(pos1, pos2-pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1) 00076 p_arts.append(part); 00077 pos2+=blen; //pos2 points now to the first charakter after the boundary 00078 if(s_rc[pos2]=='-' && s_rc[pos2+1]=='-') { //end-boundary 00079 pos1=pos2+2; //pos1 points now to the character directly after the end-boundary 00080 if( (pos1=s_rc.find('\n', pos1))>-1 ) //skipt the rest of this line 00081 e_pilouge=s_rc.mid(pos1+1, s_rc.length()-pos1-1); //everything after the end-boundary is considered as the epilouge 00082 pos1=-1; 00083 pos2=-1; //break 00084 } 00085 else { 00086 pos1=pos2; //the search continues ... 00087 } 00088 } 00089 } 00090 } 00091 00092 return (!p_arts.isEmpty()); 00093 } 00094 00095 //============================================================================================ 00096 00097 00098 NonMimeParser::NonMimeParser(const TQCString &src) : 00099 s_rc(src), p_artNr(-1), t_otalNr(-1) 00100 {} 00101 00105 TQCString NonMimeParser::guessMimeType(const TQCString& fileName) 00106 { 00107 TQCString tmp, mimeType; 00108 int pos; 00109 00110 if(!fileName.isEmpty()) { 00111 pos=fileName.findRev('.'); 00112 if(pos++ != -1) { 00113 tmp=fileName.mid(pos, fileName.length()-pos).upper(); 00114 if(tmp=="JPG" || tmp=="JPEG") mimeType="image/jpeg"; 00115 else if(tmp=="GIF") mimeType="image/gif"; 00116 else if(tmp=="PNG") mimeType="image/png"; 00117 else if(tmp=="TIFF" || tmp=="TIF") mimeType="image/tiff"; 00118 else if(tmp=="XPM") mimeType="image/x-xpm"; 00119 else if(tmp=="XBM") mimeType="image/x-xbm"; 00120 else if(tmp=="BMP") mimeType="image/x-bmp"; 00121 else if(tmp=="TXT" || 00122 tmp=="ASC" || 00123 tmp=="H" || 00124 tmp=="C" || 00125 tmp=="CC" || 00126 tmp=="CPP") mimeType="text/plain"; 00127 else if(tmp=="HTML" || tmp=="HTM") mimeType="text/html"; 00128 else mimeType="application/octet-stream"; 00129 } 00130 else mimeType="application/octet-stream"; 00131 } 00132 else mimeType="application/octet-stream"; 00133 00134 return mimeType; 00135 } 00136 00137 //============================================================================================ 00138 00139 00140 UUEncoded::UUEncoded(const TQCString &src, const TQCString &subject) : 00141 NonMimeParser(src), s_ubject(subject) 00142 {} 00143 00144 00145 bool UUEncoded::parse() 00146 { 00147 int currentPos=0; 00148 bool success=true, firstIteration=true; 00149 00150 while (success) { 00151 int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0; 00152 bool containsBegin=false, containsEnd=false; 00153 TQCString tmp,fileName; 00154 00155 if( (beginPos=s_rc.find(TQRegExp("begin [0-9][0-9][0-9]"),currentPos))>-1 && (beginPos==0 || s_rc.at(beginPos-1)=='\n') ) { 00156 containsBegin=true; 00157 uuStart=s_rc.find('\n', beginPos); 00158 if(uuStart==-1) {//no more line breaks found, we give up 00159 success = false; 00160 break; 00161 } else 00162 uuStart++; //points now at the beginning of the next line 00163 } 00164 else beginPos=currentPos; 00165 00166 if ( (endPos=s_rc.find("\nend",(uuStart>0)? uuStart-1:0))==-1 ) 00167 endPos=s_rc.length(); //no end found 00168 else 00169 containsEnd=true; 00170 00171 if ((containsBegin && containsEnd) || firstIteration) { 00172 00173 //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos); 00174 //all lines in a uuencoded text start with 'M' 00175 for(int idx=uuStart; idx<endPos; idx++) 00176 if(s_rc[idx]=='\n') { 00177 lineCount++; 00178 if(idx+1<endPos && s_rc[idx+1]=='M') { 00179 idx++; 00180 MCount++; 00181 } 00182 } 00183 00184 //printf("lineCount=%d , MCount=%d\n", lineCount, MCount); 00185 if( MCount==0 || (lineCount-MCount)>10 || 00186 ((!containsBegin || !containsEnd) && (MCount<15)) ) { // harder check for splitted-articles 00187 success = false; 00188 break; //too many "non-M-Lines" found, we give up 00189 } 00190 00191 if( (!containsBegin || !containsEnd) && !s_ubject.isNull()) { // message may be split up => parse subject 00192 TQRegExp rx("[0-9]+/[0-9]+"); 00193 pos=rx.search(TQString(s_ubject), 0); 00194 len=rx.matchedLength(); 00195 if(pos!=-1) { 00196 tmp=s_ubject.mid(pos, len); 00197 pos=tmp.find('/'); 00198 p_artNr=tmp.left(pos).toInt(); 00199 t_otalNr=tmp.right(tmp.length()-pos-1).toInt(); 00200 } else { 00201 success = false; 00202 break; //no "part-numbers" found in the subject, we give up 00203 } 00204 } 00205 00206 //everything before "begin" is text 00207 if(beginPos>0) 00208 t_ext.append(s_rc.mid(currentPos,beginPos-currentPos)); 00209 00210 if(containsBegin) 00211 fileName = s_rc.mid(beginPos+10, uuStart-beginPos-11); //everything between "begin ### " and the next LF is considered as the filename 00212 else 00213 fileName = ""; 00214 f_ilenames.append(fileName); 00215 b_ins.append(s_rc.mid(uuStart, endPos-uuStart+1)); //everything beetween "begin" and "end" is uuencoded 00216 m_imeTypes.append(guessMimeType(fileName)); 00217 firstIteration=false; 00218 00219 int next = s_rc.find('\n', endPos+1); 00220 if(next==-1) { //no more line breaks found, we give up 00221 success = false; 00222 break; 00223 } else 00224 next++; //points now at the beginning of the next line 00225 currentPos = next; 00226 00227 } else { 00228 success = false; 00229 } 00230 } 00231 00232 // append trailing text part of the article 00233 t_ext.append(s_rc.right(s_rc.length()-currentPos)); 00234 00235 return ((b_ins.count()>0) || isPartial()); 00236 } 00237 00238 00239 //============================================================================================ 00240 00241 00242 YENCEncoded::YENCEncoded(const TQCString &src) : 00243 NonMimeParser(src) 00244 {} 00245 00246 00247 bool YENCEncoded::yencMeta(TQCString& src, const TQCString& name, int* value) 00248 { 00249 bool found = false; 00250 TQCString sought=name + "="; 00251 00252 int iPos=src.find( sought.data() ); 00253 if (iPos>-1) { 00254 int pos1=src.find(' ', iPos); 00255 int pos2=src.find('\r', iPos); 00256 int pos3=src.find('\t', iPos); 00257 int pos4=src.find('\n', iPos); 00258 if (pos2>=0 && (pos1<0 || pos1>pos2)) 00259 pos1=pos2; 00260 if (pos3>=0 && (pos1<0 || pos1>pos3)) 00261 pos1=pos3; 00262 if (pos4>=0 && (pos1<0 || pos1>pos4)) 00263 pos1=pos4; 00264 iPos=src.findRev( '=', pos1)+1; 00265 if (iPos<pos1) { 00266 char c=src.at( iPos); 00267 if ( c>='0' && c<='9') { 00268 found=true; 00269 *value=src.mid( iPos, pos1-iPos).toInt(); 00270 } 00271 } 00272 } 00273 return found; 00274 } 00275 00276 00277 bool YENCEncoded::parse() 00278 { 00279 int currentPos=0; 00280 bool success=true; 00281 00282 while (success) { 00283 int beginPos=currentPos, yencStart=currentPos; 00284 bool containsPart=false; 00285 TQCString fileName,mimeType; 00286 00287 if ((beginPos=s_rc.find("=ybegin ", currentPos))>-1 && ( beginPos==0 || s_rc.at( beginPos-1)=='\n') ) { 00288 yencStart=s_rc.find( '\n', beginPos); 00289 if (yencStart==-1) { // no more line breaks found, give up 00290 success = false; 00291 break; 00292 } else { 00293 yencStart++; 00294 if (s_rc.find("=ypart", yencStart)==yencStart) { 00295 containsPart=true; 00296 yencStart=s_rc.find( '\n', yencStart); 00297 if ( yencStart== -1) { 00298 success=false; 00299 break; 00300 } 00301 yencStart++; 00302 } 00303 } 00304 // Try to identify yenc meta data 00305 00306 // Filenames can contain any embedded chars until end of line 00307 TQCString meta=s_rc.mid(beginPos, yencStart-beginPos); 00308 int namePos=meta.find("name="); 00309 if (namePos== -1) { 00310 success=false; 00311 break; 00312 } 00313 int eolPos=meta.find('\r', namePos); 00314 if (eolPos== -1) 00315 eolPos=meta.find('\n', namePos); 00316 if (eolPos== -1) { 00317 success=false; 00318 break; 00319 } 00320 fileName=meta.mid(namePos+5, eolPos-(namePos+5)); 00321 00322 // Other metadata is integer 00323 int yencLine; 00324 if (!yencMeta(meta, "line", ¥cLine)) { 00325 success=false; 00326 break; 00327 } 00328 int yencSize; 00329 if (!yencMeta( meta, "size", ¥cSize)) { 00330 success=false; 00331 break; 00332 } 00333 00334 int partBegin, partEnd; 00335 if (containsPart) { 00336 if (!yencMeta(meta, "part", &p_artNr)) { 00337 success=false; 00338 break; 00339 } 00340 if (!yencMeta(meta, "begin", &partBegin) || ! 00341 yencMeta(meta, "end", &partEnd)) { 00342 success=false; 00343 break; 00344 } 00345 if (!yencMeta(meta, "total", &t_otalNr)) 00346 t_otalNr=p_artNr+1; 00347 if (yencSize==partEnd-partBegin+1) 00348 t_otalNr=1; else 00349 yencSize=partEnd-partBegin+1; 00350 } 00351 00352 // We have a valid yenc header; now we extract the binary data 00353 int totalSize=0; 00354 int pos=yencStart; 00355 int len=s_rc.length(); 00356 bool lineStart=true; 00357 int lineLength=0; 00358 bool containsEnd=false; 00359 TQByteArray binary = TQByteArray(yencSize); 00360 while (pos<len) { 00361 int ch=s_rc.at(pos); 00362 if (ch<0) 00363 ch+=256; 00364 if (ch=='\r') 00365 { 00366 if (lineLength!=yencLine && totalSize!=yencSize) 00367 break; 00368 pos++; 00369 } 00370 else if (ch=='\n') 00371 { 00372 lineStart=true; 00373 lineLength=0; 00374 pos++; 00375 } 00376 else 00377 { 00378 if (ch=='=') 00379 { 00380 if (pos+1<len) 00381 { 00382 ch=s_rc.at( pos+1); 00383 if (lineStart && ch=='y') 00384 { 00385 containsEnd=true; 00386 break; 00387 } 00388 pos+=2; 00389 ch-=64+42; 00390 if (ch<0) 00391 ch+=256; 00392 if (totalSize>=yencSize) 00393 break; 00394 binary.at(totalSize++)=ch; 00395 lineLength++; 00396 } 00397 else 00398 break; 00399 } 00400 else 00401 { 00402 ch-=42; 00403 if (ch<0) 00404 ch+=256; 00405 if (totalSize>=yencSize) 00406 break; 00407 binary.at(totalSize++)=ch; 00408 lineLength++; 00409 pos++; 00410 } 00411 lineStart=false; 00412 } 00413 } 00414 00415 if (!containsEnd) 00416 { 00417 success=false; 00418 break; 00419 } 00420 if (totalSize!=yencSize) 00421 { 00422 success=false; 00423 break; 00424 } 00425 00426 // pos now points to =yend; get end data 00427 eolPos=s_rc.find('\n', pos); 00428 if (eolPos== -1) 00429 { 00430 success=false; 00431 break; 00432 } 00433 meta=s_rc.mid(pos, eolPos-pos); 00434 if (!yencMeta(meta, "size", &totalSize)) 00435 { 00436 success=false; 00437 break; 00438 } 00439 if (totalSize!=yencSize) 00440 { 00441 success=false; 00442 break; 00443 } 00444 00445 f_ilenames.append(fileName); 00446 m_imeTypes.append(guessMimeType( fileName)); 00447 b_ins.append(binary); 00448 00449 //everything before "begin" is text 00450 if(beginPos>0) 00451 t_ext.append(s_rc.mid(currentPos,beginPos-currentPos)); 00452 currentPos = eolPos+1; 00453 00454 } else { 00455 success = false; 00456 } 00457 } 00458 00459 // append trailing text part of the article 00460 t_ext.append(s_rc.right(s_rc.length()-currentPos)); 00461 00462 return b_ins.count()>0; 00463 } 00464 00465 } // namespace Parser 00466 } // namespace KMime