article.cpp
00001 /* 00002 This file is part of Akregator. 00003 00004 Copyright (C) 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net> 00005 2005 Frank Osterfeld <frank.osterfeld at kdemail.net> 00006 This program is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation; either version 2 of the License, or 00009 (at your option) any later version. 00010 00011 This program is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with this program; if not, write to the Free Software 00018 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00019 00020 As a special exception, permission is given to link this program 00021 with any edition of TQt, and distribute the resulting executable, 00022 without including the source code for TQt in the source distribution. 00023 */ 00024 00025 #include "article.h" 00026 #include "feed.h" 00027 #include "feedstorage.h" 00028 #include "storage.h" 00029 #include "librss/librss.h" 00030 #include "shared.h" 00031 #include "utils.h" 00032 00033 #include <tqdatetime.h> 00034 #include <tqdom.h> 00035 #include <tqregexp.h> 00036 #include <tqstringlist.h> 00037 #include <tqvaluelist.h> 00038 00039 #include <krfcdate.h> 00040 #include <kdebug.h> 00041 #include <kurl.h> 00042 00043 00044 namespace Akregator { 00045 00046 struct Article::Private : public Shared 00047 { 00058 enum Status {Deleted=0x01, Trash=0x02, New=0x04, Read=0x08, Keep=0x10}; 00059 00060 TQString guid; 00061 Backend::FeedStorage* archive; 00062 Feed* feed; 00063 00064 // the variables below are initialized to null values in the Article constructor 00065 // and then loaded on demand instead. 00066 // 00067 // to read their values, you should therefore use the accessor methods of the Article 00068 // hash(), pubDate(), statusBits() rather than accessing them directly. 00069 uint hash; 00070 TQDateTime pubDate; 00071 int status; 00072 }; 00073 00074 Article::Article() : d(new Private) 00075 { 00076 d->hash = 0; 00077 d->status = 0; 00078 d->feed = 0; 00079 d->archive = 0; 00080 } 00081 00082 Article::Article(const TQString& guid, Feed* feed) : d(new Private) 00083 { 00084 // this constructor should be as cheap as possible, so avoid calls to 00085 // read information from the archive in here if possible 00086 // 00087 // d->hash, d->pubDate and d->status are loaded on-demand by 00088 // the hash(), pubDate() and statusBits() methods respectively 00089 00090 d->feed = feed; 00091 d->guid = guid; 00092 d->archive = Backend::Storage::getInstance()->archiveFor(feed->xmlUrl()); 00093 d->status = 0; 00094 } 00095 00096 void Article::initialize(RSS::Article article, Backend::FeedStorage* archive) 00097 { 00098 d->archive = archive; 00099 d->status = Private::New; 00100 d->hash = Utils::calcHash(article.title() + article.description() + article.author() + article.link().url() 00101 + article.commentsLink().url() ); 00102 00103 d->guid = article.guid(); 00104 00105 if (!d->archive->contains(d->guid)) 00106 { 00107 d->archive->addEntry(d->guid); 00108 00109 if (article.meta("deleted") == "true") 00110 { // if article is in deleted state, we just add the status and omit the rest 00111 d->status = Private::Read | Private::Deleted; 00112 d->archive->setStatus(d->guid, d->status); 00113 } 00114 else 00115 { // article is not deleted, let's add it to the archive 00116 00117 d->archive->setHash(d->guid, hash() ); 00118 TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title(); 00119 d->archive->setTitle(d->guid, title); 00120 d->archive->setDescription(d->guid, article.description()); 00121 d->archive->setLink(d->guid, article.link().url()); 00122 d->archive->setComments(d->guid, article.comments()); 00123 d->archive->setCommentsLink(d->guid, article.commentsLink().url()); 00124 d->archive->setGuidIsPermaLink(d->guid, article.guidIsPermaLink()); 00125 d->archive->setGuidIsHash(d->guid, article.meta("guidIsHash") == "true"); 00126 d->pubDate = article.pubDate().isValid() ? article.pubDate() : TQDateTime::currentDateTime(); 00127 d->archive->setPubDate(d->guid, d->pubDate.toTime_t()); 00128 d->archive->setAuthor(d->guid, article.author()); 00129 00130 TQValueList<RSS::Category> cats = article.categories(); 00131 TQValueList<RSS::Category>::ConstIterator end = cats.end(); 00132 00133 for (TQValueList<RSS::Category>::ConstIterator it = cats.begin(); it != end; ++it) 00134 { 00135 Backend::Category cat; 00136 00137 cat.term = (*it).category(); 00138 cat.scheme = (*it).domain(); 00139 cat.name = (*it).category(); 00140 00141 d->archive->addCategory(d->guid, cat); 00142 } 00143 00144 if (!article.enclosure().isNull()) 00145 { 00146 d->archive->setEnclosure(d->guid, article.enclosure().url(), article.enclosure().type(), article.enclosure().length()); 00147 } 00148 else 00149 { 00150 d->archive->removeEnclosure(d->guid); 00151 } 00152 00153 TQString status = article.meta("status"); 00154 00155 if (!status.isEmpty()) 00156 { 00157 int statusInt = status.toInt(); 00158 if (statusInt == New) 00159 statusInt = Unread; 00160 setStatus(statusInt); 00161 } 00162 setKeep(article.meta("keep") == "true"); 00163 } 00164 } 00165 else 00166 { 00167 // always update comments count, as it's not used for hash calculation 00168 d->archive->setComments(d->guid, article.comments()); 00169 if ( hash() != d->archive->hash(d->guid)) //article is in archive, was it modified? 00170 { // if yes, update 00171 d->pubDate.setTime_t(d->archive->pubDate(d->guid)); 00172 d->archive->setHash(d->guid, hash() ); 00173 TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title(); 00174 d->archive->setTitle(d->guid, title); 00175 d->archive->setDescription(d->guid, article.description()); 00176 d->archive->setLink(d->guid, article.link().url()); 00177 d->archive->setCommentsLink(d->guid, article.commentsLink().url()); 00178 d->archive->setAuthor(d->guid, article.author()); 00179 } 00180 } 00181 } 00182 00183 Article::Article(RSS::Article article, Feed* feed) : d(new Private) 00184 { 00185 //assert(feed) 00186 d->feed = feed; 00187 initialize(article, Backend::Storage::getInstance()->archiveFor(feed->xmlUrl())); 00188 } 00189 00190 Article::Article(RSS::Article article, Backend::FeedStorage* archive) : d(new Private) 00191 { 00192 d->feed = 0; 00193 initialize(article, archive); 00194 } 00195 00196 bool Article::isNull() const 00197 { 00198 return d->archive == 0; // TODO: use proper null state 00199 } 00200 00201 void Article::offsetPubDate(int secs) 00202 { 00203 d->pubDate = pubDate().addSecs(secs); 00204 d->archive->setPubDate(d->guid, d->pubDate.toTime_t()); 00205 00206 } 00207 00208 void Article::setDeleted() 00209 { 00210 if (isDeleted()) 00211 return; 00212 00213 setStatus(Read); 00214 d->status = Private::Deleted | Private::Read; 00215 d->archive->setStatus(d->guid, d->status); 00216 d->archive->setDeleted(d->guid); 00217 00218 if (d->feed) 00219 d->feed->setArticleDeleted(*this); 00220 } 00221 00222 bool Article::isDeleted() const 00223 { 00224 return (statusBits() & Private::Deleted) != 0; 00225 } 00226 00227 Article::Article(const Article &other) : d(new Private) 00228 { 00229 *this = other; 00230 } 00231 00232 Article::~Article() 00233 { 00234 if (d->deref()) 00235 { 00236 delete d; 00237 d = 0; 00238 } 00239 } 00240 00241 Article &Article::operator=(const Article &other) 00242 { 00243 if (this != &other) { 00244 other.d->ref(); 00245 if (d && d->deref()) 00246 delete d; 00247 d = other.d; 00248 } 00249 return *this; 00250 } 00251 00252 00253 bool Article::operator<(const Article &other) const 00254 { 00255 return pubDate() > other.pubDate() || 00256 (pubDate() == other.pubDate() && guid() < other.guid() ); 00257 } 00258 00259 bool Article::operator<=(const Article &other) const 00260 { 00261 return (pubDate() > other.pubDate() || *this == other); 00262 } 00263 00264 bool Article::operator>(const Article &other) const 00265 { 00266 return pubDate() < other.pubDate() || 00267 (pubDate() == other.pubDate() && guid() > other.guid() ); 00268 } 00269 00270 bool Article::operator>=(const Article &other) const 00271 { 00272 return (pubDate() > other.pubDate() || *this == other); 00273 } 00274 00275 bool Article::operator==(const Article &other) const 00276 { 00277 return d->guid == other.guid(); 00278 } 00279 00280 int Article::statusBits() const 00281 { 00282 // delayed loading of status information from archive 00283 if ( d->status == 0 ) 00284 { 00285 d->status = d->archive->status(d->guid); 00286 } 00287 00288 return d->status; 00289 } 00290 00291 int Article::status() const 00292 { 00293 if ((statusBits() & Private::Read) != 0) 00294 return Read; 00295 00296 if ((statusBits() & Private::New) != 0) 00297 return New; 00298 else 00299 return Unread; 00300 } 00301 00302 void Article::setStatus(int stat) 00303 { 00304 // use status() rather than statusBits() here to filter out status flags that we are not 00305 // interested in 00306 int oldStatus = status(); 00307 00308 if (oldStatus != stat) 00309 { 00310 switch (stat) 00311 { 00312 case Read: 00313 d->status = ( d->status | Private::Read) & ~Private::New; 00314 break; 00315 case Unread: 00316 d->status = ( d->status & ~Private::Read) & ~Private::New; 00317 break; 00318 case New: 00319 d->status = ( d->status | Private::New) & ~Private::Read; 00320 break; 00321 } 00322 d->archive->setStatus(d->guid, d->status); 00323 if (d->feed) 00324 d->feed->setArticleChanged(*this, oldStatus); 00325 } 00326 } 00327 00328 TQString Article::title() const 00329 { 00330 return d->archive->title(d->guid); 00331 } 00332 00333 TQString Article::author() const 00334 { 00335 return d->archive->author(d->guid); 00336 } 00337 00338 KURL Article::link() const 00339 { 00340 return d->archive->link(d->guid); 00341 } 00342 00343 TQString Article::description() const 00344 { 00345 return d->archive->description(d->guid); 00346 } 00347 00348 TQString Article::guid() const 00349 { 00350 return d->guid; 00351 } 00352 00353 KURL Article::commentsLink() const 00354 { 00355 return d->archive->commentsLink(d->guid); 00356 } 00357 00358 00359 int Article::comments() const 00360 { 00361 00362 return d->archive->comments(d->guid); 00363 } 00364 00365 00366 bool Article::guidIsPermaLink() const 00367 { 00368 return d->archive->guidIsPermaLink(d->guid); 00369 } 00370 00371 bool Article::guidIsHash() const 00372 { 00373 return d->archive->guidIsHash(d->guid); 00374 } 00375 00376 uint Article::hash() const 00377 { 00378 // delayed loading of hash from archive 00379 if ( d->hash == 0 ) 00380 { 00381 d->hash = d->archive->hash(d->guid); 00382 } 00383 00384 return d->hash; 00385 } 00386 00387 bool Article::keep() const 00388 { 00389 return ( statusBits() & Private::Keep) != 0; 00390 } 00391 00392 RSS::Enclosure Article::enclosure() const 00393 { 00394 bool hasEnc; 00395 TQString url, type; 00396 int length; 00397 d->archive->enclosure(d->guid, hasEnc, url, type, length); 00398 return hasEnc ? RSS::Enclosure(url, length, type) : RSS::Enclosure(); 00399 00400 00401 } 00402 00403 00404 void Article::setKeep(bool keep) 00405 { 00406 d->status = keep ? ( statusBits() | Private::Keep) : ( statusBits() & ~Private::Keep); 00407 d->archive->setStatus(d->guid, d->status); 00408 if (d->feed) 00409 d->feed->setArticleChanged(*this); 00410 } 00411 00412 void Article::addTag(const TQString& tag) 00413 { 00414 d->archive->addTag(d->guid, tag); 00415 if (d->feed) 00416 d->feed->setArticleChanged(*this); 00417 } 00418 00419 void Article::removeTag(const TQString& tag) 00420 { 00421 d->archive->removeTag(d->guid, tag); 00422 if (d->feed) 00423 d->feed->setArticleChanged(*this); 00424 } 00425 00426 bool Article::hasTag(const TQString& tag) const 00427 { 00428 return d->archive->tags(d->guid).contains(tag); 00429 } 00430 00431 TQStringList Article::tags() const 00432 { 00433 return d->archive->tags(d->guid); 00434 } 00435 00436 Feed* Article::feed() const 00437 { return d->feed; } 00438 00439 const TQDateTime& Article::pubDate() const 00440 { 00441 // delayed loading of publication date information from archive 00442 if ( d->pubDate.isNull() ) 00443 { 00444 d->pubDate.setTime_t(d->archive->pubDate(d->guid)); 00445 } 00446 00447 return d->pubDate; 00448 } 00449 00450 TQString Article::buildTitle(const TQString& description) 00451 { 00452 TQString s = description; 00453 if (description.stripWhiteSpace().isEmpty()) 00454 return ""; 00455 00456 int i = s.find('>',500); /*avoid processing too much */ 00457 if (i != -1) 00458 s = s.left(i+1); 00459 TQRegExp rx("(<([^\\s>]*)(?:[^>]*)>)[^<]*", false); 00460 TQString tagName, toReplace, replaceWith; 00461 while (rx.search(s) != -1 ) 00462 { 00463 tagName=rx.cap(2); 00464 if (tagName=="SCRIPT"||tagName=="script") 00465 toReplace=rx.cap(0); // strip tag AND tag contents 00466 else if (tagName.startsWith("br") || tagName.startsWith("BR")) 00467 { 00468 toReplace=rx.cap(1); 00469 replaceWith=" "; 00470 } 00471 else 00472 toReplace=rx.cap(1); // strip just tag 00473 s=s.replace(s.find(toReplace),toReplace.length(),replaceWith); // do the deed 00474 } 00475 if (s.length()> 90) 00476 s=s.left(90)+"..."; 00477 return s.simplifyWhiteSpace(); 00478 } 00479 } // namespace Akregator