meinproc.cpp
00001 #ifdef HAVE_CONFIG_H 00002 #include "config.h" 00003 #endif 00004 00005 #include <string.h> 00006 #include <sys/time.h> 00007 #include <unistd.h> 00008 #include <libxml/xmlversion.h> 00009 #include <libxml/xmlmemory.h> 00010 #include <libxml/debugXML.h> 00011 #include <libxml/HTMLtree.h> 00012 #include <libxml/xmlIO.h> 00013 #include <libxml/parserInternals.h> 00014 #include <libxslt/xsltconfig.h> 00015 #include <libxslt/xsltInternals.h> 00016 #include <libxslt/transform.h> 00017 #include <libxslt/xsltutils.h> 00018 #include <tqstring.h> 00019 #include <kstandarddirs.h> 00020 #include <kinstance.h> 00021 #include <xslt.h> 00022 #include <tqfile.h> 00023 #include <tqdir.h> 00024 #include <kcmdlineargs.h> 00025 #include <klocale.h> 00026 #include <kaboutdata.h> 00027 #include <stdlib.h> 00028 #include <kdebug.h> 00029 #include <tqtextcodec.h> 00030 #include <tqfileinfo.h> 00031 #include <kprocess.h> 00032 #include <tqvaluevector.h> 00033 00034 extern int xmlLoadExtDtdDefaultValue; 00035 00036 class MyPair { 00037 public: 00038 TQString word; 00039 int base;}; 00040 00041 typedef TQValueList<MyPair> PairList; 00042 00043 void parseEntry(PairList &list, xmlNodePtr cur, int base) 00044 { 00045 if ( !cur ) 00046 return; 00047 00048 base += atoi( ( const char* )xmlGetProp(cur, ( const xmlChar* )"header") ); 00049 if ( base > 10 ) // 10 is the maximum 00050 base = 10; 00051 00052 /* We don't care what the top level element name is */ 00053 cur = cur->xmlChildrenNode; 00054 while (cur != NULL) { 00055 00056 if ( cur->type == XML_TEXT_NODE ) { 00057 TQString words = TQString::fromUtf8( ( char* )cur->content ); 00058 TQStringList wlist = TQStringList::split( ' ', words.simplifyWhiteSpace() ); 00059 for ( TQStringList::ConstIterator it = wlist.begin(); 00060 it != wlist.end(); ++it ) 00061 { 00062 MyPair m; 00063 m.word = *it; 00064 m.base = base; 00065 list.append( m ); 00066 } 00067 } else if ( !xmlStrcmp( cur->name, (const xmlChar *) "entry") ) 00068 parseEntry( list, cur, base ); 00069 00070 cur = cur->next; 00071 } 00072 00073 } 00074 00075 static KCmdLineOptions options[] = 00076 { 00077 { "stylesheet <xsl>", I18N_NOOP( "Stylesheet to use" ), 0 }, 00078 { "stdout", I18N_NOOP( "Output whole document to stdout" ), 0 }, 00079 { "o", 0, 0 }, 00080 { "output <file>", I18N_NOOP("Output whole document to file" ), 0 }, 00081 { "htdig", I18N_NOOP( "Create a ht://dig compatible index" ), 0 }, 00082 { "check", I18N_NOOP( "Check the document for validity" ), 0 }, 00083 { "cache <file>", I18N_NOOP( "Create a cache file for the document" ), 0}, 00084 { "srcdir <dir>", I18N_NOOP( "Set the srcdir, for kdelibs" ), 0}, 00085 { "param <key>=<value>", I18N_NOOP( "Parameters to pass to the stylesheet" ), 0}, 00086 { "+xml", I18N_NOOP("The file to transform"), 0}, 00087 KCmdLineLastOption // End of options. 00088 }; 00089 00090 00091 00092 00093 int main(int argc, char **argv) { 00094 00095 // xsltSetGenericDebugFunc(stderr, NULL); 00096 00097 KAboutData aboutData( "meinproc", I18N_NOOP("XML-Translator" ), 00098 "$Revision$", 00099 I18N_NOOP("KDE Translator for XML")); 00100 00101 KCmdLineArgs::init(argc, argv, &aboutData); 00102 KCmdLineArgs::addCmdLineOptions( options ); 00103 00104 KLocale::setMainCatalogue("kio_help"); 00105 KInstance ins("meinproc"); 00106 KGlobal::locale(); 00107 00108 00109 KCmdLineArgs *args = KCmdLineArgs::parsedArgs(); 00110 if ( args->count() != 1 ) { 00111 args->usage(); 00112 return ( 1 ); 00113 } 00114 00115 // Need to set SRCDIR before calling fillInstance 00116 TQString srcdir; 00117 if ( args->isSet( "srcdir" ) ) 00118 srcdir = TQDir( TQFile::decodeName( args->getOption( "srcdir" ) ) ).absPath(); 00119 fillInstance(ins,srcdir); 00120 00121 LIBXML_TEST_VERSION 00122 00123 TQString checkFilename = TQFile::decodeName(args->arg( 0 )); 00124 TQFileInfo checkFile(checkFilename); 00125 if (!checkFile.exists()) 00126 { 00127 kdError() << "File '" << checkFilename << "' does not exist." << endl; 00128 return ( 2 ); 00129 } 00130 if (!checkFile.isFile()) 00131 { 00132 kdError() << "'" << checkFilename << "' is not a file." << endl; 00133 return ( 2 ); 00134 } 00135 if (!checkFile.isReadable()) 00136 { 00137 kdError() << "File '" << checkFilename << "' is not readable." << endl; 00138 return ( 2 ); 00139 } 00140 00141 if ( args->isSet( "check" ) ) { 00142 #if !defined(PATH_MAX) && defined(__GLIBC__) 00143 char *pwd_buffer; 00144 #else 00145 char pwd_buffer[PATH_MAX]; 00146 #endif 00147 TQFileInfo file( TQFile::decodeName(args->arg( 0 )) ); 00148 #if !defined(PATH_MAX) && defined(__GLIBC__) 00149 if ( !(pwd_buffer = getcwd( NULL, 0 ) ) ) 00150 #else 00151 if ( !getcwd( pwd_buffer, sizeof(pwd_buffer) - 1 ) ) 00152 #endif 00153 { 00154 kdError() << "getcwd failed." << endl; 00155 return 2; 00156 } 00157 00158 TQString catalogs; 00159 catalogs += locate( "dtd", "customization/catalog.xml" ); 00160 catalogs += " "; 00161 catalogs += locate( "dtd", "docbook/xml-dtd-4.1.2/catalog.xml" ); 00162 00163 setenv( "XML_CATALOG_FILES", TQFile::encodeName( catalogs ).data(), 1); 00164 TQString exe; 00165 #if defined( XMLLINT ) 00166 exe = XMLLINT; 00167 #endif 00168 if ( (::access( TQFile::encodeName( exe ), X_OK )!=0) ) { 00169 exe = KStandardDirs::findExe( "xmllint" ); 00170 if (exe.isEmpty()) 00171 exe = locate( "exe", "xmllint" ); 00172 } 00173 if ( ::access( TQFile::encodeName( exe ), X_OK )==0 ) { 00174 chdir( TQFile::encodeName( file.dirPath( true ) ) ); 00175 TQString cmd = exe; 00176 cmd += " --valid --noout "; 00177 cmd += KProcess::quote(file.fileName()); 00178 cmd += " 2>&1"; 00179 FILE *xmllint = popen( TQFile::encodeName( cmd ), "r"); 00180 char buf[ 512 ]; 00181 bool noout = true; 00182 unsigned int n; 00183 while ( ( n = fread(buf, 1, sizeof( buf ), xmllint ) ) ) { 00184 noout = false; 00185 buf[ n ] = '\0'; 00186 fputs( buf, stderr ); 00187 } 00188 pclose( xmllint ); 00189 chdir( pwd_buffer ); 00190 if ( !noout ) { 00191 #if !defined(PATH_MAX) && defined(__GLIBC__) 00192 free( pwd_buffer ); 00193 #endif 00194 return 1; 00195 } 00196 } else { 00197 kdWarning() << "couldn't find xmllint" << endl; 00198 } 00199 #if !defined(PATH_MAX) && defined(__GLIBC__) 00200 free( pwd_buffer ); 00201 #endif 00202 } 00203 00204 xmlSubstituteEntitiesDefault(1); 00205 xmlLoadExtDtdDefaultValue = 1; 00206 00207 TQValueVector<const char *> params; 00208 if (args->isSet( "output" ) ) { 00209 params.append( qstrdup( "outputFile" ) ); 00210 params.append( qstrdup( TQString(TQFile::decodeName( args->getOption( "output" ) )).latin1() ) ); 00211 } 00212 { 00213 const QCStringList paramList = args->getOptionList( "param" ); 00214 QCStringList::ConstIterator it = paramList.begin(); 00215 QCStringList::ConstIterator end = paramList.end(); 00216 for ( ; it != end; ++it ) { 00217 const TQCString tuple = *it; 00218 const int ch = tuple.find( '=' ); 00219 if ( ch == -1 ) { 00220 kdError() << "Key-Value tuple '" << tuple << "' lacks a '='!" << endl; 00221 return( 2 ); 00222 } 00223 params.append( qstrdup( tuple.left( ch ) ) ); 00224 params.append( qstrdup( tuple.mid( ch + 1 ) ) ); 00225 } 00226 } 00227 params.append( NULL ); 00228 00229 bool index = args->isSet( "htdig" ); 00230 TQString tss = args->getOption( "stylesheet" ); 00231 if ( tss.isEmpty() ) 00232 tss = "customization/kde-chunk.xsl"; 00233 if ( index ) 00234 tss = "customization/htdig_index.xsl" ; 00235 00236 tss = locate( "dtd", tss ); 00237 00238 if ( index ) { 00239 xsltStylesheetPtr style_sheet = 00240 xsltParseStylesheetFile((const xmlChar *)tss.latin1()); 00241 00242 if (style_sheet != NULL) { 00243 00244 xmlDocPtr doc = xmlParseFile( TQFile::encodeName( args->arg( 0 ) ) ); 00245 00246 xmlDocPtr res = xsltApplyStylesheet(style_sheet, doc, ¶ms[0]); 00247 00248 xmlFreeDoc(doc); 00249 xsltFreeStylesheet(style_sheet); 00250 if (res != NULL) { 00251 xmlNodePtr cur = xmlDocGetRootElement(res); 00252 if (!cur || xmlStrcmp(cur->name, (const xmlChar *) "entry")) { 00253 fprintf(stderr,"document of the wrong type, root node != entry"); 00254 xmlFreeDoc(res); 00255 return(1); 00256 } 00257 PairList list; 00258 parseEntry( list, cur, 0 ); 00259 int wi = 0; 00260 for ( PairList::ConstIterator it = list.begin(); it != list.end(); 00261 ++it, ++wi ) 00262 fprintf( stdout, "w\t%s\t%d\t%d\n", ( *it ).word.utf8().data(), 00263 1000*wi/(int)list.count(), ( *it ).base ); 00264 00265 xmlFreeDoc(res); 00266 } else { 00267 kdDebug() << "couldn't parse document " << args->arg( 0 ) << endl; 00268 } 00269 } else { 00270 kdDebug() << "couldn't parse style sheet " << tss << endl; 00271 } 00272 00273 } else { 00274 TQString output = transform(args->arg( 0 ) , tss, params); 00275 if (output.isEmpty()) { 00276 fprintf(stderr, "unable to parse %s\n", args->arg( 0 )); 00277 return(1); 00278 } 00279 00280 TQString cache = args->getOption( "cache" ); 00281 if ( !cache.isEmpty() ) { 00282 if ( !saveToCache( output, cache ) ) { 00283 kdError() << TQString(i18n( "Could not write to cache file %1." ).arg( cache )) << endl; 00284 } 00285 goto end; 00286 } 00287 00288 if (output.find( "<FILENAME " ) == -1 || args->isSet( "stdout" ) || args->isSet("output") ) 00289 { 00290 TQFile file; 00291 if (args->isSet( "stdout" ) ) { 00292 file.open( IO_WriteOnly, stdout ); 00293 } else { 00294 if (args->isSet( "output" ) ) 00295 file.setName( TQFile::decodeName(args->getOption( "output" ))); 00296 else 00297 file.setName( "index.html" ); 00298 file.open(IO_WriteOnly); 00299 } 00300 replaceCharsetHeader( output ); 00301 00302 TQCString data = output.local8Bit(); 00303 file.writeBlock(data.data(), data.length()); 00304 file.close(); 00305 } else { 00306 int index = 0; 00307 while (true) { 00308 index = output.find("<FILENAME ", index); 00309 if (index == -1) 00310 break; 00311 int filename_index = index + strlen("<FILENAME filename=\""); 00312 00313 TQString filename = output.mid(filename_index, 00314 output.find("\"", filename_index) - 00315 filename_index); 00316 00317 TQString filedata = splitOut(output, index); 00318 TQFile file(filename); 00319 file.open(IO_WriteOnly); 00320 replaceCharsetHeader( filedata ); 00321 TQCString data = fromUnicode( filedata ); 00322 file.writeBlock(data.data(), data.length()); 00323 file.close(); 00324 00325 index += 8; 00326 } 00327 } 00328 } 00329 end: 00330 xmlCleanupParser(); 00331 xmlMemoryDump(); 00332 return(0); 00333 } 00334