meinproc.cpp
00001 #include <config.h> 00002 #include <string.h> 00003 #include <sys/time.h> 00004 #include <unistd.h> 00005 #include <libxml/xmlversion.h> 00006 #include <libxml/xmlmemory.h> 00007 #include <libxml/debugXML.h> 00008 #include <libxml/HTMLtree.h> 00009 #include <libxml/xmlIO.h> 00010 #include <libxml/parserInternals.h> 00011 #include <libxslt/xsltconfig.h> 00012 #include <libxslt/xsltInternals.h> 00013 #include <libxslt/transform.h> 00014 #include <libxslt/xsltutils.h> 00015 #include <tqstring.h> 00016 #include <kstandarddirs.h> 00017 #include <kinstance.h> 00018 #include <xslt.h> 00019 #include <tqfile.h> 00020 #include <tqdir.h> 00021 #include <tdecmdlineargs.h> 00022 #include <tdelocale.h> 00023 #include <tdeaboutdata.h> 00024 #include <stdlib.h> 00025 #include <kdebug.h> 00026 #include <tqtextcodec.h> 00027 #include <tqfileinfo.h> 00028 #include <kprocess.h> 00029 #include <tqvaluevector.h> 00030 00031 extern int xmlLoadExtDtdDefaultValue; 00032 00033 class MyPair { 00034 public: 00035 TQString word; 00036 int base;}; 00037 00038 typedef TQValueList<MyPair> PairList; 00039 00040 void parseEntry(PairList &list, xmlNodePtr cur, int base) 00041 { 00042 if ( !cur ) 00043 return; 00044 00045 base += atoi( ( const char* )xmlGetProp(cur, ( const xmlChar* )"header") ); 00046 if ( base > 10 ) // 10 is the maximum 00047 base = 10; 00048 00049 /* We don't care what the top level element name is */ 00050 cur = cur->xmlChildrenNode; 00051 while (cur != NULL) { 00052 00053 if ( cur->type == XML_TEXT_NODE ) { 00054 TQString words = TQString::fromUtf8( ( char* )cur->content ); 00055 TQStringList wlist = TQStringList::split( ' ', words.simplifyWhiteSpace() ); 00056 for ( TQStringList::ConstIterator it = wlist.begin(); 00057 it != wlist.end(); ++it ) 00058 { 00059 MyPair m; 00060 m.word = *it; 00061 m.base = base; 00062 list.append( m ); 00063 } 00064 } else if ( !xmlStrcmp( cur->name, (const xmlChar *) "entry") ) 00065 parseEntry( list, cur, base ); 00066 00067 cur = cur->next; 00068 } 00069 00070 } 00071 00072 static TDECmdLineOptions options[] = 00073 { 00074 { "stylesheet <xsl>", I18N_NOOP( "Stylesheet to use" ), 0 }, 00075 { "stdout", I18N_NOOP( "Output whole document to stdout" ), 0 }, 00076 { "o", 0, 0 }, 00077 { "output <file>", I18N_NOOP("Output whole document to file" ), 0 }, 00078 { "htdig", I18N_NOOP( "Create a ht://dig compatible index" ), 0 }, 00079 { "check", I18N_NOOP( "Check the document for validity" ), 0 }, 00080 { "cache <file>", I18N_NOOP( "Create a cache file for the document" ), 0}, 00081 { "srcdir <dir>", I18N_NOOP( "Set the srcdir, for tdelibs" ), 0}, 00082 { "param <key>=<value>", I18N_NOOP( "Parameters to pass to the stylesheet" ), 0}, 00083 { "+xml", I18N_NOOP("The file to transform"), 0}, 00084 TDECmdLineLastOption // End of options. 00085 }; 00086 00087 00088 00089 00090 int main(int argc, char **argv) { 00091 00092 // xsltSetGenericDebugFunc(stderr, NULL); 00093 00094 TDEAboutData aboutData( "meinproc", I18N_NOOP("XML-Translator" ), 00095 "$Revision$", 00096 I18N_NOOP("TDE Translator for XML")); 00097 00098 TDECmdLineArgs::init(argc, argv, &aboutData); 00099 TDECmdLineArgs::addCmdLineOptions( options ); 00100 00101 TDELocale::setMainCatalogue("tdeio_help"); 00102 TDEInstance ins("meinproc"); 00103 TDEGlobal::locale(); 00104 00105 00106 TDECmdLineArgs *args = TDECmdLineArgs::parsedArgs(); 00107 if ( args->count() != 1 ) { 00108 args->usage(); 00109 return ( 1 ); 00110 } 00111 00112 // Need to set SRCDIR before calling fillInstance 00113 TQString srcdir; 00114 if ( args->isSet( "srcdir" ) ) 00115 srcdir = TQDir( TQFile::decodeName( args->getOption( "srcdir" ) ) ).absPath(); 00116 fillInstance(ins,srcdir); 00117 00118 LIBXML_TEST_VERSION 00119 00120 TQString checkFilename = TQFile::decodeName(args->arg( 0 )); 00121 TQFileInfo checkFile(checkFilename); 00122 if (!checkFile.exists()) 00123 { 00124 kdError() << "File '" << checkFilename << "' does not exist." << endl; 00125 return ( 2 ); 00126 } 00127 if (!checkFile.isFile()) 00128 { 00129 kdError() << "'" << checkFilename << "' is not a file." << endl; 00130 return ( 2 ); 00131 } 00132 if (!checkFile.isReadable()) 00133 { 00134 kdError() << "File '" << checkFilename << "' is not readable." << endl; 00135 return ( 2 ); 00136 } 00137 00138 if ( args->isSet( "check" ) ) { 00139 #if !defined(PATH_MAX) && defined(__GLIBC__) 00140 char *pwd_buffer; 00141 #else 00142 char pwd_buffer[PATH_MAX]; 00143 #endif 00144 TQFileInfo file( TQFile::decodeName(args->arg( 0 )) ); 00145 #if !defined(PATH_MAX) && defined(__GLIBC__) 00146 if ( !(pwd_buffer = getcwd( NULL, 0 ) ) ) 00147 #else 00148 if ( !getcwd( pwd_buffer, sizeof(pwd_buffer) - 1 ) ) 00149 #endif 00150 { 00151 kdError() << "getcwd failed." << endl; 00152 return 2; 00153 } 00154 00155 TQString catalogs; 00156 catalogs += locate( "dtd", "customization/catalog.xml" ); 00157 catalogs += " "; 00158 catalogs += locate( "dtd", "docbook/xml-dtd-4.1.2/catalog.xml" ); 00159 00160 setenv( "XML_CATALOG_FILES", TQFile::encodeName( catalogs ).data(), 1); 00161 TQString exe; 00162 #if defined( XMLLINT ) 00163 exe = XMLLINT; 00164 #endif 00165 if ( (::access( TQFile::encodeName( exe ), X_OK )!=0) ) { 00166 exe = TDEStandardDirs::findExe( "xmllint" ); 00167 if (exe.isEmpty()) 00168 exe = locate( "exe", "xmllint" ); 00169 } 00170 if ( ::access( TQFile::encodeName( exe ), X_OK )==0 ) { 00171 chdir( TQFile::encodeName( file.dirPath( true ) ) ); 00172 TQString cmd = exe; 00173 cmd += " --valid --noout "; 00174 cmd += TDEProcess::quote(file.fileName()); 00175 cmd += " 2>&1"; 00176 FILE *xmllint = popen( TQFile::encodeName( cmd ), "r"); 00177 char buf[ 512 ]; 00178 bool noout = true; 00179 unsigned int n; 00180 while ( ( n = fread(buf, 1, sizeof( buf ), xmllint ) ) ) { 00181 noout = false; 00182 buf[ n ] = '\0'; 00183 fputs( buf, stderr ); 00184 } 00185 pclose( xmllint ); 00186 chdir( pwd_buffer ); 00187 if ( !noout ) { 00188 #if !defined(PATH_MAX) && defined(__GLIBC__) 00189 free( pwd_buffer ); 00190 #endif 00191 return 1; 00192 } 00193 } else { 00194 kdWarning() << "couldn't find xmllint" << endl; 00195 } 00196 #if !defined(PATH_MAX) && defined(__GLIBC__) 00197 free( pwd_buffer ); 00198 #endif 00199 } 00200 00201 xmlSubstituteEntitiesDefault(1); 00202 xmlLoadExtDtdDefaultValue = 1; 00203 00204 TQValueVector<const char *> params; 00205 if (args->isSet( "output" ) ) { 00206 params.append( tqstrdup( "outputFile" ) ); 00207 params.append( tqstrdup( TQString(TQFile::decodeName( args->getOption( "output" ) )).latin1() ) ); 00208 } 00209 { 00210 const QCStringList paramList = args->getOptionList( "param" ); 00211 QCStringList::ConstIterator it = paramList.begin(); 00212 QCStringList::ConstIterator end = paramList.end(); 00213 for ( ; it != end; ++it ) { 00214 const TQCString tuple = *it; 00215 const int ch = tuple.find( '=' ); 00216 if ( ch == -1 ) { 00217 kdError() << "Key-Value tuple '" << tuple << "' lacks a '='!" << endl; 00218 return( 2 ); 00219 } 00220 params.append( tqstrdup( tuple.left( ch ) ) ); 00221 params.append( tqstrdup( tuple.mid( ch + 1 ) ) ); 00222 } 00223 } 00224 params.append( NULL ); 00225 00226 bool index = args->isSet( "htdig" ); 00227 TQString tss = args->getOption( "stylesheet" ); 00228 if ( tss.isEmpty() ) 00229 tss = "customization/tde-chunk.xsl"; 00230 if ( index ) 00231 tss = "customization/htdig_index.xsl" ; 00232 00233 tss = locate( "dtd", tss ); 00234 00235 if ( index ) { 00236 xsltStylesheetPtr style_sheet = 00237 xsltParseStylesheetFile((const xmlChar *)tss.latin1()); 00238 00239 if (style_sheet != NULL) { 00240 00241 xmlDocPtr doc = xmlParseFile( TQFile::encodeName( args->arg( 0 ) ) ); 00242 00243 xmlDocPtr res = xsltApplyStylesheet(style_sheet, doc, ¶ms[0]); 00244 00245 xmlFreeDoc(doc); 00246 xsltFreeStylesheet(style_sheet); 00247 if (res != NULL) { 00248 xmlNodePtr cur = xmlDocGetRootElement(res); 00249 if (!cur || xmlStrcmp(cur->name, (const xmlChar *) "entry")) { 00250 fprintf(stderr,"document of the wrong type, root node != entry"); 00251 xmlFreeDoc(res); 00252 return(1); 00253 } 00254 PairList list; 00255 parseEntry( list, cur, 0 ); 00256 int wi = 0; 00257 for ( PairList::ConstIterator it = list.begin(); it != list.end(); 00258 ++it, ++wi ) 00259 fprintf( stdout, "w\t%s\t%d\t%d\n", ( *it ).word.utf8().data(), 00260 1000*wi/(int)list.count(), ( *it ).base ); 00261 00262 xmlFreeDoc(res); 00263 } else { 00264 kdDebug() << "couldn't parse document " << args->arg( 0 ) << endl; 00265 } 00266 } else { 00267 kdDebug() << "couldn't parse style sheet " << tss << endl; 00268 } 00269 00270 } else { 00271 TQString output = transform(args->arg( 0 ) , tss, params); 00272 if (output.isEmpty()) { 00273 fprintf(stderr, "unable to parse %s\n", args->arg( 0 )); 00274 return(1); 00275 } 00276 00277 TQString cache = args->getOption( "cache" ); 00278 if ( !cache.isEmpty() ) { 00279 if ( !saveToCache( output, cache ) ) { 00280 kdError() << TQString(i18n( "Could not write to cache file %1." ).arg( cache )) << endl; 00281 } 00282 goto end; 00283 } 00284 00285 if (output.find( "<FILENAME " ) == -1 || args->isSet( "stdout" ) || args->isSet("output") ) 00286 { 00287 TQFile file; 00288 if (args->isSet( "stdout" ) ) { 00289 file.open( IO_WriteOnly, stdout ); 00290 } else { 00291 if (args->isSet( "output" ) ) 00292 file.setName( TQFile::decodeName(args->getOption( "output" ))); 00293 else 00294 file.setName( "index.html" ); 00295 file.open(IO_WriteOnly); 00296 } 00297 replaceCharsetHeader( output ); 00298 00299 TQCString data = output.local8Bit(); 00300 file.writeBlock(data.data(), data.length()); 00301 file.close(); 00302 } else { 00303 int index = 0; 00304 while (true) { 00305 index = output.find("<FILENAME ", index); 00306 if (index == -1) 00307 break; 00308 int filename_index = index + strlen("<FILENAME filename=\""); 00309 00310 TQString filename = output.mid(filename_index, 00311 output.find("\"", filename_index) - 00312 filename_index); 00313 00314 TQString filedata = splitOut(output, index); 00315 TQFile file(filename); 00316 file.open(IO_WriteOnly); 00317 replaceCharsetHeader( filedata ); 00318 TQCString data = fromUnicode( filedata ); 00319 file.writeBlock(data.data(), data.length()); 00320 file.close(); 00321 00322 index += 8; 00323 } 00324 } 00325 } 00326 end: 00327 xmlCleanupParser(); 00328 xmlMemoryDump(); 00329 return(0); 00330 } 00331