encodingdetector.h
00001 /* 00002 This file was taken from the KDE 4.x libraries and backported to TQt 3. 00003 00004 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 00005 Copyright (C) 2007 Nick Shaforostoff (shafff@ukr.net) 00006 00007 This library is free software; you can redistribute it and/or 00008 modify it under the terms of the GNU Library General Public 00009 License as published by the Free Software Foundation; either 00010 version 2 of the License, or (at your option) any later version. 00011 00012 This library is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 Library General Public License for more details. 00016 00017 You should have received a copy of the GNU Library General Public License 00018 along with this library; see the file COPYING.LIB. If not, write to 00019 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00020 Boston, MA 02110-1301, USA. 00021 00022 */ 00023 #ifndef ENCODINGDETECTOR_H 00024 #define ENCODINGDETECTOR_H 00025 00026 #include <tqstring.h> 00027 00028 class TQTextCodec; 00029 class TQTextDecoder; 00030 class EncodingDetectorPrivate; 00031 00057 class EncodingDetector 00058 { 00059 public: 00060 enum EncodingChoiceSource 00061 { 00062 DefaultEncoding, 00063 AutoDetectedEncoding, 00064 BOM, 00065 EncodingFromXMLHeader, 00066 EncodingFromMetaTag, 00067 EncodingFromHTTPHeader, 00068 UserChosenEncoding 00069 }; 00070 00071 enum AutoDetectScript 00072 { 00073 None, 00074 SemiautomaticDetection, 00075 Arabic, 00076 Baltic, 00077 CentralEuropean, 00078 ChineseSimplified, 00079 ChineseTraditional, 00080 Cyrillic, 00081 Greek, 00082 Hebrew, 00083 Japanese, 00084 Korean, 00085 NorthernSaami, 00086 SouthEasternEurope, 00087 Thai, 00088 Turkish, 00089 Unicode, 00090 WesternEuropean 00091 }; 00092 00096 EncodingDetector(); 00097 00101 EncodingDetector(TQTextCodec* codec, EncodingChoiceSource source, AutoDetectScript script=None); 00102 ~EncodingDetector(); 00103 00104 //const TQTextCodec* codec() const; 00105 00109 bool setEncoding(const char *encoding, EncodingChoiceSource type); 00110 00115 const char* encoding() const; 00116 00117 bool visuallyOrdered() const; 00118 00119 // void setAutoDetectLanguage( const TQString& ); 00120 // const TQString& autoDetectLanguage() const; 00121 00122 void setAutoDetectLanguage( AutoDetectScript ); 00123 AutoDetectScript autoDetectLanguage() const; 00124 00125 EncodingChoiceSource encodingChoiceSource() const; 00126 00131 bool analyze( const char *data, int len ); 00132 00137 bool analyze( const TQByteArray &data ); 00138 00142 static AutoDetectScript scriptForName(const TQString& lang); 00143 static TQString nameForScript(AutoDetectScript); 00144 static AutoDetectScript scriptForLanguageCode(const TQString &lang); 00145 static bool hasAutoDetectionForScript(AutoDetectScript); 00146 00147 protected: 00155 bool errorsIfUtf8 (const char* data, int length); 00156 00160 TQTextDecoder* decoder(); 00161 00162 private: 00163 EncodingDetectorPrivate* const d; 00164 }; 00165 00166 #endif