42 #include "ispell_checker.h" 46 #include <tqfileinfo.h> 50 typedef struct str_ispell_map
57 static const char *ispell_dirs [] = {
58 "/usr/" SYSTEM_LIBDIR
"/ispell",
60 "/usr/local/" SYSTEM_LIBDIR
"/ispell",
61 "/usr/local/lib/ispell",
62 "/usr/local/share/ispell",
67 static const IspellMap ispell_map [] = {
68 {
"ca" ,
"catala.hash" ,
"iso-8859-1" },
69 {
"ca_ES" ,
"catala.hash" ,
"iso-8859-1" },
70 {
"cs" ,
"czech.hash" ,
"iso-8859-2" },
71 {
"cs_CZ" ,
"czech.hash" ,
"iso-8859-2" },
72 {
"da" ,
"dansk.hash" ,
"iso-8859-1" },
73 {
"da_DK" ,
"dansk.hash" ,
"iso-8859-1" },
74 {
"de" ,
"deutsch.hash" ,
"iso-8859-1" },
75 {
"de_CH" ,
"swiss.hash" ,
"iso-8859-1" },
76 {
"de_AT" ,
"deutsch.hash" ,
"iso-8859-1" },
77 {
"de_DE" ,
"deutsch.hash" ,
"iso-8859-1" },
78 {
"el" ,
"ellhnika.hash" ,
"iso-8859-7" },
79 {
"el_GR" ,
"ellhnika.hash" ,
"iso-8859-7" },
80 {
"en" ,
"british.hash" ,
"iso-8859-1" },
81 {
"en_AU" ,
"british.hash" ,
"iso-8859-1" },
82 {
"en_BZ" ,
"british.hash" ,
"iso-8859-1" },
83 {
"en_CA" ,
"british.hash" ,
"iso-8859-1" },
84 {
"en_GB" ,
"british.hash" ,
"iso-8859-1" },
85 {
"en_IE" ,
"british.hash" ,
"iso-8859-1" },
86 {
"en_JM" ,
"british.hash" ,
"iso-8859-1" },
87 {
"en_NZ" ,
"british.hash" ,
"iso-8859-1" },
88 {
"en_TT" ,
"british.hash" ,
"iso-8859-1" },
89 {
"en_ZA" ,
"british.hash" ,
"iso-8859-1" },
90 {
"en_ZW" ,
"british.hash" ,
"iso-8859-1" },
91 {
"en_PH" ,
"american.hash" ,
"iso-8859-1" },
92 {
"en_US" ,
"american.hash" ,
"iso-8859-1" },
93 {
"eo" ,
"esperanto.hash" ,
"iso-8859-3" },
94 {
"es" ,
"espanol.hash" ,
"iso-8859-1" },
95 {
"es_AR" ,
"espanol.hash" ,
"iso-8859-1" },
96 {
"es_BO" ,
"espanol.hash" ,
"iso-8859-1" },
97 {
"es_CL" ,
"espanol.hash" ,
"iso-8859-1" },
98 {
"es_CO" ,
"espanol.hash" ,
"iso-8859-1" },
99 {
"es_CR" ,
"espanol.hash" ,
"iso-8859-1" },
100 {
"es_DO" ,
"espanol.hash" ,
"iso-8859-1" },
101 {
"es_EC" ,
"espanol.hash" ,
"iso-8859-1" },
102 {
"es_ES" ,
"espanol.hash" ,
"iso-8859-1" },
103 {
"es_GT" ,
"espanol.hash" ,
"iso-8859-1" },
104 {
"es_HN" ,
"espanol.hash" ,
"iso-8859-1" },
105 {
"es_MX" ,
"espanol.hash" ,
"iso-8859-1" },
106 {
"es_NI" ,
"espanol.hash" ,
"iso-8859-1" },
107 {
"es_PA" ,
"espanol.hash" ,
"iso-8859-1" },
108 {
"es_PE" ,
"espanol.hash" ,
"iso-8859-1" },
109 {
"es_PR" ,
"espanol.hash" ,
"iso-8859-1" },
110 {
"es_PY" ,
"espanol.hash" ,
"iso-8859-1" },
111 {
"es_SV" ,
"espanol.hash" ,
"iso-8859-1" },
112 {
"es_UY" ,
"espanol.hash" ,
"iso-8859-1" },
113 {
"es_VE" ,
"espanol.hash" ,
"iso-8859-1" },
114 {
"fi" ,
"finnish.hash" ,
"iso-8859-1" },
115 {
"fi_FI" ,
"finnish.hash" ,
"iso-8859-1" },
116 {
"fr" ,
"francais.hash" ,
"iso-8859-1" },
117 {
"fr_BE" ,
"francais.hash" ,
"iso-8859-1" },
118 {
"fr_CA" ,
"francais.hash" ,
"iso-8859-1" },
119 {
"fr_CH" ,
"francais.hash" ,
"iso-8859-1" },
120 {
"fr_FR" ,
"francais.hash" ,
"iso-8859-1" },
121 {
"fr_LU" ,
"francais.hash" ,
"iso-8859-1" },
122 {
"fr_MC" ,
"francais.hash" ,
"iso-8859-1" },
123 {
"hu" ,
"hungarian.hash" ,
"iso-8859-2" },
124 {
"hu_HU" ,
"hungarian.hash" ,
"iso-8859-2" },
125 {
"ga" ,
"irish.hash" ,
"iso-8859-1" },
126 {
"ga_IE" ,
"irish.hash" ,
"iso-8859-1" },
127 {
"gl" ,
"galician.hash" ,
"iso-8859-1" },
128 {
"gl_ES" ,
"galician.hash" ,
"iso-8859-1" },
129 {
"ia" ,
"interlingua.hash" ,
"iso-8859-1" },
130 {
"it" ,
"italian.hash" ,
"iso-8859-1" },
131 {
"it_IT" ,
"italian.hash" ,
"iso-8859-1" },
132 {
"it_CH" ,
"italian.hash" ,
"iso-8859-1" },
133 {
"la" ,
"mlatin.hash" ,
"iso-8859-1" },
134 {
"la_IT" ,
"mlatin.hash" ,
"iso-8859-1" },
135 {
"lt" ,
"lietuviu.hash" ,
"iso-8859-13" },
136 {
"lt_LT" ,
"lietuviu.hash" ,
"iso-8859-13" },
137 {
"nl" ,
"nederlands.hash" ,
"iso-8859-1" },
138 {
"nl_NL" ,
"nederlands.hash" ,
"iso-8859-1" },
139 {
"nl_BE" ,
"nederlands.hash" ,
"iso-8859-1" },
140 {
"nb" ,
"norsk.hash" ,
"iso-8859-1" },
141 {
"nb_NO" ,
"norsk.hash" ,
"iso-8859-1" },
142 {
"nn" ,
"nynorsk.hash" ,
"iso-8859-1" },
143 {
"nn_NO" ,
"nynorsk.hash" ,
"iso-8859-1" },
144 {
"no" ,
"norsk.hash" ,
"iso-8859-1" },
145 {
"no_NO" ,
"norsk.hash" ,
"iso-8859-1" },
146 {
"pl" ,
"polish.hash" ,
"iso-8859-2" },
147 {
"pl_PL" ,
"polish.hash" ,
"iso-8859-2" },
148 {
"pt" ,
"brazilian.hash" ,
"iso-8859-1" },
149 {
"pt_BR" ,
"brazilian.hash" ,
"iso-8859-1" },
150 {
"pt_PT" ,
"portugues.hash" ,
"iso-8859-1" },
151 {
"ru" ,
"russian.hash" ,
"koi8-r" },
152 {
"ru_MD" ,
"russian.hash" ,
"koi8-r" },
153 {
"ru_RU" ,
"russian.hash" ,
"koi8-r" },
154 {
"sc" ,
"sardinian.hash" ,
"iso-8859-1" },
155 {
"sc_IT" ,
"sardinian.hash" ,
"iso-8859-1" },
156 {
"sk" ,
"slovak.hash" ,
"iso-8859-2" },
157 {
"sk_SK" ,
"slovak.hash" ,
"iso-8859-2" },
158 {
"sl" ,
"slovensko.hash" ,
"iso-8859-2" },
159 {
"sl_SI" ,
"slovensko.hash" ,
"iso-8859-2" },
160 {
"sv" ,
"svenska.hash" ,
"iso-8859-1" },
161 {
"sv_SE" ,
"svenska.hash" ,
"iso-8859-1" },
162 {
"uk" ,
"ukrainian.hash" ,
"koi8-u" },
163 {
"uk_UA" ,
"ukrainian.hash" ,
"koi8-u" },
164 {
"yi" ,
"yiddish-yivo.hash" ,
"utf-8" }
167 static const size_t size_ispell_map = (
sizeof(ispell_map) /
sizeof((ispell_map)[0]) );
168 static TQMap<TQString, TQString> ispell_dict_map;
172 ISpellChecker::try_autodetect_charset(
const char *
const inEncoding)
174 if (inEncoding && strlen(inEncoding))
176 m_translate_in = TQTextCodec::codecForName(inEncoding);
183 ISpellChecker::ISpellChecker()
186 m_bSuccessfulInit(false),
208 memset(m_sflagindex,0,
sizeof(m_sflagindex));
209 memset(m_pflagindex,0,
sizeof(m_pflagindex));
213 #define FREEP(p) do { if (p) free(p); } while (0) 216 ISpellChecker::~ISpellChecker()
218 if (m_bSuccessfulInit) {
221 clearindex (m_pflagindex);
222 clearindex (m_sflagindex);
226 FREEP(m_hashstrings);
230 delete m_translate_in;
235 ISpellChecker::checkWord(
const TQString& utf8Word )
237 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
238 if (!m_bSuccessfulInit)
241 if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
250 int len_out = utf8Word.length();
252 out = m_translate_in->fromUnicode( utf8Word, len_out );
255 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
257 if (good(iWord, 0, 0, 1, 0) == 1 ||
258 compoundgood(iWord, 1) == 1)
268 ISpellChecker::suggestWord(
const TQString& utf8Word)
270 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
273 if (!m_bSuccessfulInit)
274 return TQStringList();
276 if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
277 utf8Word.length() == 0)
278 return TQStringList();
282 return TQStringList();
287 int len_out = utf8Word.length();
288 out = m_translate_in->fromUnicode( utf8Word, len_out );
291 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
292 makepossibilities(iWord);
294 return TQStringList();
296 TQStringList sugg_arr;
297 for (c = 0; c < m_pcount; c++)
304 utf8Word = TQString::fromUtf8( m_possibilities[c] );
309 utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
312 sugg_arr.append( utf8Word );
319 s_buildHashNames (std::vector<std::string> & names,
const char * dict)
321 const char * tmp = 0;
326 while ( (tmp = ispell_dirs[i++]) ) {
327 TQCString maybeFile = TQCString( tmp ) +
'/';
329 names.push_back( maybeFile.data() );
336 const char * tmp = 0;
339 while ( (tmp = ispell_dirs[i++]) ) {
341 TQStringList lst = dir.entryList(
"*.hash" );
342 for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
343 TQFileInfo info( *it );
344 for (
size_t i = 0; i < size_ispell_map; i++)
346 const IspellMap * mapping = (
const IspellMap *)(&(ispell_map[i]));
347 if (!strcmp (info.fileName().latin1(), mapping->dict))
349 ispell_dict_map.insert( mapping->lang, *it );
356 TQValueList<TQString>
357 ISpellChecker::allDics()
359 if ( ispell_dict_map.empty() )
362 return ispell_dict_map.keys();
366 ISpellChecker::loadDictionary (
const char * szdict)
368 std::vector<std::string> dict_names;
370 s_buildHashNames (dict_names, szdict);
372 for (
size_t i = 0; i < dict_names.size(); i++)
374 if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
375 return dict_names[i].c_str();
378 return TQString::null;
388 ISpellChecker::loadDictionaryForLanguage (
const char * szLang )
392 const char * encoding = NULL;
393 const char * szFile = NULL;
395 for (
size_t i = 0; i < size_ispell_map; i++)
397 const IspellMap * mapping = (
const IspellMap *)(&(ispell_map[i]));
398 if (!strcmp (szLang, mapping->lang))
400 szFile = mapping->dict;
401 encoding = mapping->enc;
406 if (!szFile || !strlen(szFile))
409 alloc_ispell_struct();
411 hashname = loadDictionary(szFile);
412 if (hashname.isEmpty())
416 setDictionaryEncoding (hashname, encoding);
422 ISpellChecker::setDictionaryEncoding(
const TQString& hashname,
const char * encoding )
425 try_autodetect_charset(encoding);
430 prefstringchar = findfiletype(
"utf8", 1, deftflag < 0 ? &deftflag
431 : static_cast<int *>(NULL));
433 if (prefstringchar < 0)
435 std::string teststring;
436 for(
int n1 = 1; n1 <= 15; n1++)
438 teststring =
"latin" + n1;
439 prefstringchar = findfiletype(teststring.c_str(), 1,
440 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
441 if (prefstringchar >= 0)
450 prefstringchar = findfiletype(
"utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
451 if (prefstringchar >= 0)
453 m_translate_in = TQTextCodec::codecForName(
"utf8");
463 for(
int n1 = 1; n1 <= 15; n1++)
465 TQString teststring = TQString(
"latin%1").arg(n1);
466 prefstringchar = findfiletype(teststring.latin1(), 1,
467 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
468 if (prefstringchar >= 0)
471 m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
480 m_translate_in = TQTextCodec::codecForName(
"latin1");
485 ISpellChecker::requestDictionary(
const char *szLang)
487 if (!loadDictionaryForLanguage (szLang))
490 std::string shortened_dict (szLang);
493 if ((uscore_pos = shortened_dict.rfind (
'_')) != ((
size_t)-1)) {
494 shortened_dict = shortened_dict.substr(0, uscore_pos);
495 if (!loadDictionaryForLanguage (shortened_dict.c_str()))
501 m_bSuccessfulInit =
true;
503 if (prefstringchar < 0)
506 m_defdupchar = prefstringchar;