• Skip to content
  • Skip to link menu
Trinity API Reference
  • Trinity API Reference
  • kspell2
 

kspell2

  • kspell2
  • plugins
  • ispell
makedent.cpp
1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* enchant
3  * Copyright (C) 2003 Dom Lachowicz
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  *
20  * In addition, as a special exception, Dom Lachowicz
21  * gives permission to link the code of this program with
22  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
23  * spell checker backend) and distribute linked combinations including
24  * the two. You must obey the GNU Lesser General Public License in all
25  * respects for all of the code used other than said providers. If you modify
26  * this file, you may extend this exception to your version of the
27  * file, but you are not obligated to do so. If you do not wish to
28  * do so, delete this exception statement from your version.
29  */
30 
31 /*
32  * Copyright 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
33  * All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  *
39  * 1. Redistributions of source code must retain the above copyright
40  * notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  * notice, this list of conditions and the following disclaimer in the
43  * documentation and/or other materials provided with the distribution.
44  * 3. All modifications to the source code must be clearly marked as
45  * such. Binary redistributions based on modified source code
46  * must be clearly marked as modified versions in the documentation
47  * and/or other materials provided with the distribution.
48  * 4. All advertising materials mentioning features or use of this software
49  * must display the following acknowledgment:
50  * This product includes software developed by Geoff Kuenning and
51  * other unpaid contributors.
52  * 5. The name of Geoff Kuenning may not be used to endorse or promote
53  * products derived from this software without specific prior
54  * written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  */
68 
69 /*
70  * $Log$
71  * Revision 1.2 2004/02/01 04:46:46 zrusin
72  * Both ispell and aspell plugins are not working properly. We can start switching.
73  *
74  * Revision 1.1 2004/01/31 16:44:12 zrusin
75  * ISpell plugin.
76  *
77  * Revision 1.4 2003/08/14 17:51:28 dom
78  * update license - exception clause should be Lesser GPL
79  *
80  * Revision 1.3 2003/07/28 20:40:27 dom
81  * fix up the license clause, further win32-registry proof some directory getting functions
82  *
83  * Revision 1.2 2003/07/16 22:52:49 dom
84  * LGPL + exception license
85  *
86  * Revision 1.1 2003/07/15 01:15:08 dom
87  * ispell enchant backend
88  *
89  * Revision 1.3 2003/02/12 02:10:38 hippietrail
90  *
91  * C casts -> C++ casts
92  * Improved const-correctness due to changing casts
93  * Fixed some warnings
94  *
95  * Revision 1.2 2003/01/29 05:50:12 hippietrail
96  *
97  * Fixed my mess in EncodingManager.
98  * Changed many C casts to C++ casts.
99  *
100  * Revision 1.1 2003/01/24 05:52:35 hippietrail
101  *
102  * Refactored ispell code. Old ispell global variables had been put into
103  * an allocated structure, a pointer to which was passed to many functions.
104  * I have now made all such functions and variables private members of the
105  * ISpellChecker class. It was C OO, now it's C++ OO.
106  *
107  * I've fixed the makefiles and tested compilation but am unable to test
108  * operation. Please back out my changes if they cause problems which
109  * are not obvious or easy to fix.
110  *
111  * Revision 1.8 2003/01/06 18:48:40 dom
112  * ispell cleanup, start of using new 'add' save features
113  *
114  * Revision 1.7 2003/01/04 19:09:04 dom
115  * some tidying... bug pissing me off...
116  *
117  * Revision 1.6 2002/09/19 05:31:18 hippietrail
118  *
119  * More Ispell cleanup. Conditional globals and DEREF macros are removed.
120  * K&R function declarations removed, converted to Doxygen style comments
121  * where possible. No code has been changed (I hope). Compiles for me but
122  * unable to test.
123  *
124  * Revision 1.5 2002/09/17 03:03:30 hippietrail
125  *
126  * After seeking permission on the developer list I've reformatted all the
127  * spelling source which seemed to have parts which used 2, 3, 4, and 8
128  * spaces for tabs. It should all look good with our standard 4-space
129  * tabs now.
130  * I've concentrated just on indentation in the actual code. More prettying
131  * could be done.
132  * * NO code changes were made *
133  *
134  * Revision 1.4 2002/09/13 17:20:13 mpritchett
135  * Fix more warnings for Linux build
136  *
137  * Revision 1.3 2002/03/22 14:31:57 dom
138  * fix mg's compile problem
139  *
140  * Revision 1.2 2001/05/12 16:05:42 thomasf
141  * Big pseudo changes to ispell to make it pass around a structure rather
142  * than rely on all sorts of gloabals willy nilly here and there. Also
143  * fixed our spelling class to work with accepting suggestions once more.
144  * This code is dirty, gross and ugly (not to mention still not supporting
145  * multiple hash sized just yet) but it works on my machine and will no
146  * doubt break other machines.
147  *
148  * Revision 1.1 2001/04/15 16:01:24 tomas_f
149  * moving to spell/xp
150  *
151  * Revision 1.6 1999/12/21 18:46:29 sterwill
152  * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se>
153  *
154  * Revision 1.5 1999/10/20 03:19:35 paul
155  * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more.
156  *
157  * Revision 1.4 1999/04/13 17:12:51 jeff
158  * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
159  * Fixed crash on Win32 with the new code.
160  *
161  * Revision 1.3 1998/12/29 14:55:33 eric
162  *
163  * I've doctored the ispell code pretty extensively here. It is now
164  * warning-free on Win32. It also *works* on Win32 now, since I
165  * replaced all the I/O calls with ANSI standard ones.
166  *
167  * Revision 1.3 1998/12/29 14:55:33 eric
168  *
169  * I've doctored the ispell code pretty extensively here. It is now
170  * warning-free on Win32. It also *works* on Win32 now, since I
171  * replaced all the I/O calls with ANSI standard ones.
172  *
173  * Revision 1.2 1998/12/28 23:11:30 eric
174  *
175  * modified spell code and integration to build on Windows.
176  * This is still a hack.
177  *
178  * Actually, it doesn't yet WORK on Windows. It just builds.
179  * SpellCheckInit is failing for some reason.
180  *
181  * Revision 1.1 1998/12/28 18:04:43 davet
182  * Spell checker code stripped from ispell. At this point, there are
183  * two external routines... the Init routine, and a check-a-word routine
184  * which returns a boolean value, and takes a 16 bit char string.
185  * The code resembles the ispell code as much as possible still.
186  *
187  * Revision 1.45 1994/12/27 23:08:52 geoff
188  * Add code to makedent to reject words that contain non-word characters.
189  * This helps protect people who use ISO 8-bit characters when ispell
190  * isn't configured for that option.
191  *
192  * Revision 1.44 1994/10/25 05:46:20 geoff
193  * Fix some incorrect declarations in the lint versions of some routines.
194  *
195  * Revision 1.43 1994/09/16 03:32:34 geoff
196  * Issue an error message for bad affix flags
197  *
198  * Revision 1.42 1994/02/07 04:23:43 geoff
199  * Correctly identify the deformatter when changing file types
200  *
201  * Revision 1.41 1994/01/25 07:11:55 geoff
202  * Get rid of all old RCS log lines in preparation for the 3.1 release.
203  *
204  */
205 
206 #include <stdlib.h>
207 #include <string.h>
208 #include <ctype.h>
209 
210 #include "ispell_checker.h"
211 #include "msgs.h"
212 
213 int makedent P ((char * lbuf, int lbuflen, struct dent * ent));
214 /*int combinecaps P ((struct dent * hdr, struct dent * newent));
215 #ifndef NO_CAPITALIZATION_SUPPORT
216 static void forcevheader P ((struct dent * hdrp, struct dent * oldp,
217  struct dent * newp));
218 #endif / * NO_CAPITALIZATION_SUPPORT * /
219 static int combine_two_entries P ((struct dent * hdrp,
220  struct dent * oldp, struct dent * newp));
221 static int acoversb P ((struct dent * enta, struct dent * entb));
222 */
223 /*static int issubset P ((struct dent * ent1, struct dent * ent2));
224 static void combineaffixes P ((struct dent * ent1, struct dent * ent2));*/
225 
226 void toutent P ((FILE * outfile, struct dent * hent,
227  int onlykeep));
228 /*static void toutword P ((FILE * outfile, char * word,
229  struct dent * cent));
230 static void flagout P ((FILE * outfile, int flag));
231 */
232 #ifndef ICHAR_IS_CHAR
233 ichar_t * icharcpy P ((ichar_t * out, ichar_t * in));
234 int icharlen P ((ichar_t * str));
235 int icharcmp P ((ichar_t * s1, ichar_t * s2));
236 int icharncmp P ((ichar_t * s1, ichar_t * s2, int n));
237 #endif /* ICHAR_IS_CHAR */
238 
239 /*static int has_marker;*/
240 
241 /*
242  * Fill in a directory entry, including setting the capitalization flags, and
243  * allocate and initialize memory for the d->word field. Returns -1
244  * if there was trouble. The input word must be in canonical form.
245 int makedent (lbuf, lbuflen, d)
246 This function is not used by AbiWord. I don't know if it'll be needed for
247 other abi documents
248  */
249 
250 #ifndef NO_CAPITALIZATION_SUPPORT
251 
259 long
260 ISpellChecker::whatcap (ichar_t *word)
261 {
262  ichar_t * p;
263 
264  for (p = word; *p; p++)
265  {
266  if (mylower (*p))
267  break;
268  }
269  if (*p == '\0')
270  return ALLCAPS;
271  else
272  {
273  for ( ; *p; p++)
274  {
275  if (myupper (*p))
276  break;
277  }
278  if (*p == '\0')
279  {
280  /*
281  ** No uppercase letters follow the lowercase ones.
282  ** If there is more than one uppercase letter, it's
283  ** "followcase". If only the first one is capitalized,
284  ** it's "capitalize". If there are no capitals
285  ** at all, it's ANYCASE.
286  */
287  if (myupper (word[0]))
288  {
289  for (p = word + 1; *p != '\0'; p++)
290  {
291  if (myupper (*p))
292  return FOLLOWCASE;
293  }
294  return CAPITALIZED;
295  }
296  else
297  return ANYCASE;
298  }
299  else
300  return FOLLOWCASE; /* .../lower/upper */
301  }
302 }
303 
312 int ISpellChecker::addvheader ( struct dent *dp)
313 {
314  struct dent * tdent; /* Copy of entry */
315 
316  /*
317  ** Add a second entry with the correct capitalization, and then make
318  ** dp into a special dummy entry.
319  */
320  tdent = static_cast<struct dent *>(malloc(sizeof (struct dent)));
321  if (tdent == NULL)
322  {
323  fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
324  return -1;
325  }
326  *tdent = *dp;
327  if (captype (tdent->flagfield) != FOLLOWCASE)
328  tdent->word = NULL;
329  else
330  {
331  /* Followcase words need a copy of the capitalization */
332  tdent->word = static_cast<char *>(malloc (static_cast<unsigned int>(strlen(tdent->word)) + 1));
333  if (tdent->word == NULL)
334  {
335  fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
336  free (reinterpret_cast<char *>(tdent));
337  return -1;
338  }
339  strcpy (tdent->word, dp->word);
340  }
341  chupcase (dp->word);
342  dp->next = tdent;
343  dp->flagfield &= ~CAPTYPEMASK;
344  dp->flagfield |= (ALLCAPS | MOREVARIANTS);
345  return 0;
346 }
347 #endif /* NO_CAPITALIZATION_SUPPORT */
348 
349 /*
350 ** Combine and resolve the entries describing two capitalizations of the same
351 ** word. This may require allocating yet more entries.
352 **
353 ** Hdrp is a pointer into a hash table. If the word covered by hdrp has
354 ** variations, hdrp must point to the header. Newp is a pointer to temporary
355 ** storage, and space is malloc'ed if newp is to be kept. The newp->word
356 ** field must have been allocated with mymalloc, so that this routine may free
357 ** the space if it keeps newp but not the word.
358 **
359 ** Return value: 0 if the word was added, 1 if the word was combined
360 ** with an existing entry, and -1 if trouble occurred (e.g., malloc).
361 ** If 1 is returned, newp->word may have been be freed using myfree.
362 **
363 ** Life is made much more difficult by the KEEP flag's possibilities. We
364 ** must ensure that a !KEEP word doesn't find its way into the personal
365 ** dictionary as a result of this routine's actions. However, a !KEEP
366 ** word that has affixes must have come from the main dictionary, so it
367 ** is acceptable to combine entries in that case (got that?).
368 **
369 ** The net result of all this is a set of rules that is a bloody pain
370 ** to figure out. Basically, we want to choose one of the following actions:
371 **
372 ** (1) Add newp's affixes and KEEP flag to oldp, and discard newp.
373 ** (2) Add oldp's affixes and KEEP flag to newp, replace oldp with
374 ** newp, and discard newp.
375 #ifndef NO_CAPITALIZATION_SUPPORT
376 ** (3) Insert newp as a new entry in the variants list. If there is
377 ** currently no variant header, this requires adding one. Adding a
378 ** header splits into two sub-cases:
379 **
380 ** (3a) If oldp is ALLCAPS and the KEEP flags match, just turn it
381 ** into the header.
382 ** (3b) Otherwise, add a new entry to serve as the header.
383 ** To ease list linking, this is done by copying oldp into
384 ** the new entry, and then performing (3a).
385 **
386 ** After newp has been added as a variant, its affixes and KEEP
387 ** flag are OR-ed into the variant header.
388 #endif
389 **
390 ** So how to choose which? The default is always case (3), which adds newp
391 ** as a new entry in the variants list. Cases (1) and (2) are symmetrical
392 ** except for which entry is discarded. We can use case (1) or (2) whenever
393 ** one entry "covers" the other. "Covering" is defined as follows:
394 **
395 ** (4) For entries with matching capitalization types, A covers B
396 ** if:
397 **
398 ** (4a) B's affix flags are a subset of A's, or the KEEP flags
399 ** match, and
400 ** (4b) either the KEEP flags match, or A's KEEP flag is set.
401 ** (Since A has more suffixes, combining B with it won't
402 ** cause any extra suffixes to be added to the dictionary.)
403 ** (4c) If the words are FOLLOWCASE, the capitalizations match
404 ** exactly.
405 **
406 #ifndef NO_CAPITALIZATION_SUPPORT
407 ** (5) For entries with mismatched capitalization types, A covers B
408 ** if (4a) and (4b) are true, and:
409 **
410 ** (5a) B is ALLCAPS, or
411 ** (5b) A is ANYCASE, and B is CAPITALIZED.
412 #endif
413 **
414 ** For any "hdrp" without variants, oldp is the same as hdrp. Otherwise,
415 ** the above tests are applied using each variant in turn for oldp.
416 int combinecaps (hdrp, newp)
417 static void forcevheader (hdrp, oldp, newp)
418 static int combine_two_entries (hdrp, oldp, newp)
419 static int acoversb (enta, entb)
420 */
421 
422 /*
423  * \param s
424  */
425 void
426 ISpellChecker::upcase (ichar_t *s)
427 {
428 
429  while (*s)
430  {
431  *s = mytoupper (*s);
432  s++;
433  }
434 }
435 
436 /*
437  * \param s
438  */
439 void
440 ISpellChecker::lowcase (ichar_t *s)
441 {
442 
443  while (*s)
444  {
445  *s = mytolower (*s);
446  s++;
447  }
448 }
449 
456 void
457 ISpellChecker::chupcase (char *s)
458 {
459  ichar_t * is;
460 
461  is = strtosichar (s, 1);
462  upcase (is);
463  ichartostr (s, is, strlen (s) + 1, 1);
464 }
465 
466 /*
467 ** See if one affix field is a subset of another. Returns NZ if ent1
468 ** is a subset of ent2. The KEEP flag is not taken into consideration.
469 static int issubset (ent1, ent2)
470 static void combineaffixes (ent1, ent2)
471 */
472 
473 /*
474 ** Write out a dictionary entry, including capitalization variants.
475 ** If onlykeep is true, only those variants with KEEP set will be
476 ** written.
477 Removed -- not used by Abiword
478 void toutent_ (toutfile, hent, onlykeep)
479 static void toutword (toutfile, word, cent)
480 static void flagout (toutfile, flag)
481 */
482 
498 int
499 ISpellChecker::stringcharlen (char *bufp, int canonical)
500 {
501 #ifdef SLOWMULTIPLY
502  static char * sp[MAXSTRINGCHARS];
503  static int inited = 0;
504 #endif /* SLOWMULTIPLY */
505  char * bufcur;
506  char * stringcur;
507  int stringno;
508  int lowstringno;
509  int highstringno;
510  int dupwanted;
511 
512 #ifdef SLOWMULTIPLY
513  if (!inited)
514  {
515  inited = 1;
516  for (stringno = 0; stringno < MAXSTRINGCHARS; stringno++)
517  sp[stringno] = &hashheader.stringchars[stringno][0];
518  }
519 #endif /* SLOWMULTIPLY */
520  lowstringno = 0;
521  highstringno = m_hashheader.nstrchars - 1;
522  dupwanted = canonical ? 0 : m_defdupchar;
523  while (lowstringno <= highstringno)
524  {
525  stringno = (lowstringno + highstringno) >> 1;
526 #ifdef SLOWMULTIPLY
527  stringcur = sp[stringno];
528 #else /* SLOWMULTIPLY */
529  stringcur = &m_hashheader.stringchars[stringno][0];
530 #endif /* SLOWMULTIPLY */
531  bufcur = bufp;
532  while (*stringcur)
533  {
534 #ifdef NO8BIT
535  if (((*bufcur++ ^ *stringcur) & 0x7F) != 0)
536 #else /* NO8BIT */
537  if (*bufcur++ != *stringcur)
538 #endif /* NO8BIT */
539  break;
540  /*
541  ** We can't use autoincrement above because of the
542  ** test below.
543  */
544  stringcur++;
545  }
546  if (*stringcur == '\0')
547  {
548  if (m_hashheader.dupnos[stringno] == dupwanted)
549  {
550  /* We have a match */
551  m_laststringch = m_hashheader.stringdups[stringno];
552 #ifdef SLOWMULTIPLY
553  return stringcur - sp[stringno];
554 #else /* SLOWMULTIPLY */
555  return stringcur - &m_hashheader.stringchars[stringno][0];
556 #endif /* SLOWMULTIPLY */
557  }
558  else
559  --stringcur;
560  }
561  /* No match - choose which side to search on */
562 #ifdef NO8BIT
563  if ((*--bufcur & 0x7F) < (*stringcur & 0x7F))
564  highstringno = stringno - 1;
565  else if ((*bufcur & 0x7F) > (*stringcur & 0x7F))
566  lowstringno = stringno + 1;
567 #else /* NO8BIT */
568  if (*--bufcur < *stringcur)
569  highstringno = stringno - 1;
570  else if (*bufcur > *stringcur)
571  lowstringno = stringno + 1;
572 #endif /* NO8BIT */
573  else if (dupwanted < m_hashheader.dupnos[stringno])
574  highstringno = stringno - 1;
575  else
576  lowstringno = stringno + 1;
577  }
578  m_laststringch = static_cast<unsigned int>(-1);
579  return 0; /* Not a string character */
580 }
581 
582 /* MACROS CONVERTED TO FUNCTIONS
583 ** These macros are similar to the ones above, but they take into account
584 ** the possibility of string characters. Note well that they take a POINTER,
585 ** not a character.
586 **
587 ** The "l_" versions set "len" to the length of the string character as a
588 ** handy side effect. (Note that the global "laststringch" is also set,
589 ** and sometimes used, by these macros.)
590 **
591 ** The "l1_" versions go one step further and guarantee that the "len"
592 ** field is valid for *all* characters, being set to 1 even if the macro
593 ** returns false. This macro is a great example of how NOT to write
594 ** readable C.
595 */
596 #define isstringch(ptr, canon) (isstringstart (*(ptr)) \
597  && stringcharlen ((ptr), (canon)) > 0)
598 /*
599 int isstringch(char *ptr, int canon) {
600  return (isstringstart (*(ptr)) && (len = stringcharlen ((ptr), (canon))) > 0);
601 }
602 */
603 
604 #define l_isstringch(ptr, len, canon) \
605  (isstringstart (*(ptr)) \
606  && (len = stringcharlen ((ptr), (canon))) \
607  > 0)
608 /*
609 int l_isstringch(char *ptr, int len, int canon) {
610  return (isstringstart (*(ptr)) && (len = stringcharlen ((ptr), (canon))) > 0);
611 }
612 */
613 
614 #define l1_isstringch(ptr, len, canon) \
615  (len = 1, \
616  isstringstart ((unsigned char)(*(ptr))) \
617  && ((len = \
618  stringcharlen ((ptr), (canon))) \
619  > 0 \
620  ? 1 : (len = 1, 0)))
621 /*
622 int l1_isstringch(char *ptr, int len, int canon) {
623  return (len = 1, isstringstart ((unsigned char)(*(ptr))) &&
624  ((len = stringcharlen ((ptr), (canon))) > 0 ? 1 : (len = 1, 0)));
625 }
626 */
627 
628 /*** END MACRO CONVERSION ***/
629 
641 int
642 ISpellChecker::strtoichar (ichar_t *out, char *in, int outlen, int canonical)
643 {
644  int len = 1; /* Length of next character */
645 
646  outlen /= sizeof (ichar_t); /* Convert to an ichar_t count */
647  for ( ; --outlen > 0 && *in != '\0'; in += len)
648  {
649  if (l1_isstringch (in, len , canonical)) {
650  *out++ = SET_SIZE + m_laststringch;
651  } else {
652  *out++ = (unsigned char)( *in );
653  }
654  }
655  *out = 0;
656  return outlen <= 0;
657 }
658 
674 int
675 ISpellChecker::ichartostr ( char *out, ichar_t *in, int outlen, int canonical)
676 {
677  int ch; /* Next character to store */
678  int i; /* Index into duplicates list */
679  char * scharp; /* Pointer into a string char */
680 
681  while (--outlen > 0 && (ch = *in++) != 0)
682  {
683  if (ch < SET_SIZE)
684  *out++ = static_cast<char>(ch);
685  else
686  {
687  ch -= SET_SIZE;
688  if (!canonical)
689  {
690  for (i = m_hashheader.nstrchars; --i >= 0; )
691  {
692  if (m_hashheader.dupnos[i] == m_defdupchar
693  && (static_cast<int>(m_hashheader.stringdups[i])) == ch)
694  {
695  ch = i;
696  break;
697  }
698  }
699  }
700  scharp = m_hashheader.stringchars[static_cast<unsigned>(ch)];
701  while ((*out++ = *scharp++) != '\0')
702  ;
703  out--;
704  }
705  }
706  *out = '\0';
707  return outlen <= 0;
708 }
709 
718 ichar_t *
719 ISpellChecker::strtosichar ( char *in, int canonical)
720 {
721  static ichar_t out[STRTOSICHAR_SIZE / sizeof (ichar_t)];
722 
723  if (strtoichar (out, in, sizeof out, canonical))
724  fprintf (stderr, WORD_TOO_LONG (in));
725  return out;
726 }
727 
736 char *
737 ISpellChecker::ichartosstr (ichar_t *in, int canonical)
738 {
739  static char out[ICHARTOSSTR_SIZE];
740 
741  if (ichartostr (out, in, sizeof out, canonical))
742  fprintf (stderr, WORD_TOO_LONG (out));
743  return out;
744 }
745 
754 char *
755 ISpellChecker::printichar (int in)
756 {
757  static char out[MAXSTRINGCHARLEN + 1];
758 
759  if (in < SET_SIZE)
760  {
761  out[0] = static_cast<char>(in);
762  out[1] = '\0';
763  }
764  else
765  strcpy (out, m_hashheader.stringchars[static_cast<unsigned>(in) - SET_SIZE]);
766  return out;
767 }
768 
769 #ifndef ICHAR_IS_CHAR
770 
778 ichar_t *
779 icharcpy (ichar_t *out, ichar_t *in)
780 {
781  ichar_t * origout; /* Copy of destination for return */
782 
783  origout = out;
784  while ((*out++ = *in++) != 0)
785  ;
786  return origout;
787 }
788 
796 int
797 icharlen (ichar_t * in)
798 {
799  int len; /* Length so far */
800 
801  for (len = 0; *in++ != 0; len++)
802  ;
803  return len;
804 }
805 
814 int
815 icharcmp (ichar_t * s1, ichar_t * s2)
816 {
817 
818  while (*s1 != 0)
819  {
820  if (*s1++ != *s2++)
821  return *--s1 - *--s2;
822  }
823  return *s1 - *s2;
824 }
825 
835 int
836 icharncmp (ichar_t *s1, ichar_t *s2, int n)
837 {
838 
839  while (--n >= 0 && *s1 != 0)
840  {
841  if (*s1++ != *s2++)
842  return *--s1 - *--s2;
843  }
844  if (n < 0)
845  return 0;
846  else
847  return *s1 - *s2;
848 }
849 
850 #endif /* ICHAR_IS_CHAR */
851 
852 /*
853  * \param istate
854  * \param name
855  * \param searchnames
856  * \param deformatter
857  *
858  * \return
859  */
860 int
861 ISpellChecker::findfiletype (const char *name, int searchnames, int *deformatter)
862 {
863  char * cp; /* Pointer into suffix list */
864  int cplen; /* Length of current suffix */
865  int i; /* Index into type table */
866  int len; /* Length of the name */
867 
868  /*
869  * Note: for now, the deformatter is set to 1 for tex, 0 for nroff.
870  * Further, we assume that it's one or the other, so that a test
871  * for tex is sufficient. This needs to be generalized.
872  */
873  len = strlen (name);
874  if (searchnames)
875  {
876  for (i = 0; i < m_hashheader.nstrchartype; i++)
877  {
878  if (strcmp (name, m_chartypes[i].name) == 0)
879  {
880  if (deformatter != NULL)
881  *deformatter =
882  (strcmp (m_chartypes[i].deformatter, "tex") == 0);
883  return i;
884  }
885  }
886  }
887  for (i = 0; i < m_hashheader.nstrchartype; i++)
888  {
889  for (cp = m_chartypes[i].suffixes; *cp != '\0'; cp += cplen + 1)
890  {
891  cplen = strlen (cp);
892  if (len >= cplen && strcmp (&name[len - cplen], cp) == 0)
893  {
894  if (deformatter != NULL)
895  *deformatter =
896  (strcmp (m_chartypes[i].deformatter, "tex") == 0);
897  return i;
898  }
899  }
900  }
901  return -1;
902 }
903 
904 /*
905  HACK: macros replaced with function implementations
906  so we could do a side-effect-free check for unicode
907  characters which aren't in hashheader
908 
909  TODO: this is just a workaround to keep us from crashing.
910  more sophisticated logic needed here.
911 */
912 char ISpellChecker::myupper(ichar_t c)
913 {
914  if (c < (SET_SIZE + MAXSTRINGCHARS))
915  return m_hashheader.upperchars[c];
916  else
917  return 0;
918 }
919 
920 char ISpellChecker::mylower(ichar_t c)
921 {
922  if (c < (SET_SIZE + MAXSTRINGCHARS))
923  return m_hashheader.lowerchars[c];
924  else
925  return 0;
926 }
927 
928 int myspace(ichar_t c)
929 {
930  return ((c > 0) && (c < 0x80) && isspace(static_cast<unsigned char>(c)));
931 }
932 
933 char ISpellChecker::iswordch(ichar_t c)
934 {
935  if (c < (SET_SIZE + MAXSTRINGCHARS))
936  return m_hashheader.wordchars[c];
937  else
938  return 0;
939 }
940 
941 char ISpellChecker::isboundarych(ichar_t c)
942 {
943  if (c < (SET_SIZE + MAXSTRINGCHARS))
944  return m_hashheader.boundarychars[c];
945  else
946  return 0;
947 }
948 
949 char ISpellChecker::isstringstart(ichar_t c)
950 {
951  if (c < (SET_SIZE))
952  return m_hashheader.stringstarts[static_cast<unsigned char>(c)];
953  else
954  return 0;
955 }
956 
957 ichar_t ISpellChecker::mytolower(ichar_t c)
958 {
959  if (c < (SET_SIZE + MAXSTRINGCHARS))
960  return m_hashheader.lowerconv[c];
961  else
962  return c;
963 }
964 
965 ichar_t ISpellChecker::mytoupper (ichar_t c)
966 {
967  if (c < (SET_SIZE + MAXSTRINGCHARS))
968  return m_hashheader.upperconv[c];
969  else
970  return c;
971 }
972 

kspell2

Skip menu "kspell2"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members

kspell2

Skip menu "kspell2"
  • arts
  • dcop
  • dnssd
  • interfaces
  •     interface
  •     library
  •   kspeech
  •   ktexteditor
  • kabc
  • kate
  • kcmshell
  • kdecore
  • kded
  • kdefx
  • kdeprint
  • kdesu
  • kdeui
  • kdoctools
  • khtml
  • kimgio
  • kinit
  • kio
  •   bookmarks
  •   httpfilter
  •   kfile
  •   kio
  •   kioexec
  •   kpasswdserver
  •   kssl
  • kioslave
  •   http
  • kjs
  • kmdi
  •   kmdi
  • knewstuff
  • kparts
  • krandr
  • kresources
  • kspell2
  • kunittest
  • kutils
  • kwallet
  • libkmid
  • libkscreensaver
Generated for kspell2 by doxygen 1.8.13
This website is maintained by Timothy Pearson.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. |