• Skip to content
  • Skip to link menu
Trinity API Reference
  • Trinity API Reference
  • tdespell2
 

tdespell2

  • tdespell2
  • plugins
  • ispell
ispell.h
1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* enchant
3  * Copyright (C) 2003 Dom Lachowicz
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  *
20  * In addition, as a special exception, Dom Lachowicz
21  * gives permission to link the code of this program with
22  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
23  * spell checker backend) and distribute linked combinations including
24  * the two. You must obey the GNU Lesser General Public License in all
25  * respects for all of the code used other than said providers. If you modify
26  * this file, you may extend this exception to your version of the
27  * file, but you are not obligated to do so. If you do not wish to
28  * do so, delete this exception statement from your version.
29  */
30 
31 #ifndef ISPELL_H
32 #define ISPELL_H
33 
34 #include <sys/types.h>
35 
36 /*
37  * $Id$
38  */
39 
40 /*
41  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
42  * All rights reserved.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  *
48  * 1. Redistributions of source code must retain the above copyright
49  * notice, this list of conditions and the following disclaimer.
50  * 2. Redistributions in binary form must reproduce the above copyright
51  * notice, this list of conditions and the following disclaimer in the
52  * documentation and/or other materials provided with the distribution.
53  * 3. All modifications to the source code must be clearly marked as
54  * such. Binary redistributions based on modified source code
55  * must be clearly marked as modified versions in the documentation
56  * and/or other materials provided with the distribution.
57  * 4. All advertising materials mentioning features or use of this software
58  * must display the following acknowledgment:
59  * This product includes software developed by Geoff Kuenning and
60  * other unpaid contributors.
61  * 5. The name of Geoff Kuenning may not be used to endorse or promote
62  * products derived from this software without specific prior
63  * written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
66  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
69  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75  * SUCH DAMAGE.
76  */
77 
78 /*
79  * $Log$
80  * Revision 1.1 2004/01/31 16:44:12 zrusin
81  * ISpell plugin.
82  *
83  * Revision 1.4 2003/08/14 17:51:27 dom
84  * update license - exception clause should be Lesser GPL
85  *
86  * Revision 1.3 2003/07/28 20:40:26 dom
87  * fix up the license clause, further win32-registry proof some directory getting functions
88  *
89  * Revision 1.2 2003/07/16 22:52:40 dom
90  * LGPL + exception license
91  *
92  * Revision 1.1 2003/07/15 01:15:06 dom
93  * ispell enchant backend
94  *
95  * Revision 1.10 2003/01/24 05:52:33 hippietrail
96  *
97  * Refactored ispell code. Old ispell global variables had been put into
98  * an allocated structure, a pointer to which was passed to many functions.
99  * I have now made all such functions and variables private members of the
100  * ISpellChecker class. It was C OO, now it's C++ OO.
101  *
102  * I've fixed the makefiles and tested compilation but am unable to test
103  * operation. Please back out my changes if they cause problems which
104  * are not obvious or easy to fix.
105  *
106  * Revision 1.9 2002/09/19 05:31:15 hippietrail
107  *
108  * More Ispell cleanup. Conditional globals and DEREF macros are removed.
109  * K&R function declarations removed, converted to Doxygen style comments
110  * where possible. No code has been changed (I hope). Compiles for me but
111  * unable to test.
112  *
113  * Revision 1.8 2002/09/17 03:03:29 hippietrail
114  *
115  * After seeking permission on the developer list I've reformatted all the
116  * spelling source which seemed to have parts which used 2, 3, 4, and 8
117  * spaces for tabs. It should all look good with our standard 4-space
118  * tabs now.
119  * I've concentrated just on indentation in the actual code. More prettying
120  * could be done.
121  * * NO code changes were made *
122  *
123  * Revision 1.7 2002/03/22 14:31:57 dom
124  * fix mg's compile problem
125  *
126  * Revision 1.6 2002/03/05 16:55:52 dom
127  * compound word support, tested against swedish
128  *
129  * Revision 1.5 2001/08/10 18:32:40 dom
130  * Spelling and iconv updates. god, i hate iconv
131  *
132  * Revision 1.4 2001/06/26 16:33:27 dom
133  * 128 StringChars and some other stuff
134  *
135  * Revision 1.3 2001/05/12 16:05:42 thomasf
136  * Big pseudo changes to ispell to make it pass around a structure rather
137  * than rely on all sorts of gloabals willy nilly here and there. Also
138  * fixed our spelling class to work with accepting suggestions once more.
139  * This code is dirty, gross and ugly (not to mention still not supporting
140  * multiple hash sized just yet) but it works on my machine and will no
141  * doubt break other machines.
142  *
143  * Revision 1.2 2001/04/18 00:59:36 thomasf
144  * Removed the duplicate declarations of variables that was causing build
145  * to bail. This new ispell stuff is a total mess.
146  *
147  * Revision 1.1 2001/04/15 16:01:24 tomas_f
148  * moving to spell/xp
149  *
150  * Revision 1.13 2001/04/13 12:33:12 tamlin
151  * ispell can now be used from C++
152  *
153  * Revision 1.12 2001/03/25 01:30:02 tomb
154  * 1. Fixed ispell #define problems on Win32
155  * 2. Changed the way that togglable toolbars are tracked so that Full
156  * Screen mode works right on Windows
157  * 3. Fixed SET_GATHER macro in ap_Win32Dialog_Options.h
158  * 4. Fixed Toggle Case dialog to default to Sentence Case when loaded
159  * 5. Added #define for Auto Save checkbox (though I haven't updated the
160  * Prefs dialog yet)
161  *
162  * Revision 1.11 2001/03/24 23:28:41 dom
163  * Make C++ aware and watch out for VOID on Win32
164  *
165  * Revision 1.10 1999/12/21 18:46:29 sterwill
166  * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se>
167  *
168  * Revision 1.9 1999/10/20 03:19:35 paul
169  * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more.
170  *
171  * Revision 1.8 1999/09/29 23:33:32 justin
172  * Updates to the underlying ispell-based code to support suggested corrections.
173  *
174  * Revision 1.7 1999/04/13 17:12:51 jeff
175  * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
176  * Fixed crash on Win32 with the new code.
177  *
178  * Revision 1.6 1999/01/07 05:14:22 sterwill
179  * So it builds on Unix... it might break win32 in ispell, since ut_types
180  * is no longer included. This is a temporary solution to a larger problem
181  * of including C++ headers in C source files.
182  *
183  * Revision 1.6 1999/01/07 05:14:22 sterwill
184  * So it builds on Unix... it might break win32 in ispell, since ut_types
185  * is no longer included. This is a temporary solution to a larger problem
186  * of including C++ headers in C source files.
187  *
188  * Revision 1.5 1999/01/07 05:02:25 sterwill
189  * Checking in half-broken to avoid tree lossage
190  *
191  * Revision 1.4 1999/01/07 01:07:48 paul
192  * Fixed spell leaks.
193  *
194  * Revision 1.3 1998/12/29 15:03:54 eric
195  *
196  * minor fix to ispell.h to get things to compile on Linux again.
197  *
198  * Revision 1.2 1998/12/29 14:55:33 eric
199  *
200  * I've doctored the ispell code pretty extensively here. It is now
201  * warning-free on Win32. It also *works* on Win32 now, since I
202  * replaced all the I/O calls with ANSI standard ones.
203  *
204  * Revision 1.1 1998/12/28 18:04:43 davet
205  * Spell checker code stripped from ispell. At this point, there are
206  * two external routines... the Init routine, and a check-a-word routine
207  * which returns a boolean value, and takes a 16 bit char string.
208  * The code resembles the ispell code as much as possible still.
209  *
210  * Revision 1.68 1995/03/06 02:42:41 geoff
211  * Be vastly more paranoid about parenthesizing macro arguments. This
212  * fixes a bug in defmt.c where a complex argument was passed to
213  * isstringch.
214  *
215  * Revision 1.67 1995/01/03 19:24:12 geoff
216  * Get rid of a non-global declaration.
217  *
218  * Revision 1.66 1994/12/27 23:08:49 geoff
219  * Fix a lot of subtly bad assumptions about the widths of ints and longs
220  * which only show up on 64-bit machines like the Cray and the DEC Alpha.
221  *
222  * Revision 1.65 1994/11/02 06:56:10 geoff
223  * Remove the anyword feature, which I've decided is a bad idea.
224  *
225  * Revision 1.64 1994/10/25 05:46:18 geoff
226  * Add the FF_ANYWORD flag for defining an affix that will apply to any
227  * word, even if not explicitly specified. (Good for French.)
228  *
229  * Revision 1.63 1994/09/16 04:48:28 geoff
230  * Make stringdups and laststringch unsigned ints, and dupnos a plain
231  * int, so that we can handle more than 128 stringchars and stringchar
232  * types.
233  *
234  * Revision 1.62 1994/09/01 06:06:39 geoff
235  * Change erasechar/killchar to uerasechar/ukillchar to avoid
236  * shared-library problems on HP systems.
237  *
238  * Revision 1.61 1994/08/31 05:58:35 geoff
239  * Add contextoffset, used in -a mode to handle extremely long lines.
240  *
241  * Revision 1.60 1994/05/17 06:44:15 geoff
242  * Add support for controlled compound formation and the COMPOUNDONLY
243  * option to affix flags.
244  *
245  * Revision 1.59 1994/03/15 06:25:16 geoff
246  * Change deftflag's initialization so we can tell if -t/-n appeared.
247  *
248  * Revision 1.58 1994/02/07 05:53:28 geoff
249  * Add typecasts to the the 7-bit versions of ichar* routines
250  *
251  * Revision 1.57 1994/01/25 07:11:48 geoff
252  * Get rid of all old RCS log lines in preparation for the 3.1 release.
253  *
254  */
255 
256 #include <stdio.h>
257 /* #include "ut_types.h" */
258 
259 #include "ispell_def.h"
260 
261 #ifdef __cplusplus
262 extern "C" {
263 #endif /* c++ */
264 
265 /* largest amount that a word might be extended by adding affixes */
266 #ifndef MAXAFFIXLEN
267 #define MAXAFFIXLEN 20
268 #endif
269 
270 /*
271 ** Number of mask bits (affix flags) supported. Must be 32, 64, 128, or
272 ** 256. If MASKBITS is 32 or 64, there are really only 26 or 58 flags
273 ** available, respectively. If it is 32, the flags are named with the
274 ** 26 English uppercase letters; lowercase will be converted to uppercase.
275 ** If MASKBITS is 64, the 58 flags are named 'A' through 'z' in ASCII
276 ** order, including the 6 special characters from 'Z' to 'a': "[\]^_`".
277 ** If MASKBITS is 128 or 256, all the 7-bit or 8-bit characters,
278 ** respectively, are theoretically available, though a few (newline, slash,
279 ** null byte) are pretty hard to actually use successfully.
280 **
281 ** Note that a number of non-English affix files depend on having a
282 ** larger value for MASKBITS. See the affix files for more
283 ** information.
284 */
285 
286 #ifndef MASKBITS
287 #define MASKBITS 64
288 #endif
289 
290 extern int gnMaskBits;
291 
292 /*
293 ** C type to use for masks. This should be a type that the processor
294 ** accesses efficiently.
295 **
296 ** MASKTYPE_WIDTH must correctly reflect the number of bits in a
297 ** MASKTYPE. Unfortunately, it is also required to be a constant at
298 ** preprocessor time, which means you can't use the sizeof operator to
299 ** define it.
300 **
301 ** Note that MASKTYPE *must* match MASKTYPE_WIDTH or you may get
302 ** division-by-zero errors!
303 */
304 #ifndef MASKTYPE
305 #define MASKTYPE long
306 #endif
307 #ifndef MASKTYPE_WIDTH
308 #define MASKTYPE_WIDTH 32
309 #endif
310 
311  /* program: this should be coded now in init */
312 
313 #if MASKBITS < MASKTYPE_WIDTH
314 #undef MASKBITS
315 #define MASKBITS MASKTYPE_WIDTH
316 #endif /* MASKBITS < MASKTYPE_WIDTH */
317 
318 /*
319 ** Maximum hash table fullness percentage. Larger numbers trade space
320 ** for time.
321 **/
322 #ifndef MAXPCT
323 #define MAXPCT 70 /* Expand table when 70% full */
324 #endif
325 
326 /*
327 ** Maximum number of "string" characters that can be defined in a
328 ** language (affix) file. Don't forget that an upper/lower string
329 ** character counts as two!
330 */
331 #ifndef MAXSTRINGCHARS
332 #define MAXSTRINGCHARS 128
333 #endif /* MAXSTRINGCHARS */
334 
335 /*
336 ** Maximum length of a "string" character. The default is appropriate for
337 ** nroff-style characters starting with a backslash.
338 */
339 #ifndef MAXSTRINGCHARLEN
340 #define MAXSTRINGCHARLEN 10
341 #endif /* MAXSTRINGCHARLEN */
342 
343 /*
344 ** Maximum number of "hits" expected on a word. This is basically the
345 ** number of different ways different affixes can produce the same word.
346 ** For example, with "english.aff", "brothers" can be produced 3 ways:
347 ** "brothers," "brother+s", or "broth+ers". If this is too low, no major
348 ** harm will be done, but ispell may occasionally forget a capitalization.
349 */
350 #ifndef MAX_HITS
351 #define MAX_HITS 10
352 #endif
353 
354 /*
355 ** Maximum number of capitalization variations expected in any word.
356 ** Besides the obvious all-lower, all-upper, and capitalized versions,
357 ** this includes followcase variants. If this is too low, no real
358 ** harm will be done, but ispell may occasionally fail to suggest a
359 ** correct capitalization.
360 */
361 #ifndef MAX_CAPS
362 #define MAX_CAPS 10
363 #endif /* MAX_CAPS */
364 
365 /* buffer size to use for file names if not in sys/param.h */
366 #ifndef MAXPATHLEN
367 #define MAXPATHLEN 512
368 #endif
369 
370 /*
371 ** Maximum language-table search size. Smaller numbers make ispell
372 ** run faster, at the expense of more memory (the lowest reasonable value
373 ** is 2). If a given character appears in a significant position in
374 ** more than MAXSEARCH suffixes, it will be given its own index table.
375 ** If you change this, define INDEXDUMP in lookup.c to be sure your
376 ** index table looks reasonable.
377 */
378 #ifndef MAXSEARCH
379 #define MAXSEARCH 4
380 #endif
381 
382 #if defined(__STDC__) || defined(__cplusplus)
383 #define P(x) x
384  #ifndef VOID
385  #define VOID void
386  #endif
387 #else /* __STDC__ */
388 #define P(x) ()
389  #ifndef VOID
390  #define VOID char
391  #endif
392 #define const
393 #endif /* __STDC__ */
394 
395 #ifdef NO8BIT
396 #define SET_SIZE 128
397 #else
398 #define SET_SIZE 256
399 #endif
400 
401 #define MASKSIZE (gnMaskBits / MASKTYPE_WIDTH)
402 
403 #ifdef lint
404 extern int TSTMASKBIT P ((MASKTYPE * mask, int bit));
405 #else /* lint */
406 /* The following is really testing for MASKSIZE <= 1, but cpp can't do that */
407 #define TSTMASKBIT(mask, bit) \
408  ((mask)[(bit) / MASKTYPE_WIDTH] & \
409  ((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1))))
410 #endif /* lint */
411 
412 #if MASKBITS > 64
413 #define FULLMASKSET
414 #endif
415 
416 #if MASKBITS <= 32
417  #define FLAGBASE ((MASKTYPE_WIDTH) - 6)
418 #else
419  # if MASKBITS <= 64
420  #define FLAGBASE ((MASKTYPE_WIDTH) - 6)
421  # else
422  #define FLAGBASE 0
423  # endif
424 #endif
425 
426 /*
427 ** Data type for internal word storage. If necessary, we use shorts rather
428 ** than chars so that string characters can be encoded as a single unit.
429 */
430 #if (SET_SIZE + MAXSTRINGCHARS) <= 256
431 #ifndef lint
432 #define ICHAR_IS_CHAR
433 #endif /* lint */
434 #endif
435 
436 #ifdef ICHAR_IS_CHAR
437 typedef unsigned char ichar_t; /* Internal character */
438 #define icharlen(s) strlen ((char *) (s))
439 #define icharcpy(a, b) strcpy ((char *) (a), (char *) (b))
440 #define icharcmp(a, b) strcmp ((char *) (a), (char *) (b))
441 #define icharncmp(a, b, n) strncmp ((char *) (a), (char *) (b), (n))
442 #define chartoichar(x) ((ichar_t) (x))
443 #else
444 typedef unsigned short ichar_t; /* Internal character */
445 #define chartoichar(x) ((ichar_t) (unsigned char) (x))
446 
447 /*
448  * Structure used to record data about successful lookups; these values
449  * are used in the ins_root_cap routine to produce correct capitalizations.
450  */
451 struct success
452 {
453  struct dent * dictent; /* Header of dict entry chain for wd */
454  struct flagent * prefix; /* Prefix flag used, or NULL */
455  struct flagent * suffix; /* Suffix flag used, or NULL */
456 };
457 
458 ichar_t* icharcpy (ichar_t* out, ichar_t* in);
459 int icharlen (ichar_t* in);
460 int icharcmp (ichar_t* s1, ichar_t* s2);
461 int icharncmp (ichar_t* s1, ichar_t* s2, int n);
462 
463 #endif
464 
465 struct dent
466 {
467  struct dent * next;
468  char * word;
469  MASKTYPE mask[2];
470 #ifdef FULLMASKSET
471  char flags;
472 #endif
473 };
474 
475 /*
476 ** Flags in the directory entry. If FULLMASKSET is undefined, these are
477 ** stored in the highest bits of the last longword of the mask field. If
478 ** FULLMASKSET is defined, they are stored in the extra "flags" field.
479 #ifndef NO_CAPITALIZATION_SUPPORT
480 **
481 ** If a word has only one capitalization form, and that form is not
482 ** FOLLOWCASE, it will have exactly one entry in the dictionary. The
483 ** legal capitalizations will be indicated by the 2-bit capitalization
484 ** field, as follows:
485 **
486 ** ALLCAPS The word must appear in all capitals.
487 ** CAPITALIZED The word must be capitalized (e.g., London).
488 ** It will also be accepted in all capitals.
489 ** ANYCASE The word may appear in lowercase, capitalized,
490 ** or all-capitals.
491 **
492 ** Regardless of the capitalization flags, the "word" field of the entry
493 ** will point to an all-uppercase copy of the word. This is to simplify
494 ** the large portion of the code that doesn't care about capitalization.
495 ** Ispell will generate the correct version when needed.
496 **
497 ** If a word has more than one capitalization, there will be multiple
498 ** entries for it, linked together by the "next" field. The initial
499 ** entry for such words will be a dummy entry, primarily for use by code
500 ** that ignores capitalization. The "word" field of this entry will
501 ** again point to an all-uppercase copy of the word. The "mask" field
502 ** will contain the logical OR of the mask fields of all variants.
503 ** A header entry is indicated by a capitalization type of ALLCAPS,
504 ** with the MOREVARIANTS bit set.
505 **
506 ** The following entries will define the individual variants. Each
507 ** entry except the last has the MOREVARIANTS flag set, and each
508 ** contains one of the following capitalization options:
509 **
510 ** ALLCAPS The word must appear in all capitals.
511 ** CAPITALIZED The word must be capitalized (e.g., London).
512 ** It will also be accepted in all capitals.
513 ** FOLLOWCASE The word must be capitalized exactly like the
514 ** sample in the entry. Prefix (suffix) characters
515 ** must be rendered in the case of the first (last)
516 ** "alphabetic" character. It will also be accepted
517 ** in all capitals. ("Alphabetic" means "mentioned
518 ** in a 'casechars' statement".)
519 ** ANYCASE The word may appear in lowercase, capitalized,
520 ** or all-capitals.
521 **
522 ** The "mask" field for the entry contains only the affix flag bits that
523 ** are legal for that capitalization. The "word" field will be null
524 ** except for FOLLOWCASE entries, where it will point to the
525 ** correctly-capitalized spelling of the root word.
526 **
527 ** It is worth discussing why the ALLCAPS option is used in
528 ** the header entry. The header entry accepts an all-capitals
529 ** version of the root plus every affix (this is always legal, since
530 ** words get capitalized in headers and so forth). Further, all of
531 ** the following variant entries will reject any all-capitals form
532 ** that is illegal due to an affix.
533 **
534 ** Finally, note that variations in the KEEP flag can cause a multiple-variant
535 ** entry as well. For example, if the personal dictionary contains "ALPHA",
536 ** (KEEP flag set) and the user adds "alpha" with the KEEP flag clear, a
537 ** multiple-variant entry will be created so that "alpha" will be accepted
538 ** but only "ALPHA" will actually be kept.
539 #endif
540 */
541 #ifdef FULLMASKSET
542 #define flagfield flags
543 #else
544 #define flagfield mask[1]
545 #endif
546 #define USED ((MASKTYPE) 1 << (FLAGBASE + 0))
547 #define KEEP ((MASKTYPE) 1 << (FLAGBASE + 1))
548 #ifdef NO_CAPITALIZATION_SUPPORT
549 #define ALLFLAGS (USED | KEEP)
550 #else /* NO_CAPITALIZATION_SUPPORT */
551 #define ANYCASE ((MASKTYPE) 0 << (FLAGBASE + 2))
552 #define ALLCAPS ((MASKTYPE) 1 << (FLAGBASE + 2))
553 #define CAPITALIZED ((MASKTYPE) 2 << (FLAGBASE + 2))
554 #define FOLLOWCASE ((MASKTYPE) 3 << (FLAGBASE + 2))
555 #define CAPTYPEMASK ((MASKTYPE) 3 << (FLAGBASE + 2))
556 #define MOREVARIANTS ((MASKTYPE) 1 << (FLAGBASE + 4))
557 #define ALLFLAGS (USED | KEEP | CAPTYPEMASK | MOREVARIANTS)
558 #define captype(x) ((x) & CAPTYPEMASK)
559 #endif /* NO_CAPITALIZATION_SUPPORT */
560 
561 /*
562  * Language tables used to encode prefix and suffix information.
563  */
564 struct flagent
565 {
566  ichar_t * strip; /* String to strip off */
567  ichar_t * affix; /* Affix to append */
568  short flagbit; /* Flag bit this ent matches */
569  short stripl; /* Length of strip */
570  short affl; /* Length of affix */
571  short numconds; /* Number of char conditions */
572  short flagflags; /* Modifiers on this flag */
573  char conds[SET_SIZE + MAXSTRINGCHARS]; /* Adj. char conds */
574 };
575 
576 /*
577  * Bits in flagflags
578  */
579 #define FF_CROSSPRODUCT (1 << 0) /* Affix does cross-products */
580 #define FF_COMPOUNDONLY (1 << 1) /* Afx works in compounds */
581 
582 union ptr_union /* Aid for building flg ptrs */
583 {
584  struct flagptr * fp; /* Pointer to more indexing */
585  struct flagent * ent; /* First of a list of ents */
586 };
587 
588 struct flagptr
589 {
590  union ptr_union pu; /* Ent list or more indexes */
591  int numents; /* If zero, pu.fp is valid */
592 };
593 
594 /*
595  * Description of a single string character type.
596  */
597 struct strchartype
598 {
599  char * name; /* Name of the type */
600  char * deformatter; /* Deformatter to use */
601  char * suffixes; /* File suffixes, null seps */
602 };
603 
604 /*
605  * Header placed at the beginning of the hash file.
606  */
607 struct hashheader
608 {
609  unsigned short magic; /* Magic number for ID */
610  unsigned short compileoptions; /* How we were compiled */
611  short maxstringchars; /* Max # strchrs we support */
612  short maxstringcharlen; /* Max strchr len supported */
613  short compoundmin; /* Min lth of compound parts */
614  short compoundbit; /* Flag 4 compounding roots */
615  int stringsize; /* Size of string table */
616  int lstringsize; /* Size of lang. str tbl */
617  int tblsize; /* No. entries in hash tbl */
618  int stblsize; /* No. entries in sfx tbl */
619  int ptblsize; /* No. entries in pfx tbl */
620  int sortval; /* Largest sort ID assigned */
621  int nstrchars; /* No. strchars defined */
622  int nstrchartype; /* No. strchar types */
623  int strtypestart; /* Start of strtype table */
624  char nrchars[5]; /* Nroff special characters */
625  char texchars[13]; /* TeX special characters */
626  char compoundflag; /* Compund-word handling */
627  char defhardflag; /* Default tryveryhard flag */
628  char flagmarker; /* "Start-of-flags" char */
629  unsigned short sortorder[SET_SIZE + MAXSTRINGCHARS]; /* Sort ordering */
630  ichar_t lowerconv[SET_SIZE + MAXSTRINGCHARS]; /* Lower-conversion table */
631  ichar_t upperconv[SET_SIZE + MAXSTRINGCHARS]; /* Upper-conversion table */
632  char wordchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for chars found in wrds */
633  char upperchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for uppercase chars */
634  char lowerchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for lowercase chars */
635  char boundarychars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for boundary chars */
636  char stringstarts[SET_SIZE]; /* NZ if char can start str */
637  char stringchars[MAXSTRINGCHARS][MAXSTRINGCHARLEN + 1]; /* String chars */
638  unsigned int stringdups[MAXSTRINGCHARS]; /* No. of "base" char */
639  int dupnos[MAXSTRINGCHARS]; /* Dup char ID # */
640  unsigned short magic2; /* Second magic for dbl chk */
641 };
642 
643 /* hash table magic number */
644 #define MAGIC 0x9602
645 
646 /* compile options, put in the hash header for consistency checking */
647 #ifdef NO8BIT
648 # define MAGIC8BIT 0x01
649 #else
650 # define MAGIC8BIT 0x00
651 #endif
652 #ifdef NO_CAPITALIZATION_SUPPORT
653 # define MAGICCAPITALIZATION 0x00
654 #else
655 # define MAGICCAPITALIZATION 0x02
656 #endif
657 # define MAGICMASKSET 0x04
658 
659 #if MASKBITS <= 32
660 # define MAGICMASKSET 0x00
661 #else
662 # if MASKBITS <= 64
663 # else
664 # if MASKBITS <= 128
665 # define MAGICMASKSET 0x08
666 # else
667 # define MAGICMASKSET 0x0C
668 # endif
669 # endif
670 #endif
671 
672 #define COMPILEOPTIONS (MAGIC8BIT | MAGICCAPITALIZATION | MAGICMASKSET)
673 
674 /*
675 ** Offsets into the nroff special-character array
676 */
677 #define NRLEFTPAREN hashheader.nrchars[0]
678 #define NRRIGHTPAREN hashheader.nrchars[1]
679 #define NRDOT hashheader.nrchars[2]
680 #define NRBACKSLASH hashheader.nrchars[3]
681 #define NRSTAR hashheader.nrchars[4]
682 
683 /*
684 ** Offsets into the TeX special-character array
685 */
686 #define TEXLEFTPAREN hashheader.texchars[0]
687 #define TEXRIGHTPAREN hashheader.texchars[1]
688 #define TEXLEFTSQUARE hashheader.texchars[2]
689 #define TEXRIGHTSQUARE hashheader.texchars[3]
690 #define TEXLEFTCURLY hashheader.texchars[4]
691 #define TEXRIGHTCURLY hashheader.texchars[5]
692 #define TEXLEFTANGLE hashheader.texchars[6]
693 #define TEXRIGHTANGLE hashheader.texchars[7]
694 #define TEXBACKSLASH hashheader.texchars[8]
695 #define TEXDOLLAR hashheader.texchars[9]
696 #define TEXSTAR hashheader.texchars[10]
697 #define TEXDOT hashheader.texchars[11]
698 #define TEXPERCENT hashheader.texchars[12]
699 
700 /*
701 ** Values for compoundflag
702 */
703 #define COMPOUND_NEVER 0 /* Compound words are never good */
704 #define COMPOUND_ANYTIME 1 /* Accept run-together words */
705 #define COMPOUND_CONTROLLED 2 /* Compounds controlled by afx flags */
706 /*
707 ** These macros are similar to the ones above, but they take into account
708 ** the possibility of string characters. Note well that they take a POINTER,
709 ** not a character.
710 **
711 ** The "l_" versions set "len" to the length of the string character as a
712 ** handy side effect. (Note that the global "laststringch" is also set,
713 ** and sometimes used, by these macros.)
714 **
715 ** The "l1_" versions go one step further and guarantee that the "len"
716 ** field is valid for *all* characters, being set to 1 even if the macro
717 ** returns false. This macro is a great example of how NOT to write
718 ** readable C.
719 */
720 /*TF NOTE: This is actually defined in code (makedent) now */
721 #if 0
722 #define isstringch(ptr, canon) (isstringstart (*(ptr)) \
723  && stringcharlen ((ptr), (canon)) > 0)
724 #define l_isstringch(ptr, len, canon) \
725  (isstringstart (*(ptr)) \
726  && (len = stringcharlen ((ptr), (canon))) \
727  > 0)
728 #define l1_isstringch(ptr, len, canon) \
729  (len = 1, \
730  isstringstart ((unsigned char)(*(ptr))) \
731  && ((len = \
732  stringcharlen ((ptr), (canon))) \
733  > 0 \
734  ? 1 : (len = 1, 0)))
735 #endif
736 
737 /*
738  * Sizes of buffers returned by ichartosstr/strtosichar.
739  */
740 #define ICHARTOSSTR_SIZE (INPUTWORDLEN + 4 * MAXAFFIXLEN + 4)
741 #define STRTOSICHAR_SIZE ((INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) \
742  * sizeof (ichar_t))
743 /* TF CHANGE: We should fill this as a structure
744  and then use it throughout.
745 */
746 
747 /*
748  * Initialized variables. These are generated using macros so that they
749  * may be consistently declared in all programs. Numerous examples of
750  * usage are given below.
751  */
752 #ifdef MAIN
753 #define INIT(decl, init) decl = init
754 #else
755 #define INIT(decl, init) extern decl
756 #endif
757 
758 #ifdef MINIMENU
759 INIT (int minimenusize, 2); /* MUST be either 2 or zero */
760 #else /* MINIMENU */
761 INIT (int minimenusize, 0); /* MUST be either 2 or zero */
762 #endif /* MINIMENU */
763 
764 INIT (int eflag, 0); /* NZ for expand mode */
765 INIT (int dumpflag, 0); /* NZ to do dump mode */
766 INIT (int fflag, 0); /* NZ if -f specified */
767 #ifndef USG
768 INIT (int sflag, 0); /* NZ to stop self after EOF */
769 #endif
770 INIT (int vflag, 0); /* NZ to display characters as M-xxx */
771 INIT (int xflag, DEFNOBACKUPFLAG); /* NZ to suppress backups */
772 INIT (int deftflag, -1); /* NZ for TeX mode by default */
773 INIT (int tflag, DEFTEXFLAG); /* NZ for TeX mode in current file */
774 INIT (int prefstringchar, -1); /* Preferred string character type */
775 
776 INIT (int terse, 0); /* NZ for "terse" mode */
777 
778 INIT (char tempfile[MAXPATHLEN], ""); /* Name of file we're spelling into */
779 
780 INIT (int minword, MINWORD); /* Longest always-legal word */
781 INIT (int sortit, 1); /* Sort suggestions alphabetically */
782 INIT (int compoundflag, -1); /* How to treat compounds: see above */
783 INIT (int tryhardflag, -1); /* Always call tryveryhard */
784 
785 INIT (char * currentfile, NULL); /* Name of current input file */
786 
787 /* Odd numbers for math mode in LaTeX; even for LR or paragraph mode */
788 INIT (int math_mode, 0);
789 /* P -- paragraph or LR mode
790  * b -- parsing a \begin statement
791  * e -- parsing an \end statement
792  * r -- parsing a \ref type of argument.
793  * m -- looking for a \begin{minipage} argument.
794  */
795 INIT (char LaTeX_Mode, 'P');
796 
797 #ifdef __cplusplus
798 }
799 #endif /* c++ */
800 
801 #endif /* ISPELL_H */

tdespell2

Skip menu "tdespell2"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members

tdespell2

Skip menu "tdespell2"
  • arts
  • dcop
  • dnssd
  • interfaces
  •   kspeech
  •     interface
  •     library
  •   tdetexteditor
  • kate
  • kded
  • kdoctools
  • kimgio
  • kjs
  • libtdemid
  • libtdescreensaver
  • tdeabc
  • tdecmshell
  • tdecore
  • tdefx
  • tdehtml
  • tdeinit
  • tdeio
  •   bookmarks
  •   httpfilter
  •   kpasswdserver
  •   kssl
  •   tdefile
  •   tdeio
  •   tdeioexec
  • tdeioslave
  •   http
  • tdemdi
  •   tdemdi
  • tdenewstuff
  • tdeparts
  • tdeprint
  • tderandr
  • tderesources
  • tdespell2
  • tdesu
  • tdeui
  • tdeunittest
  • tdeutils
  • tdewallet
Generated for tdespell2 by doxygen 1.8.1.2
This website is maintained by Timothy Pearson.