tgood.cpp
00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ 00002 /* enchant 00003 * Copyright (C) 2003 Dom Lachowicz 00004 * 00005 * This library is free software; you can redistribute it and/or 00006 * modify it under the terms of the GNU Lesser General Public 00007 * License as published by the Free Software Foundation; either 00008 * version 2.1 of the License, or (at your option) any later version. 00009 * 00010 * This library is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 * Lesser General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU Lesser General Public 00016 * License along with this library; if not, write to the 00017 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00018 * Boston, MA 02110-1301, USA. 00019 * 00020 * In addition, as a special exception, Dom Lachowicz 00021 * gives permission to link the code of this program with 00022 * non-LGPL Spelling Provider libraries (eg: a MSFT Office 00023 * spell checker backend) and distribute linked combinations including 00024 * the two. You must obey the GNU Lesser General Public License in all 00025 * respects for all of the code used other than said providers. If you modify 00026 * this file, you may extend this exception to your version of the 00027 * file, but you are not obligated to do so. If you do not wish to 00028 * do so, delete this exception statement from your version. 00029 */ 00030 00031 /* 00032 * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA 00033 * All rights reserved. 00034 * 00035 * Redistribution and use in source and binary forms, with or without 00036 * modification, are permitted provided that the following conditions 00037 * are met: 00038 * 00039 * 1. Redistributions of source code must retain the above copyright 00040 * notice, this list of conditions and the following disclaimer. 00041 * 2. Redistributions in binary form must reproduce the above copyright 00042 * notice, this list of conditions and the following disclaimer in the 00043 * documentation and/or other materials provided with the distribution. 00044 * 3. All modifications to the source code must be clearly marked as 00045 * such. Binary redistributions based on modified source code 00046 * must be clearly marked as modified versions in the documentation 00047 * and/or other materials provided with the distribution. 00048 * 4. All advertising materials mentioning features or use of this software 00049 * must display the following acknowledgment: 00050 * This product includes software developed by Geoff Kuenning and 00051 * other unpaid contributors. 00052 * 5. The name of Geoff Kuenning may not be used to endorse or promote 00053 * products derived from this software without specific prior 00054 * written permission. 00055 * 00056 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND 00057 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00058 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00059 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE 00060 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00061 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00062 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00063 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00064 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00065 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00066 * SUCH DAMAGE. 00067 */ 00068 00069 /* 00070 * Table-driven version of good.c. 00071 * 00072 * Geoff Kuenning, July 1987 00073 */ 00074 00075 /* 00076 * $Log$ 00077 * Revision 1.1 2004/01/31 16:44:12 zrusin 00078 * ISpell plugin. 00079 * 00080 * Revision 1.4 2003/08/14 17:51:29 dom 00081 * update license - exception clause should be Lesser GPL 00082 * 00083 * Revision 1.3 2003/07/28 20:40:28 dom 00084 * fix up the license clause, further win32-registry proof some directory getting functions 00085 * 00086 * Revision 1.2 2003/07/16 22:52:56 dom 00087 * LGPL + exception license 00088 * 00089 * Revision 1.1 2003/07/15 01:15:09 dom 00090 * ispell enchant backend 00091 * 00092 * Revision 1.2 2003/01/29 05:50:12 hippietrail 00093 * 00094 * Fixed my mess in EncodingManager. 00095 * Changed many C casts to C++ casts. 00096 * 00097 * Revision 1.1 2003/01/24 05:52:36 hippietrail 00098 * 00099 * Refactored ispell code. Old ispell global variables had been put into 00100 * an allocated structure, a pointer to which was passed to many functions. 00101 * I have now made all such functions and variables private members of the 00102 * ISpellChecker class. It was C OO, now it's C++ OO. 00103 * 00104 * I've fixed the makefiles and tested compilation but am unable to test 00105 * operation. Please back out my changes if they cause problems which 00106 * are not obvious or easy to fix. 00107 * 00108 * Revision 1.6 2003/01/06 18:48:42 dom 00109 * ispell cleanup, start of using new 'add' save features 00110 * 00111 * Revision 1.5 2002/09/19 05:31:20 hippietrail 00112 * 00113 * More Ispell cleanup. Conditional globals and DEREF macros are removed. 00114 * K&R function declarations removed, converted to Doxygen style comments 00115 * where possible. No code has been changed (I hope). Compiles for me but 00116 * unable to test. 00117 * 00118 * Revision 1.4 2002/09/17 03:03:31 hippietrail 00119 * 00120 * After seeking permission on the developer list I've reformatted all the 00121 * spelling source which seemed to have parts which used 2, 3, 4, and 8 00122 * spaces for tabs. It should all look good with our standard 4-space 00123 * tabs now. 00124 * I've concentrated just on indentation in the actual code. More prettying 00125 * could be done. 00126 * * NO code changes were made * 00127 * 00128 * Revision 1.3 2002/09/13 17:20:14 mpritchett 00129 * Fix more warnings for Linux build 00130 * 00131 * Revision 1.2 2001/05/12 16:05:42 thomasf 00132 * Big pseudo changes to ispell to make it pass around a structure rather 00133 * than rely on all sorts of gloabals willy nilly here and there. Also 00134 * fixed our spelling class to work with accepting suggestions once more. 00135 * This code is dirty, gross and ugly (not to mention still not supporting 00136 * multiple hash sized just yet) but it works on my machine and will no 00137 * doubt break other machines. 00138 * 00139 * Revision 1.1 2001/04/15 16:01:24 tomas_f 00140 * moving to spell/xp 00141 * 00142 * Revision 1.7 1999/10/20 06:03:56 sterwill 00143 * Changed C++-style comments to C-style comments in C code. 00144 * 00145 * Revision 1.6 1999/10/20 03:19:35 paul 00146 * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more. 00147 * 00148 * Revision 1.5 1999/04/13 17:12:51 jeff 00149 * Applied "Darren O. Benham" <gecko@benham.net> spell check changes. 00150 * Fixed crash on Win32 with the new code. 00151 * 00152 * Revision 1.4 1998/12/29 14:55:33 eric 00153 * 00154 * I've doctored the ispell code pretty extensively here. It is now 00155 * warning-free on Win32. It also *works* on Win32 now, since I 00156 * replaced all the I/O calls with ANSI standard ones. 00157 * 00158 * Revision 1.4 1998/12/29 14:55:33 eric 00159 * 00160 * I've doctored the ispell code pretty extensively here. It is now 00161 * warning-free on Win32. It also *works* on Win32 now, since I 00162 * replaced all the I/O calls with ANSI standard ones. 00163 * 00164 * Revision 1.3 1998/12/28 23:11:30 eric 00165 * 00166 * modified spell code and integration to build on Windows. 00167 * This is still a hack. 00168 * 00169 * Actually, it doesn't yet WORK on Windows. It just builds. 00170 * SpellCheckInit is failing for some reason. 00171 * 00172 * Revision 1.2 1998/12/28 22:16:22 eric 00173 * 00174 * These changes begin to incorporate the spell checker into AbiWord. Most 00175 * of this is a hack. 00176 * 00177 * 1. added other/spell to the -I list in config/abi_defs 00178 * 2. replaced other/spell/Makefile with one which is more like 00179 * our build system. 00180 * 3. added other/spell to other/Makefile so that the build will now 00181 * dive down and build the spell check library. 00182 * 4. added the AbiSpell library to the Makefiles in wp/main 00183 * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp. 00184 * This call is a HACK and should be replaced with something 00185 * proper later. 00186 * 6. added code to fv_View.cpp as follows: 00187 * whenever you double-click on a word, the spell checker 00188 * verifies that word and prints its status to stdout. 00189 * 00190 * Caveats: 00191 * 1. This will break the Windows build. I'm going to work on fixing it 00192 * now. 00193 * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash. 00194 * The dictionary location is currently hard-coded. This will be 00195 * fixed as well. 00196 * 00197 * Anyway, such as it is, it works. 00198 * 00199 * Revision 1.1 1998/12/28 18:04:43 davet 00200 * Spell checker code stripped from ispell. At this point, there are 00201 * two external routines... the Init routine, and a check-a-word routine 00202 * which returns a boolean value, and takes a 16 bit char string. 00203 * The code resembles the ispell code as much as possible still. 00204 * 00205 * Revision 1.32 1994/11/02 06:56:16 geoff 00206 * Remove the anyword feature, which I've decided is a bad idea. 00207 * 00208 * Revision 1.31 1994/10/25 05:46:25 geoff 00209 * Add support for the FF_ANYWORD (affix applies to all words, even if 00210 * flag bit isn't set) flag option. 00211 * 00212 * Revision 1.30 1994/05/24 06:23:08 geoff 00213 * Don't create a hit if "allhits" is clear and capitalization 00214 * mismatches. This cures a bug where a word could be in the dictionary 00215 * and yet not found. 00216 * 00217 * Revision 1.29 1994/05/17 06:44:21 geoff 00218 * Add support for controlled compound formation and the COMPOUNDONLY 00219 * option to affix flags. 00220 * 00221 * Revision 1.28 1994/01/25 07:12:13 geoff 00222 * Get rid of all old RCS log lines in preparation for the 3.1 release. 00223 * 00224 */ 00225 00226 #include <ctype.h> 00227 #include <stdlib.h> 00228 #include <string.h> 00229 00230 #include "ispell_checker.h" 00231 00243 void ISpellChecker::chk_aff (ichar_t *word, ichar_t *ucword, 00244 int len, int ignoreflagbits, int allhits, int pfxopts, int sfxopts) 00245 { 00246 ichar_t * cp; /* Pointer to char to index on */ 00247 struct flagptr * ind; /* Flag index table to test */ 00248 00249 pfx_list_chk (word, ucword, len, pfxopts, sfxopts, &m_pflagindex[0], 00250 ignoreflagbits, allhits); 00251 cp = ucword; 00252 /* HACK: bail on unrecognized chars */ 00253 if (*cp >= (SET_SIZE + MAXSTRINGCHARS)) 00254 return; 00255 ind = &m_pflagindex[*cp++]; 00256 while (ind->numents == 0 && ind->pu.fp != NULL) 00257 { 00258 if (*cp == 0) 00259 return; 00260 if (ind->pu.fp[0].numents) 00261 { 00262 pfx_list_chk (word, ucword, len, pfxopts, sfxopts, &ind->pu.fp[0], 00263 ignoreflagbits, allhits); 00264 if (m_numhits && !allhits && /* !cflag && */ !ignoreflagbits) 00265 return; 00266 } 00267 /* HACK: bail on unrecognized chars */ 00268 if (*cp >= (SET_SIZE + MAXSTRINGCHARS)) 00269 return; 00270 ind = &ind->pu.fp[*cp++]; 00271 } 00272 pfx_list_chk (word, ucword, len, pfxopts, sfxopts, ind, ignoreflagbits, 00273 allhits); 00274 if (m_numhits && !allhits && /* !cflag &&*/ !ignoreflagbits) 00275 return; 00276 chk_suf (word, ucword, len, sfxopts, static_cast<struct flagent *>(NULL), 00277 ignoreflagbits, allhits); 00278 } 00279 00292 void ISpellChecker::pfx_list_chk (ichar_t *word, ichar_t *ucword, int len, int optflags, 00293 int sfxopts, struct flagptr * ind, int ignoreflagbits, int allhits) 00294 { 00295 int cond; /* Condition number */ 00296 ichar_t * cp; /* Pointer into end of ucword */ 00297 struct dent * dent; /* Dictionary entry we found */ 00298 int entcount; /* Number of entries to process */ 00299 struct flagent * 00300 flent; /* Current table entry */ 00301 int preadd; /* Length added to tword2 as prefix */ 00302 int tlen; /* Length of tword */ 00303 ichar_t tword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; /* Tmp cpy */ 00304 ichar_t tword2[sizeof tword]; /* 2nd copy for ins_root_cap */ 00305 00306 for (flent = ind->pu.ent, entcount = ind->numents; 00307 entcount > 0; 00308 flent++, entcount--) 00309 { 00310 /* 00311 * If this is a compound-only affix, ignore it unless we're 00312 * looking for that specific thing. 00313 */ 00314 if ((flent->flagflags & FF_COMPOUNDONLY) != 0 00315 && (optflags & FF_COMPOUNDONLY) == 0) 00316 continue; 00317 00318 /* 00319 * See if the prefix matches. 00320 */ 00321 tlen = len - flent->affl; 00322 if (tlen > 0 00323 && (flent->affl == 0 00324 || icharncmp (flent->affix, ucword, flent->affl) == 0) 00325 && tlen + flent->stripl >= flent->numconds) 00326 { 00327 /* 00328 * The prefix matches. Remove it, replace it by the "strip" 00329 * string (if any), and check the original conditions. 00330 */ 00331 if (flent->stripl) 00332 icharcpy (tword, flent->strip); 00333 icharcpy (tword + flent->stripl, ucword + flent->affl); 00334 cp = tword; 00335 for (cond = 0; cond < flent->numconds; cond++) 00336 { 00337 if ((flent->conds[*cp++] & (1 << cond)) == 0) 00338 break; 00339 } 00340 if (cond >= flent->numconds) 00341 { 00342 /* 00343 * The conditions match. See if the word is in the 00344 * dictionary. 00345 */ 00346 tlen += flent->stripl; 00347 00348 if (ignoreflagbits) 00349 { 00350 if ((dent = ispell_lookup (tword, 1)) != NULL) 00351 { 00352 cp = tword2; 00353 if (flent->affl) 00354 { 00355 icharcpy (cp, flent->affix); 00356 cp += flent->affl; 00357 *cp++ = '+'; 00358 } 00359 preadd = cp - tword2; 00360 icharcpy (cp, tword); 00361 cp += tlen; 00362 if (flent->stripl) 00363 { 00364 *cp++ = '-'; 00365 icharcpy (cp, flent->strip); 00366 } 00367 } 00368 } 00369 else if ((dent = ispell_lookup (tword, 1)) != NULL 00370 && TSTMASKBIT (dent->mask, flent->flagbit)) 00371 { 00372 if (m_numhits < MAX_HITS) 00373 { 00374 m_hits[m_numhits].dictent = dent; 00375 m_hits[m_numhits].prefix = flent; 00376 m_hits[m_numhits].suffix = NULL; 00377 m_numhits++; 00378 } 00379 if (!allhits) 00380 { 00381 #ifndef NO_CAPITALIZATION_SUPPORT 00382 if (cap_ok (word, &m_hits[0], len)) 00383 return; 00384 m_numhits = 0; 00385 #else /* NO_CAPITALIZATION_SUPPORT */ 00386 return; 00387 #endif /* NO_CAPITALIZATION_SUPPORT */ 00388 } 00389 } 00390 /* 00391 * Handle cross-products. 00392 */ 00393 if (flent->flagflags & FF_CROSSPRODUCT) 00394 chk_suf (word, tword, tlen, sfxopts | FF_CROSSPRODUCT, 00395 flent, ignoreflagbits, allhits); 00396 } 00397 } 00398 } 00399 } 00400 00412 void 00413 ISpellChecker::chk_suf (ichar_t *word, ichar_t *ucword, 00414 int len, int optflags, struct flagent *pfxent, 00415 int ignoreflagbits, int allhits) 00416 { 00417 ichar_t * cp; /* Pointer to char to index on */ 00418 struct flagptr * ind; /* Flag index table to test */ 00419 00420 suf_list_chk (word, ucword, len, &m_sflagindex[0], optflags, pfxent, 00421 ignoreflagbits, allhits); 00422 cp = ucword + len - 1; 00423 /* HACK: bail on unrecognized chars */ 00424 if (*cp >= (SET_SIZE + MAXSTRINGCHARS)) 00425 return; 00426 ind = &m_sflagindex[*cp]; 00427 while (ind->numents == 0 && ind->pu.fp != NULL) 00428 { 00429 if (cp == ucword) 00430 return; 00431 if (ind->pu.fp[0].numents) 00432 { 00433 suf_list_chk (word, ucword, len, &ind->pu.fp[0], 00434 optflags, pfxent, ignoreflagbits, allhits); 00435 if (m_numhits != 0 && !allhits && /* !cflag && */ !ignoreflagbits) 00436 return; 00437 } 00438 /* HACK: bail on unrecognized chars */ 00439 if (*(cp-1) >= (SET_SIZE + MAXSTRINGCHARS)) 00440 return; 00441 ind = &ind->pu.fp[*--cp]; 00442 } 00443 suf_list_chk (word, ucword, len, ind, optflags, pfxent, 00444 ignoreflagbits, allhits); 00445 } 00446 00457 void ISpellChecker::suf_list_chk (ichar_t *word, ichar_t *ucword, 00458 int len, struct flagptr *ind, int optflags, 00459 struct flagent *pfxent, int ignoreflagbits, int allhits) 00460 { 00461 ichar_t * cp; /* Pointer into end of ucword */ 00462 int cond; /* Condition number */ 00463 struct dent * dent; /* Dictionary entry we found */ 00464 int entcount; /* Number of entries to process */ 00465 struct flagent * 00466 flent; /* Current table entry */ 00467 int preadd; /* Length added to tword2 as prefix */ 00468 int tlen; /* Length of tword */ 00469 ichar_t tword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; /* Tmp cpy */ 00470 ichar_t tword2[sizeof tword]; /* 2nd copy for ins_root_cap */ 00471 00472 icharcpy (tword, ucword); 00473 for (flent = ind->pu.ent, entcount = ind->numents; 00474 entcount > 0; 00475 flent++, entcount--) 00476 { 00477 if ((optflags & FF_CROSSPRODUCT) != 0 00478 && (flent->flagflags & FF_CROSSPRODUCT) == 0) 00479 continue; 00480 /* 00481 * If this is a compound-only affix, ignore it unless we're 00482 * looking for that specific thing. 00483 */ 00484 if ((flent->flagflags & FF_COMPOUNDONLY) != 0 00485 && (optflags & FF_COMPOUNDONLY) == 0) 00486 continue; 00487 00488 /* 00489 * See if the suffix matches. 00490 */ 00491 tlen = len - flent->affl; 00492 if (tlen > 0 00493 && (flent->affl == 0 00494 || icharcmp (flent->affix, ucword + tlen) == 0) 00495 && tlen + flent->stripl >= flent->numconds) 00496 { 00497 /* 00498 * The suffix matches. Remove it, replace it by the "strip" 00499 * string (if any), and check the original conditions. 00500 */ 00501 icharcpy (tword, ucword); 00502 cp = tword + tlen; 00503 if (flent->stripl) 00504 { 00505 icharcpy (cp, flent->strip); 00506 tlen += flent->stripl; 00507 cp = tword + tlen; 00508 } 00509 else 00510 *cp = '\0'; 00511 for (cond = flent->numconds; --cond >= 0; ) 00512 { 00513 if ((flent->conds[*--cp] & (1 << cond)) == 0) 00514 break; 00515 } 00516 if (cond < 0) 00517 { 00518 /* 00519 * The conditions match. See if the word is in the 00520 * dictionary. 00521 */ 00522 if (ignoreflagbits) 00523 { 00524 if ((dent = ispell_lookup (tword, 1)) != NULL) 00525 { 00526 cp = tword2; 00527 if ((optflags & FF_CROSSPRODUCT) 00528 && pfxent->affl != 0) 00529 { 00530 icharcpy (cp, pfxent->affix); 00531 cp += pfxent->affl; 00532 *cp++ = '+'; 00533 } 00534 preadd = cp - tword2; 00535 icharcpy (cp, tword); 00536 cp += tlen; 00537 if ((optflags & FF_CROSSPRODUCT) 00538 && pfxent->stripl != 0) 00539 { 00540 *cp++ = '-'; 00541 icharcpy (cp, pfxent->strip); 00542 cp += pfxent->stripl; 00543 } 00544 if (flent->stripl) 00545 { 00546 *cp++ = '-'; 00547 icharcpy (cp, flent->strip); 00548 cp += flent->stripl; 00549 } 00550 if (flent->affl) 00551 { 00552 *cp++ = '+'; 00553 icharcpy (cp, flent->affix); 00554 cp += flent->affl; 00555 } 00556 } 00557 } 00558 else if ((dent = ispell_lookup (tword, 1)) != NULL 00559 && TSTMASKBIT (dent->mask, flent->flagbit) 00560 && ((optflags & FF_CROSSPRODUCT) == 0 00561 || TSTMASKBIT (dent->mask, pfxent->flagbit))) 00562 { 00563 if (m_numhits < MAX_HITS) 00564 { 00565 m_hits[m_numhits].dictent = dent; 00566 m_hits[m_numhits].prefix = pfxent; 00567 m_hits[m_numhits].suffix = flent; 00568 m_numhits++; 00569 } 00570 if (!allhits) 00571 { 00572 #ifndef NO_CAPITALIZATION_SUPPORT 00573 if (cap_ok (word, &m_hits[0], len)) 00574 return; 00575 m_numhits = 0; 00576 #else /* NO_CAPITALIZATION_SUPPORT */ 00577 return; 00578 #endif /* NO_CAPITALIZATION_SUPPORT */ 00579 } 00580 } 00581 } 00582 } 00583 } 00584 } 00585 00597 int ISpellChecker::expand_pre (char *croot, ichar_t *rootword, MASKTYPE mask[], 00598 int option, char *extra) 00599 { 00600 int entcount; /* No. of entries to process */ 00601 int explength; /* Length of expansions */ 00602 struct flagent * 00603 flent; /* Current table entry */ 00604 00605 for (flent = m_pflaglist, entcount = m_numpflags, explength = 0; 00606 entcount > 0; 00607 flent++, entcount--) 00608 { 00609 if (TSTMASKBIT (mask, flent->flagbit)) 00610 explength += 00611 pr_pre_expansion (croot, rootword, flent, mask, option, extra); 00612 } 00613 return explength; 00614 } 00615 00628 int ISpellChecker::pr_pre_expansion ( char *croot, ichar_t *rootword, 00629 struct flagent *flent, MASKTYPE mask[], int option, 00630 char *extra) 00631 { 00632 int cond; /* Current condition number */ 00633 ichar_t * nextc; /* Next case choice */ 00634 int tlen; /* Length of tword */ 00635 ichar_t tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */ 00636 00637 tlen = icharlen (rootword); 00638 if (flent->numconds > tlen) 00639 return 0; 00640 tlen -= flent->stripl; 00641 if (tlen <= 0) 00642 return 0; 00643 tlen += flent->affl; 00644 for (cond = 0, nextc = rootword; cond < flent->numconds; cond++) 00645 { 00646 if ((flent->conds[mytoupper (*nextc++)] & (1 << cond)) == 0) 00647 return 0; 00648 } 00649 /* 00650 * The conditions are satisfied. Copy the word, add the prefix, 00651 * and make it the proper case. This code is carefully written 00652 * to match that ins_cap and cap_ok. Note that the affix, as 00653 * inserted, is uppercase. 00654 * 00655 * There is a tricky bit here: if the root is capitalized, we 00656 * want a capitalized result. If the root is followcase, however, 00657 * we want to duplicate the case of the first remaining letter 00658 * of the root. In other words, "Loved/U" should generate "Unloved", 00659 * but "LOved/U" should generate "UNLOved" and "lOved/U" should 00660 * produce "unlOved". 00661 */ 00662 if (flent->affl) 00663 { 00664 icharcpy (tword, flent->affix); 00665 nextc = tword + flent->affl; 00666 } 00667 icharcpy (nextc, rootword + flent->stripl); 00668 if (myupper (rootword[0])) 00669 { 00670 /* We must distinguish followcase from capitalized and all-upper */ 00671 for (nextc = rootword + 1; *nextc; nextc++) 00672 { 00673 if (!myupper (*nextc)) 00674 break; 00675 } 00676 if (*nextc) 00677 { 00678 /* It's a followcase or capitalized word. Figure out which. */ 00679 for ( ; *nextc; nextc++) 00680 { 00681 if (myupper (*nextc)) 00682 break; 00683 } 00684 if (*nextc) 00685 { 00686 /* It's followcase. */ 00687 if (!myupper (tword[flent->affl])) 00688 forcelc (tword, flent->affl); 00689 } 00690 else 00691 { 00692 /* It's capitalized */ 00693 forcelc (tword + 1, tlen - 1); 00694 } 00695 } 00696 } 00697 else 00698 { 00699 /* Followcase or all-lower, we don't care which */ 00700 if (!myupper (*nextc)) 00701 forcelc (tword, flent->affl); 00702 } 00703 if (option == 3) 00704 printf ("\n%s", croot); 00705 if (option != 4) 00706 printf (" %s%s", ichartosstr (tword, 1), extra); 00707 if (flent->flagflags & FF_CROSSPRODUCT) 00708 return tlen 00709 + expand_suf (croot, tword, mask, FF_CROSSPRODUCT, option, extra); 00710 else 00711 return tlen; 00712 } 00713 00726 int ISpellChecker::expand_suf (char *croot, ichar_t *rootword, MASKTYPE mask[], 00727 int optflags, int option, char *extra) 00728 { 00729 int entcount; /* No. of entries to process */ 00730 int explength; /* Length of expansions */ 00731 struct flagent * 00732 flent; /* Current table entry */ 00733 00734 for (flent = m_sflaglist, entcount = m_numsflags, explength = 0; 00735 entcount > 0; 00736 flent++, entcount--) 00737 { 00738 if (TSTMASKBIT (mask, flent->flagbit)) 00739 { 00740 if ((optflags & FF_CROSSPRODUCT) == 0 00741 || (flent->flagflags & FF_CROSSPRODUCT)) 00742 explength += 00743 pr_suf_expansion (croot, rootword, flent, option, extra); 00744 } 00745 } 00746 return explength; 00747 } 00748 00760 int ISpellChecker::pr_suf_expansion (char *croot, ichar_t *rootword, 00761 struct flagent *flent, int option, char *extra) 00762 { 00763 int cond; /* Current condition number */ 00764 ichar_t * nextc; /* Next case choice */ 00765 int tlen; /* Length of tword */ 00766 ichar_t tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */ 00767 00768 tlen = icharlen (rootword); 00769 cond = flent->numconds; 00770 if (cond > tlen) 00771 return 0; 00772 if (tlen - flent->stripl <= 0) 00773 return 0; 00774 for (nextc = rootword + tlen; --cond >= 0; ) 00775 { 00776 if ((flent->conds[mytoupper (*--nextc)] & (1 << cond)) == 0) 00777 return 0; 00778 } 00779 /* 00780 * The conditions are satisfied. Copy the word, add the suffix, 00781 * and make it match the case of the last remaining character of the 00782 * root. Again, this code carefully matches ins_cap and cap_ok. 00783 */ 00784 icharcpy (tword, rootword); 00785 nextc = tword + tlen - flent->stripl; 00786 if (flent->affl) 00787 { 00788 icharcpy (nextc, flent->affix); 00789 if (!myupper (nextc[-1])) 00790 forcelc (nextc, flent->affl); 00791 } 00792 else 00793 *nextc = 0; 00794 if (option == 3) 00795 printf ("\n%s", croot); 00796 if (option != 4) 00797 printf (" %s%s", ichartosstr (tword, 1), extra); 00798 return tlen + flent->affl - flent->stripl; 00799 } 00800 00805 void ISpellChecker::forcelc (ichar_t *dst, int len) /* Force to lowercase */ 00806 { 00807 00808 for ( ; --len >= 0; dst++) 00809 *dst = mytolower (*dst); 00810 }