LCOV - code coverage report
Current view: top level - extensions/spellcheck/hunspell/src - suggestmgr.cxx (source / functions) Hit Total Coverage
Test: output.info Lines: 0 1281 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 43 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* ***** BEGIN LICENSE BLOCK *****
       2             :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       3             :  *
       4             :  * Copyright (C) 2002-2017 Németh László
       5             :  *
       6             :  * The contents of this file are subject to the Mozilla Public License Version
       7             :  * 1.1 (the "License"); you may not use this file except in compliance with
       8             :  * the License. You may obtain a copy of the License at
       9             :  * http://www.mozilla.org/MPL/
      10             :  *
      11             :  * Software distributed under the License is distributed on an "AS IS" basis,
      12             :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      13             :  * for the specific language governing rights and limitations under the
      14             :  * License.
      15             :  *
      16             :  * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
      17             :  *
      18             :  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
      19             :  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
      20             :  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
      21             :  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
      22             :  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
      23             :  *
      24             :  * Alternatively, the contents of this file may be used under the terms of
      25             :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      26             :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      27             :  * in which case the provisions of the GPL or the LGPL are applicable instead
      28             :  * of those above. If you wish to allow use of your version of this file only
      29             :  * under the terms of either the GPL or the LGPL, and not to allow others to
      30             :  * use your version of this file under the terms of the MPL, indicate your
      31             :  * decision by deleting the provisions above and replace them with the notice
      32             :  * and other provisions required by the GPL or the LGPL. If you do not delete
      33             :  * the provisions above, a recipient may use your version of this file under
      34             :  * the terms of any one of the MPL, the GPL or the LGPL.
      35             :  *
      36             :  * ***** END LICENSE BLOCK ***** */
      37             : /*
      38             :  * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
      39             :  * And Contributors.  All rights reserved.
      40             :  *
      41             :  * Redistribution and use in source and binary forms, with or without
      42             :  * modification, are permitted provided that the following conditions
      43             :  * are met:
      44             :  *
      45             :  * 1. Redistributions of source code must retain the above copyright
      46             :  *    notice, this list of conditions and the following disclaimer.
      47             :  *
      48             :  * 2. Redistributions in binary form must reproduce the above copyright
      49             :  *    notice, this list of conditions and the following disclaimer in the
      50             :  *    documentation and/or other materials provided with the distribution.
      51             :  *
      52             :  * 3. All modifications to the source code must be clearly marked as
      53             :  *    such.  Binary redistributions based on modified source code
      54             :  *    must be clearly marked as modified versions in the documentation
      55             :  *    and/or other materials provided with the distribution.
      56             :  *
      57             :  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
      58             :  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      59             :  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
      60             :  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      61             :  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
      62             :  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
      63             :  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
      64             :  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      65             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      66             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      67             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      68             :  * SUCH DAMAGE.
      69             :  */
      70             : 
      71             : #include <stdlib.h>
      72             : #include <string.h>
      73             : #include <stdio.h>
      74             : #include <ctype.h>
      75             : 
      76             : #include "suggestmgr.hxx"
      77             : #include "htypes.hxx"
      78             : #include "csutil.hxx"
      79             : 
      80             : const w_char W_VLINE = {'\0', '|'};
      81             : 
      82           0 : SuggestMgr::SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr) {
      83             :   // register affix manager and check in string of chars to
      84             :   // try when building candidate suggestions
      85           0 :   pAMgr = aptr;
      86             : 
      87           0 :   csconv = NULL;
      88             : 
      89           0 :   ckeyl = 0;
      90           0 :   ckey = NULL;
      91             : 
      92           0 :   ctryl = 0;
      93           0 :   ctry = NULL;
      94             : 
      95           0 :   utf8 = 0;
      96           0 :   langnum = 0;
      97           0 :   complexprefixes = 0;
      98             : 
      99           0 :   maxSug = maxn;
     100           0 :   nosplitsugs = 0;
     101           0 :   maxngramsugs = MAXNGRAMSUGS;
     102           0 :   maxcpdsugs = MAXCOMPOUNDSUGS;
     103             : 
     104           0 :   if (pAMgr) {
     105           0 :     langnum = pAMgr->get_langnum();
     106           0 :     ckey = pAMgr->get_key_string();
     107           0 :     nosplitsugs = pAMgr->get_nosplitsugs();
     108           0 :     if (pAMgr->get_maxngramsugs() >= 0)
     109           0 :       maxngramsugs = pAMgr->get_maxngramsugs();
     110           0 :     utf8 = pAMgr->get_utf8();
     111           0 :     if (pAMgr->get_maxcpdsugs() >= 0)
     112           0 :       maxcpdsugs = pAMgr->get_maxcpdsugs();
     113           0 :     if (!utf8) {
     114           0 :       csconv = get_current_cs(pAMgr->get_encoding());
     115             :     }
     116           0 :     complexprefixes = pAMgr->get_complexprefixes();
     117             :   }
     118             : 
     119           0 :   if (ckey) {
     120           0 :     if (utf8) {
     121           0 :       ckeyl = u8_u16(ckey_utf, ckey);
     122             :     } else {
     123           0 :       ckeyl = strlen(ckey);
     124             :     }
     125             :   }
     126             : 
     127           0 :   if (tryme) {
     128           0 :     ctry = mystrdup(tryme);
     129           0 :     if (ctry)
     130           0 :       ctryl = strlen(ctry);
     131           0 :     if (ctry && utf8) {
     132           0 :       ctryl = u8_u16(ctry_utf, tryme);
     133             :     }
     134             :   }
     135           0 : }
     136             : 
     137           0 : SuggestMgr::~SuggestMgr() {
     138           0 :   pAMgr = NULL;
     139           0 :   if (ckey)
     140           0 :     free(ckey);
     141           0 :   ckey = NULL;
     142           0 :   ckeyl = 0;
     143           0 :   if (ctry)
     144           0 :     free(ctry);
     145           0 :   ctry = NULL;
     146           0 :   ctryl = 0;
     147           0 :   maxSug = 0;
     148             : #ifdef MOZILLA_CLIENT
     149           0 :   delete[] csconv;
     150             : #endif
     151           0 : }
     152             : 
     153           0 : void SuggestMgr::testsug(std::vector<std::string>& wlst,
     154             :                         const std::string& candidate,
     155             :                         int cpdsuggest,
     156             :                         int* timer,
     157             :                         clock_t* timelimit) {
     158           0 :   int cwrd = 1;
     159           0 :   if (wlst.size() == maxSug)
     160           0 :     return;
     161           0 :   for (size_t k = 0; k < wlst.size(); ++k) {
     162           0 :     if (wlst[k] == candidate) {
     163           0 :       cwrd = 0;
     164           0 :       break;
     165             :     }
     166             :   }
     167           0 :   if ((cwrd) && checkword(candidate, cpdsuggest, timer, timelimit)) {
     168           0 :     wlst.push_back(candidate);
     169             :   }
     170             : }
     171             : 
     172             : // generate suggestions for a misspelled word
     173             : //    pass in address of array of char * pointers
     174             : // onlycompoundsug: probably bad suggestions (need for ngram sugs, too)
     175           0 : void SuggestMgr::suggest(std::vector<std::string>& slst,
     176             :                         const char* w,
     177             :                         int* onlycompoundsug) {
     178           0 :   int nocompoundtwowords = 0;
     179           0 :   std::vector<w_char> word_utf;
     180           0 :   int wl = 0;
     181           0 :   size_t nsugorig = slst.size();
     182           0 :   std::string w2;
     183           0 :   const char* word = w;
     184           0 :   size_t oldSug = 0;
     185             : 
     186             :   // word reversing wrapper for complex prefixes
     187           0 :   if (complexprefixes) {
     188           0 :     w2.assign(w);
     189           0 :     if (utf8)
     190           0 :       reverseword_utf(w2);
     191             :     else
     192           0 :       reverseword(w2);
     193           0 :     word = w2.c_str();
     194             :   }
     195             : 
     196           0 :   if (utf8) {
     197           0 :     wl = u8_u16(word_utf, word);
     198           0 :     if (wl == -1) {
     199           0 :       return;
     200             :     }
     201             :   }
     202             : 
     203           0 :   for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0);
     204             :        cpdsuggest++) {
     205             :     // limit compound suggestion
     206           0 :     if (cpdsuggest > 0)
     207           0 :       oldSug = slst.size();
     208             : 
     209             :     // suggestions for an uppercase word (html -> HTML)
     210           0 :     if (slst.size() < maxSug) {
     211           0 :       if (utf8)
     212           0 :         capchars_utf(slst, &word_utf[0], wl, cpdsuggest);
     213             :       else
     214           0 :         capchars(slst, word, cpdsuggest);
     215             :     }
     216             : 
     217             :     // perhaps we made a typical fault of spelling
     218           0 :     if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     219           0 :       replchars(slst, word, cpdsuggest);
     220             :     }
     221             : 
     222             :     // perhaps we made chose the wrong char from a related set
     223           0 :     if ((slst.size() < maxSug) &&
     224           0 :         (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     225           0 :       mapchars(slst, word, cpdsuggest);
     226             :     }
     227             : 
     228             :     // only suggest compound words when no other suggestion
     229           0 :     if ((cpdsuggest == 0) && (slst.size() > nsugorig))
     230           0 :       nocompoundtwowords = 1;
     231             : 
     232             :     // did we swap the order of chars by mistake
     233           0 :     if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     234           0 :       if (utf8)
     235           0 :         swapchar_utf(slst, &word_utf[0], wl, cpdsuggest);
     236             :       else
     237           0 :         swapchar(slst, word, cpdsuggest);
     238             :     }
     239             : 
     240             :     // did we swap the order of non adjacent chars by mistake
     241           0 :     if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     242           0 :       if (utf8)
     243           0 :         longswapchar_utf(slst, &word_utf[0], wl, cpdsuggest);
     244             :       else
     245           0 :         longswapchar(slst, word, cpdsuggest);
     246             :     }
     247             : 
     248             :     // did we just hit the wrong key in place of a good char (case and keyboard)
     249           0 :     if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     250           0 :       if (utf8)
     251           0 :         badcharkey_utf(slst, &word_utf[0], wl, cpdsuggest);
     252             :       else
     253           0 :         badcharkey(slst, word, cpdsuggest);
     254             :     }
     255             : 
     256             :     // did we add a char that should not be there
     257           0 :     if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     258           0 :       if (utf8)
     259           0 :         extrachar_utf(slst, &word_utf[0], wl, cpdsuggest);
     260             :       else
     261           0 :         extrachar(slst, word, cpdsuggest);
     262             :     }
     263             : 
     264             :     // did we forgot a char
     265           0 :     if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     266           0 :       if (utf8)
     267           0 :         forgotchar_utf(slst, &word_utf[0], wl, cpdsuggest);
     268             :       else
     269           0 :         forgotchar(slst, word, cpdsuggest);
     270             :     }
     271             : 
     272             :     // did we move a char
     273           0 :     if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     274           0 :       if (utf8)
     275           0 :         movechar_utf(slst, &word_utf[0], wl, cpdsuggest);
     276             :       else
     277           0 :         movechar(slst, word, cpdsuggest);
     278             :     }
     279             : 
     280             :     // did we just hit the wrong key in place of a good char
     281           0 :     if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     282           0 :       if (utf8)
     283           0 :         badchar_utf(slst, &word_utf[0], wl, cpdsuggest);
     284             :       else
     285           0 :         badchar(slst, word, cpdsuggest);
     286             :     }
     287             : 
     288             :     // did we double two characters
     289           0 :     if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     290           0 :       if (utf8)
     291           0 :         doubletwochars_utf(slst, &word_utf[0], wl, cpdsuggest);
     292             :       else
     293           0 :         doubletwochars(slst, word, cpdsuggest);
     294             :     }
     295             : 
     296             :     // perhaps we forgot to hit space and two words ran together
     297           0 :     if (!nosplitsugs && (slst.size() < maxSug) &&
     298           0 :         (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
     299           0 :       twowords(slst, word, cpdsuggest);
     300             :     }
     301             : 
     302             :   }  // repeating ``for'' statement compounding support
     303             : 
     304           0 :   if (!nocompoundtwowords && (!slst.empty()) && onlycompoundsug)
     305           0 :     *onlycompoundsug = 1;
     306             : }
     307             : 
     308             : // suggestions for an uppercase word (html -> HTML)
     309           0 : void SuggestMgr::capchars_utf(std::vector<std::string>& wlst,
     310             :                               const w_char* word,
     311             :                               int wl,
     312             :                               int cpdsuggest) {
     313           0 :   std::vector<w_char> candidate_utf(word, word + wl);
     314           0 :   mkallcap_utf(candidate_utf, langnum);
     315           0 :   std::string candidate;
     316           0 :   u16_u8(candidate, candidate_utf);
     317           0 :   testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     318           0 : }
     319             : 
     320             : // suggestions for an uppercase word (html -> HTML)
     321           0 : void SuggestMgr::capchars(std::vector<std::string>& wlst,
     322             :                           const char* word,
     323             :                           int cpdsuggest) {
     324           0 :   std::string candidate(word);
     325           0 :   mkallcap(candidate, csconv);
     326           0 :   testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     327           0 : }
     328             : 
     329             : // suggestions for when chose the wrong char out of a related set
     330           0 : int SuggestMgr::mapchars(std::vector<std::string>& wlst,
     331             :                          const char* word,
     332             :                          int cpdsuggest) {
     333           0 :   std::string candidate;
     334             :   clock_t timelimit;
     335             :   int timer;
     336             : 
     337           0 :   int wl = strlen(word);
     338           0 :   if (wl < 2 || !pAMgr)
     339           0 :     return wlst.size();
     340             : 
     341           0 :   const std::vector<mapentry>& maptable = pAMgr->get_maptable();
     342           0 :   if (maptable.empty())
     343           0 :     return wlst.size();
     344             : 
     345           0 :   timelimit = clock();
     346           0 :   timer = MINTIMER;
     347             :   return map_related(word, candidate, 0, wlst, cpdsuggest,
     348           0 :                      maptable, &timer, &timelimit);
     349             : }
     350             : 
     351           0 : int SuggestMgr::map_related(const char* word,
     352             :                             std::string& candidate,
     353             :                             int wn,
     354             :                             std::vector<std::string>& wlst,
     355             :                             int cpdsuggest,
     356             :                             const std::vector<mapentry>& maptable,
     357             :                             int* timer,
     358             :                             clock_t* timelimit) {
     359           0 :   if (*(word + wn) == '\0') {
     360           0 :     int cwrd = 1;
     361           0 :     for (size_t m = 0; m < wlst.size(); ++m) {
     362           0 :       if (wlst[m] == candidate) {
     363           0 :         cwrd = 0;
     364           0 :         break;
     365             :       }
     366             :     }
     367           0 :     if ((cwrd) && checkword(candidate, cpdsuggest, timer, timelimit)) {
     368           0 :       if (wlst.size() < maxSug) {
     369           0 :         wlst.push_back(candidate);
     370             :       }
     371             :     }
     372           0 :     return wlst.size();
     373             :   }
     374           0 :   int in_map = 0;
     375           0 :   for (size_t j = 0; j < maptable.size(); ++j) {
     376           0 :     for (size_t k = 0; k < maptable[j].size(); ++k) {
     377           0 :       size_t len = maptable[j][k].size();
     378           0 :       if (strncmp(maptable[j][k].c_str(), word + wn, len) == 0) {
     379           0 :         in_map = 1;
     380           0 :         size_t cn = candidate.size();
     381           0 :         for (size_t l = 0; l < maptable[j].size(); ++l) {
     382           0 :           candidate.resize(cn);
     383           0 :           candidate.append(maptable[j][l]);
     384           0 :           map_related(word, candidate, wn + len, wlst,
     385           0 :                            cpdsuggest, maptable, timer, timelimit);
     386           0 :           if (!(*timer))
     387           0 :             return wlst.size();
     388             :         }
     389             :       }
     390             :     }
     391             :   }
     392           0 :   if (!in_map) {
     393           0 :     candidate.push_back(*(word + wn));
     394           0 :     map_related(word, candidate, wn + 1, wlst, cpdsuggest,
     395           0 :                 maptable, timer, timelimit);
     396             :   }
     397           0 :   return wlst.size();
     398             : }
     399             : 
     400             : // suggestions for a typical fault of spelling, that
     401             : // differs with more, than 1 letter from the right form.
     402           0 : int SuggestMgr::replchars(std::vector<std::string>& wlst,
     403             :                           const char* word,
     404             :                           int cpdsuggest) {
     405           0 :   std::string candidate;
     406           0 :   int wl = strlen(word);
     407           0 :   if (wl < 2 || !pAMgr)
     408           0 :     return wlst.size();
     409           0 :   const std::vector<replentry>& reptable = pAMgr->get_reptable();
     410           0 :   for (size_t i = 0; i < reptable.size(); ++i) {
     411           0 :     const char* r = word;
     412             :     // search every occurence of the pattern in the word
     413           0 :     while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) {
     414           0 :       int type = (r == word) ? 1 : 0;
     415           0 :       if (r - word + reptable[i].pattern.size() == strlen(word))
     416           0 :         type += 2;
     417           0 :       while (type && reptable[i].outstrings[type].empty())
     418           0 :         type = (type == 2 && r != word) ? 0 : type - 1;
     419           0 :       const std::string&out = reptable[i].outstrings[type];
     420           0 :       if (out.empty()) {
     421           0 :         ++r;
     422           0 :         continue;
     423             :       }
     424           0 :       candidate.assign(word);
     425           0 :       candidate.resize(r - word);
     426           0 :       candidate.append(reptable[i].outstrings[type]);
     427           0 :       candidate.append(r + reptable[i].pattern.size());
     428           0 :       testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     429             :       // check REP suggestions with space
     430           0 :       size_t sp = candidate.find(' ');
     431           0 :       if (sp != std::string::npos) {
     432           0 :         size_t prev = 0;
     433           0 :         while (sp != std::string::npos) {
     434           0 :           std::string prev_chunk = candidate.substr(prev, sp - prev);
     435           0 :           if (checkword(prev_chunk, 0, NULL, NULL)) {
     436           0 :             size_t oldns = wlst.size();
     437           0 :             std::string post_chunk = candidate.substr(sp + 1);
     438           0 :             testsug(wlst, post_chunk, cpdsuggest, NULL, NULL);
     439           0 :             if (oldns < wlst.size()) {
     440           0 :               wlst[wlst.size() - 1] = candidate;
     441             :             }
     442             :           }
     443           0 :           prev = sp + 1;
     444           0 :           sp = candidate.find(' ', prev);
     445             :         }
     446             :       }
     447           0 :       r++;  // search for the next letter
     448             :     }
     449             :   }
     450           0 :   return wlst.size();
     451             : }
     452             : 
     453             : // perhaps we doubled two characters (pattern aba -> ababa, for example vacation
     454             : // -> vacacation)
     455           0 : int SuggestMgr::doubletwochars(std::vector<std::string>& wlst,
     456             :                                const char* word,
     457             :                                int cpdsuggest) {
     458           0 :   int state = 0;
     459           0 :   int wl = strlen(word);
     460           0 :   if (wl < 5 || !pAMgr)
     461           0 :     return wlst.size();
     462           0 :   for (int i = 2; i < wl; i++) {
     463           0 :     if (word[i] == word[i - 2]) {
     464           0 :       state++;
     465           0 :       if (state == 3) {
     466           0 :         std::string candidate(word, word + i - 1);
     467           0 :         candidate.insert(candidate.end(), word + i + 1, word + wl);
     468           0 :         testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     469           0 :         state = 0;
     470             :       }
     471             :     } else {
     472           0 :       state = 0;
     473             :     }
     474             :   }
     475           0 :   return wlst.size();
     476             : }
     477             : 
     478             : // perhaps we doubled two characters (pattern aba -> ababa, for example vacation
     479             : // -> vacacation)
     480           0 : int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst,
     481             :                                    const w_char* word,
     482             :                                    int wl,
     483             :                                    int cpdsuggest) {
     484           0 :   int state = 0;
     485           0 :   if (wl < 5 || !pAMgr)
     486           0 :     return wlst.size();
     487           0 :   for (int i = 2; i < wl; i++) {
     488           0 :     if (word[i] == word[i - 2]) {
     489           0 :       state++;
     490           0 :       if (state == 3) {
     491           0 :         std::vector<w_char> candidate_utf(word, word + i - 1);
     492           0 :         candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
     493           0 :         std::string candidate;
     494           0 :         u16_u8(candidate, candidate_utf);
     495           0 :         testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     496           0 :         state = 0;
     497             :       }
     498             :     } else {
     499           0 :       state = 0;
     500             :     }
     501             :   }
     502           0 :   return wlst.size();
     503             : }
     504             : 
     505             : // error is wrong char in place of correct one (case and keyboard related
     506             : // version)
     507           0 : int SuggestMgr::badcharkey(std::vector<std::string>& wlst,
     508             :                            const char* word,
     509             :                            int cpdsuggest) {
     510           0 :   std::string candidate(word);
     511             : 
     512             :   // swap out each char one by one and try uppercase and neighbor
     513             :   // keyboard chars in its place to see if that makes a good word
     514           0 :   for (size_t i = 0; i < candidate.size(); ++i) {
     515           0 :     char tmpc = candidate[i];
     516             :     // check with uppercase letters
     517           0 :     candidate[i] = csconv[((unsigned char)tmpc)].cupper;
     518           0 :     if (tmpc != candidate[i]) {
     519           0 :       testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     520           0 :       candidate[i] = tmpc;
     521             :     }
     522             :     // check neighbor characters in keyboard string
     523           0 :     if (!ckey)
     524           0 :       continue;
     525           0 :     char* loc = strchr(ckey, tmpc);
     526           0 :     while (loc) {
     527           0 :       if ((loc > ckey) && (*(loc - 1) != '|')) {
     528           0 :         candidate[i] = *(loc - 1);
     529           0 :         testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     530             :       }
     531           0 :       if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) {
     532           0 :         candidate[i] = *(loc + 1);
     533           0 :         testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     534             :       }
     535           0 :       loc = strchr(loc + 1, tmpc);
     536             :     }
     537           0 :     candidate[i] = tmpc;
     538             :   }
     539           0 :   return wlst.size();
     540             : }
     541             : 
     542             : // error is wrong char in place of correct one (case and keyboard related
     543             : // version)
     544           0 : int SuggestMgr::badcharkey_utf(std::vector<std::string>& wlst,
     545             :                                const w_char* word,
     546             :                                int wl,
     547             :                                int cpdsuggest) {
     548           0 :   std::string candidate;
     549           0 :   std::vector<w_char> candidate_utf(word, word + wl);
     550             :   // swap out each char one by one and try all the tryme
     551             :   // chars in its place to see if that makes a good word
     552           0 :   for (int i = 0; i < wl; i++) {
     553           0 :     w_char tmpc = candidate_utf[i];
     554             :     // check with uppercase letters
     555           0 :     candidate_utf[i] = upper_utf(candidate_utf[i], 1);
     556           0 :     if (tmpc != candidate_utf[i]) {
     557           0 :       u16_u8(candidate, candidate_utf);
     558           0 :       testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     559           0 :       candidate_utf[i] = tmpc;
     560             :     }
     561             :     // check neighbor characters in keyboard string
     562           0 :     if (!ckey)
     563           0 :       continue;
     564           0 :     size_t loc = 0;
     565           0 :     while ((loc < ckeyl) && ckey_utf[loc] != tmpc)
     566           0 :       ++loc;
     567           0 :     while (loc < ckeyl) {
     568           0 :       if ((loc > 0) && ckey_utf[loc - 1] != W_VLINE) {
     569           0 :         candidate_utf[i] = ckey_utf[loc - 1];
     570           0 :         u16_u8(candidate, candidate_utf);
     571           0 :         testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     572             :       }
     573           0 :       if (((loc + 1) < ckeyl) && (ckey_utf[loc + 1] != W_VLINE)) {
     574           0 :         candidate_utf[i] = ckey_utf[loc + 1];
     575           0 :         u16_u8(candidate, candidate_utf);
     576           0 :         testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     577             :       }
     578           0 :       do {
     579           0 :         loc++;
     580           0 :       } while ((loc < ckeyl) && ckey_utf[loc] != tmpc);
     581             :     }
     582           0 :     candidate_utf[i] = tmpc;
     583             :   }
     584           0 :   return wlst.size();
     585             : }
     586             : 
     587             : // error is wrong char in place of correct one
     588           0 : int SuggestMgr::badchar(std::vector<std::string>& wlst, const char* word, int cpdsuggest) {
     589           0 :   std::string candidate(word);
     590           0 :   clock_t timelimit = clock();
     591           0 :   int timer = MINTIMER;
     592             :   // swap out each char one by one and try all the tryme
     593             :   // chars in its place to see if that makes a good word
     594           0 :   for (size_t j = 0; j < ctryl; ++j) {
     595           0 :     for (std::string::reverse_iterator aI = candidate.rbegin(), aEnd = candidate.rend(); aI != aEnd; ++aI) {
     596           0 :       char tmpc = *aI;
     597           0 :       if (ctry[j] == tmpc)
     598           0 :         continue;
     599           0 :       *aI = ctry[j];
     600           0 :       testsug(wlst, candidate, cpdsuggest, &timer, &timelimit);
     601           0 :       if (!timer)
     602           0 :         return wlst.size();
     603           0 :       *aI = tmpc;
     604             :     }
     605             :   }
     606           0 :   return wlst.size();
     607             : }
     608             : 
     609             : // error is wrong char in place of correct one
     610           0 : int SuggestMgr::badchar_utf(std::vector<std::string>& wlst,
     611             :                             const w_char* word,
     612             :                             int wl,
     613             :                             int cpdsuggest) {
     614           0 :   std::vector<w_char> candidate_utf(word, word + wl);
     615           0 :   std::string candidate;
     616           0 :   clock_t timelimit = clock();
     617           0 :   int timer = MINTIMER;
     618             :   // swap out each char one by one and try all the tryme
     619             :   // chars in its place to see if that makes a good word
     620           0 :   for (size_t j = 0; j < ctryl; ++j) {
     621           0 :     for (int i = wl - 1; i >= 0; i--) {
     622           0 :       w_char tmpc = candidate_utf[i];
     623           0 :       if (tmpc == ctry_utf[j])
     624           0 :         continue;
     625           0 :       candidate_utf[i] = ctry_utf[j];
     626           0 :       u16_u8(candidate, candidate_utf);
     627           0 :       testsug(wlst, candidate, cpdsuggest, &timer, &timelimit);
     628           0 :       if (!timer)
     629           0 :         return wlst.size();
     630           0 :       candidate_utf[i] = tmpc;
     631             :     }
     632             :   }
     633           0 :   return wlst.size();
     634             : }
     635             : 
     636             : // error is word has an extra letter it does not need
     637           0 : int SuggestMgr::extrachar_utf(std::vector<std::string>& wlst,
     638             :                               const w_char* word,
     639             :                               int wl,
     640             :                               int cpdsuggest) {
     641           0 :   std::vector<w_char> candidate_utf(word, word + wl);
     642           0 :   if (candidate_utf.size() < 2)
     643           0 :     return wlst.size();
     644             :   // try omitting one char of word at a time
     645           0 :   for (size_t i = 0; i < candidate_utf.size(); ++i) {
     646           0 :     size_t index = candidate_utf.size() - 1 - i;
     647           0 :     w_char tmpc = candidate_utf[index];
     648           0 :     candidate_utf.erase(candidate_utf.begin() + index);
     649           0 :     std::string candidate;
     650           0 :     u16_u8(candidate, candidate_utf);
     651           0 :     testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     652           0 :     candidate_utf.insert(candidate_utf.begin() + index, tmpc);
     653             :   }
     654           0 :   return wlst.size();
     655             : }
     656             : 
     657             : // error is word has an extra letter it does not need
     658           0 : int SuggestMgr::extrachar(std::vector<std::string>& wlst,
     659             :                           const char* word,
     660             :                           int cpdsuggest) {
     661           0 :   std::string candidate(word);
     662           0 :   if (candidate.size() < 2)
     663           0 :     return wlst.size();
     664             :   // try omitting one char of word at a time
     665           0 :   for (size_t i = 0; i < candidate.size(); ++i) {
     666           0 :     size_t index = candidate.size() - 1 - i;
     667           0 :     char tmpc = candidate[index];
     668           0 :     candidate.erase(candidate.begin() + index);
     669           0 :     testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     670           0 :     candidate.insert(candidate.begin() + index, tmpc);
     671             :   }
     672           0 :   return wlst.size();
     673             : }
     674             : 
     675             : // error is missing a letter it needs
     676           0 : int SuggestMgr::forgotchar(std::vector<std::string>& wlst,
     677             :                            const char* word,
     678             :                            int cpdsuggest) {
     679           0 :   std::string candidate(word);
     680           0 :   clock_t timelimit = clock();
     681           0 :   int timer = MINTIMER;
     682             : 
     683             :   // try inserting a tryme character before every letter (and the null
     684             :   // terminator)
     685           0 :   for (size_t k = 0; k < ctryl; ++k) {
     686           0 :     for (size_t i = 0; i <= candidate.size(); ++i) {
     687           0 :       size_t index = candidate.size() - i;
     688           0 :       candidate.insert(candidate.begin() + index, ctry[k]);
     689           0 :       testsug(wlst, candidate, cpdsuggest, &timer, &timelimit);
     690           0 :       if (!timer)
     691           0 :         return wlst.size();
     692           0 :       candidate.erase(candidate.begin() + index);
     693             :     }
     694             :   }
     695           0 :   return wlst.size();
     696             : }
     697             : 
     698             : // error is missing a letter it needs
     699           0 : int SuggestMgr::forgotchar_utf(std::vector<std::string>& wlst,
     700             :                                const w_char* word,
     701             :                                int wl,
     702             :                                int cpdsuggest) {
     703           0 :   std::vector<w_char> candidate_utf(word, word + wl);
     704           0 :   clock_t timelimit = clock();
     705           0 :   int timer = MINTIMER;
     706             : 
     707             :   // try inserting a tryme character at the end of the word and before every
     708             :   // letter
     709           0 :   for (size_t k = 0; k < ctryl; ++k) {
     710           0 :     for (size_t i = 0; i <= candidate_utf.size(); ++i) {
     711           0 :       size_t index = candidate_utf.size() - i;
     712           0 :       candidate_utf.insert(candidate_utf.begin() + index, ctry_utf[k]);
     713           0 :       std::string candidate;
     714           0 :       u16_u8(candidate, candidate_utf);
     715           0 :       testsug(wlst, candidate, cpdsuggest, &timer, &timelimit);
     716           0 :       if (!timer)
     717           0 :         return wlst.size();
     718           0 :       candidate_utf.erase(candidate_utf.begin() + index);
     719             :     }
     720             :   }
     721           0 :   return wlst.size();
     722             : }
     723             : 
     724             : /* error is should have been two words */
     725           0 : int SuggestMgr::twowords(std::vector<std::string>& wlst,
     726             :                          const char* word,
     727             :                          int cpdsuggest) {
     728             :   int c2;
     729           0 :   int forbidden = 0;
     730             :   int cwrd;
     731             : 
     732           0 :   int wl = strlen(word);
     733           0 :   if (wl < 3)
     734           0 :     return wlst.size();
     735             : 
     736           0 :   if (langnum == LANG_hu)
     737           0 :     forbidden = check_forbidden(word, wl);
     738             : 
     739           0 :   char* candidate = (char*)malloc(wl + 2);
     740           0 :   strcpy(candidate + 1, word);
     741             : 
     742             :   // split the string into two pieces after every char
     743             :   // if both pieces are good words make them a suggestion
     744           0 :   for (char* p = candidate + 1; p[1] != '\0'; p++) {
     745           0 :     p[-1] = *p;
     746             :     // go to end of the UTF-8 character
     747           0 :     while (utf8 && ((p[1] & 0xc0) == 0x80)) {
     748           0 :       *p = p[1];
     749           0 :       p++;
     750             :     }
     751           0 :     if (utf8 && p[1] == '\0')
     752           0 :       break;  // last UTF-8 character
     753           0 :     *p = '\0';
     754           0 :     int c1 = checkword(candidate, cpdsuggest, NULL, NULL);
     755           0 :     if (c1) {
     756           0 :       c2 = checkword((p + 1), cpdsuggest, NULL, NULL);
     757           0 :       if (c2) {
     758           0 :         *p = ' ';
     759             : 
     760             :         // spec. Hungarian code (need a better compound word support)
     761           0 :         if ((langnum == LANG_hu) && !forbidden &&
     762             :             // if 3 repeating letter, use - instead of space
     763           0 :             (((p[-1] == p[1]) &&
     764           0 :               (((p > candidate + 1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) ||
     765             :              // or multiple compounding, with more, than 6 syllables
     766           0 :              ((c1 == 3) && (c2 >= 2))))
     767           0 :           *p = '-';
     768             : 
     769           0 :         cwrd = 1;
     770           0 :         for (size_t k = 0; k < wlst.size(); ++k) {
     771           0 :           if (wlst[k] == candidate) {
     772           0 :             cwrd = 0;
     773           0 :             break;
     774             :           }
     775             :         }
     776           0 :         if (wlst.size() < maxSug) {
     777           0 :           if (cwrd) {
     778           0 :             wlst.push_back(candidate);
     779             :           }
     780             :         } else {
     781           0 :           free(candidate);
     782           0 :           return wlst.size();
     783             :         }
     784             :         // add two word suggestion with dash, if TRY string contains
     785             :         // "a" or "-"
     786             :         // NOTE: cwrd doesn't modified for REP twoword sugg.
     787           0 :         if (ctry && (strchr(ctry, 'a') || strchr(ctry, '-')) &&
     788           0 :             mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) {
     789           0 :           *p = '-';
     790           0 :           for (size_t k = 0; k < wlst.size(); ++k) {
     791           0 :             if (wlst[k] == candidate) {
     792           0 :               cwrd = 0;
     793           0 :               break;
     794             :             }
     795             :           }
     796           0 :           if (wlst.size() < maxSug) {
     797           0 :             if (cwrd) {
     798           0 :               wlst.push_back(candidate);
     799             :             }
     800             :           } else {
     801           0 :             free(candidate);
     802           0 :             return wlst.size();
     803             :           }
     804             :         }
     805             :       }
     806             :     }
     807             :   }
     808           0 :   free(candidate);
     809           0 :   return wlst.size();
     810             : }
     811             : 
     812             : // error is adjacent letter were swapped
     813           0 : int SuggestMgr::swapchar(std::vector<std::string>& wlst,
     814             :                          const char* word,
     815             :                          int cpdsuggest) {
     816           0 :   std::string candidate(word);
     817           0 :   if (candidate.size() < 2)
     818           0 :     return wlst.size();
     819             : 
     820             :   // try swapping adjacent chars one by one
     821           0 :   for (size_t i = 0; i < candidate.size() - 1; ++i) {
     822           0 :     std::swap(candidate[i], candidate[i+1]);
     823           0 :     testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     824           0 :     std::swap(candidate[i], candidate[i+1]);
     825             :   }
     826             : 
     827             :   // try double swaps for short words
     828             :   // ahev -> have, owudl -> would
     829           0 :   if (candidate.size() == 4 || candidate.size() == 5) {
     830           0 :     candidate[0] = word[1];
     831           0 :     candidate[1] = word[0];
     832           0 :     candidate[2] = word[2];
     833           0 :     candidate[candidate.size() - 2] = word[candidate.size() - 1];
     834           0 :     candidate[candidate.size() - 1] = word[candidate.size() - 2];
     835           0 :     testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     836           0 :     if (candidate.size() == 5) {
     837           0 :       candidate[0] = word[0];
     838           0 :       candidate[1] = word[2];
     839           0 :       candidate[2] = word[1];
     840           0 :       testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     841             :     }
     842             :   }
     843             : 
     844           0 :   return wlst.size();
     845             : }
     846             : 
     847             : // error is adjacent letter were swapped
     848           0 : int SuggestMgr::swapchar_utf(std::vector<std::string>& wlst,
     849             :                              const w_char* word,
     850             :                              int wl,
     851             :                              int cpdsuggest) {
     852           0 :   std::vector<w_char> candidate_utf(word, word + wl);
     853           0 :   if (candidate_utf.size() < 2)
     854           0 :     return wlst.size();
     855             : 
     856           0 :   std::string candidate;
     857             :   // try swapping adjacent chars one by one
     858           0 :   for (size_t i = 0; i < candidate_utf.size() - 1; ++i) {
     859           0 :     std::swap(candidate_utf[i], candidate_utf[i+1]);
     860           0 :     u16_u8(candidate, candidate_utf);
     861           0 :     testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     862           0 :     std::swap(candidate_utf[i], candidate_utf[i+1]);
     863             :   }
     864             : 
     865             :   // try double swaps for short words
     866             :   // ahev -> have, owudl -> would, suodn -> sound
     867           0 :   if (candidate_utf.size() == 4 || candidate_utf.size() == 5) {
     868           0 :     candidate_utf[0] = word[1];
     869           0 :     candidate_utf[1] = word[0];
     870           0 :     candidate_utf[2] = word[2];
     871           0 :     candidate_utf[candidate_utf.size() - 2] = word[candidate_utf.size() - 1];
     872           0 :     candidate_utf[candidate_utf.size() - 1] = word[candidate_utf.size() - 2];
     873           0 :     u16_u8(candidate, candidate_utf);
     874           0 :     testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     875           0 :     if (candidate_utf.size() == 5) {
     876           0 :       candidate_utf[0] = word[0];
     877           0 :       candidate_utf[1] = word[2];
     878           0 :       candidate_utf[2] = word[1];
     879           0 :       u16_u8(candidate, candidate_utf);
     880           0 :       testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     881             :     }
     882             :   }
     883           0 :   return wlst.size();
     884             : }
     885             : 
     886             : // error is not adjacent letter were swapped
     887           0 : int SuggestMgr::longswapchar(std::vector<std::string>& wlst,
     888             :                              const char* word,
     889             :                              int cpdsuggest) {
     890           0 :   std::string candidate(word);
     891             :   // try swapping not adjacent chars one by one
     892           0 :   for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) {
     893           0 :     for (std::string::iterator q = candidate.begin(); q < candidate.end(); ++q) {
     894           0 :       if (std::abs(std::distance(q, p)) > 1) {
     895           0 :         std::swap(*p, *q);
     896           0 :         testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     897           0 :         std::swap(*p, *q);
     898             :       }
     899             :     }
     900             :   }
     901           0 :   return wlst.size();
     902             : }
     903             : 
     904             : // error is adjacent letter were swapped
     905           0 : int SuggestMgr::longswapchar_utf(std::vector<std::string>& wlst,
     906             :                                  const w_char* word,
     907             :                                  int wl,
     908             :                                  int cpdsuggest) {
     909           0 :   std::vector<w_char> candidate_utf(word, word + wl);
     910             :   // try swapping not adjacent chars
     911           0 :   for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
     912           0 :     for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) {
     913           0 :       if (std::abs(std::distance(q, p)) > 1) {
     914           0 :         std::swap(*p, *q);
     915           0 :         std::string candidate;
     916           0 :         u16_u8(candidate, candidate_utf);
     917           0 :         testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     918           0 :         std::swap(*p, *q);
     919             :       }
     920             :     }
     921             :   }
     922           0 :   return wlst.size();
     923             : }
     924             : 
     925             : // error is a letter was moved
     926           0 : int SuggestMgr::movechar(std::vector<std::string>& wlst,
     927             :                          const char* word,
     928             :                          int cpdsuggest) {
     929           0 :   std::string candidate(word);
     930           0 :   if (candidate.size() < 2)
     931           0 :     return wlst.size();
     932             : 
     933             :   // try moving a char
     934           0 :   for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) {
     935           0 :     for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) < 10; ++q) {
     936           0 :       std::swap(*q, *(q - 1));
     937           0 :       if (std::distance(p, q) < 2)
     938           0 :         continue;  // omit swap char
     939           0 :       testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     940             :     }
     941           0 :     std::copy(word, word + candidate.size(), candidate.begin());
     942             :   }
     943             : 
     944           0 :   for (std::string::reverse_iterator p = candidate.rbegin(), pEnd = candidate.rend() - 1; p != pEnd; ++p) {
     945           0 :     for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) < 10; ++q) {
     946           0 :       std::swap(*q, *(q - 1));
     947           0 :       if (std::distance(p, q) < 2)
     948           0 :         continue;  // omit swap char
     949           0 :       testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     950             :     }
     951           0 :     std::copy(word, word + candidate.size(), candidate.begin());
     952             :   }
     953             : 
     954           0 :   return wlst.size();
     955             : }
     956             : 
     957             : // error is a letter was moved
     958           0 : int SuggestMgr::movechar_utf(std::vector<std::string>& wlst,
     959             :                              const w_char* word,
     960             :                              int wl,
     961             :                              int cpdsuggest) {
     962           0 :   std::vector<w_char> candidate_utf(word, word + wl);
     963           0 :   if (candidate_utf.size() < 2)
     964           0 :     return wlst.size();
     965             : 
     966             :   // try moving a char
     967           0 :   for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
     968           0 :     for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) {
     969           0 :       std::swap(*q, *(q - 1));
     970           0 :       if (std::distance(p, q) < 2)
     971           0 :         continue;  // omit swap char
     972           0 :       std::string candidate;
     973           0 :       u16_u8(candidate, candidate_utf);
     974           0 :       testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     975             :     }
     976           0 :     std::copy(word, word + candidate_utf.size(), candidate_utf.begin());
     977             :   }
     978             : 
     979           0 :   for (std::vector<w_char>::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) {
     980           0 :     for (std::vector<w_char>::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) {
     981           0 :       std::swap(*q, *(q - 1));
     982           0 :       if (std::distance(p, q) < 2)
     983           0 :         continue;  // omit swap char
     984           0 :       std::string candidate;
     985           0 :       u16_u8(candidate, candidate_utf);
     986           0 :       testsug(wlst, candidate, cpdsuggest, NULL, NULL);
     987             :     }
     988           0 :     std::copy(word, word + candidate_utf.size(), candidate_utf.begin());
     989             :   }
     990             : 
     991           0 :   return wlst.size();
     992             : }
     993             : 
     994             : // generate a set of suggestions for very poorly spelled words
     995           0 : void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
     996             :                           const char* w,
     997             :                           const std::vector<HashMgr*>& rHMgr) {
     998             :   int lval;
     999             :   int sc;
    1000             :   int lp, lpphon;
    1001           0 :   int nonbmp = 0;
    1002             : 
    1003             :   // exhaustively search through all root words
    1004             :   // keeping track of the MAX_ROOTS most similar root words
    1005             :   struct hentry* roots[MAX_ROOTS];
    1006             :   char* rootsphon[MAX_ROOTS];
    1007             :   int scores[MAX_ROOTS];
    1008             :   int scoresphon[MAX_ROOTS];
    1009           0 :   for (int i = 0; i < MAX_ROOTS; i++) {
    1010           0 :     roots[i] = NULL;
    1011           0 :     scores[i] = -100 * i;
    1012           0 :     rootsphon[i] = NULL;
    1013           0 :     scoresphon[i] = -100 * i;
    1014             :   }
    1015           0 :   lp = MAX_ROOTS - 1;
    1016           0 :   lpphon = MAX_ROOTS - 1;
    1017           0 :   int low = NGRAM_LOWERING;
    1018             : 
    1019           0 :   std::string w2;
    1020           0 :   const char* word = w;
    1021             : 
    1022             :   // word reversing wrapper for complex prefixes
    1023           0 :   if (complexprefixes) {
    1024           0 :     w2.assign(w);
    1025           0 :     if (utf8)
    1026           0 :       reverseword_utf(w2);
    1027             :     else
    1028           0 :       reverseword(w2);
    1029           0 :     word = w2.c_str();
    1030             :   }
    1031             : 
    1032           0 :   std::vector<w_char> u8;
    1033           0 :   int nc = strlen(word);
    1034           0 :   int n = (utf8) ? u8_u16(u8, word) : nc;
    1035             : 
    1036             :   // set character based ngram suggestion for words with non-BMP Unicode
    1037             :   // characters
    1038           0 :   if (n == -1) {
    1039           0 :     utf8 = 0;  // XXX not state-free
    1040           0 :     n = nc;
    1041           0 :     nonbmp = 1;
    1042           0 :     low = 0;
    1043             :   }
    1044             : 
    1045           0 :   struct hentry* hp = NULL;
    1046           0 :   int col = -1;
    1047           0 :   phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
    1048           0 :   std::string target;
    1049           0 :   std::string candidate;
    1050           0 :   std::vector<w_char> w_candidate;
    1051           0 :   if (ph) {
    1052           0 :     if (utf8) {
    1053           0 :       u8_u16(w_candidate, word);
    1054           0 :       mkallcap_utf(w_candidate, langnum);
    1055           0 :       u16_u8(candidate, w_candidate);
    1056             :     } else {
    1057           0 :       candidate.assign(word);
    1058           0 :       if (!nonbmp)
    1059           0 :         mkallcap(candidate, csconv);
    1060             :     }
    1061           0 :     target = phonet(candidate, *ph);  // XXX phonet() is 8-bit (nc, not n)
    1062             :   }
    1063             : 
    1064           0 :   FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL;
    1065           0 :   FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL;
    1066           0 :   FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL;
    1067           0 :   FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL;
    1068             : 
    1069           0 :   std::vector<w_char> w_word, w_target;
    1070           0 :   if (utf8) {
    1071           0 :     u8_u16(w_word, word);
    1072           0 :     u8_u16(w_target, target);
    1073             :   }
    1074             :   
    1075           0 :   std::string f;
    1076           0 :   std::vector<w_char> w_f;
    1077             :   
    1078           0 :   for (size_t i = 0; i < rHMgr.size(); ++i) {
    1079           0 :     while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
    1080           0 :       if ((hp->astr) && (pAMgr) &&
    1081           0 :           (TESTAFF(hp->astr, forbiddenword, hp->alen) ||
    1082           0 :            TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) ||
    1083           0 :            TESTAFF(hp->astr, nosuggest, hp->alen) ||
    1084           0 :            TESTAFF(hp->astr, nongramsuggest, hp->alen) ||
    1085           0 :            TESTAFF(hp->astr, onlyincompound, hp->alen)))
    1086           0 :         continue;
    1087             : 
    1088           0 :       if (utf8) {
    1089           0 :         u8_u16(w_f, HENTRY_WORD(hp));
    1090             : 
    1091           0 :         int leftcommon = leftcommonsubstring(w_word, w_f);
    1092           0 :         if (low) {
    1093             :           // lowering dictionary word
    1094           0 :           mkallsmall_utf(w_f, langnum);
    1095             :         }
    1096           0 :         sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
    1097             :       } else {
    1098           0 :         f.assign(HENTRY_WORD(hp));
    1099             : 
    1100           0 :         int leftcommon = leftcommonsubstring(word, f.c_str());
    1101           0 :         if (low) {
    1102             :           // lowering dictionary word
    1103           0 :           mkallsmall(f, csconv);
    1104             :         }
    1105           0 :         sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
    1106             :       }
    1107             : 
    1108             :       // check special pronounciation
    1109           0 :       f.clear();
    1110           0 :       if ((hp->var & H_OPT_PHON) &&
    1111           0 :           copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
    1112             :         int sc2;
    1113           0 :         if (utf8) {
    1114           0 :           u8_u16(w_f, f);
    1115             : 
    1116           0 :           int leftcommon = leftcommonsubstring(w_word, w_f);
    1117           0 :           if (low) {
    1118             :             // lowering dictionary word
    1119           0 :             mkallsmall_utf(w_f, langnum);
    1120             :           }
    1121           0 :           sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
    1122             :         } else {
    1123           0 :           int leftcommon = leftcommonsubstring(word, f.c_str());
    1124           0 :           if (low) {
    1125             :             // lowering dictionary word
    1126           0 :             mkallsmall(f, csconv);
    1127             :           }
    1128           0 :           sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
    1129             :         }
    1130           0 :         if (sc2 > sc)
    1131           0 :           sc = sc2;
    1132             :       }
    1133             : 
    1134           0 :       int scphon = -20000;
    1135           0 :       if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
    1136           0 :         if (utf8) {
    1137           0 :           u8_u16(w_candidate, HENTRY_WORD(hp));
    1138           0 :           mkallcap_utf(w_candidate, langnum);
    1139           0 :           u16_u8(candidate, w_candidate);
    1140             :         } else {
    1141           0 :           candidate = HENTRY_WORD(hp);
    1142           0 :           mkallcap(candidate, csconv);
    1143             :         }
    1144           0 :         f = phonet(candidate, *ph);
    1145           0 :         if (utf8) {
    1146           0 :           u8_u16(w_f, f);
    1147           0 :           scphon = 2 * ngram(3, w_target, w_f,
    1148             :                              NGRAM_LONGER_WORSE);
    1149             :         } else {
    1150           0 :           scphon = 2 * ngram(3, target, f,
    1151             :                              NGRAM_LONGER_WORSE);
    1152             :         }
    1153             :       }
    1154             : 
    1155           0 :       if (sc > scores[lp]) {
    1156           0 :         scores[lp] = sc;
    1157           0 :         roots[lp] = hp;
    1158           0 :         lval = sc;
    1159           0 :         for (int j = 0; j < MAX_ROOTS; j++)
    1160           0 :           if (scores[j] < lval) {
    1161           0 :             lp = j;
    1162           0 :             lval = scores[j];
    1163             :           }
    1164             :       }
    1165             : 
    1166           0 :       if (scphon > scoresphon[lpphon]) {
    1167           0 :         scoresphon[lpphon] = scphon;
    1168           0 :         rootsphon[lpphon] = HENTRY_WORD(hp);
    1169           0 :         lval = scphon;
    1170           0 :         for (int j = 0; j < MAX_ROOTS; j++)
    1171           0 :           if (scoresphon[j] < lval) {
    1172           0 :             lpphon = j;
    1173           0 :             lval = scoresphon[j];
    1174             :           }
    1175             :       }
    1176             :     }
    1177             :   }
    1178             : 
    1179             :   // find minimum threshold for a passable suggestion
    1180             :   // mangle original word three differnt ways
    1181             :   // and score them to generate a minimum acceptable score
    1182           0 :   std::vector<w_char> w_mw;
    1183           0 :   int thresh = 0;
    1184           0 :   for (int sp = 1; sp < 4; sp++) {
    1185           0 :     if (utf8) {
    1186           0 :       w_mw = w_word;
    1187           0 :       for (int k = sp; k < n; k += 4) {
    1188           0 :         w_mw[k].l = '*';
    1189           0 :         w_mw[k].h = 0;
    1190             :       }
    1191             : 
    1192           0 :       if (low) {
    1193             :         // lowering dictionary word
    1194           0 :         mkallsmall_utf(w_mw, langnum);
    1195             :       }
    1196             : 
    1197           0 :       thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH);
    1198             :     } else {
    1199           0 :       std::string mw = word;
    1200           0 :       for (int k = sp; k < n; k += 4)
    1201           0 :         mw[k] = '*';
    1202             : 
    1203           0 :       if (low) {
    1204             :         // lowering dictionary word
    1205           0 :         mkallsmall(mw, csconv);
    1206             :       }
    1207             : 
    1208           0 :       thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH);
    1209             :     }
    1210             :   }
    1211           0 :   thresh = thresh / 3;
    1212           0 :   thresh--;
    1213             : 
    1214             :   // now expand affixes on each of these root words and
    1215             :   // and use length adjusted ngram scores to select
    1216             :   // possible suggestions
    1217             :   char* guess[MAX_GUESS];
    1218             :   char* guessorig[MAX_GUESS];
    1219             :   int gscore[MAX_GUESS];
    1220           0 :   for (int i = 0; i < MAX_GUESS; i++) {
    1221           0 :     guess[i] = NULL;
    1222           0 :     guessorig[i] = NULL;
    1223           0 :     gscore[i] = -100 * i;
    1224             :   }
    1225             : 
    1226           0 :   lp = MAX_GUESS - 1;
    1227             : 
    1228             :   struct guessword* glst;
    1229           0 :   glst = (struct guessword*)calloc(MAX_WORDS, sizeof(struct guessword));
    1230           0 :   if (!glst) {
    1231           0 :     if (nonbmp)
    1232           0 :       utf8 = 1;
    1233           0 :     return;
    1234             :   }
    1235             : 
    1236           0 :   for (int i = 0; i < MAX_ROOTS; i++) {
    1237           0 :     if (roots[i]) {
    1238           0 :       struct hentry* rp = roots[i];
    1239             : 
    1240           0 :       f.clear();
    1241           0 :       const char *field = NULL;
    1242           0 :       if ((rp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(rp), MORPH_PHON))
    1243           0 :           field = f.c_str();
    1244           0 :       int nw = pAMgr->expand_rootword(
    1245           0 :           glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen, rp->astr, rp->alen, word,
    1246           0 :           nc, field);
    1247             : 
    1248           0 :       for (int k = 0; k < nw; k++) {
    1249           0 :         if (utf8) {
    1250           0 :           u8_u16(w_f, glst[k].word);
    1251             : 
    1252           0 :           int leftcommon = leftcommonsubstring(w_word, w_f);
    1253           0 :           if (low) {
    1254             :             // lowering dictionary word
    1255           0 :             mkallsmall_utf(w_f, langnum);
    1256             :           }
    1257             : 
    1258           0 :           sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon;
    1259             :         } else {
    1260           0 :           f = glst[k].word;
    1261             : 
    1262           0 :           int leftcommon = leftcommonsubstring(word, f.c_str());
    1263           0 :           if (low) {
    1264             :             // lowering dictionary word
    1265           0 :             mkallsmall(f, csconv);
    1266             :           }
    1267             : 
    1268           0 :           sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon;
    1269             :         }
    1270             : 
    1271           0 :         if (sc > thresh) {
    1272           0 :           if (sc > gscore[lp]) {
    1273           0 :             if (guess[lp]) {
    1274           0 :               free(guess[lp]);
    1275           0 :               if (guessorig[lp]) {
    1276           0 :                 free(guessorig[lp]);
    1277           0 :                 guessorig[lp] = NULL;
    1278             :               }
    1279             :             }
    1280           0 :             gscore[lp] = sc;
    1281           0 :             guess[lp] = glst[k].word;
    1282           0 :             guessorig[lp] = glst[k].orig;
    1283           0 :             lval = sc;
    1284           0 :             for (int j = 0; j < MAX_GUESS; j++)
    1285           0 :               if (gscore[j] < lval) {
    1286           0 :                 lp = j;
    1287           0 :                 lval = gscore[j];
    1288             :               }
    1289             :           } else {
    1290           0 :             free(glst[k].word);
    1291           0 :             if (glst[k].orig)
    1292           0 :               free(glst[k].orig);
    1293             :           }
    1294             :         } else {
    1295           0 :           free(glst[k].word);
    1296           0 :           if (glst[k].orig)
    1297           0 :             free(glst[k].orig);
    1298             :         }
    1299             :       }
    1300             :     }
    1301             :   }
    1302           0 :   free(glst);
    1303             : 
    1304             :   // now we are done generating guesses
    1305             :   // sort in order of decreasing score
    1306             : 
    1307           0 :   bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
    1308           0 :   if (ph)
    1309           0 :     bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS);
    1310             : 
    1311             :   // weight suggestions with a similarity index, based on
    1312             :   // the longest common subsequent algorithm and resort
    1313             : 
    1314           0 :   int is_swap = 0;
    1315           0 :   int re = 0;
    1316           0 :   double fact = 1.0;
    1317           0 :   if (pAMgr) {
    1318           0 :     int maxd = pAMgr->get_maxdiff();
    1319           0 :     if (maxd >= 0)
    1320           0 :       fact = (10.0 - maxd) / 5.0;
    1321             :   }
    1322             : 
    1323           0 :   std::vector<w_char> w_gl;
    1324           0 :   for (int i = 0; i < MAX_GUESS; i++) {
    1325           0 :     if (guess[i]) {
    1326             :       // lowering guess[i]
    1327           0 :       std::string gl;
    1328             :       int len;
    1329           0 :       if (utf8) {
    1330           0 :         len = u8_u16(w_gl, guess[i]);
    1331           0 :         mkallsmall_utf(w_gl, langnum);
    1332           0 :         u16_u8(gl, w_gl);
    1333             :       } else {
    1334           0 :         gl.assign(guess[i]);
    1335           0 :         if (!nonbmp)
    1336           0 :           mkallsmall(gl, csconv);
    1337           0 :         len = strlen(guess[i]);
    1338             :       }
    1339             : 
    1340           0 :       int _lcs = lcslen(word, gl.c_str());
    1341             : 
    1342             :       // same characters with different casing
    1343           0 :       if ((n == len) && (n == _lcs)) {
    1344           0 :         gscore[i] += 2000;
    1345           0 :         break;
    1346             :       }
    1347             :       // using 2-gram instead of 3, and other weightening
    1348             : 
    1349           0 :       if (utf8) {
    1350           0 :         u8_u16(w_gl, gl);
    1351             :         //w_gl is lowercase already at this point
    1352           0 :         re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
    1353           0 :         if (low) {
    1354           0 :           w_f = w_word;
    1355             :           // lowering dictionary word
    1356           0 :           mkallsmall_utf(w_f, langnum);
    1357           0 :           re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
    1358             :         } else {
    1359           0 :           re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
    1360             :         }
    1361             :       } else {
    1362             :         //gl is lowercase already at this point
    1363           0 :         re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
    1364           0 :         if (low) {
    1365           0 :           f = word;
    1366             :           // lowering dictionary word
    1367           0 :           mkallsmall(f, csconv);
    1368           0 :           re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
    1369             :         } else {
    1370           0 :           re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
    1371             :         }
    1372             :       }
    1373             : 
    1374             :       int ngram_score, leftcommon_score;
    1375           0 :       if (utf8) {
    1376             :         //w_gl is lowercase already at this point
    1377           0 :         ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH);
    1378           0 :         leftcommon_score = leftcommonsubstring(w_word, w_gl);
    1379             :       } else {
    1380             :         //gl is lowercase already at this point
    1381           0 :         ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH);
    1382           0 :         leftcommon_score = leftcommonsubstring(word, gl.c_str());
    1383             :       }
    1384           0 :       gscore[i] =
    1385             :           // length of longest common subsequent minus length difference
    1386           0 :           2 * _lcs - abs((int)(n - len)) +
    1387             :           // weight length of the left common substring
    1388           0 :           leftcommon_score +
    1389             :           // weight equal character positions
    1390           0 :           (!nonbmp && commoncharacterpositions(word, gl.c_str(), &is_swap)
    1391           0 :                ? 1
    1392           0 :                : 0) +
    1393             :           // swap character (not neighboring)
    1394           0 :           ((is_swap) ? 10 : 0) +
    1395             :           // ngram
    1396           0 :           ngram_score +
    1397             :           // weighted ngrams
    1398           0 :           re +
    1399             :           // different limit for dictionaries with PHONE rules
    1400           0 :           (ph ? (re < len * fact ? -1000 : 0)
    1401           0 :               : (re < (n + len) * fact ? -1000 : 0));
    1402             :     }
    1403             :   }
    1404             : 
    1405           0 :   bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
    1406             : 
    1407             :   // phonetic version
    1408           0 :   if (ph)
    1409           0 :     for (int i = 0; i < MAX_ROOTS; i++) {
    1410           0 :       if (rootsphon[i]) {
    1411             :         // lowering rootphon[i]
    1412           0 :         std::string gl;
    1413             :         int len;
    1414           0 :         if (utf8) {
    1415           0 :           len = u8_u16(w_gl, rootsphon[i]);
    1416           0 :           mkallsmall_utf(w_gl, langnum);
    1417           0 :           u16_u8(gl, w_gl);
    1418             :         } else {
    1419           0 :           gl.assign(rootsphon[i]);
    1420           0 :           if (!nonbmp)
    1421           0 :             mkallsmall(gl, csconv);
    1422           0 :           len = strlen(rootsphon[i]);
    1423             :         }
    1424             : 
    1425             :         // weight length of the left common substring
    1426             :         int leftcommon_score;
    1427           0 :         if (utf8)
    1428           0 :           leftcommon_score = leftcommonsubstring(w_word, w_gl);
    1429             :         else
    1430           0 :           leftcommon_score = leftcommonsubstring(word, gl.c_str());
    1431             :         // heuristic weigthing of ngram scores
    1432           0 :         scoresphon[i] += 2 * lcslen(word, gl) - abs((int)(n - len)) +
    1433           0 :                          leftcommon_score;
    1434             :       }
    1435             :     }
    1436             : 
    1437           0 :   if (ph)
    1438           0 :     bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS);
    1439             : 
    1440             :   // copy over
    1441           0 :   size_t oldns = wlst.size();
    1442             : 
    1443           0 :   int same = 0;
    1444           0 :   for (int i = 0; i < MAX_GUESS; i++) {
    1445           0 :     if (guess[i]) {
    1446           0 :       if ((wlst.size() < oldns + maxngramsugs) && (wlst.size() < maxSug) &&
    1447           0 :           (!same || (gscore[i] > 1000))) {
    1448           0 :         int unique = 1;
    1449             :         // leave only excellent suggestions, if exists
    1450           0 :         if (gscore[i] > 1000)
    1451           0 :           same = 1;
    1452           0 :         else if (gscore[i] < -100) {
    1453           0 :           same = 1;
    1454             :           // keep the best ngram suggestions, unless in ONLYMAXDIFF mode
    1455           0 :           if (wlst.size() > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) {
    1456           0 :             free(guess[i]);
    1457           0 :             if (guessorig[i])
    1458           0 :               free(guessorig[i]);
    1459           0 :             continue;
    1460             :           }
    1461             :         }
    1462           0 :         for (size_t j = 0; j < wlst.size(); ++j) {
    1463             :           // don't suggest previous suggestions or a previous suggestion with
    1464             :           // prefixes or affixes
    1465           0 :           if ((!guessorig[i] && strstr(guess[i], wlst[j].c_str())) ||
    1466           0 :               (guessorig[i] && strstr(guessorig[i], wlst[j].c_str())) ||
    1467             :               // check forbidden words
    1468           0 :               !checkword(guess[i], 0, NULL, NULL)) {
    1469           0 :             unique = 0;
    1470           0 :             break;
    1471             :           }
    1472             :         }
    1473           0 :         if (unique) {
    1474           0 :           if (guessorig[i]) {
    1475           0 :             wlst.push_back(guessorig[i]);
    1476             :           } else {
    1477           0 :             wlst.push_back(guess[i]);
    1478             :           }
    1479             :         }
    1480           0 :         free(guess[i]);
    1481           0 :         if (guessorig[i])
    1482           0 :           free(guessorig[i]);
    1483             :       } else {
    1484           0 :         free(guess[i]);
    1485           0 :         if (guessorig[i])
    1486           0 :           free(guessorig[i]);
    1487             :       }
    1488             :     }
    1489             :   }
    1490             : 
    1491           0 :   oldns = wlst.size();
    1492           0 :   if (ph)
    1493           0 :     for (int i = 0; i < MAX_ROOTS; i++) {
    1494           0 :       if (rootsphon[i]) {
    1495           0 :         if ((wlst.size() < oldns + MAXPHONSUGS) && (wlst.size() < maxSug)) {
    1496           0 :           int unique = 1;
    1497           0 :           for (size_t j = 0; j < wlst.size(); ++j) {
    1498             :             // don't suggest previous suggestions or a previous suggestion with
    1499             :             // prefixes or affixes
    1500           0 :             if (strstr(rootsphon[i], wlst[j].c_str()) ||
    1501             :                 // check forbidden words
    1502           0 :                 !checkword(rootsphon[i], 0, NULL, NULL)) {
    1503           0 :               unique = 0;
    1504           0 :               break;
    1505             :             }
    1506             :           }
    1507           0 :           if (unique) {
    1508           0 :             wlst.push_back(rootsphon[i]);
    1509             :           }
    1510             :         }
    1511             :       }
    1512             :     }
    1513             : 
    1514           0 :   if (nonbmp)
    1515           0 :     utf8 = 1;
    1516             : }
    1517             : 
    1518             : // see if a candidate suggestion is spelled correctly
    1519             : // needs to check both root words and words with affixes
    1520             : 
    1521             : // obsolote MySpell-HU modifications:
    1522             : // return value 2 and 3 marks compounding with hyphen (-)
    1523             : // `3' marks roots without suffix
    1524           0 : int SuggestMgr::checkword(const std::string& word,
    1525             :                           int cpdsuggest,
    1526             :                           int* timer,
    1527             :                           clock_t* timelimit) {
    1528             :   // check time limit
    1529           0 :   if (timer) {
    1530           0 :     (*timer)--;
    1531           0 :     if (!(*timer) && timelimit) {
    1532           0 :       if ((clock() - *timelimit) > TIMELIMIT)
    1533           0 :         return 0;
    1534           0 :       *timer = MAXPLUSTIMER;
    1535             :     }
    1536             :   }
    1537             : 
    1538           0 :   if (pAMgr) {
    1539           0 :     struct hentry* rv = NULL;
    1540           0 :     int nosuffix = 0;
    1541             : 
    1542           0 :     if (cpdsuggest == 1) {
    1543           0 :       if (pAMgr->get_compound()) {
    1544           0 :         struct hentry* rv2 = NULL;
    1545             :         struct hentry* rwords[100];  // buffer for COMPOUND pattern checking
    1546           0 :         rv = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 1, 0);  // EXT
    1547           0 :         if (rv &&
    1548           0 :             (!(rv2 = pAMgr->lookup(word.c_str())) || !rv2->astr ||
    1549           0 :              !(TESTAFF(rv2->astr, pAMgr->get_forbiddenword(), rv2->alen) ||
    1550           0 :                TESTAFF(rv2->astr, pAMgr->get_nosuggest(), rv2->alen))))
    1551           0 :           return 3;  // XXX obsolote categorisation + only ICONV needs affix
    1552             :                      // flag check?
    1553             :       }
    1554           0 :       return 0;
    1555             :     }
    1556             : 
    1557           0 :     rv = pAMgr->lookup(word.c_str());
    1558             : 
    1559           0 :     if (rv) {
    1560           0 :       if ((rv->astr) &&
    1561           0 :           (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
    1562           0 :            TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen)))
    1563           0 :         return 0;
    1564           0 :       while (rv) {
    1565           0 :         if (rv->astr &&
    1566           0 :             (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
    1567           0 :              TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
    1568           0 :              TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) {
    1569           0 :           rv = rv->next_homonym;
    1570             :         } else
    1571           0 :           break;
    1572             :       }
    1573             :     } else
    1574           0 :       rv = pAMgr->prefix_check(word.c_str(), word.size(),
    1575           0 :                                0);  // only prefix, and prefix + suffix XXX
    1576             : 
    1577           0 :     if (rv) {
    1578           0 :       nosuffix = 1;
    1579             :     } else {
    1580           0 :       rv = pAMgr->suffix_check(word.c_str(), word.size(), 0, NULL,
    1581           0 :                                FLAG_NULL, FLAG_NULL, IN_CPD_NOT);  // only suffix
    1582             :     }
    1583             : 
    1584           0 :     if (!rv && pAMgr->have_contclass()) {
    1585           0 :       rv = pAMgr->suffix_check_twosfx(word.c_str(), word.size(), 0, NULL, FLAG_NULL);
    1586           0 :       if (!rv)
    1587           0 :         rv = pAMgr->prefix_check_twosfx(word.c_str(), word.size(), 1, FLAG_NULL);
    1588             :     }
    1589             : 
    1590             :     // check forbidden words
    1591           0 :     if ((rv) && (rv->astr) &&
    1592           0 :         (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
    1593           0 :          TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
    1594           0 :          TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen) ||
    1595           0 :          TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen)))
    1596           0 :       return 0;
    1597             : 
    1598           0 :     if (rv) {  // XXX obsolote
    1599           0 :       if ((pAMgr->get_compoundflag()) &&
    1600           0 :           TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen))
    1601           0 :         return 2 + nosuffix;
    1602           0 :       return 1;
    1603             :     }
    1604             :   }
    1605           0 :   return 0;
    1606             : }
    1607             : 
    1608           0 : int SuggestMgr::check_forbidden(const char* word, int len) {
    1609           0 :   if (pAMgr) {
    1610           0 :     struct hentry* rv = pAMgr->lookup(word);
    1611           0 :     if (rv && rv->astr &&
    1612           0 :         (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
    1613           0 :          TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen)))
    1614           0 :       rv = NULL;
    1615           0 :     if (!(pAMgr->prefix_check(word, len, 1)))
    1616           0 :       rv = pAMgr->suffix_check(word, len, 0, NULL,
    1617           0 :                                FLAG_NULL, FLAG_NULL, IN_CPD_NOT);  // prefix+suffix, suffix
    1618             :     // check forbidden words
    1619           0 :     if ((rv) && (rv->astr) &&
    1620           0 :         TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen))
    1621           0 :       return 1;
    1622             :   }
    1623           0 :   return 0;
    1624             : }
    1625             : 
    1626           0 : std::string SuggestMgr::suggest_morph(const std::string& in_w) {
    1627           0 :   std::string result;
    1628             : 
    1629           0 :   struct hentry* rv = NULL;
    1630             : 
    1631           0 :   if (!pAMgr)
    1632           0 :     return std::string();
    1633             : 
    1634           0 :   std::string w(in_w);
    1635             : 
    1636             :   // word reversing wrapper for complex prefixes
    1637           0 :   if (complexprefixes) {
    1638           0 :     if (utf8)
    1639           0 :       reverseword_utf(w);
    1640             :     else
    1641           0 :       reverseword(w);
    1642             :   }
    1643             : 
    1644           0 :   rv = pAMgr->lookup(w.c_str());
    1645             : 
    1646           0 :   while (rv) {
    1647           0 :     if ((!rv->astr) ||
    1648           0 :         !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
    1649           0 :           TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
    1650           0 :           TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) {
    1651           0 :       if (!HENTRY_FIND(rv, MORPH_STEM)) {
    1652           0 :         result.append(" ");
    1653           0 :         result.append(MORPH_STEM);
    1654           0 :         result.append(w);
    1655             :       }
    1656           0 :       if (HENTRY_DATA(rv)) {
    1657           0 :         result.append(" ");
    1658           0 :         result.append(HENTRY_DATA2(rv));
    1659             :       }
    1660           0 :       result.append("\n");
    1661             :     }
    1662           0 :     rv = rv->next_homonym;
    1663             :   }
    1664             : 
    1665           0 :   std::string st = pAMgr->affix_check_morph(w.c_str(), w.size());
    1666           0 :   if (!st.empty()) {
    1667           0 :     result.append(st);
    1668             :   }
    1669             : 
    1670           0 :   if (pAMgr->get_compound() && result.empty()) {
    1671             :     struct hentry* rwords[100];  // buffer for COMPOUND pattern checking
    1672           0 :     pAMgr->compound_check_morph(w.c_str(), w.size(), 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, result,
    1673           0 :                                 NULL);
    1674             :   }
    1675             : 
    1676           0 :   line_uniq(result, MSEP_REC);
    1677             : 
    1678           0 :   return result;
    1679             : }
    1680             : 
    1681           0 : static int get_sfxcount(const char* morph) {
    1682           0 :   if (!morph || !*morph)
    1683           0 :     return 0;
    1684           0 :   int n = 0;
    1685           0 :   const char* old = morph;
    1686           0 :   morph = strstr(morph, MORPH_DERI_SFX);
    1687           0 :   if (!morph)
    1688           0 :     morph = strstr(old, MORPH_INFL_SFX);
    1689           0 :   if (!morph)
    1690           0 :     morph = strstr(old, MORPH_TERM_SFX);
    1691           0 :   while (morph) {
    1692           0 :     n++;
    1693           0 :     old = morph;
    1694           0 :     morph = strstr(morph + 1, MORPH_DERI_SFX);
    1695           0 :     if (!morph)
    1696           0 :       morph = strstr(old + 1, MORPH_INFL_SFX);
    1697           0 :     if (!morph)
    1698           0 :       morph = strstr(old + 1, MORPH_TERM_SFX);
    1699             :   }
    1700           0 :   return n;
    1701             : }
    1702             : 
    1703             : /* affixation */
    1704           0 : std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) {
    1705           0 :   std::string result;
    1706           0 :   int sfxcount = get_sfxcount(pattern);
    1707             : 
    1708           0 :   if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount)
    1709           0 :     return result;
    1710             : 
    1711           0 :   if (HENTRY_DATA(rv)) {
    1712           0 :     std::string aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen,
    1713           0 :                                       HENTRY_DATA(rv), pattern, 0);
    1714           0 :     if (!aff.empty()) {
    1715           0 :       result.append(aff);
    1716           0 :       result.append("\n");
    1717             :     }
    1718             :   }
    1719             : 
    1720             :   // check all allomorphs
    1721           0 :   char* p = NULL;
    1722           0 :   if (HENTRY_DATA(rv))
    1723           0 :     p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH);
    1724           0 :   while (p) {
    1725           0 :     p += MORPH_TAG_LEN;
    1726           0 :     int plen = fieldlen(p);
    1727           0 :     std::string allomorph(p, plen);
    1728           0 :     struct hentry* rv2 = pAMgr->lookup(allomorph.c_str());
    1729           0 :     while (rv2) {
    1730             :       //            if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <=
    1731             :       //            sfxcount) {
    1732           0 :       if (HENTRY_DATA(rv2)) {
    1733           0 :         char* st = (char*)strstr(HENTRY_DATA2(rv2), MORPH_STEM);
    1734           0 :         if (st && (strncmp(st + MORPH_TAG_LEN, HENTRY_WORD(rv),
    1735           0 :                            fieldlen(st + MORPH_TAG_LEN)) == 0)) {
    1736           0 :           std::string aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv2->astr,
    1737           0 :                                             rv2->alen, HENTRY_DATA(rv2), pattern, 0);
    1738           0 :           if (!aff.empty()) {
    1739           0 :             result.append(aff);
    1740           0 :             result.append("\n");
    1741             :           }
    1742             :         }
    1743             :       }
    1744           0 :       rv2 = rv2->next_homonym;
    1745             :     }
    1746           0 :     p = strstr(p + plen, MORPH_ALLOMORPH);
    1747             :   }
    1748             : 
    1749           0 :   return result;
    1750             : }
    1751             : 
    1752           0 : std::string SuggestMgr::suggest_gen(const std::vector<std::string>& desc, const std::string& in_pattern) {
    1753           0 :   if (desc.empty() || !pAMgr)
    1754           0 :     return std::string();
    1755             : 
    1756           0 :   const char* pattern = in_pattern.c_str();
    1757           0 :   std::string result2;
    1758           0 :   std::string newpattern;
    1759           0 :   struct hentry* rv = NULL;
    1760             : 
    1761             :   // search affixed forms with and without derivational suffixes
    1762             :   while (1) {
    1763           0 :     for (size_t k = 0; k < desc.size(); ++k) {
    1764           0 :       std::string result;
    1765             : 
    1766             :       // add compound word parts (except the last one)
    1767           0 :       const char* s = desc[k].c_str();
    1768           0 :       const char* part = strstr(s, MORPH_PART);
    1769           0 :       if (part) {
    1770           0 :         const char* nextpart = strstr(part + 1, MORPH_PART);
    1771           0 :         while (nextpart) {
    1772           0 :           std::string field;
    1773           0 :           copy_field(field, part, MORPH_PART);
    1774           0 :           result.append(field);
    1775           0 :           part = nextpart;
    1776           0 :           nextpart = strstr(part + 1, MORPH_PART);
    1777             :         }
    1778           0 :         s = part;
    1779             :       }
    1780             : 
    1781           0 :       std::string tok(s);
    1782           0 :       size_t pos = tok.find(" | ");
    1783           0 :       while (pos != std::string::npos) {
    1784           0 :         tok[pos + 1] = MSEP_ALT;
    1785           0 :         pos = tok.find(" | ", pos);
    1786             :       }
    1787           0 :       std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
    1788           0 :       for (size_t i = 0; i < pl.size(); ++i) {
    1789             :         // remove inflectional and terminal suffixes
    1790           0 :         size_t is = pl[i].find(MORPH_INFL_SFX);
    1791           0 :         if (is != std::string::npos)
    1792           0 :           pl[i].resize(is);
    1793           0 :         size_t ts = pl[i].find(MORPH_TERM_SFX);
    1794           0 :         while (ts != std::string::npos) {
    1795           0 :           pl[i][ts] = '_';
    1796           0 :           ts = pl[i].find(MORPH_TERM_SFX);
    1797             :         }
    1798           0 :         const char* st = strstr(s, MORPH_STEM);
    1799           0 :         if (st) {
    1800           0 :           copy_field(tok, st, MORPH_STEM);
    1801           0 :           rv = pAMgr->lookup(tok.c_str());
    1802           0 :           while (rv) {
    1803           0 :             std::string newpat(pl[i]);
    1804           0 :             newpat.append(pattern);
    1805           0 :             std::string sg = suggest_hentry_gen(rv, newpat.c_str());
    1806           0 :             if (sg.empty())
    1807           0 :               sg = suggest_hentry_gen(rv, pattern);
    1808           0 :             if (!sg.empty()) {
    1809           0 :               std::vector<std::string> gen = line_tok(sg, MSEP_REC);
    1810           0 :               for (size_t j = 0; j < gen.size(); ++j) {
    1811           0 :                 result2.push_back(MSEP_REC);
    1812           0 :                 result2.append(result);
    1813           0 :                 if (pl[i].find(MORPH_SURF_PFX) != std::string::npos) {
    1814           0 :                   std::string field;
    1815           0 :                   copy_field(field, pl[i], MORPH_SURF_PFX);
    1816           0 :                   result2.append(field);
    1817             :                 }
    1818           0 :                 result2.append(gen[j]);
    1819             :               }
    1820             :             }
    1821           0 :             rv = rv->next_homonym;
    1822             :           }
    1823             :         }
    1824             :       }
    1825             :     }
    1826             : 
    1827           0 :     if (!result2.empty() || !strstr(pattern, MORPH_DERI_SFX))
    1828           0 :       break;
    1829             : 
    1830           0 :     newpattern.assign(pattern);
    1831           0 :     mystrrep(newpattern, MORPH_DERI_SFX, MORPH_TERM_SFX);
    1832           0 :     pattern = newpattern.c_str();
    1833           0 :   }
    1834           0 :   return result2;
    1835             : }
    1836             : 
    1837             : // generate an n-gram score comparing s1 and s2, UTF16 version
    1838           0 : int SuggestMgr::ngram(int n,
    1839             :                       const std::vector<w_char>& su1,
    1840             :                       const std::vector<w_char>& su2,
    1841             :                       int opt) {
    1842           0 :   int nscore = 0;
    1843             :   int ns;
    1844             :   int l1;
    1845             :   int l2;
    1846           0 :   int test = 0;
    1847             : 
    1848           0 :   l1 = su1.size();
    1849           0 :   l2 = su2.size();
    1850           0 :   if (l2 == 0)
    1851           0 :     return 0;
    1852           0 :   for (int j = 1; j <= n; j++) {
    1853           0 :     ns = 0;
    1854           0 :     for (int i = 0; i <= (l1 - j); i++) {
    1855           0 :       int k = 0;
    1856           0 :       for (int l = 0; l <= (l2 - j); l++) {
    1857           0 :         for (k = 0; k < j; k++) {
    1858           0 :           const w_char& c1 = su1[i + k];
    1859           0 :           const w_char& c2 = su2[l + k];
    1860           0 :           if ((c1.l != c2.l) || (c1.h != c2.h))
    1861             :             break;
    1862             :         }
    1863           0 :         if (k == j) {
    1864           0 :           ns++;
    1865           0 :           break;
    1866             :         }
    1867             :       }
    1868           0 :       if (k != j && opt & NGRAM_WEIGHTED) {
    1869           0 :         ns--;
    1870           0 :         test++;
    1871           0 :         if (i == 0 || i == l1 - j)
    1872           0 :           ns--;  // side weight
    1873             :       }
    1874             :     }
    1875           0 :     nscore = nscore + ns;
    1876           0 :     if (ns < 2 && !(opt & NGRAM_WEIGHTED))
    1877           0 :       break;
    1878             :   }
    1879             : 
    1880           0 :   ns = 0;
    1881           0 :   if (opt & NGRAM_LONGER_WORSE)
    1882           0 :     ns = (l2 - l1) - 2;
    1883           0 :   if (opt & NGRAM_ANY_MISMATCH)
    1884           0 :     ns = abs(l2 - l1) - 2;
    1885           0 :   ns = (nscore - ((ns > 0) ? ns : 0));
    1886           0 :   return ns;
    1887             : }
    1888             : 
    1889             : // generate an n-gram score comparing s1 and s2, non-UTF16 version
    1890           0 : int SuggestMgr::ngram(int n,
    1891             :                       const std::string& s1,
    1892             :                       const std::string& s2,
    1893             :                       int opt) {
    1894           0 :   int nscore = 0;
    1895             :   int ns;
    1896             :   int l1;
    1897             :   int l2;
    1898           0 :   int test = 0;
    1899             : 
    1900           0 :   l2 = s2.size();
    1901           0 :   if (l2 == 0)
    1902           0 :     return 0;
    1903           0 :   l1 = s1.size();
    1904           0 :   for (int j = 1; j <= n; j++) {
    1905           0 :     ns = 0;
    1906           0 :     for (int i = 0; i <= (l1 - j); i++) {
    1907             :       //s2 is haystack, s1[i..i+j) is needle
    1908           0 :       if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) {
    1909           0 :         ns++;
    1910           0 :       } else if (opt & NGRAM_WEIGHTED) {
    1911           0 :         ns--;
    1912           0 :         test++;
    1913           0 :         if (i == 0 || i == l1 - j)
    1914           0 :           ns--;  // side weight
    1915             :       }
    1916             :     }
    1917           0 :     nscore = nscore + ns;
    1918           0 :     if (ns < 2 && !(opt & NGRAM_WEIGHTED))
    1919           0 :       break;
    1920             :   }
    1921             : 
    1922           0 :   ns = 0;
    1923           0 :   if (opt & NGRAM_LONGER_WORSE)
    1924           0 :     ns = (l2 - l1) - 2;
    1925           0 :   if (opt & NGRAM_ANY_MISMATCH)
    1926           0 :     ns = abs(l2 - l1) - 2;
    1927           0 :   ns = (nscore - ((ns > 0) ? ns : 0));
    1928           0 :   return ns;
    1929             : }
    1930             : 
    1931             : // length of the left common substring of s1 and (decapitalised) s2, UTF version
    1932           0 : int SuggestMgr::leftcommonsubstring(
    1933             :     const std::vector<w_char>& su1,
    1934             :     const std::vector<w_char>& su2) {
    1935           0 :   int l1 = su1.size();
    1936           0 :   int l2 = su2.size();
    1937             :   // decapitalize dictionary word
    1938           0 :   if (complexprefixes) {
    1939           0 :     if (su1[l1 - 1] == su2[l2 - 1])
    1940           0 :       return 1;
    1941             :   } else {
    1942           0 :     unsigned short idx = su2.empty() ? 0 : (su2[0].h << 8) + su2[0].l;
    1943           0 :     unsigned short otheridx = su1.empty() ? 0 : (su1[0].h << 8) + su1[0].l;
    1944           0 :     if (otheridx != idx && (otheridx != unicodetolower(idx, langnum)))
    1945           0 :       return 0;
    1946             :     int i;
    1947           0 :     for (i = 1; (i < l1) && (i < l2) && (su1[i].l == su2[i].l) &&
    1948           0 :                 (su1[i].h == su2[i].h);
    1949             :          i++)
    1950             :       ;
    1951           0 :     return i;
    1952             :   }
    1953           0 :   return 0;
    1954             : }
    1955             : 
    1956             : // length of the left common substring of s1 and (decapitalised) s2, non-UTF
    1957           0 : int SuggestMgr::leftcommonsubstring(
    1958             :     const char* s1,
    1959             :     const char* s2) {
    1960           0 :   if (complexprefixes) {
    1961           0 :     int l1 = strlen(s1);
    1962           0 :     int l2 = strlen(s2);
    1963           0 :     if (l1 <= l2 && s2[l1 - 1] == s2[l2 - 1])
    1964           0 :       return 1;
    1965           0 :   } else if (csconv) {
    1966           0 :     const char* olds = s1;
    1967             :     // decapitalise dictionary word
    1968           0 :     if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower))
    1969           0 :       return 0;
    1970           0 :     do {
    1971           0 :       s1++;
    1972           0 :       s2++;
    1973           0 :     } while ((*s1 == *s2) && (*s1 != '\0'));
    1974           0 :     return (int)(s1 - olds);
    1975             :   }
    1976           0 :   return 0;
    1977             : }
    1978             : 
    1979           0 : int SuggestMgr::commoncharacterpositions(const char* s1,
    1980             :                                          const char* s2,
    1981             :                                          int* is_swap) {
    1982           0 :   int num = 0;
    1983           0 :   int diff = 0;
    1984             :   int diffpos[2];
    1985           0 :   *is_swap = 0;
    1986           0 :   if (utf8) {
    1987           0 :     std::vector<w_char> su1;
    1988           0 :     std::vector<w_char> su2;
    1989           0 :     int l1 = u8_u16(su1, s1);
    1990           0 :     int l2 = u8_u16(su2, s2);
    1991             : 
    1992           0 :     if (l1 <= 0 || l2 <= 0)
    1993           0 :       return 0;
    1994             : 
    1995             :     // decapitalize dictionary word
    1996           0 :     if (complexprefixes) {
    1997           0 :       su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum);
    1998             :     } else {
    1999           0 :       su2[0] = lower_utf(su2[0], langnum);
    2000             :     }
    2001           0 :     for (int i = 0; (i < l1) && (i < l2); i++) {
    2002           0 :       if (su1[i] == su2[i]) {
    2003           0 :         num++;
    2004             :       } else {
    2005           0 :         if (diff < 2)
    2006           0 :           diffpos[diff] = i;
    2007           0 :         diff++;
    2008             :       }
    2009             :     }
    2010           0 :     if ((diff == 2) && (l1 == l2) &&
    2011           0 :         (su1[diffpos[0]] == su2[diffpos[1]]) &&
    2012           0 :         (su1[diffpos[1]] == su2[diffpos[0]]))
    2013           0 :       *is_swap = 1;
    2014             :   } else {
    2015             :     size_t i;
    2016           0 :     std::string t(s2);
    2017             :     // decapitalize dictionary word
    2018           0 :     if (complexprefixes) {
    2019           0 :       size_t l2 = t.size();
    2020           0 :       t[l2 - 1] = csconv[(unsigned char)t[l2 - 1]].clower;
    2021             :     } else {
    2022           0 :       mkallsmall(t, csconv);
    2023             :     }
    2024           0 :     for (i = 0; i < t.size() && (*(s1 + i) != 0); ++i) {
    2025           0 :       if (*(s1 + i) == t[i]) {
    2026           0 :         num++;
    2027             :       } else {
    2028           0 :         if (diff < 2)
    2029           0 :           diffpos[diff] = i;
    2030           0 :         diff++;
    2031             :       }
    2032             :     }
    2033           0 :     if ((diff == 2) && (*(s1 + i) == 0) && i == t.size() &&
    2034           0 :         (*(s1 + diffpos[0]) == t[diffpos[1]]) &&
    2035           0 :         (*(s1 + diffpos[1]) == t[diffpos[0]]))
    2036           0 :       *is_swap = 1;
    2037             :   }
    2038           0 :   return num;
    2039             : }
    2040             : 
    2041           0 : int SuggestMgr::mystrlen(const char* word) {
    2042           0 :   if (utf8) {
    2043           0 :     std::vector<w_char> w;
    2044           0 :     return u8_u16(w, word);
    2045             :   } else
    2046           0 :     return strlen(word);
    2047             : }
    2048             : 
    2049             : // sort in decreasing order of score
    2050           0 : void SuggestMgr::bubblesort(char** rword, char** rword2, int* rsc, int n) {
    2051           0 :   int m = 1;
    2052           0 :   while (m < n) {
    2053           0 :     int j = m;
    2054           0 :     while (j > 0) {
    2055           0 :       if (rsc[j - 1] < rsc[j]) {
    2056           0 :         int sctmp = rsc[j - 1];
    2057           0 :         char* wdtmp = rword[j - 1];
    2058           0 :         rsc[j - 1] = rsc[j];
    2059           0 :         rword[j - 1] = rword[j];
    2060           0 :         rsc[j] = sctmp;
    2061           0 :         rword[j] = wdtmp;
    2062           0 :         if (rword2) {
    2063           0 :           wdtmp = rword2[j - 1];
    2064           0 :           rword2[j - 1] = rword2[j];
    2065           0 :           rword2[j] = wdtmp;
    2066             :         }
    2067           0 :         j--;
    2068             :       } else
    2069           0 :         break;
    2070             :     }
    2071           0 :     m++;
    2072             :   }
    2073           0 :   return;
    2074             : }
    2075             : 
    2076             : // longest common subsequence
    2077           0 : void SuggestMgr::lcs(const char* s,
    2078             :                      const char* s2,
    2079             :                      int* l1,
    2080             :                      int* l2,
    2081             :                      char** result) {
    2082             :   int n, m;
    2083           0 :   std::vector<w_char> su;
    2084           0 :   std::vector<w_char> su2;
    2085             :   char* b;
    2086             :   char* c;
    2087             :   int i;
    2088             :   int j;
    2089           0 :   if (utf8) {
    2090           0 :     m = u8_u16(su, s);
    2091           0 :     n = u8_u16(su2, s2);
    2092             :   } else {
    2093           0 :     m = strlen(s);
    2094           0 :     n = strlen(s2);
    2095             :   }
    2096           0 :   c = (char*)malloc((m + 1) * (n + 1));
    2097           0 :   b = (char*)malloc((m + 1) * (n + 1));
    2098           0 :   if (!c || !b) {
    2099           0 :     if (c)
    2100           0 :       free(c);
    2101           0 :     if (b)
    2102           0 :       free(b);
    2103           0 :     *result = NULL;
    2104           0 :     return;
    2105             :   }
    2106           0 :   for (i = 1; i <= m; i++)
    2107           0 :     c[i * (n + 1)] = 0;
    2108           0 :   for (j = 0; j <= n; j++)
    2109           0 :     c[j] = 0;
    2110           0 :   for (i = 1; i <= m; i++) {
    2111           0 :     for (j = 1; j <= n; j++) {
    2112           0 :       if (((utf8) && (su[i - 1] == su2[j - 1])) ||
    2113           0 :           ((!utf8) && (s[i - 1] == s2[j - 1]))) {
    2114           0 :         c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1;
    2115           0 :         b[i * (n + 1) + j] = LCS_UPLEFT;
    2116           0 :       } else if (c[(i - 1) * (n + 1) + j] >= c[i * (n + 1) + j - 1]) {
    2117           0 :         c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j];
    2118           0 :         b[i * (n + 1) + j] = LCS_UP;
    2119             :       } else {
    2120           0 :         c[i * (n + 1) + j] = c[i * (n + 1) + j - 1];
    2121           0 :         b[i * (n + 1) + j] = LCS_LEFT;
    2122             :       }
    2123             :     }
    2124             :   }
    2125           0 :   *result = b;
    2126           0 :   free(c);
    2127           0 :   *l1 = m;
    2128           0 :   *l2 = n;
    2129             : }
    2130             : 
    2131           0 : int SuggestMgr::lcslen(const char* s, const char* s2) {
    2132             :   int m;
    2133             :   int n;
    2134             :   int i;
    2135             :   int j;
    2136             :   char* result;
    2137           0 :   int len = 0;
    2138           0 :   lcs(s, s2, &m, &n, &result);
    2139           0 :   if (!result)
    2140           0 :     return 0;
    2141           0 :   i = m;
    2142           0 :   j = n;
    2143           0 :   while ((i != 0) && (j != 0)) {
    2144           0 :     if (result[i * (n + 1) + j] == LCS_UPLEFT) {
    2145           0 :       len++;
    2146           0 :       i--;
    2147           0 :       j--;
    2148           0 :     } else if (result[i * (n + 1) + j] == LCS_UP) {
    2149           0 :       i--;
    2150             :     } else
    2151           0 :       j--;
    2152             :   }
    2153           0 :   free(result);
    2154           0 :   return len;
    2155             : }
    2156             : 
    2157           0 : int SuggestMgr::lcslen(const std::string& s, const std::string& s2) {
    2158           0 :   return lcslen(s.c_str(), s2.c_str());
    2159             : }

Generated by: LCOV version 1.13