LCOV - code coverage report
Current view: top level - extensions/spellcheck/hunspell/src - csutil.hxx (source / functions) Hit Total Coverage
Test: output.info Lines: 0 16 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 3 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* ***** BEGIN LICENSE BLOCK *****
       2             :  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
       3             :  *
       4             :  * Copyright (C) 2002-2017 Németh László
       5             :  *
       6             :  * The contents of this file are subject to the Mozilla Public License Version
       7             :  * 1.1 (the "License"); you may not use this file except in compliance with
       8             :  * the License. You may obtain a copy of the License at
       9             :  * http://www.mozilla.org/MPL/
      10             :  *
      11             :  * Software distributed under the License is distributed on an "AS IS" basis,
      12             :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      13             :  * for the specific language governing rights and limitations under the
      14             :  * License.
      15             :  *
      16             :  * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
      17             :  *
      18             :  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
      19             :  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
      20             :  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
      21             :  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
      22             :  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
      23             :  *
      24             :  * Alternatively, the contents of this file may be used under the terms of
      25             :  * either the GNU General Public License Version 2 or later (the "GPL"), or
      26             :  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
      27             :  * in which case the provisions of the GPL or the LGPL are applicable instead
      28             :  * of those above. If you wish to allow use of your version of this file only
      29             :  * under the terms of either the GPL or the LGPL, and not to allow others to
      30             :  * use your version of this file under the terms of the MPL, indicate your
      31             :  * decision by deleting the provisions above and replace them with the notice
      32             :  * and other provisions required by the GPL or the LGPL. If you do not delete
      33             :  * the provisions above, a recipient may use your version of this file under
      34             :  * the terms of any one of the MPL, the GPL or the LGPL.
      35             :  *
      36             :  * ***** END LICENSE BLOCK ***** */
      37             : /*
      38             :  * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
      39             :  * And Contributors.  All rights reserved.
      40             :  *
      41             :  * Redistribution and use in source and binary forms, with or without
      42             :  * modification, are permitted provided that the following conditions
      43             :  * are met:
      44             :  *
      45             :  * 1. Redistributions of source code must retain the above copyright
      46             :  *    notice, this list of conditions and the following disclaimer.
      47             :  *
      48             :  * 2. Redistributions in binary form must reproduce the above copyright
      49             :  *    notice, this list of conditions and the following disclaimer in the
      50             :  *    documentation and/or other materials provided with the distribution.
      51             :  *
      52             :  * 3. All modifications to the source code must be clearly marked as
      53             :  *    such.  Binary redistributions based on modified source code
      54             :  *    must be clearly marked as modified versions in the documentation
      55             :  *    and/or other materials provided with the distribution.
      56             :  *
      57             :  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
      58             :  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      59             :  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
      60             :  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      61             :  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
      62             :  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
      63             :  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
      64             :  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      65             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      66             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      67             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      68             :  * SUCH DAMAGE.
      69             :  */
      70             : 
      71             : #ifndef CSUTIL_HXX_
      72             : #define CSUTIL_HXX_
      73             : 
      74             : #include "hunvisapi.h"
      75             : 
      76             : // First some base level utility routines
      77             : 
      78             : #include <fstream>
      79             : #include <string>
      80             : #include <vector>
      81             : #include <string.h>
      82             : #include "w_char.hxx"
      83             : #include "htypes.hxx"
      84             : 
      85             : #ifdef MOZILLA_CLIENT
      86             : #include "nscore.h"  // for mozalloc headers
      87             : #endif
      88             : 
      89             : // casing
      90             : #define NOCAP 0
      91             : #define INITCAP 1
      92             : #define ALLCAP 2
      93             : #define HUHCAP 3
      94             : #define HUHINITCAP 4
      95             : 
      96             : // default encoding and keystring
      97             : #define SPELL_ENCODING "ISO8859-1"
      98             : #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
      99             : 
     100             : // default morphological fields
     101             : #define MORPH_STEM "st:"
     102             : #define MORPH_ALLOMORPH "al:"
     103             : #define MORPH_POS "po:"
     104             : #define MORPH_DERI_PFX "dp:"
     105             : #define MORPH_INFL_PFX "ip:"
     106             : #define MORPH_TERM_PFX "tp:"
     107             : #define MORPH_DERI_SFX "ds:"
     108             : #define MORPH_INFL_SFX "is:"
     109             : #define MORPH_TERM_SFX "ts:"
     110             : #define MORPH_SURF_PFX "sp:"
     111             : #define MORPH_FREQ "fr:"
     112             : #define MORPH_PHON "ph:"
     113             : #define MORPH_HYPH "hy:"
     114             : #define MORPH_PART "pa:"
     115             : #define MORPH_FLAG "fl:"
     116             : #define MORPH_HENTRY "_H:"
     117             : #define MORPH_TAG_LEN strlen(MORPH_STEM)
     118             : 
     119             : #define MSEP_FLD ' '
     120             : #define MSEP_REC '\n'
     121             : #define MSEP_ALT '\v'
     122             : 
     123             : // default flags
     124             : #define DEFAULTFLAGS 65510
     125             : #define FORBIDDENWORD 65510
     126             : #define ONLYUPCASEFLAG 65511
     127             : 
     128             : // fix long pathname problem of WIN32 by using w_char std::fstream::open override
     129             : LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path,
     130             :                                      std::ios_base::openmode mode);
     131             : 
     132             : // convert UTF-16 characters to UTF-8
     133             : LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
     134             :                                              const std::vector<w_char>& src);
     135             : 
     136             : // convert UTF-8 characters to UTF-16
     137             : LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
     138             :                                     const std::string& src);
     139             : 
     140             : // remove end of line char(s)
     141             : LIBHUNSPELL_DLL_EXPORTED void mychomp(std::string& s);
     142             : 
     143             : // duplicate string
     144             : LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s);
     145             : 
     146             : // parse into tokens with char delimiter
     147             : LIBHUNSPELL_DLL_EXPORTED std::string::const_iterator mystrsep(const std::string &str,
     148             :                                                               std::string::const_iterator& start);
     149             : 
     150             : // replace pat by rep in word and return word
     151             : LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str,
     152             :                                                const std::string& search,
     153             :                                                const std::string& replace);
     154             : 
     155             : // append s to ends of every lines in text
     156             : LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str,
     157             :                                                  const std::string& apd);
     158             : 
     159             : // tokenize into lines with new line
     160             : LIBHUNSPELL_DLL_EXPORTED std::vector<std::string> line_tok(const std::string& text,
     161             :                                                            char breakchar);
     162             : 
     163             : // tokenize into lines with new line and uniq in place
     164             : LIBHUNSPELL_DLL_EXPORTED void line_uniq(std::string& text, char breakchar);
     165             : 
     166             : LIBHUNSPELL_DLL_EXPORTED void line_uniq_app(std::string& text, char breakchar);
     167             : 
     168             : // reverse word
     169             : LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
     170             : 
     171             : // reverse word
     172             : LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&);
     173             : 
     174             : // remove duplicates
     175             : LIBHUNSPELL_DLL_EXPORTED void uniqlist(std::vector<std::string>& list);
     176             : 
     177             : // character encoding information
     178             : struct cs_info {
     179             :   unsigned char ccase;
     180             :   unsigned char clower;
     181             :   unsigned char cupper;
     182             : };
     183             : 
     184             : LIBHUNSPELL_DLL_EXPORTED void initialize_utf_tbl();
     185             : LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
     186             : LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c,
     187             :                                                        int langnum);
     188             : LIBHUNSPELL_DLL_EXPORTED w_char upper_utf(w_char u, int langnum);
     189             : LIBHUNSPELL_DLL_EXPORTED w_char lower_utf(w_char u, int langnum);
     190             : LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,
     191             :                                                        int langnum);
     192             : LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
     193             : 
     194             : LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const std::string& es);
     195             : 
     196             : // get language identifiers of language codes
     197             : LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const std::string& lang);
     198             : 
     199             : // get characters of the given 8bit encoding with lower- and uppercase forms
     200             : LIBHUNSPELL_DLL_EXPORTED std::string get_casechars(const char* enc);
     201             : 
     202             : // convert std::string to all caps
     203             : LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s,
     204             :                                                const struct cs_info* csconv);
     205             : 
     206             : // convert null terminated string to all little
     207             : LIBHUNSPELL_DLL_EXPORTED std::string& mkallsmall(std::string& s,
     208             :                                                  const struct cs_info* csconv);
     209             : 
     210             : // convert first letter of string to little
     211             : LIBHUNSPELL_DLL_EXPORTED std::string& mkinitsmall(std::string& s,
     212             :                                                  const struct cs_info* csconv);
     213             : 
     214             : // convert first letter of string to capital
     215             : LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s,
     216             :                                                 const struct cs_info* csconv);
     217             : 
     218             : // convert first letter of UTF-8 string to capital
     219             : LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
     220             : mkinitcap_utf(std::vector<w_char>& u, int langnum);
     221             : 
     222             : // convert UTF-8 string to little
     223             : LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
     224             : mkallsmall_utf(std::vector<w_char>& u, int langnum);
     225             : 
     226             : // convert first letter of UTF-8 string to little
     227             : LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
     228             : mkinitsmall_utf(std::vector<w_char>& u, int langnum);
     229             : 
     230             : // convert UTF-8 string to capital
     231             : LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
     232             : mkallcap_utf(std::vector<w_char>& u, int langnum);
     233             : 
     234             : // get type of capitalization
     235             : LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
     236             : 
     237             : // get type of capitalization (UTF-8)
     238             : LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
     239             : 
     240             : // strip all ignored characters in the string
     241             : LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(
     242             :     std::string& word,
     243             :     const std::vector<w_char>& ignored_chars);
     244             : 
     245             : // strip all ignored characters in the string
     246             : LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
     247             :     std::string& word,
     248             :     const std::string& ignored_chars);
     249             : 
     250             : LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line,
     251             :                                            std::string& out,
     252             :                                            int ln);
     253             : 
     254             : LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line,
     255             :                                           std::string& out,
     256             :                                           std::vector<w_char>& out_utf16,
     257             :                                           int utf8,
     258             :                                           int ln);
     259             : 
     260             : LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char* r);
     261             : 
     262             : LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest,
     263             :                                          const std::string& morph,
     264             :                                          const std::string& var);
     265             : 
     266             : // conversion function for protected memory
     267             : LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source);
     268             : 
     269             : // conversion function for protected memory
     270             : LIBHUNSPELL_DLL_EXPORTED char* get_stored_pointer(const char* s);
     271             : 
     272             : // hash entry macros
     273           0 : LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry* h) {
     274             :   char* ret;
     275           0 :   if (!h->var)
     276           0 :     ret = NULL;
     277           0 :   else if (h->var & H_OPT_ALIASM)
     278           0 :     ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
     279             :   else
     280           0 :     ret = HENTRY_WORD(h) + h->blen + 1;
     281           0 :   return ret;
     282             : }
     283             : 
     284             : LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA(
     285             :     const struct hentry* h) {
     286             :   const char* ret;
     287             :   if (!h->var)
     288             :     ret = NULL;
     289             :   else if (h->var & H_OPT_ALIASM)
     290             :     ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
     291             :   else
     292             :     ret = HENTRY_WORD(h) + h->blen + 1;
     293             :   return ret;
     294             : }
     295             : 
     296             : // NULL-free version for warning-free OOo build
     297           0 : LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(
     298             :     const struct hentry* h) {
     299             :   const char* ret;
     300           0 :   if (!h->var)
     301           0 :     ret = "";
     302           0 :   else if (h->var & H_OPT_ALIASM)
     303           0 :     ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
     304             :   else
     305           0 :     ret = HENTRY_WORD(h) + h->blen + 1;
     306           0 :   return ret;
     307             : }
     308             : 
     309           0 : LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry* h,
     310             :                                                   const char* p) {
     311           0 :   return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
     312             : }
     313             : 
     314             : #endif

Generated by: LCOV version 1.13