LCOV - code coverage report
Current view: top level - intl/hyphenation/hyphen - hyphen.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 531 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 23 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
       2             :  * licenses follows.
       3             :  */
       4             : 
       5             : /* LibHnj - a library for high quality hyphenation and justification
       6             :  * Copyright (C) 1998 Raph Levien, 
       7             :  *           (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), 
       8             :  *           (C) 2001 Peter Novodvorsky (nidd@cs.msu.su)
       9             :  *           (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo)
      10             :  *
      11             :  * This library is free software; you can redistribute it and/or
      12             :  * modify it under the terms of the GNU Library General Public
      13             :  * License as published by the Free Software Foundation; either
      14             :  * version 2 of the License, or (at your option) any later version.
      15             :  *
      16             :  * This library is distributed in the hope that it will be useful,
      17             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      18             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      19             :  * Library General Public License for more details.
      20             :  *
      21             :  * You should have received a copy of the GNU Library General Public
      22             :  * License along with this library; if not, write to the 
      23             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
      24             :  * Boston, MA  02111-1307  USA.
      25             : */
      26             : 
      27             : /*
      28             :  * The contents of this file are subject to the Mozilla Public License
      29             :  * Version 1.0 (the "MPL"); you may not use this file except in
      30             :  * compliance with the MPL.  You may obtain a copy of the MPL at
      31             :  * http://www.mozilla.org/MPL/
      32             :  *
      33             :  * Software distributed under the MPL is distributed on an "AS IS" basis,
      34             :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
      35             :  * for the specific language governing rights and limitations under the
      36             :  * MPL.
      37             :  *
      38             :  */
      39             : #include <stdlib.h> /* for NULL, malloc */
      40             : #include <stdio.h>  /* for fprintf */
      41             : #include <string.h> /* for strdup */
      42             : 
      43             : #ifdef UNX
      44             : #include <unistd.h> /* for exit */
      45             : #endif
      46             : 
      47             : #define noVERBOSE
      48             : 
      49             : /* calculate hyphenmin values with long ligature length (2 or 3 characters
      50             :  * instead of 1 or 2) for comparison with hyphenation without ligatures */
      51             : #define noLONG_LIGATURE
      52             : 
      53             : #ifdef LONG_LIGATURE
      54             : #define LIG_xx  1
      55             : #define LIG_xxx 2
      56             : #else
      57             : #define LIG_xx  0
      58             : #define LIG_xxx 1
      59             : #endif
      60             : 
      61             : #include "hnjalloc.h"
      62             : #include "hyphen.h"
      63             : 
      64             : static char *
      65           0 : hnj_strdup (const char *s)
      66             : {
      67             :   char *newstr;
      68             :   int l;
      69             : 
      70           0 :   l = strlen (s);
      71           0 :   newstr = (char *) hnj_malloc (l + 1);
      72           0 :   memcpy (newstr, s, l);
      73           0 :   newstr[l] = 0;
      74           0 :   return newstr;
      75             : }
      76             : 
      77             : /* remove cross-platform text line end characters */
      78           0 : void hnj_strchomp(char * s)
      79             : {
      80           0 :   int k = strlen(s);
      81           0 :   if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
      82           0 :   if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
      83           0 : }
      84             : 
      85             : /* a little bit of a hash table implementation. This simply maps strings
      86             :    to state numbers */
      87             : 
      88             : typedef struct _HashTab HashTab;
      89             : typedef struct _HashEntry HashEntry;
      90             : 
      91             : /* A cheap, but effective, hack. */
      92             : #define HASH_SIZE 31627
      93             : 
      94             : struct _HashTab {
      95             :   HashEntry *entries[HASH_SIZE];
      96             : };
      97             : 
      98             : struct _HashEntry {
      99             :   HashEntry *next;
     100             :   char *key;
     101             :   int val;
     102             : };
     103             : 
     104             : /* a char* hash function from ASU - adapted from Gtk+ */
     105             : static unsigned int
     106           0 : hnj_string_hash (const char *s)
     107             : {
     108             :   const char *p;
     109           0 :   unsigned int h=0, g;
     110           0 :   for(p = s; *p != '\0'; p += 1) {
     111           0 :     h = ( h << 4 ) + *p;
     112           0 :     if ( ( g = h & 0xf0000000 ) ) {
     113           0 :       h = h ^ (g >> 24);
     114           0 :       h = h ^ g;
     115             :     }
     116             :   }
     117           0 :   return h /* % M */;
     118             : }
     119             : 
     120             : static HashTab *
     121           0 : hnj_hash_new (void)
     122             : {
     123             :   HashTab *hashtab;
     124             :   int i;
     125             : 
     126           0 :   hashtab = (HashTab *) hnj_malloc (sizeof(HashTab));
     127           0 :   for (i = 0; i < HASH_SIZE; i++)
     128           0 :     hashtab->entries[i] = NULL;
     129             : 
     130           0 :   return hashtab;
     131             : }
     132             : 
     133             : static void
     134           0 : hnj_hash_free (HashTab *hashtab)
     135             : {
     136             :   int i;
     137             :   HashEntry *e, *next;
     138             : 
     139           0 :   for (i = 0; i < HASH_SIZE; i++)
     140           0 :     for (e = hashtab->entries[i]; e; e = next)
     141             :       {
     142           0 :         next = e->next;
     143           0 :         hnj_free (e->key);
     144           0 :         hnj_free (e);
     145             :       }
     146             : 
     147           0 :   hnj_free (hashtab);
     148           0 : }
     149             : 
     150             : /* assumes that key is not already present! */
     151             : static void
     152           0 : hnj_hash_insert (HashTab *hashtab, const char *key, int val)
     153             : {
     154             :   int i;
     155             :   HashEntry *e;
     156             : 
     157           0 :   i = hnj_string_hash (key) % HASH_SIZE;
     158           0 :   e = (HashEntry *) hnj_malloc (sizeof(HashEntry));
     159           0 :   e->next = hashtab->entries[i];
     160           0 :   e->key = hnj_strdup (key);
     161           0 :   e->val = val;
     162           0 :   hashtab->entries[i] = e;
     163           0 : }
     164             : 
     165             : /* return val if found, otherwise -1 */
     166             : static int
     167           0 : hnj_hash_lookup (HashTab *hashtab, const char *key)
     168             : {
     169             :   int i;
     170             :   HashEntry *e;
     171           0 :   i = hnj_string_hash (key) % HASH_SIZE;
     172           0 :   for (e = hashtab->entries[i]; e; e = e->next)
     173           0 :     if (!strcmp (key, e->key))
     174           0 :       return e->val;
     175           0 :   return -1;
     176             : }
     177             : 
     178             : /* Get the state number, allocating a new state if necessary. */
     179             : static int
     180           0 : hnj_get_state (HyphenDict *dict, HashTab *hashtab, const char *string)
     181             : {
     182             :   int state_num;
     183             : 
     184           0 :   state_num = hnj_hash_lookup (hashtab, string);
     185             : 
     186           0 :   if (state_num >= 0)
     187           0 :     return state_num;
     188             : 
     189           0 :   hnj_hash_insert (hashtab, string, dict->num_states);
     190             :   /* predicate is true if dict->num_states is a power of two */
     191           0 :   if (!(dict->num_states & (dict->num_states - 1)))
     192             :     {
     193           0 :       dict->states = (HyphenState *) hnj_realloc (dict->states,
     194             :                                   (dict->num_states << 1) *
     195             :                                   sizeof(HyphenState));
     196             :     }
     197           0 :   dict->states[dict->num_states].match = NULL;
     198           0 :   dict->states[dict->num_states].repl = NULL;
     199           0 :   dict->states[dict->num_states].fallback_state = -1;
     200           0 :   dict->states[dict->num_states].num_trans = 0;
     201           0 :   dict->states[dict->num_states].trans = NULL;
     202           0 :   return dict->num_states++;
     203             : }
     204             : 
     205             : /* add a transition from state1 to state2 through ch - assumes that the
     206             :    transition does not already exist */
     207             : static void
     208           0 : hnj_add_trans (HyphenDict *dict, int state1, int state2, char ch)
     209             : {
     210             :   int num_trans;
     211             : 
     212           0 :   num_trans = dict->states[state1].num_trans;
     213           0 :   if (num_trans == 0)
     214             :     {
     215           0 :       dict->states[state1].trans = (HyphenTrans *) hnj_malloc (sizeof(HyphenTrans));
     216             :     }
     217           0 :   else if (!(num_trans & (num_trans - 1)))
     218             :     {
     219           0 :       dict->states[state1].trans = (HyphenTrans *) hnj_realloc (dict->states[state1].trans,
     220             :                                                 (num_trans << 1) *
     221             :                                                 sizeof(HyphenTrans));
     222             :     }
     223           0 :   dict->states[state1].trans[num_trans].ch = ch;
     224           0 :   dict->states[state1].trans[num_trans].new_state = state2;
     225           0 :   dict->states[state1].num_trans++;
     226           0 : }
     227             : 
     228             : #ifdef VERBOSE
     229             : HashTab *global[1];
     230             : 
     231             : static char *
     232             : get_state_str (int state, int level)
     233             : {
     234             :   int i;
     235             :   HashEntry *e;
     236             : 
     237             :   for (i = 0; i < HASH_SIZE; i++)
     238             :     for (e = global[level]->entries[i]; e; e = e->next)
     239             :       if (e->val == state)
     240             :         return e->key;
     241             :   return NULL;
     242             : }
     243             : #endif
     244             : 
     245           0 : void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
     246             :   int i, j;
     247             :   char word[MAX_CHARS];
     248             :   char pattern[MAX_CHARS];
     249             :   char * repl;
     250             :   signed char replindex;
     251             :   signed char replcut;
     252           0 :   int state_num = 0;
     253             :   int last_state;
     254             :   char ch;
     255             :   int found;
     256             : 
     257           0 :           if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
     258           0 :             dict->lhmin = atoi(buf + 13);
     259           0 :             return;
     260           0 :           } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
     261           0 :             dict->rhmin = atoi(buf + 14);
     262           0 :             return;
     263           0 :           } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
     264           0 :             dict->clhmin = atoi(buf + 21);
     265           0 :             return;
     266           0 :           } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
     267           0 :             dict->crhmin = atoi(buf + 22);
     268           0 :             return;
     269           0 :           } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
     270           0 :             char * space = buf + 8;
     271           0 :             while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
     272           0 :             if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
     273           0 :             if (dict->nohyphen) {
     274           0 :                 char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
     275           0 :                 *nhe = 0;
     276           0 :                 for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
     277           0 :                         if (*nhe == ',') {
     278           0 :                             dict->nohyphenl++;
     279           0 :                             *nhe = 0;
     280             :                         }
     281             :                 }
     282             :             }
     283           0 :             return;
     284             :           } 
     285           0 :           j = 0;
     286           0 :           pattern[j] = '0';
     287           0 :           repl = strchr(buf, '/');
     288           0 :           replindex = 0;
     289           0 :           replcut = 0;
     290           0 :           if (repl) {
     291           0 :             char * index = strchr(repl + 1, ',');
     292           0 :             *repl = '\0';
     293           0 :             if (index) {
     294           0 :                 char * index2 = strchr(index + 1, ',');
     295           0 :                 *index = '\0';
     296           0 :                 if (index2) {
     297           0 :                     *index2 = '\0';
     298           0 :                     replindex = (signed char) atoi(index + 1) - 1;
     299           0 :                     replcut = (signed char) atoi(index2 + 1);                
     300             :                 }
     301             :             } else {
     302           0 :                 hnj_strchomp(repl + 1);
     303           0 :                 replindex = 0;
     304           0 :                 replcut = (signed char) strlen(buf);
     305             :             }
     306           0 :             repl = hnj_strdup(repl + 1);
     307             :           }
     308           0 :           for (i = 0; ((buf[i] > ' ') || (buf[i] < 0)); i++)
     309             :             {
     310           0 :               if (buf[i] >= '0' && buf[i] <= '9')
     311           0 :                 pattern[j] = buf[i];
     312             :               else
     313             :                 {
     314           0 :                   word[j] = buf[i];
     315           0 :                   pattern[++j] = '0';
     316             :                 }
     317             :             }
     318           0 :           word[j] = '\0';
     319           0 :           pattern[j + 1] = '\0';
     320             : 
     321           0 :           i = 0;
     322           0 :           if (!repl) {
     323             :             /* Optimize away leading zeroes */
     324           0 :             for (; pattern[i] == '0'; i++);
     325             :           } else {
     326           0 :             if (*word == '.') i++;
     327             :             /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
     328           0 :             if (dict->utf8) {
     329           0 :                 int pu = -1;        /* unicode character position */
     330           0 :                 int ps = -1;        /* unicode start position (original replindex) */
     331           0 :                 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
     332           0 :                 for (; pc < (strlen(word) + 1); pc++) {
     333             :                 /* beginning of an UTF-8 character (not '10' start bits) */
     334           0 :                     if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
     335           0 :                     if ((ps < 0) && (replindex == pu)) {
     336           0 :                         ps = replindex;
     337           0 :                         replindex = (signed char) pc;
     338             :                     }
     339           0 :                     if ((ps >= 0) && ((pu - ps) == replcut)) {
     340           0 :                         replcut = (signed char) (pc - replindex);
     341           0 :                         break;
     342             :                     }
     343             :                 }
     344           0 :                 if (*word == '.') replindex--;
     345             :             }
     346             :           }
     347             : 
     348             : #ifdef VERBOSE
     349             :           printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, j, repl);
     350             : #endif
     351           0 :           found = hnj_hash_lookup (hashtab, word);
     352           0 :           state_num = hnj_get_state (dict, hashtab, word);
     353           0 :           dict->states[state_num].match = hnj_strdup (pattern + i);
     354           0 :           dict->states[state_num].repl = repl;
     355           0 :           dict->states[state_num].replindex = replindex;
     356           0 :           if (!replcut) {
     357           0 :             dict->states[state_num].replcut = (signed char) strlen(word);
     358             :           } else {
     359           0 :             dict->states[state_num].replcut = replcut;
     360             :           }
     361             : 
     362             :           /* now, put in the prefix transitions */
     363           0 :           for (; found < 0 && j > 0; --j)
     364             :             {
     365           0 :               last_state = state_num;
     366           0 :               ch = word[j - 1];
     367           0 :               word[j - 1] = '\0';
     368           0 :               found = hnj_hash_lookup (hashtab, word);
     369           0 :               state_num = hnj_get_state (dict, hashtab, word);
     370           0 :               hnj_add_trans (dict, state_num, last_state, ch);
     371             :             }
     372             : }
     373             : 
     374             : HyphenDict *
     375           0 : hnj_hyphen_load (const char *fn)
     376             : {
     377             :   HyphenDict *result;
     378             :   FILE *f;
     379           0 :   f = fopen (fn, "r");
     380           0 :   if (f == NULL)
     381           0 :     return NULL;
     382             : 
     383           0 :   result = hnj_hyphen_load_file(f);
     384             : 
     385           0 :   fclose(f);
     386           0 :   return result;
     387             : }
     388             : 
     389             : HyphenDict *
     390           0 : hnj_hyphen_load_file (FILE *f)
     391             : {
     392             :   HyphenDict *dict[2];
     393             :   HashTab *hashtab;
     394             :   char buf[MAX_CHARS];
     395           0 :   int nextlevel = 0;
     396             :   int i, j, k;
     397             :   HashEntry *e;
     398           0 :   int state_num = 0;
     399             : // loading one or two dictionaries (separated by NEXTLEVEL keyword)
     400           0 : for (k = 0; k < 2; k++) { 
     401           0 :   hashtab = hnj_hash_new ();
     402             : #ifdef VERBOSE
     403             :   global[k] = hashtab;
     404             : #endif
     405           0 :   hnj_hash_insert (hashtab, "", 0);
     406           0 :   dict[k] = (HyphenDict *) hnj_malloc (sizeof(HyphenDict));
     407           0 :   dict[k]->num_states = 1;
     408           0 :   dict[k]->states = (HyphenState *) hnj_malloc (sizeof(HyphenState));
     409           0 :   dict[k]->states[0].match = NULL;
     410           0 :   dict[k]->states[0].repl = NULL;
     411           0 :   dict[k]->states[0].fallback_state = -1;
     412           0 :   dict[k]->states[0].num_trans = 0;
     413           0 :   dict[k]->states[0].trans = NULL;
     414           0 :   dict[k]->nextlevel = NULL;
     415           0 :   dict[k]->lhmin = 0;
     416           0 :   dict[k]->rhmin = 0;
     417           0 :   dict[k]->clhmin = 0;
     418           0 :   dict[k]->crhmin = 0;
     419           0 :   dict[k]->nohyphen = NULL;
     420           0 :   dict[k]->nohyphenl = 0;
     421             : 
     422             :   /* read in character set info */
     423           0 :   if (k == 0) {
     424           0 :     for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
     425           0 :     if (fgets(dict[k]->cset,  sizeof(dict[k]->cset),f) != NULL) {
     426           0 :       for (i=0;i<MAX_NAME;i++)
     427           0 :         if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
     428           0 :           dict[k]->cset[i] = 0;
     429             :     } else {
     430           0 :       dict[k]->cset[0] = 0;
     431             :     }
     432           0 :     dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
     433             :   } else {
     434           0 :     strncpy(dict[k]->cset, dict[0]->cset, sizeof(dict[k]->cset)-1);
     435           0 :     dict[k]->cset[sizeof(dict[k]->cset)-1] = '\0';
     436           0 :     dict[k]->utf8 = dict[0]->utf8;
     437             :   }
     438             : 
     439           0 :   if (k == 0 || nextlevel) {
     440           0 :     while (fgets (buf, sizeof(buf), f) != NULL) {
     441           0 :       if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
     442           0 :         nextlevel = 1;
     443           0 :         break;
     444           0 :       } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
     445             :     }
     446           0 :   } else if (k == 1) {
     447             :     /* default first level: hyphen and ASCII apostrophe */
     448           0 :     if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN ',-\n", dict[k], hashtab);
     449           0 :     else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99,-\n", dict[k], hashtab);
     450           0 :     strncpy(buf, "1-1\n", MAX_CHARS-1); // buf rewritten by hnj_hyphen_load here
     451           0 :     buf[MAX_CHARS-1] = '\0';
     452           0 :     hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
     453           0 :     hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
     454           0 :     if (dict[0]->utf8) {
     455           0 :       hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
     456           0 :       hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
     457             :     }
     458             :   }
     459             : 
     460             :   /* Could do unioning of matches here (instead of the preprocessor script).
     461             :      If we did, the pseudocode would look something like this:
     462             : 
     463             :      foreach state in the hash table
     464             :         foreach i = [1..length(state) - 1]
     465             :            state to check is substr (state, i)
     466             :            look it up
     467             :            if found, and if there is a match, union the match in.
     468             : 
     469             :      It's also possible to avoid the quadratic blowup by doing the
     470             :      search in order of increasing state string sizes - then you
     471             :      can break the loop after finding the first match.
     472             : 
     473             :      This step should be optional in any case - if there is a
     474             :      preprocessed rule table, it's always faster to use that.
     475             : 
     476             : */
     477             : 
     478             :   /* put in the fallback states */
     479           0 :   for (i = 0; i < HASH_SIZE; i++)
     480           0 :     for (e = hashtab->entries[i]; e; e = e->next)
     481             :       {
     482           0 :         if (*(e->key)) for (j = 1; 1; j++)
     483             :           {          
     484           0 :             state_num = hnj_hash_lookup (hashtab, e->key + j);
     485           0 :             if (state_num >= 0)
     486           0 :               break;
     487             :           }
     488             :         /* KBH: FIXME state 0 fallback_state should always be -1? */
     489           0 :         if (e->val)
     490           0 :           dict[k]->states[e->val].fallback_state = state_num;
     491             :       }
     492             : #ifdef VERBOSE
     493             :   for (i = 0; i < HASH_SIZE; i++)
     494             :     for (e = hashtab->entries[i]; e; e = e->next)
     495             :       {
     496             :         printf ("%d string %s state %d, fallback=%d\n", i, e->key, e->val,
     497             :                 dict[k]->states[e->val].fallback_state);
     498             :         for (j = 0; j < dict[k]->states[e->val].num_trans; j++)
     499             :           printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch,
     500             :                   dict[k]->states[e->val].trans[j].new_state);
     501             :       }
     502             : #endif
     503             : 
     504             : #ifndef VERBOSE
     505           0 :   hnj_hash_free (hashtab);
     506             : #endif
     507           0 :   state_num = 0;
     508             : }
     509           0 :   if (nextlevel) dict[0]->nextlevel = dict[1];
     510             :   else {
     511           0 :     dict[1] -> nextlevel = dict[0];
     512           0 :     dict[1]->lhmin = dict[0]->lhmin;
     513           0 :     dict[1]->rhmin = dict[0]->rhmin;
     514           0 :     dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
     515           0 :     dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
     516             : #ifdef VERBOSE
     517             :     HashTab *r = global[0];
     518             :     global[0] = global[1];
     519             :     global[1] = r;
     520             : #endif
     521           0 :     return dict[1];
     522             :   }
     523           0 :   return dict[0];
     524             : }
     525             : 
     526           0 : void hnj_hyphen_free (HyphenDict *dict)
     527             : {
     528             :   int state_num;
     529             :   HyphenState *hstate;
     530             : 
     531           0 :   for (state_num = 0; state_num < dict->num_states; state_num++)
     532             :     {
     533           0 :       hstate = &dict->states[state_num];
     534           0 :       if (hstate->match)
     535           0 :         hnj_free (hstate->match);
     536           0 :       if (hstate->repl)
     537           0 :         hnj_free (hstate->repl);
     538           0 :       if (hstate->trans)
     539           0 :         hnj_free (hstate->trans);
     540             :     }
     541           0 :   if (dict->nextlevel) hnj_hyphen_free(dict->nextlevel);
     542             : 
     543           0 :   if (dict->nohyphen) hnj_free(dict->nohyphen);
     544             : 
     545           0 :   hnj_free (dict->states);
     546             : 
     547           0 :   hnj_free (dict);
     548           0 : }
     549             : 
     550             : #define MAX_WORD 256
     551             : 
     552           0 : int hnj_hyphen_hyphenate (HyphenDict *dict,
     553             :                            const char *word, int word_size,
     554             :                            char *hyphens)
     555             : {
     556             :   char *prep_word;
     557             :   int i, j, k;
     558             :   int state;
     559             :   char ch;
     560             :   HyphenState *hstate;
     561             :   char *match;
     562             :   int offset;
     563             : 
     564           0 :   prep_word = (char*) hnj_malloc (word_size + 3);
     565             : 
     566           0 :   j = 0;
     567           0 :   prep_word[j++] = '.';
     568             : 
     569           0 :   for (i = 0; i < word_size; i++) {
     570           0 :     if (word[i] <= '9' && word[i] >= '0') {
     571           0 :       prep_word[j++] = '.';
     572             :     } else {
     573           0 :       prep_word[j++] = word[i];
     574             :     }
     575             :   }
     576             : 
     577           0 :   prep_word[j++] = '.';
     578           0 :   prep_word[j] = '\0';
     579             : 
     580           0 :   for (i = 0; i < word_size + 5; i++)
     581           0 :     hyphens[i] = '0';
     582             : 
     583             : #ifdef VERBOSE
     584             :   printf ("prep_word = %s\n", prep_word);
     585             : #endif
     586             : 
     587             :   /* now, run the finite state machine */
     588           0 :   state = 0;
     589           0 :   for (i = 0; i < j; i++)
     590             :     {
     591           0 :       ch = prep_word[i];
     592             :       for (;;)
     593             :         {
     594             : 
     595           0 :           if (state == -1) {
     596             :             /* return 1; */
     597             :             /*  KBH: FIXME shouldn't this be as follows? */
     598           0 :             state = 0;
     599           0 :             goto try_next_letter;
     600             :           }          
     601             : 
     602             : #ifdef VERBOSE
     603             :           char *state_str;
     604             :           state_str = get_state_str (state, 0);
     605             : 
     606             :           for (k = 0; k < i - strlen (state_str); k++)
     607             :             putchar (' ');
     608             :           printf ("%s", state_str);
     609             : #endif
     610             : 
     611           0 :           hstate = &dict->states[state];
     612           0 :           for (k = 0; k < hstate->num_trans; k++)
     613           0 :             if (hstate->trans[k].ch == ch)
     614             :               {
     615           0 :                 state = hstate->trans[k].new_state;
     616           0 :                 goto found_state;
     617             :               }
     618           0 :           state = hstate->fallback_state;
     619             : #ifdef VERBOSE
     620             :           printf (" falling back, fallback_state %d\n", state);
     621             : #endif
     622             :         }
     623             :     found_state:
     624             : #ifdef VERBOSE
     625             :       printf ("found state %d\n",state);
     626             : #endif
     627             :       /* Additional optimization is possible here - especially,
     628             :          elimination of trailing zeroes from the match. Leading zeroes
     629             :          have already been optimized. */
     630           0 :       match = dict->states[state].match;
     631             :       /* replacing rules not handled by hyphen_hyphenate() */
     632           0 :       if (match && !dict->states[state].repl)
     633             :         {
     634           0 :           offset = i + 1 - strlen (match);
     635             : #ifdef VERBOSE
     636             :           for (k = 0; k < offset; k++)
     637             :             putchar (' ');
     638             :           printf ("%s\n", match);
     639             : #endif
     640             :           /* This is a linear search because I tried a binary search and
     641             :              found it to be just a teeny bit slower. */
     642           0 :           for (k = 0; match[k]; k++)
     643           0 :             if (hyphens[offset + k] < match[k])
     644           0 :               hyphens[offset + k] = match[k];
     645             :         }
     646             : 
     647             :       /* KBH: we need this to make sure we keep looking in a word */
     648             :       /* for patterns even if the current character is not known in state 0 */
     649             :       /* since patterns for hyphenation may occur anywhere in the word */
     650             :       try_next_letter: ;
     651             : 
     652             :     }
     653             : #ifdef VERBOSE
     654             :   for (i = 0; i < j; i++)
     655             :     putchar (hyphens[i]);
     656             :   putchar ('\n');
     657             : #endif
     658             : 
     659           0 :   for (i = 0; i < j - 4; i++)
     660             : #if 0
     661             :     if (hyphens[i + 1] & 1)
     662             :       hyphens[i] = '-';
     663             : #else
     664           0 :     hyphens[i] = hyphens[i + 1];
     665             : #endif
     666           0 :   hyphens[0] = '0';
     667           0 :   for (; i < word_size; i++)
     668           0 :     hyphens[i] = '0';
     669           0 :   hyphens[word_size] = '\0';
     670             : 
     671           0 :   hnj_free (prep_word);
     672             :     
     673           0 :   return 0;    
     674             : }
     675             : 
     676             : /* Unicode ligature length */
     677           0 : int hnj_ligature(unsigned char c) {
     678           0 :     switch (c) {
     679             :         case 0x80:                      /* ff */
     680             :         case 0x81:                      /* fi */
     681           0 :         case 0x82: return LIG_xx;       /* fl */
     682             :         case 0x83:                      /* ffi */
     683           0 :         case 0x84: return LIG_xxx;      /* ffl */
     684             :         case 0x85:                      /* long st */
     685           0 :         case 0x86: return LIG_xx;       /* st */
     686             :     }
     687           0 :     return 0;
     688             : }
     689             : 
     690             : /* character length of the first n byte of the input word */
     691           0 : int hnj_hyphen_strnlen(const char * word, int n, int utf8)
     692             : {
     693           0 :     int i = 0;
     694           0 :     int j = 0;
     695           0 :     while (j < n && word[j] != '\0') {
     696           0 :       i++;
     697             :       // Unicode ligature support
     698           0 :       if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC))  {
     699           0 :         i += hnj_ligature(word[j + 2]);
     700             :       }
     701           0 :       for (j++; utf8 && (word[j] & 0xc0) == 0x80; j++);
     702             :     }
     703           0 :     return i;
     704             : }
     705             : 
     706           0 : int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens,
     707             :         char *** rep, int ** pos, int ** cut, int lhmin)
     708             : {
     709           0 :     int i = 1, j;
     710             : 
     711             :     // Unicode ligature support
     712           0 :     if (utf8 && ((unsigned char) word[0] == 0xEF) && ((unsigned char) word[1] == 0xAC))  {
     713           0 :       i += hnj_ligature(word[2]);
     714             :     }
     715             : 
     716             :     // ignore numbers
     717           0 :     for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
     718             : 
     719           0 :     for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
     720             :       // check length of the non-standard part
     721           0 :       if (*rep && *pos && *cut && (*rep)[j]) {
     722           0 :         char * rh = strchr((*rep)[j], '=');
     723           0 :         if (rh && (hnj_hyphen_strnlen(word, j - (*pos)[j] + 1, utf8) +
     724           0 :           hnj_hyphen_strnlen((*rep)[j], rh - (*rep)[j], utf8)) < lhmin) {
     725           0 :             free((*rep)[j]);
     726           0 :             (*rep)[j] = NULL;
     727           0 :             hyphens[j] = '0';
     728             :           }
     729             :        } else {
     730           0 :          hyphens[j] = '0';
     731             :        }
     732           0 :        j++;
     733             : 
     734             :        // Unicode ligature support
     735           0 :        if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC))  {
     736           0 :          i += hnj_ligature(word[j + 2]);
     737             :        }
     738           0 :     } while (utf8 && (word[j] & 0xc0) == 0x80);
     739           0 :     return 0;
     740             : }
     741             : 
     742           0 : int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
     743             :         char *** rep, int ** pos, int ** cut, int rhmin)
     744             : {
     745           0 :     int i = 0;
     746             :     int j;
     747             : 
     748             :     // ignore numbers
     749           0 :     for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
     750             : 
     751           0 :     for (j = word_size - 1; i < rhmin && j > 0; j--) {
     752             :       // check length of the non-standard part
     753           0 :       if (*rep && *pos && *cut && (*rep)[j]) {
     754           0 :         char * rh = strchr((*rep)[j], '=');
     755           0 :         if (rh && (hnj_hyphen_strnlen(word + j - (*pos)[j] + (*cut)[j] + 1, 100, utf8) +
     756           0 :           hnj_hyphen_strnlen(rh + 1, strlen(rh + 1), utf8)) < rhmin) {
     757           0 :             free((*rep)[j]);
     758           0 :             (*rep)[j] = NULL;
     759           0 :             hyphens[j] = '0';
     760             :           }
     761             :        } else {
     762           0 :          hyphens[j] = '0';
     763             :        }
     764           0 :        if (!utf8 || (word[j] & 0xc0) == 0xc0 || (word[j] & 0x80) != 0x80) i++;
     765             :     }
     766           0 :     return 0;
     767             : }
     768             : 
     769             : // recursive function for compound level hyphenation
     770           0 : int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
     771             :     char * hyphens, char *** rep, int ** pos, int ** cut,
     772             :     int clhmin, int crhmin, int lend, int rend)
     773             : {
     774             :   char *prep_word;
     775             :   int i, j, k;
     776             :   int state;
     777             :   char ch;
     778             :   HyphenState *hstate;
     779             :   char *match;
     780             :   char *repl;
     781             :   signed char replindex;
     782             :   signed char replcut;
     783             :   int offset;
     784             :   int * matchlen;
     785             :   int * matchindex;
     786             :   char ** matchrepl;  
     787           0 :   int isrepl = 0;
     788             :   int nHyphCount;
     789             : 
     790           0 :   size_t prep_word_size = word_size + 3;
     791           0 :   prep_word = (char*) hnj_malloc (prep_word_size);
     792           0 :   matchlen = (int*) hnj_malloc ((word_size + 3) * sizeof(int));
     793           0 :   matchindex = (int*) hnj_malloc ((word_size + 3) * sizeof(int));
     794           0 :   matchrepl = (char**) hnj_malloc ((word_size + 3) * sizeof(char *));
     795             : 
     796           0 :   j = 0;
     797           0 :   prep_word[j++] = '.';
     798             :   
     799           0 :   for (i = 0; i < word_size; i++) {
     800           0 :     if (word[i] <= '9' && word[i] >= '0') {
     801           0 :       prep_word[j++] = '.';
     802             :     } else {
     803           0 :       prep_word[j++] = word[i];
     804             :     }
     805             :   }
     806             : 
     807             : 
     808             : 
     809           0 :   prep_word[j++] = '.';
     810           0 :   prep_word[j] = '\0';
     811             : 
     812           0 :   for (i = 0; i < j; i++)
     813           0 :     hyphens[i] = '0';    
     814             : 
     815             : #ifdef VERBOSE
     816             :   printf ("prep_word = %s\n", prep_word);
     817             : #endif
     818             : 
     819             :   /* now, run the finite state machine */
     820           0 :   state = 0;
     821           0 :   for (i = 0; i < j; i++)
     822             :     {
     823           0 :       ch = prep_word[i];
     824             :       for (;;)
     825             :         {
     826             : 
     827           0 :           if (state == -1) {
     828             :             /* return 1; */
     829             :             /*  KBH: FIXME shouldn't this be as follows? */
     830           0 :             state = 0;
     831           0 :             goto try_next_letter;
     832             :           }          
     833             : 
     834             : #ifdef VERBOSE
     835             :           char *state_str;
     836             :           state_str = get_state_str (state, 1);
     837             : 
     838             :           for (k = 0; k < i - strlen (state_str); k++)
     839             :             putchar (' ');
     840             :           printf ("%s", state_str);
     841             : #endif
     842             : 
     843           0 :           hstate = &dict->states[state];
     844           0 :           for (k = 0; k < hstate->num_trans; k++)
     845           0 :             if (hstate->trans[k].ch == ch)
     846             :               {
     847           0 :                 state = hstate->trans[k].new_state;
     848           0 :                 goto found_state;
     849             :               }
     850           0 :           state = hstate->fallback_state;
     851             : #ifdef VERBOSE
     852             :           printf (" falling back, fallback_state %d\n", state);
     853             : #endif
     854             :         }
     855             :     found_state:
     856             : #ifdef VERBOSE
     857             :       printf ("found state %d\n",state);
     858             : #endif
     859             :       /* Additional optimization is possible here - especially,
     860             :          elimination of trailing zeroes from the match. Leading zeroes
     861             :          have already been optimized. */
     862           0 :       match = dict->states[state].match;
     863           0 :       repl = dict->states[state].repl;
     864           0 :       replindex = dict->states[state].replindex;
     865           0 :       replcut = dict->states[state].replcut;
     866             :       /* replacing rules not handled by hyphen_hyphenate() */
     867           0 :       if (match)
     868             :         {
     869           0 :           offset = i + 1 - strlen (match);
     870             : #ifdef VERBOSE
     871             :           for (k = 0; k < offset; k++)
     872             :             putchar (' ');
     873             :           printf ("%s (%s)\n", match, repl);
     874             : #endif
     875           0 :           if (repl) {
     876           0 :             if (!isrepl) for(; isrepl < word_size; isrepl++) {
     877           0 :                 matchrepl[isrepl] = NULL;
     878           0 :                 matchindex[isrepl] = -1;
     879             :             }
     880           0 :             matchlen[offset + replindex] = replcut;
     881             :           }
     882             :           /* This is a linear search because I tried a binary search and
     883             :              found it to be just a teeny bit slower. */
     884           0 :           for (k = 0; match[k]; k++) {
     885           0 :             if ((hyphens[offset + k] < match[k])) {
     886           0 :               hyphens[offset + k] = match[k];
     887           0 :               if (match[k]&1) {
     888           0 :                 matchrepl[offset + k] = repl;
     889           0 :                 if (repl && (k >= replindex) && (k <= replindex + replcut)) {
     890           0 :                     matchindex[offset + replindex] = offset + k;
     891             :                 }
     892             :               }
     893             :             }
     894             :           }
     895             :           
     896             :         }
     897             : 
     898             :       /* KBH: we need this to make sure we keep looking in a word */
     899             :       /* for patterns even if the current character is not known in state 0 */
     900             :       /* since patterns for hyphenation may occur anywhere in the word */
     901             :       try_next_letter: ;
     902             : 
     903             :     }
     904             : #ifdef VERBOSE
     905             :   for (i = 0; i < j; i++)
     906             :     putchar (hyphens[i]);
     907             :   putchar ('\n');
     908             : #endif
     909             : 
     910           0 :   for (i = 0; i < j - 3; i++)
     911             : #if 0
     912             :     if (hyphens[i + 1] & 1)
     913             :       hyphens[i] = '-';
     914             : #else
     915           0 :     hyphens[i] = hyphens[i + 1];
     916             : #endif
     917           0 :   for (; i < word_size; i++)
     918           0 :     hyphens[i] = '0';
     919           0 :   hyphens[word_size] = '\0';
     920             : 
     921             :        /* now create a new char string showing hyphenation positions */
     922             :        /* count the hyphens and allocate space for the new hyphenated string */
     923           0 :        nHyphCount = 0;
     924           0 :        for (i = 0; i < word_size; i++)
     925           0 :           if (hyphens[i]&1)
     926           0 :              nHyphCount++;
     927           0 :        j = 0;
     928           0 :        for (i = 0; i < word_size; i++) {
     929           0 :            if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) { 
     930           0 :                 if (rep && pos && cut) {
     931           0 :                     if (!*rep)
     932           0 :                         *rep = (char **) calloc(word_size, sizeof(char *));
     933           0 :                     if (!*pos)
     934           0 :                         *pos = (int *) calloc(word_size, sizeof(int));
     935           0 :                     if (!*cut) {
     936           0 :                         *cut = (int *) calloc(word_size, sizeof(int));
     937             :                     }
     938           0 :                     (*rep)[matchindex[i] - 1] = hnj_strdup(matchrepl[matchindex[i]]);
     939           0 :                     (*pos)[matchindex[i] - 1] = matchindex[i] - i;
     940           0 :                     (*cut)[matchindex[i] - 1] = matchlen[i];
     941             :                 }
     942           0 :                 j += strlen(matchrepl[matchindex[i]]);
     943           0 :                 i += matchlen[i] - 1;
     944             :           }
     945             :        }
     946             : 
     947           0 :   hnj_free (matchrepl);
     948           0 :   hnj_free (matchlen);
     949           0 :   hnj_free (matchindex);
     950             : 
     951             :   // recursive hyphenation of the first (compound) level segments
     952           0 :   if (dict->nextlevel) {
     953             :      char ** rep2;
     954             :      int * pos2;
     955             :      int * cut2;
     956             :      char * hyphens2;
     957           0 :      int begin = 0;
     958             : 
     959           0 :      rep2 = (char**) hnj_malloc (word_size * sizeof(char *));
     960           0 :      pos2 = (int*) hnj_malloc (word_size * sizeof(int));
     961           0 :      cut2 = (int*) hnj_malloc (word_size * sizeof(int));
     962           0 :      hyphens2 = (char*) hnj_malloc (word_size + 3);
     963           0 :      for (i = 0; i < word_size; i++) rep2[i] = NULL;
     964           0 :      for (i = 0; i < word_size; i++) if 
     965           0 :         (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
     966           0 :         if (i - begin > 1) {
     967           0 :             int hyph = 0;
     968           0 :             prep_word[i + 2] = '\0';
     969             :             /* non-standard hyphenation at compound boundary (Schiffahrt) */
     970           0 :             if (rep && *rep && *pos && *cut && (*rep)[i]) {
     971           0 :                 char * l = strchr((*rep)[i], '=');
     972           0 :                 size_t offset = 2 + i - (*pos)[i];
     973           0 :                 strncpy(prep_word + offset, (*rep)[i], prep_word_size - offset - 1);
     974           0 :                 prep_word[prep_word_size - 1] = '\0';
     975           0 :                 if (l) {
     976           0 :                     hyph = (l - (*rep)[i]) - (*pos)[i];
     977           0 :                     prep_word[2 + i + hyph] = '\0';
     978             :                 }
     979             :             }
     980           0 :             hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph,
     981             :                 hyphens2, &rep2, &pos2, &cut2, clhmin,
     982           0 :                 crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend));
     983           0 :             for (j = 0; j < i - begin - 1; j++) {
     984           0 :                 hyphens[begin + j] = hyphens2[j];
     985           0 :                 if (rep2[j] && rep && pos && cut) {
     986           0 :                     if (!*rep && !*pos && !*cut) {
     987             :                         int k;
     988           0 :                         *rep = (char **) malloc(sizeof(char *) * word_size);
     989           0 :                         *pos = (int *) malloc(sizeof(int) * word_size);
     990           0 :                         *cut = (int *) malloc(sizeof(int) * word_size);
     991           0 :                         for (k = 0; k < word_size; k++) {
     992           0 :                             (*rep)[k] = NULL;
     993           0 :                             (*pos)[k] = 0;
     994           0 :                             (*cut)[k] = 0;
     995             :                         }
     996             :                     }
     997           0 :                     (*rep)[begin + j] = rep2[j];
     998           0 :                     (*pos)[begin + j] = pos2[j];
     999           0 :                     (*cut)[begin + j] = cut2[j];
    1000             :                 }
    1001             :             }
    1002           0 :             prep_word[i + 2] = word[i + 1];
    1003           0 :             if (*rep && *pos && *cut && (*rep)[i]) {
    1004           0 :                 size_t offset = 1;
    1005           0 :                 strncpy(prep_word + offset, word, prep_word_size - offset - 1);
    1006           0 :                 prep_word[prep_word_size - 1] = '\0';
    1007             :             }
    1008             :         }
    1009           0 :         begin = i + 1;
    1010           0 :         for (j = 0; j < word_size; j++) rep2[j] = NULL;
    1011             :      }
    1012             :      
    1013             :      // non-compound
    1014           0 :      if (begin == 0) {
    1015           0 :         hnj_hyphen_hyph_(dict->nextlevel, word, word_size,
    1016             :             hyphens, rep, pos, cut, clhmin, crhmin, lend, rend);
    1017           0 :         if (!lend) hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
    1018             :             rep, pos, cut, clhmin);
    1019           0 :         if (!rend) hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
    1020             :             rep, pos, cut, crhmin);
    1021             :      }
    1022             :      
    1023           0 :      free(rep2);
    1024           0 :      free(cut2);
    1025           0 :      free(pos2);
    1026           0 :      free(hyphens2);
    1027             :   }
    1028             : 
    1029           0 :   hnj_free (prep_word);
    1030           0 :   return 0;
    1031             : }
    1032             : 
    1033             : /* UTF-8 normalization of hyphen and non-standard positions */
    1034           0 : int hnj_hyphen_norm(const char *word, int word_size, char * hyphens,
    1035             :         char *** rep, int ** pos, int ** cut)
    1036             : {
    1037             :   int i, j, k;
    1038           0 :   if ((((unsigned char) word[0]) >> 6) == 2) {
    1039           0 :     fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word);
    1040           0 :     return 1;
    1041             :   }
    1042             : 
    1043             :   /* calculate UTF-8 character positions */
    1044           0 :   for (i = 0, j = -1; i < word_size; i++) {
    1045             :     /* beginning of an UTF-8 character (not '10' start bits) */
    1046           0 :     if ((((unsigned char) word[i]) >> 6) != 2) j++;
    1047           0 :     hyphens[j] = hyphens[i];
    1048           0 :     if (rep && pos && cut && *rep && *pos && *cut) {
    1049           0 :         int l = (*pos)[i];
    1050           0 :         (*pos)[j] = 0;
    1051           0 :         for (k = 0; k < l; k++) {
    1052           0 :             if ((((unsigned char) word[i - k]) >> 6) != 2) (*pos)[j]++;
    1053             :         }
    1054           0 :         k = i - l + 1;
    1055           0 :         l = k + (*cut)[i];
    1056           0 :         (*cut)[j] = 0;        
    1057           0 :         for (; k < l; k++) {
    1058           0 :             if ((((unsigned char) word[k]) >> 6) != 2) (*cut)[j]++;
    1059             :         }
    1060           0 :         (*rep)[j] = (*rep)[i];
    1061           0 :         if (j < i) {
    1062           0 :             (*rep)[i] = NULL;
    1063           0 :             (*pos)[i] = 0;
    1064           0 :             (*cut)[i] = 0;
    1065             :         }
    1066             :     }
    1067             :   }
    1068           0 :   hyphens[j + 1] = '\0';
    1069             : #ifdef VERBOSE
    1070             :   printf ("nums: %s\n", hyphens);
    1071             : #endif
    1072           0 :   return 0;
    1073             : }
    1074             : 
    1075             : /* get the word with all possible hyphenations (output: hyphword) */
    1076           0 : void hnj_hyphen_hyphword(const char * word, int l, const char * hyphens, 
    1077             :     char * hyphword, char *** rep, int ** pos, int ** cut)
    1078             : {
    1079           0 :   int hyphenslen = l + 5;
    1080             : 
    1081             :   int i, j;
    1082           0 :   for (i = 0, j = 0; i < l; i++, j++) {
    1083           0 :     if (hyphens[i]&1) {
    1084           0 :       hyphword[j] = word[i];
    1085           0 :       if (*rep && *pos && *cut && (*rep)[i]) {
    1086           0 :         size_t offset = j - (*pos)[i] + 1;
    1087           0 :         strncpy(hyphword + offset, (*rep)[i], hyphenslen - offset - 1);
    1088           0 :         hyphword[hyphenslen-1] = '\0';
    1089           0 :         j += strlen((*rep)[i]) - (*pos)[i];
    1090           0 :         i += (*cut)[i] - (*pos)[i];
    1091           0 :       } else hyphword[++j] = '=';
    1092           0 :     } else hyphword[j] = word[i];
    1093             :   }
    1094           0 :   hyphword[j] = '\0';
    1095           0 : }
    1096             : 
    1097             : 
    1098             : /* main api function with default hyphenmin parameters */
    1099           0 : int hnj_hyphen_hyphenate2 (HyphenDict *dict,
    1100             :                            const char *word, int word_size, char * hyphens,
    1101             :                            char *hyphword, char *** rep, int ** pos, int ** cut)
    1102             : {
    1103           0 :   hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
    1104           0 :     dict->clhmin, dict->crhmin, 1, 1);
    1105           0 :   hnj_hyphen_lhmin(dict->utf8, word, word_size,
    1106           0 :     hyphens, rep, pos, cut, (dict->lhmin > 0 ? dict->lhmin : 2));
    1107           0 :   hnj_hyphen_rhmin(dict->utf8, word, word_size,
    1108           0 :     hyphens, rep, pos, cut, (dict->rhmin > 0 ? dict->rhmin : 2));
    1109             : 
    1110             :   /* nohyphen */
    1111           0 :   if (dict->nohyphen) {
    1112           0 :     char * nh = dict->nohyphen;
    1113             :     int nhi;
    1114           0 :     for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
    1115           0 :         char * nhy = (char *) strstr(word, nh);
    1116           0 :         while (nhy) {
    1117           0 :             hyphens[nhy - word + strlen(nh) - 1] = '0';
    1118           0 :             if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = '0';
    1119           0 :             nhy = (char *) strstr(nhy + 1, nh);
    1120             :         }
    1121           0 :         nh = nh + strlen(nh) + 1;
    1122             :     }
    1123             :   }
    1124             : 
    1125           0 :   if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
    1126           0 :   if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
    1127             : #ifdef VERBOSE
    1128             :   printf ("nums: %s\n", hyphens);
    1129             : #endif
    1130           0 :   return 0;
    1131             : }
    1132             : 
    1133             : /* previous main api function with hyphenmin parameters */
    1134           0 : int hnj_hyphen_hyphenate3 (HyphenDict *dict,
    1135             :         const char *word, int word_size, char * hyphens,
    1136             :         char *hyphword, char *** rep, int ** pos, int ** cut,
    1137             :         int lhmin, int rhmin, int clhmin, int crhmin)
    1138             : {
    1139           0 :   lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
    1140           0 :   rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
    1141           0 :   clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
    1142           0 :   crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
    1143           0 :   hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
    1144             :     clhmin, crhmin, 1, 1);
    1145           0 :   hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
    1146             :     rep, pos, cut, (lhmin > 0 ? lhmin : 2));
    1147           0 :   hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
    1148             :     rep, pos, cut, (rhmin > 0 ? rhmin : 2));
    1149           0 :   if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
    1150             : 
    1151             :   /* nohyphen */
    1152           0 :   if (dict->nohyphen) {
    1153           0 :     char * nh = dict->nohyphen;
    1154             :     int nhi;
    1155           0 :     for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
    1156           0 :         char * nhy = (char *) strstr(word, nh);
    1157           0 :         while (nhy) {
    1158           0 :             hyphens[nhy - word + strlen(nh) - 1] = 0;
    1159           0 :             if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = 0;
    1160           0 :             nhy = (char *) strstr(nhy + 1, nh);
    1161             :         }
    1162           0 :         nh = nh + strlen(nh) + 1;
    1163             :     }
    1164             :   }
    1165             : 
    1166           0 :   if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
    1167           0 :   return 0;
    1168             : }

Generated by: LCOV version 1.13