/*****************************************************************************/
/*                                                                           */
/*  TSORT.C                                                                  */
/*                                                                           */
/*  Sorting program for TeXspell word lists                                  */
/*                                                                           */
/*  Reinhard Mller, Inst. fr DatenverarbeitungsAnlagen  TU Braunschweig    */
/*                                                                           */
/*  01.03.94    Version 0.1 using 'qsort' from MSC 6.0 library               */
/*              (very very slow on already sorted lists)                     */
/*  02.03.94    Version 0.2 selfmade (copied & improved) QSort procedure.    */
/*              Compare modified to have special end values (166 & 241-254)  */
/*              BEFORE longer words with same starting part.                 */
/*  24.03.94    Bug in Qsort removed (if low = high = 0 Qsort is recursivly  */
/*              called with 0, 65535 !                                       */
/*  26.03.94    Version 0.3 combining entries where special characters would */
/*              determine sorting order.                                     */
/*  28.03.94    Model checking, allow 'huge' array and prepared for GCC      */
/*****************************************************************************/
#define VERSION     "Version 0.4  22.06.94"

#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define TRUE  1
#define FALSE 0

#define index Index
/* if the type for the index is changed to an unsigned type change the line
        for (iEntry = iWordCount - 2; iEntry >= 0; iEntry--)
to      for (iEntry = iWordCount - 2; iEntry < MAX_WORDS; iEntry--)
and     if (iLow >= iHigh)
to      if ((iLow >= iHigh) || (iHigh == -1))
*/

#if defined (M_I86)

	/* MS-C*/
/* this is a good chance to check for the correct memory model               */

	#if !defined (M_I86CM) && !defined (M_I86LM)
	#error Wrong memory model used! Must be either Compact, Large or Huge
	#endif

	#if defined (M_I86HM)
		#define MAX_WORDS   65534    /* using avoid to reach -1 using short index    */
		typedef long        index;          /* actual size will be 32 bit (signed)   */
	#else   /* not huge */
		#define MAX_WORDS   16300    /* maximum number of FAR pointers within 64 K   */
		typedef int         index;          /* actual size will be 16 bit (signed)   */
	#endif  /* M_I86HM = huge model */

#elif defined (__TURBOC__)
	/* Turbo-C*/

	#define MAX_WORDS 15550
	typedef int index;

#else /* non i86 processor or GNU-C */

	#define MAX_WORDS   65534   /* using avoid to reach -1 using short index    */
	typedef int         index;         /* actual size depends on machine type ! */

#endif  /* M_I86 */


typedef unsigned int    word;       /* actual size depends on machine type ! */
typedef unsigned char   byte;
typedef unsigned int    boolean;    /* actual size depends on machine type ! */

#define repeat      do
#define until(x)    while(!(x))


#define WORD_LEN    1024     /* maximum lenght of input line                 */
char    acWord [WORD_LEN + 1];
char *  apcWords [MAX_WORDS + 1];
index   iWordCount;
FILE *  fpIn;
FILE *  fpOut;
FILE *  fpLog;


/*****************************************************************************/
/*          Print Error Message and Exit from Program                        */
/*****************************************************************************/

void errexit (short ret_code, char *psFormat, ...)
{
    va_list             marker;

    if (psFormat)
    {
        fprintf (fpLog, "*** ERROR: ");
        va_start (marker, psFormat);       /* Initialize variable arguments  */
        vfprintf (fpLog, psFormat, marker);
        va_end (marker);                   /* Reset variable arguments       */
    }
    if (ret_code < 0)
        return;                                      /* just message; NO exit*/
    exit (ret_code);
}   /* errexit */


/*****************************************************************************/
/*          Read one input file                                              */
/*****************************************************************************/

void ReadWords (FILE * fpInput)
{
    fgets (acWord, WORD_LEN, fpInput);
    while (!feof (fpInput))
	 {
/* ME, 22.6.94*/
		  if(strspn(acWord,"abcdefghijklmnopqrstuvwxyz"
								  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
								  "ә᠂酊냈"
								  ","
						 )!=strlen(acWord)-1)
						 {
						 fprintf(stderr,"Word removed: %s\n",acWord);
							fgets (acWord, WORD_LEN - 1, fpInput);
							continue;
						 }

        if ((apcWords [iWordCount] = strdup (acWord)) == NULL)
            errexit (4, "Out of memory for word storage (%u)!\n", iWordCount);
        if (++iWordCount >= MAX_WORDS)
            errexit (4, "Too many words (%d)!\n", iWordCount);
        fgets (acWord, WORD_LEN - 1, fpInput);
    }   /* while */
}   /* ReadWords */

/*****************************************************************************/
/*          Compare two words                                                */
/*****************************************************************************/

#define CmpWords(a,b) (strcmp (apcWords [a], apcWords [b]))

/* int CmpWords (index iIndex1, index iIndex2)                  */
/* {                                                            */
/*     return (strcmp (apcWords [iIndex1], apcWords [iIndex2]));*/
/* }                                                            */

/*****************************************************************************/
/*          Swap two words                                                   */
/*****************************************************************************/

void SwapWords (index iIndex1, index iIndex2)
{
    char *  pcTmp;

    pcTmp              = apcWords [iIndex1];
    apcWords [iIndex1] = apcWords [iIndex2];
    apcWords [iIndex2] = pcTmp;
}

/*****************************************************************************/
/*          Quicksort                                                        */
/*****************************************************************************/

/* Based on the QuickSort algorithm in Microsofts 'sortdemo.c'               */
#if defined (MSDOS)
#pragma check_stack (on)
#endif

void QSort (index iLow, index iHigh)
{
    index   iUp;
    index   iDown;

    if (iLow >= iHigh)                               /* just one -> all done */
        return;

    if((iHigh - iLow) == 1 )
    {
        if (CmpWords (iLow, iHigh) > 0)              /* if only two elements:*/
            SwapWords (iLow, iHigh);                 /*  test and swap       */
        return;                                      /*                      */
    }

    /* Change: Take pivot element out of the middle (not end of list): faster*/
    SwapWords ((iLow + iHigh) / 2, iHigh);       /* on already sorted list   */

    /* Change: Don't restart from Outside (retest what we already know)      */
    iUp = iLow;
    iDown = iHigh;
    do
    {
        /* Move in from both sides towards the pivot element.                */

        while ((iUp < iDown) &&
               (CmpWords (iUp, iHigh) <= 0))
            iUp++;

        while ((iDown > iUp) &&
               (CmpWords (iDown, iHigh) >= 0))
            iDown--;

        /* If we haven't reached the pivot, it means that two                */
        /* elements on either side are out of order, so swap them.           */
        if (iUp < iDown )
            SwapWords (iUp, iDown);

    } while (iUp < iDown);

    /* Move pivot element back to its proper place in the array. */
    SwapWords (iUp, iHigh);

    QSort (iLow, iUp - 1);       /* Call recursively for both parts  */
    QSort (iUp + 1, iHigh);      /* of the splitted field            */

}   /* QSort */

#if defined (MSDOS)
#pragma check_stack
#endif

/*****************************************************************************/
/*          Prepare word string to be combined                               */
/*****************************************************************************/

void PrepWord (index iEntry, word wPos)
{
    byte    b1;
    byte *  pc1;
    byte *  pc2;
    byte *  pc3;

    pc1 = apcWords [iEntry];
    pc1 [strlen (pc1) - 1] = '\0';                   /* remove trailing '\n' */
    pc1 += wPos;
    if (*pc1 == '\0')
        *pc1 = 246;                                  /* --> nop extension    */
    else if ((*pc1 != ',') && ((pc2 = strchr (pc1, ',')) != NULL))
    {
        b1 = *pc1;
        *pc1 = '\0';
        strcpy (acWord, apcWords [iEntry]);          /* new base word        */

        *pc1 = b1;                                   /* restore and terminate*/
        *pc2++ = '\0';                               /* common extension part*/

        while (*pc2 && (*pc2 != 166))
        {
            if ((pc3 = strchr (pc2, ',')) == NULL)
                errexit (3, "Invalid entry \"%s\"!\n", apcWords [iEntry]);
            *pc3++ = '\0';
            strcat (acWord, ",");                    /* separator            */
            strcat (acWord, pc1);                    /* common part          */
            strcat (acWord, pc2);                    /* individual part      */
            pc2 = pc3;
        }
        if (strlen (acWord) > WORD_LEN)
            errexit (4, "Prepared line too long \"%s\"!\n", acWord);
        free (apcWords [iEntry]);
        if ((apcWords [iEntry] = strdup (acWord)) == NULL)
            errexit (4, "Out of memory for prepared word line \"%s\"!\n", acWord);
    }
}   /* PrepWord */


/*****************************************************************************/
/*          Remove duplicate entries                                         */
/*****************************************************************************/

void RemDup (void)
{
    index           iEntry;
    index           iMove;
    index           iExt;
    int             iComp;                           /* MUST be signed       */
    word            wLen;
    byte *          pc1;
    byte *          pc2;
    byte *          pc3;
    byte *          pc4;                             /* just for gcc         */
    register byte   b1;
    register byte   b2;
    boolean         bSorted;

    static char acTstStr [] = {',', 166, 241, 242, 243, 244,
                                245, 246, 247, 248, 249,
                                250, 251, 252, 253, 254, '\0'};
    static char acEndStr [] = {',', 166, '\n', '\0'};

#define MAX_EXT 64
    static char *   apcExt [MAX_EXT];
    index           iExtCount;

    for (iEntry = iWordCount - 2; iEntry >= 0; iEntry--)
    {
        pc1 = apcWords [iEntry];
        pc2 = apcWords [iEntry + 1];
        while (*pc1 && (*pc1 == *pc2))
        {
            pc1++;
            pc2++;
        }
        b1 = *pc1;
        b2 = *pc2;
        if (!b1 && !b2)                          /* both must be the same    */
        {
            pc1 = apcWords [iEntry + 1];
            pc1 [strlen (pc1) - 1] = '\0';           /* remove trailing '\n' */
            fprintf (fpLog, "removed duplicate of \"%s\"\n", pc1);
            free (pc1);
            iWordCount--;
            for (iMove = iEntry + 1; iMove < iWordCount; iMove++)
                apcWords [iMove] = apcWords [iMove + 1];
            continue;
        }
        *pc1 = '\0';
        if ((pc3 = strpbrk (apcWords [iEntry], acTstStr)) != NULL)
        {
            *pc1 = b1;                               /* first restore        */
            pc2 -= pc3 - pc1;                        /*  then use pc1        */
            pc1 = pc3;                               /*   before setting new */
            b1 = *pc1;                               /* update b1            */
            b2 = *pc2;                               /*      & b2            */
        }
        *pc1 = b1;                                   /* restore if not done  */

        if ((b1 != ',') && (b1 != 166) && ((b1 < 241) || (b1 > 254)) &&
            (b2 != ',') && (b2 != 166) && ((b2 < 241) || (b2 > 254)))
        {
           continue;                                 /* diffrent strings     */
        }

        fprintf (fpLog, "combined %s"
                        "       & %s",
                        apcWords [iEntry], apcWords [iEntry + 1]);

        pc4 = apcWords [iEntry];
        wLen = pc1 - pc4;
        PrepWord (iEntry, wLen);
        PrepWord (iEntry + 1, wLen);
        pc1 = apcWords [iEntry] + wLen;
        pc2 = apcWords [iEntry + 1] + wLen;

        b1 = *pc1;
        *pc1 = '\0';
        strcpy (acWord, apcWords [iEntry]);          /* copy common part     */
        pc3 = strchr (acWord, '\0') + 1;
        *pc1 = b1;


        strcpy (pc3, pc1);                           /* ext. of first entry  */
        strcat (pc3, ",");
        strcat (pc3, pc2);                           /* ext. of second entry */
        pc4 = acWord;
        if ((pc3 - pc4 + strlen (pc3)) > WORD_LEN)
            errexit (4, "New line too long \"%s\"!\n", acWord);
        free (apcWords [iEntry]);                    /* BOTH will            */
        free (apcWords [iEntry + 1]);                /*  be repaced by ONE   */
        iWordCount--;
        for (iMove = iEntry + 1; iMove < iWordCount; iMove++)
            apcWords [iMove] = apcWords [iMove + 1];

        iExtCount = 0;

        /* ---  read all extentions into separate strings  ----------------- */

        pc1 = pc3;
        while (*pc1)
        {
            if ((*pc1 == ',') || (*pc1 == 166))
                pc1++;
            else
            {
                if ((pc2 = strpbrk (pc1, acTstStr)) == NULL)
                {
                    pc2 = strchr (pc1, '\0');
                    pc2 [1] = '\0';                  /* double termination   */
                }
                if (*pc2 == 166)
                    errexit (3, "Illegal char(s) before {166} (\"%s\")!\n", pc1);
                if (!*pc2 || (*pc2 == ','))
                    *pc2++ = 246;                    /* nop extension        */
                else
                {
                    pc2++;
                    if (*pc2 && (*pc2 != ','))
                        errexit (3, "Illegal char(s) after special (\"%s\")!\n", pc1);
                }
                b2 = *pc2;
                *pc2 = '\0';
                if ((apcExt [iExtCount] = strdup (pc1)) == NULL)
                    errexit (4, "Out of memory extension in \"%s\"!\n", acWord);
                if (++iExtCount >= MAX_EXT)
                    errexit (4, "Too many extensions in \"%s\"!\n", acWord);
                *pc2 = b2;
                pc1 = pc2;
            }
        }

        /* ---  sort all extentions (using bubble sort)  ------------------- */

        repeat
        {
            bSorted = TRUE;
            for (iExt = 0; iExt < iExtCount - 1; iExt++)
            {
                if ((iComp = strlen (apcExt [iExt])
                           - strlen (apcExt [iExt + 1])) == 0)
                    iComp = strcmp (apcExt [iExt], apcExt [iExt + 1]);
                if (iComp > 0)
                {
                    pc1 = apcExt [iExt];
                    apcExt [iExt] = apcExt [iExt + 1];
                    apcExt [iExt + 1] = pc1;
                    bSorted = FALSE;
                }
                else if (iComp == 0)
                {
                    free (apcExt [iExt + 1]);
                    iExtCount--;
                    for (iMove = iExt + 1; iMove < iExtCount; iMove++)
                        apcExt [iMove] = apcExt [iMove + 1];
                    iExt--;                            /* use same iExt again    */
                }
            }
        } until (bSorted);

        /* ---  write all extentions into acWord  -------------------------- */

        if (iExtCount == 1)
            strcat (acWord, apcExt [0]);
        else
        {
            pc1 = strchr (acWord, '\0');
            for (iExt = 0; iExt < iExtCount; iExt++)
            {
                *pc1++ = ',';
                strcpy (pc1, apcExt [iExt]);
                free (apcExt [iExt]);
                pc1 = strchr (pc1, '\0');
            }
            strcpy (pc1, acEndStr);
        }

        /* ---  write all extentions into acWord  -------------------------- */

        if ((apcWords [iEntry] = strdup (acWord)) == NULL)
            errexit (4, "Out of memory for new word line \"%s\"!\n", acWord);
        fprintf (fpLog, "      to %s", acWord);

        if (iEntry <= iWordCount - 2)                /* if not at end of list*/
            iEntry++;                                /*  redo current one    */

    }   /* for iEntry */
}   /* RemDup */

/*****************************************************************************/
/*          ******  main  ******                                             */
/*****************************************************************************/

int main (int argc, char ** argv)
{
    int     iFile;
    index   iEntry;
#if defined (MSDOS)
    char *  pc1;

    if ((pc1 = strrchr (*argv, '\\')) != NULL)
        *argv = pc1 + 1;                             /* truncate path        */
    if ((pc1 = strchr (*argv, '.')) != NULL)         /*  and extension       */
        *pc1 = '\0';                                 /*   from program name  */
    strupr (*argv);
#endif

    fprintf (stderr, "%s  %s\n", *argv, VERSION);

    iWordCount = 0;
    fpLog = stdout;                      /* may be redirected even under DOS */

/* -------  Read (all) input file(s)  -------------------------------------- */

    switch (argc)
    {
        case 0:                                      /* never true !!        */
		  case 2:                                      /* just one parameter   */
		  case 1:
				fprintf (stderr,"Usage: %s inputfile [inputfile2] [...] outputfile\n\n"
									 "This program can be used to check, sort and merge "
									 "texspell dictionaries.\n",
									 *argv);
            exit (2);
				break;
        default:    /* >= 3 --> min. 2 arguments */  /* used with files      */
            if ((fpOut = fopen (argv [argc - 1], "r")) != NULL)
            {
                fprintf (fpLog, "outputfile \"%s\" already exist!\n",
                    argv [argc - 1]);
                exit (3);
            }
            if ((fpOut = fopen (argv [argc - 1], "wt")) == NULL)
            {
                fprintf (fpLog, "unable to open outputfile \"%s\"!\n",
                    argv [argc - 1]);
                exit (3);
            }
            for (iFile = 1; iFile < argc - 1; iFile++)
            {
                if ((fpIn = fopen (argv [iFile], "rt")) == NULL)
                {
                    fprintf (fpLog, "unable to open inputfile \"%s\"!\n",
                        argv [iFile]);
                    exit (3);
                }
                fprintf (fpLog, "Reading \"%s\"\n", argv [iFile]);
                ReadWords (fpIn);
                fclose (fpIn);
            }
            break;
    }   /* switch argc */

    apcWords [iWordCount] = NULL;                    /* not realy needed     */

/* -------  Sort all strings  ---------------------------------------------- */

    fprintf (fpLog, "Sorting %d entries\n", iWordCount);

    QSort (0, iWordCount - 1);
    /* qsort ((void *)apcWords, (size_t)iWordCount, sizeof (char *), CmpWords); */

/* -------  Remove all duplicates  ----------------------------------------- */

    fprintf (fpLog, "Searching for duplicates\n");
    RemDup ();

/* -------  Write the output file  ----------------------------------------- */

    if (fpOut != stdout)
        fprintf (fpLog, "Writing \"%s\"\n", argv [argc - 1]);

    for (iEntry = 0; iEntry < iWordCount; iEntry++)
        fputs (apcWords [iEntry], fpOut);

/* -------  Clean up  ------------------------------------------------------ */

    if (fpOut != stdout)
        fclose (fpOut);
    return (0);                                      /* all done and fine    */
}   /* main */
