/* wordref.c (emx+gcc) -- Copyright (c) 1996 by Eberhard Mattes */

/* This sample program demonstrates how to use the BSD database
   library (B-trees) and how to handle signals.

   It's a sample program -- fgrep takes less disk space and is
   probably faster. */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <getopt.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <db.h>
#include <sys/param.h>
#ifdef __EMX__
#include <sys/nls.h>
#endif

/* We use three B-tree files:

   object | file name       | key                  | data
   -------+-----------------+----------------------+----------
   DBF0   | "wordref.db0"   | word, fileno, lineno | -
   DBF1   | "wordref.db1"   | fileno               | file name
   DBF2   | "wordref.db2"   | file name            | fileno

   fileno and lineno are stored in the key of DBF0 to obtain output
   sorted by fileno and lineno. */

/* The key structure for DBF0. */

struct key0
{
  unsigned fileno;              /* File number (see DBF1 and DBF2) */
  unsigned lineno;              /* Line number */
  char word[1];                 /* Null-terminated string */
} __attribute__ ((__packed__));

/* The key structure for DBF1. */

struct key1
{
  unsigned fileno;              /* File number */
};

/* The data structure for DBF2. */

struct data2
{
  unsigned fileno;              /* File number */
};

/* This structure describes one of the three database files. */

struct dbfile
{
  DB *db;                       /* Database file */
  int (*compare)(const DBT*, const DBT*); /* Comparison function */
  int dup;                      /* Allow duplicates */
  unsigned cache_size;          /* Cache size */
  char fname[MAXPATHLEN];       /* File name of the database file */
};

/* Define an array holding the three database files and define
   shorthands for convenience. */

static struct dbfile dbfile[3];

#define DBF0    dbfile[0]
#define DBF1    dbfile[1]
#define DBF2    dbfile[2]

/* To avoid looking up DBF1 and DBF2 over and over, we keep the last
   (file name, fileno) pair in these two variables.  The values are
   valid only if cur_fname is not the empty string. */

static char cur_fname[MAXPATHLEN];
static unsigned cur_fileno;

/* This is the set of signals to block while performing an operation
   that must not be interrupted. */

static sigset_t block_set;

/* This variable is non-zero if the database files should be closed
   during atexit() processing. */

static int cleanup;

/* This variable is set to 'l' or 'u' by the -l and -u options,
   respectively, of the `add' command.  If it's zero, the case of
   words won't be changed. */

static char word_case;

/* This variable is set to true by the `-c' option of the `add'
   command. */

static char lang_c;


/* Evaluate to the minimum of A and B. */

#define MIN(a,b) ((a) < (b) ? (a) : (b))


/* Tell them how to run this program. */

static void usage (void)
{
  puts ("Usage:\n"
        "  wordref clear                 Delete the database\n"
        "  wordref add [-clu] <file>     Add words of <file> to the database\n"
        "                                  -c   words are C identifiers\n"
        "                                  -l   convert words to lower case\n"
        "                                  -u   convert words to upper case\n"
        "  wordref find <word>           Query database for <word>\n"
        "  wordref list                  List the database contents");
  exit (1);
}


/* Block the signals in BLOCK_SET. */

static void block (void)
{
  sigprocmask (SIG_BLOCK, &block_set, NULL);
}


/* Unblock the signals in BLOCK_SET. */

static void unblock (void)
{
  sigprocmask (SIG_UNBLOCK, &block_set, NULL);
}


/* Close any open database files.  Return a non-zero termination code
   if an error occurred; return 0 if successful. */

static int wordref_close (void)
{
  int i, result;

  block ();
  result = 0; cleanup = 0;
  for (i = 0; i < 3; ++i)
    {
      if (dbfile[i].db != NULL)
        {
          if (dbfile[i].db->close (dbfile[i].db) != 0)
            {
              perror (dbfile[i].fname);
              result = 2;
            }
          dbfile[i].db = NULL;
        }
    }
  unblock ();
  return result;
}


/* Handle asynchronous signals which cause process termination, such
   as SIGINT and SIGTERM. */

static void term_sig (int signo)
{
  struct sigaction sa;
  sigset_t set;

  /* stderr is buffered, so this might happen to work. */

  fprintf (stderr, "Got signal %d, cleaning up...\n", signo);

  /* Close any open database files. */

  wordref_close ();

  /* Regenerate the signal SIGNO with default action installed to
     terminate the process.  Don't forget to unblock the signal. */

  sa.sa_handler = SIG_DFL;
  sa.sa_flags = 0;
  sigemptyset (&sa.sa_mask);
  if (sigaction (signo, &sa, NULL) != 0)
    perror ("sigaction");
  sigemptyset (&set);
  sigaddset (&set, signo);
  sigprocmask (SIG_UNBLOCK, &set, NULL);
  raise (signo);
  exit (3);
}


/* Handle sychronous process termination. */

static void wordref_atexit (void)
{
  /* If we should close any open database files, do so. */

  if (cleanup)
    {
      fprintf (stderr, "Cleaning up...\n");
      wordref_close ();
    }
}


/* Call get() with signals blocked and with error checking. */

static int db_get (struct dbfile *dbf, DBT *key, DBT *data, unsigned flags)
{
  int rc;

  block ();
  rc = dbf->db->get (dbf->db, key, data, flags);
  unblock ();
  if (rc == -1)
    {
      perror (dbf->fname);
      exit (2);
    }
  return rc;
}


/* Call seq() with signals blocked and with error checking. */

static int db_seq (struct dbfile *dbf, DBT *key, DBT *data, unsigned flags)
{
  int rc;

  block ();
  rc = dbf->db->seq (dbf->db, key, data, flags);
  unblock ();
  if (rc == -1)
    {
      perror (dbf->fname);
      exit (2);
    }
  return rc;
}


/* Call put() with signals blocked and with error checking. */

static int db_put (struct dbfile *dbf, DBT *key, DBT *data, unsigned flags)
{
  int rc;

  block ();
  rc = dbf->db->put (dbf->db, key, data, flags);
  unblock ();
  if (rc == -1)
    {
      perror (dbf->fname);
      exit (2);
    }
  return rc;
}


/* Compare two keys of DBF0. */

static int compare0 (const DBT *dbt1, const DBT *dbt2)
{
  int cmp;
  const struct key0 *k1 = dbt1->data;
  const struct key0 *k2 = dbt2->data;

  cmp = strcmp (k1->word, k2->word);
  if (cmp != 0)
    return cmp;
  if (k1->fileno < k2->fileno)
    return -1;
  else if (k1->fileno > k2->fileno)
    return 1;
  else if (k1->lineno < k2->lineno)
    return -1;
  else if (k1->lineno > k2->lineno)
    return 1;
  else
    return 0;
}


/* Compare two keys of DBF1. */

static int compare1 (const DBT *dbt1, const DBT *dbt2)
{
  const struct key1 *k1 = (const struct key1 *)dbt1->data;
  const struct key1 *k2 = (const struct key1 *)dbt2->data;

  if (k1->fileno < k2->fileno)
    return -1;
  else if (k1->fileno > k2->fileno)
    return 1;
  else
    return 0;
}


/* Compare two keys of DBF2.  Note that the file name in DBF2 is not
   null-terminated! */

static int compare2 (const DBT *dbt1, const DBT *dbt2)
{
  int cmp;

  cmp = memcmp (dbt1->data, dbt2->data, MIN (dbt1->size, dbt2->size));
  if (cmp != 0)
    return cmp;
  else if (dbt1->size < dbt2->size)
    return -1;
  else if (dbt1->size > dbt2->size)
    return 1;
  else
    return 0;
}


/* Open one database (B-tree) file, pointed to by DBF.  Create the
   file if CREATE is non-zero. */

static void wordref_open_one (struct dbfile *dbf, int create)
{
  BTREEINFO info;
  DB *db;
  int oflags;

  info.flags = dbf->dup ? R_DUP : 0;
  info.cachesize = dbf->cache_size;
  info.psize = 4096;
  info.lorder = 0;
  info.minkeypage = 0;
  info.compare = dbf->compare;
  info.prefix = NULL;
  oflags = create ? O_CREAT | O_RDWR : O_RDONLY;
  block ();
  db = dbopen (dbf->fname, oflags, S_IREAD | S_IWRITE, DB_BTREE, &info);
  if (db == NULL)
    {
      perror (dbf->fname);
      unblock ();
      exit (2);
    }
  dbf->db = db;
  unblock ();
}


/* Open all the database files.  Create non-existing files if CREATE
   is non-zero. */

static void wordref_open (int create)
{
  int i;

  cleanup = 1;
  for (i = 0; i < 3; ++i)
    wordref_open_one (&dbfile[i], create);
}


/* Add the word pointed to by WORD of WORD_LEN characters to the
   database.  The word is line LINENO of file FNAME.  Note that the
   array pointed to by WORD is modified if WORD_CASE is 'u' or 'l'. */

static void add_word (const char *fname, unsigned lineno,
                      char *word, size_t word_len)
{
  DBT key, data;
  struct key0 *k0;
  struct key1 k1;
  struct data2 d2;

  /* Case-convert the word if requested. */

  switch (word_case)
    {
    case 'l':
#ifdef __EMX__
      _nls_strlwr (word);
#else
      for (i = 0; word[i] != 0; ++i)
        word[i] = (char)tolower (word[i]);
#endif
      break;
    case 'u':
#ifdef __EMX__
      _nls_strupr (word);
#else
      for (i = 0; word[i] != 0; ++i)
        word[i] = (char)to (word[i]);
#endif
      break;
    }

  /* If the fileno for the file name is cached, use the cached
     value. */

  if (cur_fname[0] == 0 || strcmp (cur_fname, fname) != 0)
    {
      /* Search DBF2 for the file name. */

      key.data = (void *)fname; key.size = strlen (fname);
      data.data = NULL; data.size = 0;
      if (db_get (&DBF2, &key, &data, 0) == 0)
        {
          /* Found.  Use the fileno from the database. */

          cur_fileno = ((const struct data2 *)data.data)->fileno;
          strcpy (cur_fname, fname);
        }
      else
        {
          /* Not found.  The file name is a new one, not yet in the
             database.  We have to add a new record to DBF1 and DBF2.
             First, we have to find a unique fileno; we use the
             biggest fileno in DBF1 plus 1. */

          key.data = NULL; key.size = 0;
          data.data = NULL; data.size = 0;
          if (db_seq (&DBF1, &key, &data, R_LAST) == 0)
            cur_fileno = ((const struct key1 *)key.data)->fileno + 1;
          else
            cur_fileno = 1;

          /* Set CUR_FNAME, avoiding to write beyond the end of
             CUR_FNAME. */

          if (strlen (fname) >= sizeof (cur_fname))
            {
              fputs ("File name too long\n", stderr);
              exit (2);
            }
          strcpy (cur_fname, fname);

          /* Add a new fileno -> file name mapping to DBF1.  Note that
             the file name in DBF1 is not null-terminated! */

          k1.fileno = cur_fileno;
          key.data = &k1; key.size = sizeof (k1);
          data.data = (void *)fname; data.size = strlen (fname);
          db_put (&DBF1, &key, &data, 0);

          /* Add a new file name -> fileno mapping to DBF2.  Note that
             the file name in DBF2 is not null-terminated! */

          key.data = (void *)fname; key.size = strlen (fname);
          d2.fileno = cur_fileno;
          data.data = &d2; data.size = sizeof (d2);
          db_put (&DBF2, &key, &data, 0);
        }
    }

  /* OK, now we have a file number in CUR_FILENO.  Add the (word,
     fileno, lineno) key to DBF0.  There's no data. */

  k0 = alloca (sizeof (struct key0) + word_len);
  memcpy (k0->word, word, word_len);
  k0->word[word_len] = 0;
  k0->fileno = cur_fileno;
  k0->lineno = lineno;
  key.data = k0; key.size = sizeof (struct key0) + word_len;
  data.data = NULL; data.size = 0;
  db_put (&DBF0, &key, &data, 0);
}


/* Implement the `add' command for one file, FNAME. */

static int wordref_add_file (const char *fname)
{
  FILE *f;
  size_t max_word_len, word_len;
  unsigned lineno, word_count;
  char *word;
  int c;

  /* Open the file and abort if we fail to do that. */

  f = fopen (fname, "r");
  if (f == NULL)
    {
      perror (fname);
      exit (2);
    }

  /* Print a progress message and open the database files. */

  printf ("Processing \"%s\"...\n", fname);
  wordref_open (1);

  /* This loops reads words, discarding characters which don't belong
     to words.  All words are added to the database by calling
     add_word().  The word is stored in the array pointed to by WORD;
     the array has a size of MAX_WORD_LEN characters.  Extend the
     array as needed.  The word has WORD_LEN characters.  Note that
     WORD is not null-terminated. */

  word = NULL; word_len = 0; max_word_len = 0; lineno = 1; word_count = 0;
  c = fgetc (f);
  while (c != EOF)
    {
      if (isalpha (c)
          || (lang_c && (c == '_' || (word_len > 0 && isdigit (c)))))
        {
          /* Character C belongs to a word.  Add the character to WORD
             and update WORD_LEN. */

          if (word_len >= max_word_len)
            {
              /* We have to extend WORD. */

              max_word_len += 512;
              word = realloc (word, max_word_len);
              if (word == NULL)
                {
                  fputs ("Out of memory\n", stderr);
                  exit (2);
                }
            }
          word[word_len++] = (char)c;
        }
      else
        {
          /* Character C does not belong to a word.  If there's a word
             in WORD, add that word to the database. */

          if (word_len != 0)
            {
              add_word (fname, lineno, word, word_len);
              word_len = 0; ++word_count;
            }

          /* Update the line counter when reaching the end of a
             line. */

          if (c == '\n')
            ++lineno;
        }
      c = fgetc (f);
    }

  /* Add the last word if the last word of the file is directly
     followed by EOF. */

  if (word_len != 0)
    {
      add_word (fname, lineno, word, word_len);
      ++word_count;
    }

  /* Close the files and print another progress message. */

  fclose (f);
  printf ("\"%s\" contains %u words.\n", fname, word_count);
  return wordref_close ();
}


static void wordref_add (int argc, char *argv[])
{
  int c, i, result;

  while ((c = getopt (argc, argv, "clu")) != -1)
    switch (c)
      {
      case 'c':
        lang_c = 1;
        break;
      case 'l':
      case 'u':
        if (word_case != 0)
          usage ();
        word_case = (char)c;
        break;
      default:
        usage ();
      }
  if (optind >= argc)
    usage ();
  for (i = optind; i < argc; ++i)
    {
      result = wordref_add_file (argv[i]);
      if (result != 0)
        exit (result);
    }
}


/* Show the DBF0 record (key) pointed to by K0.  Include the word if
   SHOW_WORD is non-zero. */

static void show_record (struct key0 *k0, int show_word)
{
  DBT key, data;
  struct key1 k1;

  /* Get the file name for the file number.  If the file number is in
     the cache, just use the cached file name. */

  if (cur_fname[0] == 0 || k0->fileno != cur_fileno)
    {
      /* The file number is not cached.  We have to search DBF1. */

      k1.fileno = k0->fileno;
      key.data = &k1; key.size = sizeof (k1);
      data.data = NULL; data.size = 0;
      if (db_get (&DBF1, &key, &data, 0) != 0)
        {
          fprintf (stderr, "File number %u not found.\n", k0->fileno);
          exit (2);
        }

      /* Update the cache.  Note that the file name in DBF1 is not
         null-terminated. */

      memcpy (cur_fname, data.data, data.size);
      cur_fname[data.size] = 0;
      cur_fileno = k0->fileno;
    }

  /* Print the values. */

  if (show_word)
    printf ("%s:%u:%s\n", cur_fname, k0->lineno, k0->word);
  else
    printf ("%s:%u\n", cur_fname, k0->lineno);
}


/* Implement the `find' command. */

static void wordref_find (const char *word)
{
  DBT key, data;
  struct key0 *k0;
  int rc;
  size_t word_len;

  /* Open the database files, don't create them if they don't
     exist (this will result in an error message). */

  wordref_open (0);

  /* Search DBF0 for the smallest key greater than or equal to the key
     (word, 0, 0).  This will find the first line of the first file
     containing the word. */

  word_len = strlen (word);
  k0 = alloca (sizeof (struct key0) + word_len);
  memcpy (k0->word, word, word_len);
  k0->word[word_len] = 0;
  k0->fileno = 0; k0->lineno = 0;
  key.data = k0; key.size = sizeof (struct key0) + word_len;
  data.data = NULL; data.size = 0;
  rc = db_seq (&DBF0, &key, &data, R_CURSOR);
  while (rc == 0)
    {
      /* We found a (or another) key.  However, the key might not
         match the word.  Stop looping if the word doesn't match. */

      k0 = key.data;
      if (strcmp (k0->word, word) != 0)
        break;

      /* Show the record and fetch the next one. */

      show_record (k0, 0);
      rc = db_seq (&DBF0, &key, &data, R_NEXT);
    }

  /* Close the database files and quit on error. */

  rc = wordref_close ();
  if (rc != 0)
    exit (rc);
}


/* Implement the `list' command. */

static void wordref_list (void)
{
  DBT key, data;
  int rc;

  /* Open the database files, don't create them if they don't
     exist (this will result in an error message). */

  wordref_open (0);

  /* Sequentially scan DBF0. */

  key.data = NULL; key.size = 0;
  data.data = NULL; data.size = 0;
  rc = db_seq (&DBF0, &key, &data, R_FIRST);
  while (rc == 0)
    {
      /* Show the record and fetch the next one. */

      show_record ((struct key0 *)key.data, 1);
      rc = db_seq (&DBF0, &key, &data, R_NEXT);
    }

  /* Close the database files and quit on error. */

  rc = wordref_close ();
  if (rc != 0)
    exit (rc);
}


/* Implement the `clear' command. */

static void wordref_clear (void)
{
  int ok, i;

  /* Delete all the database files. */

  ok = 1;
  for (i = 0; i < 3; ++i)
    if (remove (dbfile[i].fname) != 0)
      {
        perror (dbfile[i].fname);
        ok = 0;
      }
  if (ok)
    printf ("Database deleted.\n");
}


/* Install a signal-catching function, term_sig (), for signal
   SIGNO. */

static void catch_sig (int signo)
{
  struct sigaction sa;

  sa.sa_handler = term_sig;
  sa.sa_flags = 0;
  sigemptyset (&sa.sa_mask);
  if (sigaction (signo, &sa, NULL) != 0)
    {
      perror ("sigaction");
      exit (2);
    }
}


/* Initialize this program. */

static void init (void)
{
  const char *base = "wordref";
  int i;

  /* Set up the database file descriptions. */

  for (i = 0; i < 3; ++i)
    {
      sprintf (dbfile[i].fname, "%s.db%d", base, i);
      dbfile[i].db = NULL;
    }
  DBF0.compare = compare0; DBF0.dup = 1;
  DBF0.cache_size = 2048 * 1024;
  DBF1.compare = compare1; DBF1.dup = 0;
  DBF1.cache_size = 64 * 1024;
  DBF2.compare = compare2; DBF2.dup = 0;
  DBF2.cache_size = 64 * 1024;

  /* Set up signal processing. */

  sigemptyset (&block_set);
  sigaddset (&block_set, SIGINT);
#ifdef SIGBREAK
  sigaddset (&block_set, SIGBREAK);
#endif
  sigaddset (&block_set, SIGTERM);
  sigaddset (&block_set, SIGHUP);

  catch_sig (SIGINT);
#ifdef SIGBREAK
  catch_sig (SIGBREAK);
#endif
  catch_sig (SIGHUP);

  /* Set up atexit() processing. */

  atexit (wordref_atexit);

  /* Clear the cache. */

  cur_fname[0] = 0;

#ifdef __EMX__
  _nls_init ();
#endif
}


/* The program starts here. */

int main (int argc, char *argv[])
{
  /* Support wildcards: `wordref add *.doc'. */

#ifdef __EMX__
  _wildcard (&argc, &argv);
#endif

  /* Initialize. */

  init ();

  /* Call the function implementing the command given on the command
     line.  Tell them how to run this program if there's something
     rotten on the command line. */

  if (argc == 2 && strcmp (argv[1], "clear") == 0)
    wordref_clear ();
  else if (argc >= 3 && strcmp (argv[1], "add") == 0)
    wordref_add (argc - 1, argv + 1);
  else if (argc == 3 && strcmp (argv[1], "find") == 0)
    wordref_find (argv[2]);
  else if (argc == 2 && strcmp (argv[1], "list") == 0)
    wordref_list ();
  else
    usage ();

  /* We come here after successful processing of a command. */

  cleanup = 0;
  return 0;
}
