Subversion Repositories planix.SVN

Rev

Rev 2 | Blame | Compare with Previous | Last modification | View Log | RSS feed

/* Parse a string, yielding a struct partime that describes it.  */

/* Copyright 1993, 1994, 1995, 1997 Paul Eggert
   Distributed under license by the Free Software Foundation, Inc.

   This file is part of RCS.

   RCS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   RCS is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with RCS; see the file COPYING.
   If not, write to the Free Software Foundation,
   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   Report problems and direct all questions to:

        rcs-bugs@cs.purdue.edu

 */

#if has_conf_h
# include <conf.h>
#else
# if HAVE_CONFIG_H
#  include <config.h>
# else
#  ifndef __STDC__
#   define const
#  endif
# endif
# if HAVE_LIMITS_H
#  include <limits.h>
# endif
# ifndef LONG_MIN
# define LONG_MIN (-1-2147483647L)
# endif
# if STDC_HEADERS
#  include <stdlib.h>
# endif
# include <time.h>
# ifdef __STDC__
#  define P(x) x
# else
#  define P(x) ()
# endif
#endif

#include <ctype.h>
#if STDC_HEADERS
# define CTYPE_DOMAIN(c) 1
#else
# define CTYPE_DOMAIN(c) ((unsigned) (c) <= 0177)
#endif
#define ISALNUM(c)      (CTYPE_DOMAIN (c) && isalnum (c))
#define ISALPHA(c)      (CTYPE_DOMAIN (c) && isalpha (c))
#define ISSPACE(c)      (CTYPE_DOMAIN (c) && isspace (c))
#define ISUPPER(c)      (CTYPE_DOMAIN (c) && isupper (c))
#define ISDIGIT(c)      ((unsigned) (c) - '0' <= 9)

#include <partime.h>

char const partimeId[] =
  "$Id: partime.c,v 5.16 1997/05/19 06:33:53 eggert Exp $";


/* Lookup tables for names of months, weekdays, time zones.  */

#define NAME_LENGTH_MAXIMUM 4

struct name_val
  {
    char name[NAME_LENGTH_MAXIMUM];
    int val;
  };


static char const *parse_decimal P ((char const *, int, int, int, int, int *, int *));
static char const *parse_fixed P ((char const *, int, int *));
static char const *parse_pattern_letter P ((char const *, int, struct partime *));
static char const *parse_prefix P ((char const *, struct partime *, int *));
static char const *parse_ranged P ((char const *, int, int, int, int *));
static int lookup P ((char const *, struct name_val const[]));
static int merge_partime P ((struct partime *, struct partime const *));
static void undefine P ((struct partime *));


static struct name_val const month_names[] =
{
  {"jan", 0},
  {"feb", 1},
  {"mar", 2},
  {"apr", 3},
  {"may", 4},
  {"jun", 5},
  {"jul", 6},
  {"aug", 7},
  {"sep", 8},
  {"oct", 9},
  {"nov", 10},
  {"dec", 11},
  {"", TM_UNDEFINED}
};

static struct name_val const weekday_names[] =
{
  {"sun", 0},
  {"mon", 1},
  {"tue", 2},
  {"wed", 3},
  {"thu", 4},
  {"fri", 5},
  {"sat", 6},
  {"", TM_UNDEFINED}
};

#define hr60nonnegative(t) ((t)/100 * 60  +  (t)%100)
#define hr60(t) ((t)<0 ? -hr60nonnegative(-(t)) : hr60nonnegative(t))
#define zs(t,s) {s, hr60(t)}
#define zd(t,s,d) zs(t, s),  zs((t)+100, d)

static struct name_val const zone_names[] =
{
  zs (-1000, "hst"),            /* Hawaii */
  zd (-1000, "hast", "hadt"),   /* Hawaii-Aleutian */
  zd (- 900, "akst", "akdt"),   /* Alaska */
  zd (- 800, "pst" , "pdt" ),   /* Pacific */
  zd (- 700, "mst" , "mdt" ),   /* Mountain */
  zd (- 600, "cst" , "cdt" ),   /* Central */
  zd (- 500, "est" , "edt" ),   /* Eastern */
  zd (- 400, "ast" , "adt" ),   /* Atlantic */
  zd (- 330, "nst" , "ndt" ),   /* Newfoundland */
  zs (  000, "utc" ),           /* Coordinated Universal */
  zs (  000, "uct" ),           /* " */
  zs (  000, "cut" ),           /* " */
  zs (  000, "ut"),             /* Universal */
  zs (  000, "z"),              /* Zulu (required by ISO 8601) */
  zd (  000, "gmt" , "bst" ),   /* Greenwich Mean, British Summer */
  zd (  000, "wet" , "west"),   /* Western European */
  zd (  100, "cet" , "cest"),   /* Central European */
  zd (  100, "met" , "mest"),   /* Middle European (bug in old tz versions) */
  zd (  100, "mez" , "mesz"),   /* Mittel-Europaeische Zeit */
  zd (  200, "eet" , "eest"),   /* Eastern European */
  zs (  530, "ist" ),           /* India */
  zd (  900, "jst" , "jdt" ),   /* Japan */
  zd (  900, "kst" , "kdt" ),   /* Korea */
  zd ( 1200, "nzst", "nzdt"),   /* New Zealand */
  {"lt", 1},
#if 0
  /* The following names are duplicates or are not well attested.
     There are lots more where these came from.  */
  zs (-1100, "sst" ),           /* Samoan */
  zd (- 900, "yst" , "ydt" ),   /* Yukon - name is no longer used */
  zd (- 500, "ast" , "adt" ),   /* Acre */
  zd (- 400, "wst" , "wdt" ),   /* Western Brazil */
  zd (- 400, "cst" , "cdt" ),   /* Chile */
  zd (- 200, "fst" , "fdt" ),   /* Fernando de Noronha */
  zs (  000, "wat" ),           /* West African */
  zs (  100, "cat" ),           /* Central African */
  zs (  200, "sat" ),           /* South African */
  zd (  200, "ist" , "idt" ),   /* Israel */
  zs (  300, "eat" ),           /* East African */
  zd (  300, "msk" , "msd" ),   /* Moscow */
  zd (  330, "ist" , "idt" ),   /* Iran */
  zs (  800, "hkt" ),           /* Hong Kong */
  zs (  800, "sgt" ),           /* Singapore */
  zd (  800, "cst" , "cdt" ),   /* China */
  zd (  800, "wst" , "wst" ),   /* Western Australia */
  zd (  930, "cst" , "cst" ),   /* Central Australia */
  zs ( 1000, "gst" ),           /* Guam */
  zd ( 1000, "est" , "est" ),   /* Eastern Australia */
#endif
  {"", -1}
};

/* Look for a prefix of S in TABLE, returning val for first matching entry.  */
static int
lookup (s, table)
     char const *s;
     struct name_val const table[];
{
  int j;
  char buf[NAME_LENGTH_MAXIMUM];

  for (j = 0; j < NAME_LENGTH_MAXIMUM; j++)
    {
      unsigned char c = *s++;
      if (! ISALPHA (c))
        {
          buf[j] = '\0';
          break;
        }
      buf[j] = ISUPPER (c) ? tolower (c) : c;
    }

  for (;; table++)
    for (j = 0; ; j++)
      if (j == NAME_LENGTH_MAXIMUM  ||  ! table[0].name[j])
        return table[0].val;
      else if (buf[j] != table[0].name[j])
        break;
}


/* Set *T to ``undefined'' values.  */
static void
undefine (t)
     struct partime *t;
{
  t->tm.tm_sec = t->tm.tm_min = t->tm.tm_hour = t->tm.tm_mday = t->tm.tm_mon
    = t->tm.tm_year = t->tm.tm_wday = t->tm.tm_yday
    = t->ymodulus = t->yweek
    = TM_UNDEFINED;
  t->zone = TM_UNDEFINED_ZONE;
}

/* Array of patterns to look for in a date string.
   Order is important: we look for the first matching pattern
   whose values do not contradict values that we already know about.
   See `parse_pattern_letter' below for the meaning of the pattern codes.  */
static char const *const patterns[] =
{
  /* These traditional patterns must come first,
     to prevent an ISO 8601 format from misinterpreting their prefixes.  */
  "E_n_y", "x", /* RFC 822 */
  "E_n", "n_E", "n", "t:m:s_A", "t:m_A", "t_A", /* traditional */
  "y/N/D$", /* traditional RCS */

  /* ISO 8601:1988 formats, generalized a bit.  */
  "y-N-D$", "4ND$", "Y-N$",
  "RND$", "-R=N$", "-R$", "--N=D$", "N=DT",
  "--N$", "---D$", "DT",
  "Y-d$", "4d$", "R=d$", "-d$", "dT",
  "y-W-X", "yWX", "y=W",
  "-r-W-X", "r-W-XT", "-rWX", "rWXT", "-W=X", "W=XT", "-W",
  "-w-X", "w-XT", "---X$", "XT", "4$",
  "T",
  "h:m:s$", "hms$", "h:m$", "hm$", "h$", "-m:s$", "-ms$", "-m$", "--s$",
  "Y", "Z",

  0
};

/* Parse an initial prefix of STR, setting *T accordingly.
   Return the first character after the prefix, or 0 if it couldn't be parsed.
   Start with pattern *PI; if success, set *PI to the next pattern to try.
   Set *PI to -1 if we know there are no more patterns to try;
   if *PI is initially negative, give up immediately.  */
static char const *
parse_prefix (str, t, pi)
     char const *str;
     struct partime *t;
     int *pi;
{
  int i = *pi;
  char const *pat;
  unsigned char c;

  if (i < 0)
    return 0;

  /* Remove initial noise.  */
  while (! ISALNUM (c = *str) && c != '-' && c != '+')
    {
      if (! c)
        {
          undefine (t);
          *pi = -1;
          return str;
        }
      str++;
    }

  /* Try a pattern until one succeeds.  */
  while ((pat = patterns[i++]) != 0)
    {
      char const *s = str;
      undefine (t);
      do
        {
          if (! (c = *pat++))
            {
              *pi = i;
              return s;
            }
        }
      while ((s = parse_pattern_letter (s, c, t)) != 0);
    }

  return 0;
}

/* Parse an initial prefix of S of length DIGITS; it must be a number.
   Store the parsed number into *RES.
   Return the first character after the prefix, or 0 if it wasn't parsed.  */
static char const *
parse_fixed (s, digits, res)
     char const *s;
     int digits, *res;
{
  int n = 0;
  char const *lim = s + digits;
  while (s < lim)
    {
      unsigned d = *s++ - '0';
      if (9 < d)
        return 0;
      n = 10 * n + d;
    }
  *res = n;
  return s;
}

/* Parse an initial prefix of S of length DIGITS;
   it must be a number in the range LO through HI.
   Store the parsed number into *RES.
   Return the first character after the prefix, or 0 if it wasn't parsed.  */
static char const *
parse_ranged (s, digits, lo, hi, res)
     char const *s;
     int digits, lo, hi, *res;
{
  s = parse_fixed (s, digits, res);
  return s && lo <= *res && *res <= hi ? s : 0;
}

/* Parse an initial prefix of S of length DIGITS;
   it must be a number in the range LO through HI
   and it may be followed by a fraction to be computed using RESOLUTION.
   Store the parsed number into *RES; store the fraction times RESOLUTION,
   rounded to the nearest integer, into *FRES.
   Return the first character after the prefix, or 0 if it wasn't parsed.  */
static char const *
parse_decimal (s, digits, lo, hi, resolution, res, fres)
     char const *s;
     int digits, lo, hi, resolution, *res, *fres;
{
  s = parse_fixed (s, digits, res);
  if (s && lo <= *res && *res <= hi)
    {
      int f = 0;
      if ((s[0] == ',' || s[0] == '.') && ISDIGIT (s[1]))
        {
          char const *s1 = ++s;
          int num10 = 0, denom10 = 10, product;
          while (ISDIGIT (*++s))
            {
              int d = denom10 * 10;
              if (d / 10  !=  denom10)
                return 0; /* overflow */
              denom10 = d;
            }
          s = parse_fixed (s1, (int) (s - s1), &num10);
          product = num10 * resolution;
          f = (product + (denom10 >> 1)) / denom10;
          f -= f & (product % denom10  ==  denom10 >> 1); /* round to even */
          if (f < 0  ||  product/resolution != num10)
            return 0; /* overflow */
        }
      *fres = f;
      return s;
    }
  return 0;
}

/* Parse an initial prefix of S; it must denote a time zone.
   Set *ZONE to the number of seconds east of GMT,
   or to TM_LOCAL_ZONE if it is the local time zone.
   Return the first character after the prefix, or 0 if it wasn't parsed.  */
char *
parzone (s, zone)
     char const *s;
     long *zone;
{
  char sign;
  int hh, mm, ss;
  int minutesEastOfUTC;
  long offset, z;

  /* The formats are LT, n, n DST, nDST, no, o
     where n is a time zone name
     and o is a time zone offset of the form [-+]hh[:mm[:ss]].  */
  switch (*s)
    {
    case '-':
    case '+':
      z = 0;
      break;

    default:
      minutesEastOfUTC = lookup (s, zone_names);
      if (minutesEastOfUTC == -1)
        return 0;

      /* Don't bother to check rest of spelling.  */
      while (ISALPHA ((unsigned char) *s))
        s++;

      /* Don't modify LT.  */
      if (minutesEastOfUTC == 1)
        {
          *zone = TM_LOCAL_ZONE;
          return (char *) s;
        }

      z = minutesEastOfUTC * 60L;

      /* Look for trailing " DST".  */
      if ((s[-1] == 'T' || s[-1] == 't')
          && (s[-2] == 'S' || s[-2] == 's')
          && (s[-3] == 'D' || s[-3] == 'd'))
        goto trailing_dst;
      while (ISSPACE ((unsigned char) *s))
        s++;
      if ((s[0] == 'D' || s[0] == 'd')
          && (s[1] == 'S' || s[1] == 's')
          && (s[2] == 'T' || s[2] == 't'))
        {
          s += 3;
        trailing_dst:
          *zone = z + 60*60;
          return (char *) s;
        }

      switch (*s)
        {
        case '-':
        case '+':
          break;

        default:
          *zone = z;
          return (char *) s;
        }

      break;
    }

  sign = *s++;

  if (! (s = parse_ranged (s, 2, 0, 23, &hh)))
    return 0;
  mm = ss = 0;
  if (*s == ':')
    s++;
  if (ISDIGIT (*s))
    {
      if (! (s = parse_ranged (s, 2, 0, 59, &mm)))
        return 0;
      if (*s == ':' && s[-3] == ':' && ISDIGIT (s[1])
          && ! (s = parse_ranged (s + 1, 2, 0, 59, &ss)))
        return 0;
    }
  if (ISDIGIT (*s))
    return 0;
  offset = (hh * 60 + mm) * 60L + ss;
  *zone = z + (sign == '-' ? -offset : offset);
  /* ?? Are fractions allowed here?  If so, they're not implemented.  */
  return (char *) s;
}

/* Parse an initial prefix of S, matching the pattern whose code is C.
   Set *T accordingly.
   Return the first character after the prefix, or 0 if it wasn't parsed.  */
static char const *
parse_pattern_letter (s, c, t)
     char const *s;
     int c;
     struct partime *t;
{
  switch (c)
    {
    case '$': /* The next character must be a non-digit.  */
      if (ISDIGIT (*s))
        return 0;
      break;

    case '-':
    case '/':
    case ':':
      /* These characters stand for themselves.  */
      if (*s++ != c)
        return 0;
      break;

    case '4': /* 4-digit year */
      s = parse_fixed (s, 4, &t->tm.tm_year);
      break;

    case '=': /* optional '-' */
      s += *s == '-';
      break;

    case 'A': /* AM or PM */
      /* This matches the regular expression [AaPp][Mm]?.
         It must not be followed by a letter or digit;
         otherwise it would match prefixes of strings like "PST".  */
      switch (*s++)
        {
        case 'A':
        case 'a':
          if (t->tm.tm_hour == 12)
            t->tm.tm_hour = 0;
          break;

        case 'P':
        case 'p':
          if (t->tm.tm_hour != 12)
            t->tm.tm_hour += 12;
          break;

        default:
          return 0;
        }
      switch (*s)
        {
        case 'M':
        case 'm':
          s++;
          break;
        }
      if (ISALNUM ((unsigned char) *s))
        return 0;
      break;

    case 'D': /* day of month [01-31] */
      s = parse_ranged (s, 2, 1, 31, &t->tm.tm_mday);
      break;

    case 'd': /* day of year [001-366] */
      s = parse_ranged (s, 3, 1, 366, &t->tm.tm_yday);
      t->tm.tm_yday--;
      break;

    case 'E': /* extended day of month [1-9, 01-31] */
      s = parse_ranged (s, (ISDIGIT (s[0]) && ISDIGIT (s[1])) + 1, 1, 31,
                        &t->tm.tm_mday);
      break;

    case 'h': /* hour [00-23 followed by optional fraction] */
      {
        int frac;
        s = parse_decimal (s, 2, 0, 23, 60 * 60, &t->tm.tm_hour, &frac);
        t->tm.tm_min = frac / 60;
        t->tm.tm_sec = frac % 60;
      }
      break;

    case 'm': /* minute [00-59 followed by optional fraction] */
      s = parse_decimal (s, 2, 0, 59, 60, &t->tm.tm_min, &t->tm.tm_sec);
      break;

    case 'n': /* month name [e.g. "Jan"] */
      if (! TM_DEFINED (t->tm.tm_mon = lookup (s, month_names)))
        return 0;
      /* Don't bother to check rest of spelling.  */
      while (ISALPHA ((unsigned char) *s))
        s++;
      break;

    case 'N': /* month [01-12] */
      s = parse_ranged (s, 2, 1, 12, &t->tm.tm_mon);
      t->tm.tm_mon--;
      break;

    case 'r': /* year % 10 (remainder in origin-0 decade) [0-9] */
      s = parse_fixed (s, 1, &t->tm.tm_year);
      t->ymodulus = 10;
      break;

    case_R:
    case 'R': /* year % 100 (remainder in origin-0 century) [00-99] */
      s = parse_fixed (s, 2, &t->tm.tm_year);
      t->ymodulus = 100;
      break;

    case 's': /* second [00-60 followed by optional fraction] */
      {
        int frac;
        s = parse_decimal (s, 2, 0, 60, 1, &t->tm.tm_sec, &frac);
        t->tm.tm_sec += frac;
      }
      break;

    case 'T': /* 'T' or 't' */
      switch (*s++)
        {
        case 'T':
        case 't':
          break;
        default:
          return 0;
        }
      break;

    case 't': /* traditional hour [1-9 or 01-12] */
      s = parse_ranged (s, (ISDIGIT (s[0]) && ISDIGIT (s[1])) + 1, 1, 12,
                        &t->tm.tm_hour);
      break;

    case 'w': /* 'W' or 'w' only (stands for current week) */
      switch (*s++)
        {
        case 'W':
        case 'w':
          break;
        default:
          return 0;
        }
      break;

    case 'W': /* 'W' or 'w', followed by a week of year [00-53] */
      switch (*s++)
        {
        case 'W':
        case 'w':
          break;
        default:
          return 0;
        }
      s = parse_ranged (s, 2, 0, 53, &t->yweek);
      break;

    case 'X': /* weekday (1=Mon ... 7=Sun) [1-7] */
      s = parse_ranged (s, 1, 1, 7, &t->tm.tm_wday);
      t->tm.tm_wday--;
      break;

    case 'x': /* weekday name [e.g. "Sun"] */
      if (! TM_DEFINED (t->tm.tm_wday = lookup (s, weekday_names)))
        return 0;
      /* Don't bother to check rest of spelling.  */
      while (ISALPHA ((unsigned char) *s))
        s++;
      break;

    case 'y': /* either R or Y */
      if (ISDIGIT (s[0]) && ISDIGIT (s[1]) && ! ISDIGIT (s[2]))
        goto case_R;
      /* fall into */
    case 'Y': /* year in full [4 or more digits] */
      {
        int len = 0;
        while (ISDIGIT (s[len]))
          len++;
        if (len < 4)
          return 0;
        s = parse_fixed (s, len, &t->tm.tm_year);
      }
      break;

    case 'Z': /* time zone */
      s = parzone (s, &t->zone);
      break;

    case '_': /* possibly empty sequence of non-alphanumerics */
      while (! ISALNUM ((unsigned char) *s) && *s)
        s++;
      break;

    default: /* bad pattern */
      return 0;
    }

  return s;
}

/* If there is no conflict, merge into *T the additional information in *U
   and return 0.  Otherwise do nothing and return -1.  */
static int
merge_partime (t, u)
     struct partime *t;
     struct partime const *u;
{
# define conflict(a,b) ((a) != (b)  &&  TM_DEFINED (a)  &&  TM_DEFINED (b))
  if (conflict (t->tm.tm_sec, u->tm.tm_sec)
      || conflict (t->tm.tm_min, u->tm.tm_min)
      || conflict (t->tm.tm_hour, u->tm.tm_hour)
      || conflict (t->tm.tm_mday, u->tm.tm_mday)
      || conflict (t->tm.tm_mon, u->tm.tm_mon)
      || conflict (t->tm.tm_year, u->tm.tm_year)
      || conflict (t->tm.tm_wday, u->tm.tm_yday)
      || conflict (t->ymodulus, u->ymodulus)
      || conflict (t->yweek, u->yweek)
      || (t->zone != u->zone
          && t->zone != TM_UNDEFINED_ZONE
          && u->zone != TM_UNDEFINED_ZONE))
    return -1;
# undef conflict
# define merge_(a,b) if (TM_DEFINED (b)) (a) = (b);
  merge_ (t->tm.tm_sec, u->tm.tm_sec)
  merge_ (t->tm.tm_min, u->tm.tm_min)
  merge_ (t->tm.tm_hour, u->tm.tm_hour)
  merge_ (t->tm.tm_mday, u->tm.tm_mday)
  merge_ (t->tm.tm_mon, u->tm.tm_mon)
  merge_ (t->tm.tm_year, u->tm.tm_year)
  merge_ (t->tm.tm_wday, u->tm.tm_yday)
  merge_ (t->ymodulus, u->ymodulus)
  merge_ (t->yweek, u->yweek)
# undef merge_
  if (u->zone != TM_UNDEFINED_ZONE)
    t->zone = u->zone;
  return 0;
}

/* Parse a date/time prefix of S, putting the parsed result into *T.
   Return the first character after the prefix.
   The prefix may contain no useful information;
   in that case, *T will contain only undefined values.  */
char *
partime (s, t)
     char const *s;
     struct partime *t;
{
  struct partime p;

  undefine (t);

  while (*s)
    {
      int i = 0;
      char const *s1;

      do
        {
          if (! (s1 = parse_prefix (s, &p, &i)))
            return (char *) s;
        }
      while (merge_partime (t, &p) != 0);

      s = s1;
    }

  return (char *) s;
}