/* file: utf7.l */
%{
#ifdef __MSDOS__
# include
# include
# include
#else
# include
#endif
#include
#include
#include
#include
int
printcode = 0;
unsigned int
outcode [2],
instep,
outstep;
char
*programname;
void
get_programname (char const *argv0),
syntax (void),
errit (char const *format, ...),
utf (void),
nextout (void),
outchar (long unsigned);
#ifdef __MSDOS__
#define strcasecmp(s1, s2) (stricmp(s1, s2))
#endif
#define YY_NO_UNPUT
#define YY_SKIP_YYWRAP
#ifdef yywrap
# undef yywrap
#endif
int yywrap()
{
return 1;
}
%}
%Start _utf
%%
"+-" { fputc ('+', stdout); }
"+" { instep = outstep = 0;
BEGIN _utf; }
<_utf>[A-Za-z0-9+/] { utf (); }
<_utf>"-" { BEGIN INITIAL; }
<_utf>.|\n { fputc (yytext [0], stdout);
BEGIN INITIAL; }
%%
void utf ()
{
unsigned
i,
c;
i = yytext [0];
if (i >= 'A' && i <= 'Z')
c = i - 'A';
else if (i >= 'a' && i <= 'z')
c = i + 26 - 'a';
else if (i >= '0' && i <= '9')
c = i + 52 - '0';
else if (i == '+')
c = 62;
else if (i == '/')
c = 63;
switch (instep) {
case 0:
outcode [outstep] = (c << 2);
break;
case 1:
outcode [outstep] |= (c >> 4);
nextout ();
outcode [outstep] = (c << 4);
break;
case 2:
outcode [outstep] |= (c >> 2);
nextout ();
outcode [outstep] = (c << 6);
break;
case 3:
outcode [outstep] |= c;
nextout ();
break;
}
if (++instep == 4)
instep = 0;
}
void nextout ()
{
unsigned
c;
if (outstep == 0) {
outstep = 1;
} else {
outstep = 0;
c = ((outcode [0] & 0xFF) << 8) | (outcode [1] & 0xFF);
outchar (c);
}
}
void outchar (long unsigned c)
{
int
i;
char
*s;
/*
* iso-8859-1
*/
if (c < 256) {
fputc (c, stdout);
return;
}
/*
* iso-8859-15
*/
i = 0;
switch (c) {
case 0x20Ac: i = 0xA4; break; /* euro */
case 0x0160: i = 0xA6; break; /* S caron */
case 0x0161: i = 0xA8; break; /* s caron */
case 0x017D: i = 0xB4; break; /* Z caron */
case 0x017E: i = 0xB8; break; /* z caron */
case 0x0152: i = 0xBC; break; /* OE ligature */
case 0x0153: i = 0xBD; break; /* oe ligature */
case 0x0178: i = 0xBE; break; /* Y diaeresis */
}
if (i) {
fputc (i, stdout);
return;
}
/*
* substitutions
*/
s = NULL;
switch (c) {
case 0x0132: s = "IJ"; break;
case 0x0133: s = "ij"; break;
}
if (s) {
fputs (s, stdout);
return;
}
if (printcode) {
if (c < 0x10000)
printf ("U+%04X", (unsigned) c);
else
printf ("U+%08lX", c);
} else
fputc (191, stdout);
}
int main (int argc, char *argv [])
{
get_programname (argv [0]);
while (argc > 1)
if (! strcmp (argv [1], "-c")) {
printcode = 1;
argv++;
argc--;
} else
break;
switch (argc) {
case 1:
if (isatty (fileno (stdin)))
syntax ();
yyin = stdin;
break;
case 2:
yyin = fopen (argv [1], "r");
if (! yyin)
errit ("Opening file \"%s\": %s", argv [1], strerror (errno));
break;
default:
syntax ();
}
yylex ();
if (yyin != stdin)
fclose (yyin);
return 0;
}
void get_programname (char const *argv0)
{
#ifdef __MSDOS__
char
name [MAXFILE];
fnsplit (argv0, NULL, NULL, name, NULL);
programname = strdup (name);
#else /* unix */
char
*p;
p = strrchr (argv0, '/');
if (p)
programname = strdup (p + 1);
else
programname = strdup (argv0);
#endif
}
void errit (char const *format, ...)
{
va_list
list;
fprintf (stderr, "\nError %s: ", programname);
va_start (list, format);
vfprintf (stderr, format, list);
fprintf (stderr, "\n\n");
exit (1);
}
void syntax ()
{
fprintf (
stderr,
"\n"
"Syntax: %s [-c] [utf-7 encoded file]\n"
"\n"
"The file will be translated to iso-8859-1 *and* iso-8859-15\n"
"\n"
" -c : print U+code for characters not in iso-8859-1/15\n"
"\n",
programname
);
exit (1);
}