Add support for distinct host and target character sets.

This commit is contained in:
Kevin Buettner 2002-09-20 00:24:01 +00:00
parent bb7eb0390b
commit 234b45d446
11 changed files with 2265 additions and 485 deletions

View File

@ -1,3 +1,34 @@
2002-09-19 Jim Blandy <jimb@redhat.com>
Add support for distinct host and target character sets.
* charset.c, charset.h: New files.
* c-exp.y: #include "charset.h".
(yylex): Convert character and string literals to the target
character set, before returning them as the semantic value of the
token.
* c-lang.c: #include "charset.h".
(c_emit_char): Use charset-specific methods to recognize
characters with backslash escape forms, to decide which characters
to print literally and which to print using numeric escape
sequences, and to convert target characters to host characters
before printing.
* utils.c: #include "charset.h".
(no_control_char_error): New function.
(parse_escape): Use charset-specific methods to recognize
backslash escapes, parse `control character' notation, and convert
characters from the host character set to the target character set.
* configure.in: Set the default host character set.
Check where to find iconv, and what its argument types might be.
* acinclude.m4 (AM_ICONV): New macro, borrowed from GCC.
* Makefile.in (SFILES): List charset.c.
(COMMON_OBS): List charset.o.
(charset.o): New rule.
(charset_h): New header dependency variable.
(c-lang.o, utils.o, c-exp.tab.o): Note dependency on $(charset_h).
(LIBICONV): New variable, set by configure.
(CLIBS): Include $(LIBICONV) here.
* aclocal.m4, config.in, configure: Regenerated.
2002-09-19 Joel Brobecker <brobecker@gnat.com>
* ada-exp.y: Add missing semicolons to end rules. Fixes a

View File

@ -138,6 +138,9 @@ INTL_DEPS = @INTLDEPS@
INTL_SRC = $(srcdir)/$(INTL_DIR)
INTL_CFLAGS = -I$(INTL_DIR) -I$(INTL_SRC)
# Where is the ICONV library? This can be empty if libc has iconv.
LIBICONV = @LIBICONV@
#
# CLI sub directory definitons
#
@ -369,6 +372,7 @@ INSTALLED_LIBS=-lbfd -lreadline -lopcodes -liberty \
-lmmalloc -lintl -liberty
CLIBS = $(SIM) $(BFD) $(READLINE) $(OPCODES) $(INTL) $(LIBIBERTY) \
$(TERMCAP) $(XM_CLIBS) $(TM_CLIBS) $(NAT_CLIBS) $(GDBTKLIBS) @LIBS@ \
$(LIBICONV) \
$(MMALLOC) $(LIBIBERTY) $(WIN32LIBS)
CDEPS = $(XM_CDEPS) $(TM_CDEPS) $(NAT_CDEPS) $(SIM) $(BFD) $(READLINE) \
$(OPCODES) $(MMALLOC) $(INTL_DEPS) $(LIBIBERTY) $(CONFIG_DEPS)
@ -526,6 +530,7 @@ TARGET_FLAGS_TO_PASS = \
SFILES = ada-exp.y ada-lang.c ada-typeprint.c ada-valprint.c ada-tasks.c \
ax-general.c ax-gdb.c bcache.c blockframe.c breakpoint.c \
charset.c \
buildsym.c c-exp.y c-lang.c c-typeprint.c c-valprint.c \
coffread.c \
complaints.c completer.c corefile.c cp-valprint.c dbxread.c \
@ -767,6 +772,8 @@ tuiSourceWin_h = $(srcdir)/tui/tuiSourceWin.h
tuiStack_h = $(srcdir)/tui/tuiStack.h
tuiWin_h = $(srcdir)/tui/tuiWin.h
charset_h = charset.h
# Header files that need to have srcdir added. Note that in the cases
# where we use a macro like $(gdbcmd_h), things are carefully arranged
# so that each .h file is listed exactly once (M-x tags-search works
@ -818,6 +825,7 @@ TAGFILES_NO_SRCDIR = $(SFILES) $(HFILES_NO_SRCDIR) $(ALLDEPFILES) \
TAGFILES_WITH_SRCDIR = $(HFILES_WITH_SRCDIR)
COMMON_OBS = version.o blockframe.o breakpoint.o findvar.o regcache.o \
charset.o \
source.o values.o eval.o valops.o valarith.o valprint.o printcmd.o \
symtab.o symfile.o symmisc.o linespec.o infcmd.o infrun.o \
expprint.o environ.o stack.o thread.o \
@ -1439,6 +1447,7 @@ z8k-tdep.o: $(srcdir)/z8k-tdep.c
c-exp.tab.o: c-exp.tab.c $(defs_h) $(gdb_string_h) $(expression_h) \
$(value_h) $(parser_defs_h) $(language_h) $(c_lang_h) $(bfd_h) \
$(charset_h) \
$(symfile_h) $(objfiles_h)
jv-exp.tab.o: jv-exp.tab.c jv-lang.h $(defs_h) $(expression_h) \
@ -2578,4 +2587,8 @@ xdr_rdb.o: vx-share/xdr_rdb.c $(defs_h) vx-share/vxTypes.h \
vx-share/vxWorks.h vx-share/xdr_rdb.h
$(CC) -c $(INTERNAL_CFLAGS) $(srcdir)/vx-share/xdr_rdb.c
charset.o: charset.c $(defs_h) $(charset_h) $(gdbcmd_h) gdb_assert.h
c-lang.o: $(charset_h)
utils.o: $(charset_h)
### end of the gdb Makefile.in.

View File

@ -976,3 +976,71 @@ case "x$am_cv_prog_cc_stdc" in
*) CC="$CC $am_cv_prog_cc_stdc" ;;
esac
])
dnl From Bruno Haible.
AC_DEFUN([AM_ICONV],
[
dnl Some systems have iconv in libc, some have it in libiconv (OSF/1 and
dnl those with the standalone portable GNU libiconv installed).
AC_ARG_WITH([libiconv-prefix],
[ --with-libiconv-prefix=DIR search for libiconv in DIR/include and DIR/lib], [
for dir in `echo "$withval" | tr : ' '`; do
if test -d $dir/include; then CPPFLAGS="$CPPFLAGS -I$dir/include"; fi
if test -d $dir/lib; then LDFLAGS="$LDFLAGS -L$dir/lib"; fi
done
])
AC_CACHE_CHECK(for iconv, am_cv_func_iconv, [
am_cv_func_iconv="no, consider installing GNU libiconv"
am_cv_lib_iconv=no
AC_TRY_LINK([#include <stdlib.h>
#include <iconv.h>],
[iconv_t cd = iconv_open("","");
iconv(cd,NULL,NULL,NULL,NULL);
iconv_close(cd);],
am_cv_func_iconv=yes)
if test "$am_cv_func_iconv" != yes; then
am_save_LIBS="$LIBS"
LIBS="$LIBS -liconv"
AC_TRY_LINK([#include <stdlib.h>
#include <iconv.h>],
[iconv_t cd = iconv_open("","");
iconv(cd,NULL,NULL,NULL,NULL);
iconv_close(cd);],
am_cv_lib_iconv=yes
am_cv_func_iconv=yes)
LIBS="$am_save_LIBS"
fi
])
if test "$am_cv_func_iconv" = yes; then
AC_DEFINE(HAVE_ICONV, 1, [Define if you have the iconv() function.])
AC_MSG_CHECKING([for iconv declaration])
AC_CACHE_VAL(am_cv_proto_iconv, [
AC_TRY_COMPILE([
#include <stdlib.h>
#include <iconv.h>
extern
#ifdef __cplusplus
"C"
#endif
#if defined(__STDC__) || defined(__cplusplus)
size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);
#else
size_t iconv();
#endif
], [], am_cv_proto_iconv_arg1="", am_cv_proto_iconv_arg1="const")
am_cv_proto_iconv="extern size_t iconv (iconv_t cd, $am_cv_proto_iconv_arg1 char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);"])
am_cv_proto_iconv=`echo "[$]am_cv_proto_iconv" | tr -s ' ' | sed -e 's/( /(/'`
AC_MSG_RESULT([$]{ac_t:-
}[$]am_cv_proto_iconv)
AC_DEFINE_UNQUOTED(ICONV_CONST, $am_cv_proto_iconv_arg1,
[Define as const if the declaration of iconv() needs const.])
fi
LIBICONV=
if test "$am_cv_lib_iconv" = yes; then
LIBICONV="-liconv"
fi
AC_SUBST(LIBICONV)
])

68
gdb/aclocal.m4 vendored
View File

@ -879,6 +879,74 @@ ifelse(yes,no,[
AC_DEFUN([CY_GNU_GETTEXT],)
])
dnl From Bruno Haible.
AC_DEFUN([AM_ICONV],
[
dnl Some systems have iconv in libc, some have it in libiconv (OSF/1 and
dnl those with the standalone portable GNU libiconv installed).
AC_ARG_WITH([libiconv-prefix],
[ --with-libiconv-prefix=DIR search for libiconv in DIR/include and DIR/lib], [
for dir in `echo "$withval" | tr : ' '`; do
if test -d $dir/include; then CPPFLAGS="$CPPFLAGS -I$dir/include"; fi
if test -d $dir/lib; then LDFLAGS="$LDFLAGS -L$dir/lib"; fi
done
])
AC_CACHE_CHECK(for iconv, am_cv_func_iconv, [
am_cv_func_iconv="no, consider installing GNU libiconv"
am_cv_lib_iconv=no
AC_TRY_LINK([#include <stdlib.h>
#include <iconv.h>],
[iconv_t cd = iconv_open("","");
iconv(cd,NULL,NULL,NULL,NULL);
iconv_close(cd);],
am_cv_func_iconv=yes)
if test "$am_cv_func_iconv" != yes; then
am_save_LIBS="$LIBS"
LIBS="$LIBS -liconv"
AC_TRY_LINK([#include <stdlib.h>
#include <iconv.h>],
[iconv_t cd = iconv_open("","");
iconv(cd,NULL,NULL,NULL,NULL);
iconv_close(cd);],
am_cv_lib_iconv=yes
am_cv_func_iconv=yes)
LIBS="$am_save_LIBS"
fi
])
if test "$am_cv_func_iconv" = yes; then
AC_DEFINE(HAVE_ICONV, 1, [Define if you have the iconv() function.])
AC_MSG_CHECKING([for iconv declaration])
AC_CACHE_VAL(am_cv_proto_iconv, [
AC_TRY_COMPILE([
#include <stdlib.h>
#include <iconv.h>
extern
#ifdef __cplusplus
"C"
#endif
#if defined(__STDC__) || defined(__cplusplus)
size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);
#else
size_t iconv();
#endif
], [], am_cv_proto_iconv_arg1="", am_cv_proto_iconv_arg1="const")
am_cv_proto_iconv="extern size_t iconv (iconv_t cd, $am_cv_proto_iconv_arg1 char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);"])
am_cv_proto_iconv=`echo "[$]am_cv_proto_iconv" | tr -s ' ' | sed -e 's/( /(/'`
AC_MSG_RESULT([$]{ac_t:-
}[$]am_cv_proto_iconv)
AC_DEFINE_UNQUOTED(ICONV_CONST, $am_cv_proto_iconv_arg1,
[Define as const if the declaration of iconv() needs const.])
fi
LIBICONV=
if test "$am_cv_lib_iconv" = yes; then
LIBICONV="-liconv"
fi
AC_SUBST(LIBICONV)
])
# Copyright 1996, 1997, 1999, 2000, 2001 Free Software Foundation, Inc.

View File

@ -49,6 +49,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#include "bfd.h" /* Required by objfiles.h. */
#include "symfile.h" /* Required by objfiles.h. */
#include "objfiles.h" /* For have_full_symbols and have_partial_symbols */
#include "charset.h"
/* Flag indicating we're dealing with HP-compiled objects */
extern int hp_som_som_object_present;
@ -1314,6 +1315,15 @@ yylex ()
c = parse_escape (&lexptr);
else if (c == '\'')
error ("Empty character constant.");
else if (! host_char_to_target (c, &c))
{
int toklen = lexptr - tokstart + 1;
char *tok = alloca (toklen + 1);
memcpy (tok, tokstart, toklen);
tok[toklen] = '\0';
error ("There is no character corresponding to %s in the target "
"character set `%s'.", tok, target_charset ());
}
yylval.typed_val_int.val = c;
yylval.typed_val_int.type = builtin_type_char;
@ -1464,6 +1474,8 @@ yylex ()
tempbufindex = 0;
do {
char *char_start_pos = tokptr;
/* Grow the static temp buffer if necessary, including allocating
the first one on demand. */
if (tempbufindex + 1 >= tempbufsize)
@ -1486,7 +1498,19 @@ yylex ()
tempbuf[tempbufindex++] = c;
break;
default:
tempbuf[tempbufindex++] = *tokptr++;
c = *tokptr++;
if (! host_char_to_target (c, &c))
{
int len = tokptr - char_start_pos;
char *copy = alloca (len + 1);
memcpy (copy, char_start_pos, len);
copy[len] = '\0';
error ("There is no character corresponding to `%s' "
"in the target character set `%s'.",
copy, target_charset ());
}
tempbuf[tempbufindex++] = c;
break;
}
} while ((*tokptr != '"') && (*tokptr != '\0'));

View File

@ -29,6 +29,7 @@
#include "valprint.h"
#include "macroscope.h"
#include "gdb_assert.h"
#include "charset.h"
extern void _initialize_c_language (void);
static void c_emit_char (int c, struct ui_file * stream, int quoter);
@ -40,55 +41,30 @@ static void c_emit_char (int c, struct ui_file * stream, int quoter);
static void
c_emit_char (register int c, struct ui_file *stream, int quoter)
{
const char *escape;
int host_char;
c &= 0xFF; /* Avoid sign bit follies */
if (PRINT_LITERAL_FORM (c))
escape = c_target_char_has_backslash_escape (c);
if (escape)
{
if (c == '\\' || c == quoter)
{
fputs_filtered ("\\", stream);
}
fprintf_filtered (stream, "%c", c);
if (quoter == '"' && strcmp (escape, "0") == 0)
/* Print nulls embedded in double quoted strings as \000 to
prevent ambiguity. */
fprintf_filtered (stream, "\\000");
else
fprintf_filtered (stream, "\\%s", escape);
}
else if (target_char_to_host (c, &host_char)
&& host_char_print_literally (host_char))
{
if (host_char == '\\' || host_char == quoter)
fputs_filtered ("\\", stream);
fprintf_filtered (stream, "%c", host_char);
}
else
{
switch (c)
{
case '\n':
fputs_filtered ("\\n", stream);
break;
case '\b':
fputs_filtered ("\\b", stream);
break;
case '\t':
fputs_filtered ("\\t", stream);
break;
case '\f':
fputs_filtered ("\\f", stream);
break;
case '\r':
fputs_filtered ("\\r", stream);
break;
case '\013':
fputs_filtered ("\\v", stream);
break;
case '\033':
fputs_filtered ("\\e", stream);
break;
case '\007':
fputs_filtered ("\\a", stream);
break;
case '\0':
if (quoter == '\'')
fputs_filtered ("\\0", stream);
else
fprintf_filtered (stream, "\\%.3o", (unsigned int) c);
break;
default:
fprintf_filtered (stream, "\\%.3o", (unsigned int) c);
break;
}
}
fprintf_filtered (stream, "\\%.3o", (unsigned int) c);
}
void

1274
gdb/charset.c Normal file

File diff suppressed because it is too large Load Diff

120
gdb/charset.h Normal file
View File

@ -0,0 +1,120 @@
/* Character set conversion support for GDB.
Copyright 2001 Free Software Foundation, Inc.
This file is part of GDB.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#ifndef CHARSET_H
#define CHARSET_H
/* If the target program uses a different character set than the host,
GDB has some support for translating between the two; GDB converts
characters and strings to the host character set before displaying
them, and converts characters and strings appearing in expressions
entered by the user to the target character set.
At the moment, GDB only supports single-byte, stateless character
sets. This includes the ISO-8859 family (ASCII extended with
accented characters, and (I think) Cyrillic, for European
languages), and the EBCDIC family (used on IBM's mainframes).
Unfortunately, it excludes many Asian scripts, the fixed- and
variable-width Unicode encodings, and other desireable things.
Patches are welcome! (For example, it would be nice if the Java
string support could simply get absorbed into some more general
multi-byte encoding support.)
Furthermore, GDB's code pretty much assumes that the host character
set is some superset of ASCII; there are plenty if ('0' + n)
expressions and the like.
When the `iconv' library routine supports a character set meeting
the requirements above, it's easy to plug an entry into GDB's table
that uses iconv to handle the details. */
/* Set the host character set to CHARSET. CHARSET must be a superset
of ASCII, since GDB's code assumes this. */
void set_host_charset (const char *charset);
/* Set the target character set to CHARSET. */
void set_target_charset (const char *charset);
/* Return the name of the current host/target character set. The
result is owned by the charset module; the caller should not free
it. */
const char *host_charset (void);
const char *target_charset (void);
/* In general, the set of C backslash escapes (\n, \f) is specific to
the character set. Not all character sets will have form feed
characters, for example.
The following functions allow GDB to parse and print control
characters in a character-set-independent way. They are both
language-specific (to C and C++) and character-set-specific.
Putting them here is a compromise. */
/* If the target character TARGET_CHAR have a backslash escape in the
C language (i.e., a character like 'n' or 't'), return the host
character string that should follow the backslash. Otherwise,
return zero.
When this function returns non-zero, the string it returns is
statically allocated; the caller is not responsible for freeing it. */
const char *c_target_char_has_backslash_escape (int target_char);
/* If the host character HOST_CHAR is a valid backslash escape in the
C language for the target character set, return non-zero, and set
*TARGET_CHAR to the target character the backslash escape represents.
Otherwise, return zero. */
int c_parse_backslash (int host_char, int *target_char);
/* Return non-zero if the host character HOST_CHAR can be printed
literally --- that is, if it can be readably printed as itself in a
character or string constant. Return zero if it should be printed
using some kind of numeric escape, like '\031' in C, '^(25)' in
Chill, or #25 in Pascal. */
int host_char_print_literally (int host_char);
/* If the host character HOST_CHAR has an equivalent in the target
character set, set *TARGET_CHAR to that equivalent, and return
non-zero. Otherwise, return zero. */
int host_char_to_target (int host_char, int *target_char);
/* If the target character TARGET_CHAR has an equivalent in the host
character set, set *HOST_CHAR to that equivalent, and return
non-zero. Otherwise, return zero. */
int target_char_to_host (int target_char, int *host_char);
/* If the target character TARGET_CHAR has a corresponding control
character (also in the target character set), set *TARGET_CTRL_CHAR
to the control character, and return non-zero. Otherwise, return
zero. */
int target_char_to_control_char (int target_char, int *target_ctrl_char);
#endif /* CHARSET_H */

985
gdb/configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -1362,6 +1362,18 @@ AC_LINK_FILES($files, $links)
dnl Check for exe extension set on certain hosts (e.g. Win32)
AC_EXEEXT
dnl Detect the character set used by this host.
dnl At the moment, we just assume it's ISO-8859-1 (which is a
dnl superset of ASCII containing the characters needed for French,
dnl German, Spanish, Italian, and possibly others), but if were
dnl *were* to support any host character sets other than ISO-8859-1,
dnl here's where we'd detect it.
AC_DEFINE(GDB_DEFAULT_HOST_CHARSET, "ISO-8859-1",
[Define to be a string naming the default host character set.])
AM_ICONV
AC_CONFIG_SUBDIRS($configdirs)
AC_OUTPUT(Makefile .gdbinit:gdbinit.in,
[

View File

@ -60,6 +60,7 @@
#include "demangle.h"
#include "expression.h"
#include "language.h"
#include "charset.h"
#include "annotate.h"
#include "filenames.h"
@ -1362,6 +1363,23 @@ query (const char *ctlstr,...)
}
/* Print an error message saying that we couldn't make sense of a
\^mumble sequence in a string or character constant. START and END
indicate a substring of some larger string that contains the
erroneous backslash sequence, missing the initial backslash. */
static NORETURN int
no_control_char_error (const char *start, const char *end)
{
int len = end - start;
char *copy = alloca (end - start + 1);
memcpy (copy, start, len);
copy[len] = '\0';
error ("There is no control character `\\%s' in the `%s' character set.",
copy, target_charset ());
}
/* Parse a C escape sequence. STRING_PTR points to a variable
containing a pointer to the string to parse. That pointer
should point to the character after the \. That pointer
@ -1380,37 +1398,55 @@ query (const char *ctlstr,...)
int
parse_escape (char **string_ptr)
{
int target_char;
register int c = *(*string_ptr)++;
switch (c)
if (c_parse_backslash (c, &target_char))
return target_char;
else switch (c)
{
case 'a':
return 007; /* Bell (alert) char */
case 'b':
return '\b';
case 'e': /* Escape character */
return 033;
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
case '\n':
return -2;
case 0:
(*string_ptr)--;
return 0;
case '^':
c = *(*string_ptr)++;
if (c == '\\')
c = parse_escape (string_ptr);
if (c == '?')
return 0177;
return (c & 0200) | (c & 037);
{
/* Remember where this escape sequence started, for reporting
errors. */
char *sequence_start_pos = *string_ptr - 1;
c = *(*string_ptr)++;
if (c == '?')
{
/* XXXCHARSET: What is `delete' in the host character set? */
c = 0177;
if (! host_char_to_target (c, &target_char))
error ("There is no character corresponding to `Delete' "
"in the target character set `%s'.",
host_charset ());
return target_char;
}
else if (c == '\\')
target_char = parse_escape (string_ptr);
else
{
if (! host_char_to_target (c, &target_char))
no_control_char_error (sequence_start_pos, *string_ptr);
}
/* Now target_char is something like `c', and we want to find
its control-character equivalent. */
if (! target_char_to_control_char (target_char, &target_char))
no_control_char_error (sequence_start_pos, *string_ptr);
return target_char;
}
/* XXXCHARSET: we need to use isdigit and value-of-digit
methods of the host character set here. */
case '0':
case '1':
@ -1439,7 +1475,12 @@ parse_escape (char **string_ptr)
return i;
}
default:
return c;
if (! host_char_to_target (c, &target_char))
error ("The escape sequence `\%c' is equivalent to plain `%c', which"
" has no equivalent\n"
"in the `%s' character set.",
c, c, target_charset ());
return target_char;
}
}