cppcharset.c (one_iso88591_to_utf8): New function.

2004-01-16  Eric Christopher  <echristo@redhat.com>
	    Chandrakala Chavva <cchavva@redhat.com>

	* cppcharset.c (one_iso88591_to_utf8): New function.
	(convert_iso88591_utf8): Ditto. Use.
	(conversion_tab): Use.
	(_cpp_input_to_utf8): New function.
	(_cpp_init_iconv_buffer): Ditto.
	(_cpp_close_iconv_buffer): Ditto.
	* cpphash.h: Prototype new functions.
	(cpp_buffer): Add input_cset_desc.
	* cppinit.c: Add input_charset default.
	* cpplib.c (cpp_push_buffer): Support init and
	close of iconv.
	* cpplib.h (cpp_options): Add input_charset.

From-SVN: r76000
This commit is contained in:
Eric Christopher 2004-01-16 22:37:49 +00:00
parent 2f9c39f8fc
commit cf551fbaca
6 changed files with 129 additions and 18 deletions

View File

@ -1,3 +1,19 @@
2004-01-16 Eric Christopher <echristo@redhat.com>
Chandrakala Chavva <cchavva@redhat.com>
* cppcharset.c (one_iso88591_to_utf8): New function.
(convert_iso88591_utf8): Ditto. Use.
(conversion_tab): Use.
(_cpp_input_to_utf8): New function.
(_cpp_init_iconv_buffer): Ditto.
(_cpp_close_iconv_buffer): Ditto.
* cpphash.h: Prototype new functions.
(cpp_buffer): Add input_cset_desc.
* cppinit.c: Add input_charset default.
* cpplib.c (cpp_push_buffer): Support init and
close of iconv.
* cpplib.h (cpp_options): Add input_charset.
2004-01-16 Kazu Hirata <kazu@cs.umass.edu>
* system.h (ASM_OUTPUT_SECTION_NAME): Poison.
@ -14,23 +30,23 @@
* fixinc/tests/base/sys/stat.h: Adapt for new hackname.
* fixinc/inclhack.def (alpha___extern_prefix,
alpha___extern_prefix_standards): New hacks to obey
alpha___extern_prefix_standards): New hacks to obey
__PRAGMA_EXTERN_PREFIX.
* fixinc/tests/base/testing.h [ALPHA___EXTERN_PREFIX_CHECK]: New
test.
* fixinc/tests/base/standards.h: Likewise.
* fixincl/inclhack.def (alpha_pthread): Tweak to match more
variations.
New testcase.
* fixinc/tests/base/pthread.h: Handle it.
* fixincl/inclhack.def (bad_lval): Sort file list.
Add many missing files up to Tru64 UNIX V5.1B.
* gcc/fixinc/tests/base/libgen.h: Renamed to ...
* gcc/fixinc/tests/base/dirent.h: ... this to match new file list
order.
* fixinc/fixincl.x: Regenerate.
2004-01-16 Mark Mitchell <mark@codesourcery.com>

View File

@ -170,7 +170,7 @@ one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
{
static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
cppchar_t c;
const uchar *inbuf = *inbufp;
size_t nbytes, i;
@ -274,7 +274,7 @@ one_cppchar_to_utf8 (cppchar_t c, uchar **outbufp, size_t *outbytesleftp)
The return value is either 0 for success, or an errno value for
failure, which may be E2BIG (need more space), EILSEQ (ill-formed
input sequence), ir EINVAL (incomplete input sequence). */
static inline int
one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
uchar **outbufp, size_t *outbytesleftp)
@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
return 0;
}
/* The first 256 code points of ISO 8859.1 have the same numeric
values as the first 256 code points of Unicode, therefore the
incoming ISO 8859.1 character can be passed directly to
one_cppchar_to_utf8 (which expects a Unicode value). */
static int
one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
{
const uchar *inbuf = *inbufp;
int rval;
if (*inbytesleftp > 1)
return EINVAL;
rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
if (rval)
return rval;
*inbufp += 1;
*inbytesleftp -= 1;
return 0;
}
/* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */
@ -489,7 +514,7 @@ conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
outbuf = to->text + to->asize - outbytesleft;
}
}
/* These functions convert entire strings between character sets.
They all have the signature
@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
static bool
convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
struct _cpp_strbuf *to)
{
return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
}
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = {
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
{ "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
};
/* Subroutine of cpp_init_iconv: initialize and return a
@ -619,7 +653,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
struct cset_converter ret;
char *pair;
size_t i;
if (!strcasecmp (to, from))
{
ret.func = convert_no_conversion;
@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
if (ret.cd == (iconv_t) -1)
{
if (errno == EINVAL)
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
"conversion from %s to %s not supported by iconv",
from, to);
else
@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
}
else
{
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
"no iconv implementation, cannot convert from %s to %s",
from, to);
ret.func = convert_no_conversion;
@ -1270,7 +1304,7 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
*unsignedp = unsigned_p;
return result;
}
/* Subroutine of cpp_interpret_charconst which performs the conversion
to a number, for wide strings. STR is the string structure returned
by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
return result;
}
uchar *
_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
{
struct _cpp_strbuf tbuf;
struct cset_converter cvt = pfile->buffer->input_cset_desc;
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
tbuf.text = xmalloc (tbuf.asize);
tbuf.len = 0;
if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
{
cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
return NULL;
}
if (length)
tbuf.text[tbuf.len] = '\n';
else
tbuf.text[0] = '\n';
return tbuf.text;
}
/* Check the input file format. At present assuming the input file
is in iso-8859-1 format. Convert this input character set to
source character set format (UTF-8). */
void
_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
{
pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
from);
}
void
_cpp_close_iconv_buffer (cpp_reader *pfile)
{
if (HAVE_ICONV
&& pfile->buffer->input_cset_desc.func == convert_using_iconv)
iconv_close (pfile->buffer->input_cset_desc.cd);
}

View File

@ -270,7 +270,7 @@ struct cpp_buffer
const uchar *cur; /* Current location. */
const uchar *line_base; /* Start of current physical line. */
const uchar *next_line; /* Start of to-be-cleaned logical line. */
const uchar *buf; /* Entire character buffer. */
const uchar *rlimit; /* Writable byte at end of file. */
@ -313,6 +313,10 @@ struct cpp_buffer
/* Used for buffer overlays by cpptrad.c. */
const uchar *saved_cur, *saved_rlimit;
/* Descriptor for converting from the input character set to the
source character set. */
struct cset_converter input_cset_desc;
};
/* A cpp_reader encapsulates the "state" of a pre-processor run.
@ -557,6 +561,9 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
unsigned int, unsigned int);
extern void _cpp_pop_buffer (cpp_reader *);
extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
extern void _cpp_close_iconv_buffer (cpp_reader *);
/* In cpptrad.c. */
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);

View File

@ -161,6 +161,9 @@ cpp_create_reader (enum c_lang lang, hash_table *table)
CPP_OPTION (pfile, narrow_charset) = 0;
CPP_OPTION (pfile, wide_charset) = 0;
/* Default the input character set to iso-8859-1 for now. */
CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
/* A fake empty "directory" used as the starting point for files
looked up without a search path. Name cannot be '/' because we
don't want to prepend anything at all to filenames using it. All

View File

@ -549,14 +549,14 @@ do_undef (cpp_reader *pfile)
/* Undefine a single macro/assertion/whatever. */
static int
undefine_macros (cpp_reader *pfile, cpp_hashnode *h,
undefine_macros (cpp_reader *pfile, cpp_hashnode *h,
void *data_p ATTRIBUTE_UNUSED)
{
switch (h->type)
{
case NT_VOID:
break;
case NT_MACRO:
if (pfile->cb.undef)
(*pfile->cb.undef) (pfile, pfile->directive_line, h);
@ -855,7 +855,7 @@ do_linemarker (cpp_reader *pfile)
cpp_string s = { 0, 0 };
if (_cpp_interpret_string_notranslate (pfile, &token->val.str, &s))
new_file = (const char *)s.text;
new_sysp = 0;
flag = read_flag (pfile, 0);
if (flag == 1)
@ -1159,7 +1159,7 @@ do_pragma (cpp_reader *pfile)
(*p->u.handler) (pfile);
if (pfile->cb.line_change)
(*pfile->cb.line_change) (pfile, pfile->cur_token, false);
}
else if (pfile->cb.def_pragma)
{
@ -1925,6 +1925,7 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
int from_stage3)
{
cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
const char *input = CPP_OPTION (pfile, input_charset);
/* Clears, amongst other things, if_stack and mi_cmacro. */
memset (new, 0, sizeof (cpp_buffer));
@ -1936,6 +1937,8 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
new->need_line = true;
pfile->buffer = new;
_cpp_init_iconv_buffer (pfile, input);
return new;
}
@ -1957,6 +1960,8 @@ _cpp_pop_buffer (cpp_reader *pfile)
/* In case of a missing #endif. */
pfile->state.skipping = 0;
_cpp_close_iconv_buffer (pfile);
/* _cpp_do_file_change expects pfile->buffer to be the new one. */
pfile->buffer = buffer->prev;

View File

@ -332,6 +332,9 @@ struct cpp_options
/* Holds the name of the target wide character set. */
const char *wide_charset;
/* Holds the name of the input character set. */
const char *input_charset;
/* True to warn about precompiled header files we couldn't use. */
bool warn_invalid_pch;
@ -417,7 +420,7 @@ struct cpp_dir
/* Mapping of file names for this directory for MS-DOS and related
platforms. A NULL-terminated array of (from, to) pairs. */
const char **name_map;
/* The C front end uses these to recognize duplicated
directories in the search path. */
ino_t ino;
@ -481,7 +484,7 @@ struct cpp_hashnode GTY(())
{
struct ht_identifier ident;
unsigned int is_directive : 1;
unsigned int directive_index : 7; /* If is_directive,
unsigned int directive_index : 7; /* If is_directive,
then index into directive table.
Otherwise, a NODE_OPERATOR. */
unsigned char rid_code; /* Rid code - for front ends. */