mirror of
git://gcc.gnu.org/git/gcc.git
synced 2024-12-16 17:39:35 +08:00
cppcharset.c (one_iso88591_to_utf8): New function.
2004-01-16 Eric Christopher <echristo@redhat.com> Chandrakala Chavva <cchavva@redhat.com> * cppcharset.c (one_iso88591_to_utf8): New function. (convert_iso88591_utf8): Ditto. Use. (conversion_tab): Use. (_cpp_input_to_utf8): New function. (_cpp_init_iconv_buffer): Ditto. (_cpp_close_iconv_buffer): Ditto. * cpphash.h: Prototype new functions. (cpp_buffer): Add input_cset_desc. * cppinit.c: Add input_charset default. * cpplib.c (cpp_push_buffer): Support init and close of iconv. * cpplib.h (cpp_options): Add input_charset. From-SVN: r76000
This commit is contained in:
parent
2f9c39f8fc
commit
cf551fbaca
@ -1,3 +1,19 @@
|
||||
2004-01-16 Eric Christopher <echristo@redhat.com>
|
||||
Chandrakala Chavva <cchavva@redhat.com>
|
||||
|
||||
* cppcharset.c (one_iso88591_to_utf8): New function.
|
||||
(convert_iso88591_utf8): Ditto. Use.
|
||||
(conversion_tab): Use.
|
||||
(_cpp_input_to_utf8): New function.
|
||||
(_cpp_init_iconv_buffer): Ditto.
|
||||
(_cpp_close_iconv_buffer): Ditto.
|
||||
* cpphash.h: Prototype new functions.
|
||||
(cpp_buffer): Add input_cset_desc.
|
||||
* cppinit.c: Add input_charset default.
|
||||
* cpplib.c (cpp_push_buffer): Support init and
|
||||
close of iconv.
|
||||
* cpplib.h (cpp_options): Add input_charset.
|
||||
|
||||
2004-01-16 Kazu Hirata <kazu@cs.umass.edu>
|
||||
|
||||
* system.h (ASM_OUTPUT_SECTION_NAME): Poison.
|
||||
@ -14,23 +30,23 @@
|
||||
* fixinc/tests/base/sys/stat.h: Adapt for new hackname.
|
||||
|
||||
* fixinc/inclhack.def (alpha___extern_prefix,
|
||||
alpha___extern_prefix_standards): New hacks to obey
|
||||
alpha___extern_prefix_standards): New hacks to obey
|
||||
__PRAGMA_EXTERN_PREFIX.
|
||||
* fixinc/tests/base/testing.h [ALPHA___EXTERN_PREFIX_CHECK]: New
|
||||
test.
|
||||
* fixinc/tests/base/standards.h: Likewise.
|
||||
|
||||
|
||||
* fixincl/inclhack.def (alpha_pthread): Tweak to match more
|
||||
variations.
|
||||
New testcase.
|
||||
* fixinc/tests/base/pthread.h: Handle it.
|
||||
|
||||
|
||||
* fixincl/inclhack.def (bad_lval): Sort file list.
|
||||
Add many missing files up to Tru64 UNIX V5.1B.
|
||||
* gcc/fixinc/tests/base/libgen.h: Renamed to ...
|
||||
* gcc/fixinc/tests/base/dirent.h: ... this to match new file list
|
||||
order.
|
||||
|
||||
|
||||
* fixinc/fixincl.x: Regenerate.
|
||||
|
||||
2004-01-16 Mark Mitchell <mark@codesourcery.com>
|
||||
|
@ -170,7 +170,7 @@ one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
|
||||
{
|
||||
static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
|
||||
static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||
|
||||
|
||||
cppchar_t c;
|
||||
const uchar *inbuf = *inbufp;
|
||||
size_t nbytes, i;
|
||||
@ -274,7 +274,7 @@ one_cppchar_to_utf8 (cppchar_t c, uchar **outbufp, size_t *outbytesleftp)
|
||||
The return value is either 0 for success, or an errno value for
|
||||
failure, which may be E2BIG (need more space), EILSEQ (ill-formed
|
||||
input sequence), ir EINVAL (incomplete input sequence). */
|
||||
|
||||
|
||||
static inline int
|
||||
one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
|
||||
uchar **outbufp, size_t *outbytesleftp)
|
||||
@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The first 256 code points of ISO 8859.1 have the same numeric
|
||||
values as the first 256 code points of Unicode, therefore the
|
||||
incoming ISO 8859.1 character can be passed directly to
|
||||
one_cppchar_to_utf8 (which expects a Unicode value). */
|
||||
|
||||
static int
|
||||
one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
|
||||
size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
|
||||
{
|
||||
const uchar *inbuf = *inbufp;
|
||||
int rval;
|
||||
|
||||
if (*inbytesleftp > 1)
|
||||
return EINVAL;
|
||||
|
||||
rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
|
||||
if (rval)
|
||||
return rval;
|
||||
|
||||
*inbufp += 1;
|
||||
*inbytesleftp -= 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper routine for the next few functions. The 'const' on
|
||||
one_conversion means that we promise not to modify what function is
|
||||
pointed to, which lets the inliner see through it. */
|
||||
@ -489,7 +514,7 @@ conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
|
||||
outbuf = to->text + to->asize - outbytesleft;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* These functions convert entire strings between character sets.
|
||||
They all have the signature
|
||||
@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
|
||||
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
|
||||
}
|
||||
|
||||
static bool
|
||||
convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
|
||||
struct _cpp_strbuf *to)
|
||||
{
|
||||
return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
|
||||
}
|
||||
|
||||
|
||||
/* Identity conversion, used when we have no alternative. */
|
||||
static bool
|
||||
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
|
||||
@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = {
|
||||
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
|
||||
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
|
||||
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
|
||||
{ "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
|
||||
};
|
||||
|
||||
/* Subroutine of cpp_init_iconv: initialize and return a
|
||||
@ -619,7 +653,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
|
||||
struct cset_converter ret;
|
||||
char *pair;
|
||||
size_t i;
|
||||
|
||||
|
||||
if (!strcasecmp (to, from))
|
||||
{
|
||||
ret.func = convert_no_conversion;
|
||||
@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
|
||||
if (ret.cd == (iconv_t) -1)
|
||||
{
|
||||
if (errno == EINVAL)
|
||||
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
|
||||
cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
|
||||
"conversion from %s to %s not supported by iconv",
|
||||
from, to);
|
||||
else
|
||||
@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
|
||||
}
|
||||
else
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
|
||||
cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
|
||||
"no iconv implementation, cannot convert from %s to %s",
|
||||
from, to);
|
||||
ret.func = convert_no_conversion;
|
||||
@ -1270,7 +1304,7 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
|
||||
*unsignedp = unsigned_p;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* Subroutine of cpp_interpret_charconst which performs the conversion
|
||||
to a number, for wide strings. STR is the string structure returned
|
||||
by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
|
||||
@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
uchar *
|
||||
_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
|
||||
{
|
||||
struct _cpp_strbuf tbuf;
|
||||
struct cset_converter cvt = pfile->buffer->input_cset_desc;
|
||||
|
||||
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
|
||||
tbuf.text = xmalloc (tbuf.asize);
|
||||
tbuf.len = 0;
|
||||
|
||||
if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (length)
|
||||
tbuf.text[tbuf.len] = '\n';
|
||||
else
|
||||
tbuf.text[0] = '\n';
|
||||
|
||||
return tbuf.text;
|
||||
}
|
||||
|
||||
/* Check the input file format. At present assuming the input file
|
||||
is in iso-8859-1 format. Convert this input character set to
|
||||
source character set format (UTF-8). */
|
||||
|
||||
void
|
||||
_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
|
||||
{
|
||||
pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
|
||||
from);
|
||||
}
|
||||
|
||||
void
|
||||
_cpp_close_iconv_buffer (cpp_reader *pfile)
|
||||
{
|
||||
if (HAVE_ICONV
|
||||
&& pfile->buffer->input_cset_desc.func == convert_using_iconv)
|
||||
iconv_close (pfile->buffer->input_cset_desc.cd);
|
||||
}
|
||||
|
@ -270,7 +270,7 @@ struct cpp_buffer
|
||||
const uchar *cur; /* Current location. */
|
||||
const uchar *line_base; /* Start of current physical line. */
|
||||
const uchar *next_line; /* Start of to-be-cleaned logical line. */
|
||||
|
||||
|
||||
const uchar *buf; /* Entire character buffer. */
|
||||
const uchar *rlimit; /* Writable byte at end of file. */
|
||||
|
||||
@ -313,6 +313,10 @@ struct cpp_buffer
|
||||
|
||||
/* Used for buffer overlays by cpptrad.c. */
|
||||
const uchar *saved_cur, *saved_rlimit;
|
||||
|
||||
/* Descriptor for converting from the input character set to the
|
||||
source character set. */
|
||||
struct cset_converter input_cset_desc;
|
||||
};
|
||||
|
||||
/* A cpp_reader encapsulates the "state" of a pre-processor run.
|
||||
@ -557,6 +561,9 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
|
||||
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
|
||||
unsigned int, unsigned int);
|
||||
extern void _cpp_pop_buffer (cpp_reader *);
|
||||
extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
|
||||
extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
|
||||
extern void _cpp_close_iconv_buffer (cpp_reader *);
|
||||
|
||||
/* In cpptrad.c. */
|
||||
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);
|
||||
|
@ -161,6 +161,9 @@ cpp_create_reader (enum c_lang lang, hash_table *table)
|
||||
CPP_OPTION (pfile, narrow_charset) = 0;
|
||||
CPP_OPTION (pfile, wide_charset) = 0;
|
||||
|
||||
/* Default the input character set to iso-8859-1 for now. */
|
||||
CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
|
||||
|
||||
/* A fake empty "directory" used as the starting point for files
|
||||
looked up without a search path. Name cannot be '/' because we
|
||||
don't want to prepend anything at all to filenames using it. All
|
||||
|
13
gcc/cpplib.c
13
gcc/cpplib.c
@ -549,14 +549,14 @@ do_undef (cpp_reader *pfile)
|
||||
/* Undefine a single macro/assertion/whatever. */
|
||||
|
||||
static int
|
||||
undefine_macros (cpp_reader *pfile, cpp_hashnode *h,
|
||||
undefine_macros (cpp_reader *pfile, cpp_hashnode *h,
|
||||
void *data_p ATTRIBUTE_UNUSED)
|
||||
{
|
||||
switch (h->type)
|
||||
{
|
||||
case NT_VOID:
|
||||
break;
|
||||
|
||||
|
||||
case NT_MACRO:
|
||||
if (pfile->cb.undef)
|
||||
(*pfile->cb.undef) (pfile, pfile->directive_line, h);
|
||||
@ -855,7 +855,7 @@ do_linemarker (cpp_reader *pfile)
|
||||
cpp_string s = { 0, 0 };
|
||||
if (_cpp_interpret_string_notranslate (pfile, &token->val.str, &s))
|
||||
new_file = (const char *)s.text;
|
||||
|
||||
|
||||
new_sysp = 0;
|
||||
flag = read_flag (pfile, 0);
|
||||
if (flag == 1)
|
||||
@ -1159,7 +1159,7 @@ do_pragma (cpp_reader *pfile)
|
||||
(*p->u.handler) (pfile);
|
||||
if (pfile->cb.line_change)
|
||||
(*pfile->cb.line_change) (pfile, pfile->cur_token, false);
|
||||
|
||||
|
||||
}
|
||||
else if (pfile->cb.def_pragma)
|
||||
{
|
||||
@ -1925,6 +1925,7 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
|
||||
int from_stage3)
|
||||
{
|
||||
cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
|
||||
const char *input = CPP_OPTION (pfile, input_charset);
|
||||
|
||||
/* Clears, amongst other things, if_stack and mi_cmacro. */
|
||||
memset (new, 0, sizeof (cpp_buffer));
|
||||
@ -1936,6 +1937,8 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
|
||||
new->need_line = true;
|
||||
|
||||
pfile->buffer = new;
|
||||
_cpp_init_iconv_buffer (pfile, input);
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
@ -1957,6 +1960,8 @@ _cpp_pop_buffer (cpp_reader *pfile)
|
||||
/* In case of a missing #endif. */
|
||||
pfile->state.skipping = 0;
|
||||
|
||||
_cpp_close_iconv_buffer (pfile);
|
||||
|
||||
/* _cpp_do_file_change expects pfile->buffer to be the new one. */
|
||||
pfile->buffer = buffer->prev;
|
||||
|
||||
|
@ -332,6 +332,9 @@ struct cpp_options
|
||||
/* Holds the name of the target wide character set. */
|
||||
const char *wide_charset;
|
||||
|
||||
/* Holds the name of the input character set. */
|
||||
const char *input_charset;
|
||||
|
||||
/* True to warn about precompiled header files we couldn't use. */
|
||||
bool warn_invalid_pch;
|
||||
|
||||
@ -417,7 +420,7 @@ struct cpp_dir
|
||||
/* Mapping of file names for this directory for MS-DOS and related
|
||||
platforms. A NULL-terminated array of (from, to) pairs. */
|
||||
const char **name_map;
|
||||
|
||||
|
||||
/* The C front end uses these to recognize duplicated
|
||||
directories in the search path. */
|
||||
ino_t ino;
|
||||
@ -481,7 +484,7 @@ struct cpp_hashnode GTY(())
|
||||
{
|
||||
struct ht_identifier ident;
|
||||
unsigned int is_directive : 1;
|
||||
unsigned int directive_index : 7; /* If is_directive,
|
||||
unsigned int directive_index : 7; /* If is_directive,
|
||||
then index into directive table.
|
||||
Otherwise, a NODE_OPERATOR. */
|
||||
unsigned char rid_code; /* Rid code - for front ends. */
|
||||
|
Loading…
Reference in New Issue
Block a user