From 53e710acd249e1861029b19b7a3d8195e7f28929 Mon Sep 17 00:00:00 2001 From: Pedro Alves Date: Wed, 12 Apr 2017 14:00:49 +0100 Subject: [PATCH] Fix PR c++/21323: GDB thinks char16_t and char32_t are signed in C++ While the C++ standard says that char16_t and char32_t are unsigned types: Types char16_t and char32_t denote distinct types with the same size, signedness, and alignment as uint_least16_t and uint_least32_t, respectively, in , called the underlying types. ... gdb treats them as signed currently: (gdb) p (char16_t)-1 $1 = -1 u'\xffff' There are actually two places in gdb that hardcode these types: - gdbtypes.c:gdbtypes_post_init, when creating the built-in types, seemingly used by the "x /s" command (judging from commit 9a22f0d0). - dwarf2read.c, when reading base types with DW_ATE_UTF encoding (which is what is used for these types, when compiling for C++11 and up). Despite the comment, the type created does end up used. Both places need fixing. But since I couldn't tell why dwarf2read.c needs to create a new type, I've made it use the per-arch built-in types instead, so that the types are only created once per arch instead of once per objfile. That seems to work fine. While writting the test, I noticed that the C++ language parser isn't actually aware of these built-in types, so if you try to use them without a program that uses them, you get: (gdb) set language c++ (gdb) ptype char16_t No symbol table is loaded. Use the "file" command. (gdb) ptype u"hello" No type named char16_t. (gdb) p u"hello" No type named char16_t. That's fixed by simply adding a couple entries to C++'s built-in types array in c-lang.c. With that, we get the expected: (gdb) ptype char16_t type = char16_t (gdb) ptype u"hello" type = char16_t [6] (gdb) p u"hello" $1 = u"hello" gdb/ChangeLog: 2017-04-12 Pedro Alves PR c++/21323 * c-lang.c (cplus_primitive_types) : New enum values. (cplus_language_arch_info): Register cplus_primitive_type_char16_t and cplus_primitive_type_char32_t. * dwarf2read.c (read_base_type) : If bit size is 16 or 32, use the archtecture's built-in type for char16_t and char32_t, respectively. Otherwise, fallback to init_integer_type as before, but make the type unsigned, and issue a complaint. * gdbtypes.c (gdbtypes_post_init): Make char16_t and char32_t unsigned. gdb/testsuite/ChangeLog: 2017-04-12 Pedro Alves PR c++/21323 * gdb.cp/wide_char_types.c: New file. * gdb.cp/wide_char_types.exp: New file. --- gdb/ChangeLog | 13 +++ gdb/c-lang.c | 6 + gdb/dwarf2read.c | 19 ++- gdb/gdbtypes.c | 5 +- gdb/testsuite/ChangeLog | 6 + gdb/testsuite/gdb.cp/wide_char_types.c | 28 +++++ gdb/testsuite/gdb.cp/wide_char_types.exp | 143 +++++++++++++++++++++++ 7 files changed, 214 insertions(+), 6 deletions(-) create mode 100644 gdb/testsuite/gdb.cp/wide_char_types.c create mode 100644 gdb/testsuite/gdb.cp/wide_char_types.exp diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 4c03fc0a219..bdcbd1e2f96 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,16 @@ +2017-04-12 Pedro Alves + + PR c++/21323 + * c-lang.c (cplus_primitive_types) : New enum values. + (cplus_language_arch_info): Register cplus_primitive_type_char16_t + and cplus_primitive_type_char32_t. + * dwarf2read.c (read_base_type) : If bit size is 16 or + 32, use the archtecture's built-in type for char16_t and char32_t, + respectively. Otherwise, fallback to init_integer_type as before, + but make the type unsigned, and issue a complaint. + * gdbtypes.c (gdbtypes_post_init): Make char16_t and char32_t unsigned. + 2017-04-12 Alan Hayward * m32r-tdep.c (M32R_ARG_REGISTER_SIZE): Added. diff --git a/gdb/c-lang.c b/gdb/c-lang.c index a1001992fcb..616aa267e84 100644 --- a/gdb/c-lang.c +++ b/gdb/c-lang.c @@ -895,6 +895,8 @@ enum cplus_primitive_types { cplus_primitive_type_decfloat, cplus_primitive_type_decdouble, cplus_primitive_type_declong, + cplus_primitive_type_char16_t, + cplus_primitive_type_char32_t, nr_cplus_primitive_types }; @@ -950,6 +952,10 @@ cplus_language_arch_info (struct gdbarch *gdbarch, = builtin->builtin_decdouble; lai->primitive_type_vector [cplus_primitive_type_declong] = builtin->builtin_declong; + lai->primitive_type_vector [cplus_primitive_type_char16_t] + = builtin->builtin_char16; + lai->primitive_type_vector [cplus_primitive_type_char32_t] + = builtin->builtin_char32; lai->bool_type_symbol = "bool"; lai->bool_type_default = builtin->builtin_bool; diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c index 966e1ee81ba..e390b322978 100644 --- a/gdb/dwarf2read.c +++ b/gdb/dwarf2read.c @@ -15125,9 +15125,22 @@ read_base_type (struct die_info *die, struct dwarf2_cu *cu) type = init_integer_type (objfile, bits, 1, name); break; case DW_ATE_UTF: - /* We just treat this as an integer and then recognize the - type by name elsewhere. */ - type = init_integer_type (objfile, bits, 0, name); + { + gdbarch *arch = get_objfile_arch (objfile); + + if (bits == 16) + type = builtin_type (arch)->builtin_char16; + else if (bits == 32) + type = builtin_type (arch)->builtin_char32; + else + { + complaint (&symfile_complaints, + _("unsupported DW_ATE_UTF bit size: '%d'"), + bits); + type = init_integer_type (objfile, bits, 1, name); + } + return set_die_type (die, type, cu); + } break; default: diff --git a/gdb/gdbtypes.c b/gdb/gdbtypes.c index 6f3aeabc173..c1f76fb539e 100644 --- a/gdb/gdbtypes.c +++ b/gdb/gdbtypes.c @@ -5204,10 +5204,9 @@ gdbtypes_post_init (struct gdbarch *gdbarch) /* Wide character types. */ builtin_type->builtin_char16 - = arch_integer_type (gdbarch, 16, 0, "char16_t"); + = arch_integer_type (gdbarch, 16, 1, "char16_t"); builtin_type->builtin_char32 - = arch_integer_type (gdbarch, 32, 0, "char32_t"); - + = arch_integer_type (gdbarch, 32, 1, "char32_t"); /* Default data/code pointer types. */ builtin_type->builtin_data_ptr diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog index 83d6018e8ac..e2555e05c37 100644 --- a/gdb/testsuite/ChangeLog +++ b/gdb/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-04-12 Pedro Alves + + PR c++/21323 + * gdb.cp/wide_char_types.c: New file. + * gdb.cp/wide_char_types.exp: New file. + 2017-04-05 Sergio Durigan Junior PR gdb/21352 diff --git a/gdb/testsuite/gdb.cp/wide_char_types.c b/gdb/testsuite/gdb.cp/wide_char_types.c new file mode 100644 index 00000000000..8337cd4cbce --- /dev/null +++ b/gdb/testsuite/gdb.cp/wide_char_types.c @@ -0,0 +1,28 @@ +/* This testcase is part of GDB, the GNU debugger. + + Copyright 2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +char16_t u16 = -1; +char32_t u32 = -1; + +int +main () +{ + return 0; +} diff --git a/gdb/testsuite/gdb.cp/wide_char_types.exp b/gdb/testsuite/gdb.cp/wide_char_types.exp new file mode 100644 index 00000000000..df5c8a8f1df --- /dev/null +++ b/gdb/testsuite/gdb.cp/wide_char_types.exp @@ -0,0 +1,143 @@ +# This testcase is part of GDB, the GNU debugger. + +# Copyright 2017 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Test GDB's awareness of the char16_t, char32_t (C++11+) built-in +# types. We also run most tests here in C mode, and check whether the +# built-ins are disabled (gdb uses the typedefs in the debug info +# instead.) + +standard_testfile + +# Test char16_t/char32_t in language LANG, against symbols in +# a program. Lang can be "c", "c++03" or "c++11". In C++11, +# char16_t/char32_t are built-in types, and the debug information +# reflects that (see +# http://wiki.dwarfstd.org/index.php?title=C%2B%2B0x:_New_string_literals). + +proc wide_char_types_program {lang} { + global srcfile testfile + + set options {debug} + if {$lang == "c++03"} { + lappend options c++ additional_flags=-std=c++03 + set out $testfile-cxx03 + } elseif {$lang == "c++11"} { + lappend options c++ additional_flags=-std=c++11 + set out $testfile-cxx11 + } else { + set out $testfile-c + } + + if { [prepare_for_testing "failed to prepare" \ + ${out} [list $srcfile] $options] } { + return -1 + } + + if ![runto_main] then { + fail "can't run to main" + return 0 + } + do_test_wide_char $lang "u16" "u32" +} + +# Test char16_t/char32_t in language LANG. Use CHAR16_EXP and +# CHAR32_EXP as expression for each of the corresponding types. +# (E.g., CHAR16_EXP will be u16 when testing against the program, and +# "(char16_t)-1" when testing the built-in types without a program +# loaded.) + +proc do_test_wide_char {lang char16_exp char32_exp} { + global gdb_prompt + + # Check that the fixed-width wide types are distinct built-in + # types in C++11+. In other modes, they're instead typedefs, + # found in the debug info. + if {$lang == "c++11"} { + gdb_test "ptype $char16_exp" "type = char16_t" \ + "char16_t is distinct" + gdb_test "ptype $char32_exp" "type = char32_t" \ + "char32_t is distinct" + } else { + gdb_test "ptype $char16_exp" "type = unsigned (long|int|short)" \ + "char16_t is typedef" + gdb_test "ptype $char32_exp" "type = unsigned (long|int|short)" \ + "char32_t is typedef" + } + + # Check that the fixed-width wide char types are unsigned. + gdb_test "p $char16_exp" " = 65535 u'\\\\xffff'" \ + "char16_t is unsigned" + gdb_test "p $char32_exp" " = 4294967295 U'\\\\xffffffff'" \ + "char32_t is unsigned" + + # Check sizeof. These are fixed-width. + gdb_test "p sizeof($char16_exp)" "= 2" \ + "sizeof($char16_exp) == 2" + gdb_test "p sizeof($char32_exp)" "= 4" \ + "sizeof(char16_t) == 4" + + # Test printing wide literal strings. Note that when testing with + # no program started, this relies on GDB's awareness of the + # built-in wide char types. + gdb_test {p U"hello"} {= U"hello"} + gdb_test {p u"hello"} {= u"hello"} +} + +# Make sure that the char16_t/char32_t types are recognized as +# distinct built-in types in C++ mode, even with no program loaded. +# Check that in C mode, the types are not recognized. + +proc wide_char_types_no_program {} { + global srcfile testfile + + gdb_exit + gdb_start + + # These types are not built-in in C. + with_test_prefix "c" { + gdb_test "set language c" + + gdb_test "p (char16_t) -1" "No symbol table is loaded.*" \ + "char16_t is not built-in" + gdb_test "p (char32_t) -1" "No symbol table is loaded.*" \ + "char32_t is not built-in" + + gdb_test {p U"hello"} "No type named char32_t\\\." + gdb_test {p u"hello"} "No type named char16_t\\\." + } + + # Note GDB does not distinguish C++ dialects, so the fixed-width + # types are always available in C++ mode, even if they were not + # built-in types before C++11. + with_test_prefix "c++" { + gdb_test "set language c++" + + do_test_wide_char "c++11" "(char16_t) -1" "(char32_t) -1" + } +} + +# Check wide char types with no program loaded. +with_test_prefix "no program" { + wide_char_types_no_program +} + +# Check types when a program is loaded. +with_test_prefix "with program" { + foreach_with_prefix lang {"c" "c++03" "c++11"} { + wide_char_types_program $lang + } +}