binutils-gdb/opcodes/riscv-dis.c
Andrew Burgess 884b49e3a9 opcodes/riscv: Hide '.L0 ' fake symbols
The RISC-V assembler generates fake labels with the name '.L0 ' as
part of the debug information (see
gas/config/tc-riscv.h:FAKE_LABEL_NAME).

The problem is that currently, when disassembling an object file, the
output looks like this (this is an example from the GDB testsuite, but
is pretty representative of anything with debug information):

  000000000000001e <main>:
    1e:   7179                    addi    sp,sp,-48
    20:   f406                    sd      ra,40(sp)
    22:   f022                    sd      s0,32(sp)
    24:   1800                    addi    s0,sp,48

  0000000000000026 <.L0 >:
    26:   87aa                    mv      a5,a0
    28:   feb43023                sd      a1,-32(s0)
    2c:   fcc43c23                sd      a2,-40(s0)
    30:   fef42623                sw      a5,-20(s0)

  0000000000000034 <.L0 >:
    34:   fec42783                lw      a5,-20(s0)
    38:   0007871b                sext.w  a4,a5
    3c:   678d                    lui     a5,0x3
    3e:   03978793                addi    a5,a5,57 # 3039 <.LASF30+0x2a9d>
    42:   02f71463                bne     a4,a5,6a <.L0 >

  0000000000000046 <.L0 >:
    46:   000007b7                lui     a5,0x0
    4a:   0007b783                ld      a5,0(a5) # 0 <need_malloc>
    4e:   6f9c                    ld      a5,24(a5)

  0000000000000050 <.L0 >:
    50:   86be                    mv      a3,a5
    52:   466d                    li      a2,27
    54:   4585                    li      a1,1
    56:   000007b7                lui     a5,0x0
    5a:   00078513                mv      a0,a5
    5e:   00000097                auipc   ra,0x0
    62:   000080e7                jalr    ra # 5e <.L0 +0xe>

  0000000000000066 <.L0 >:
    66:   4785                    li      a5,1
    68:   a869                    j       102 <.L0 >

  000000000000006a <.L0 >:
    6a:   000007b7                lui     a5,0x0
    6e:   00078513                mv      a0,a5
    72:   00000097                auipc   ra,0x0
    76:   000080e7                jalr    ra # 72 <.L0 +0x8>

The frequent repeated '.L0 ' labels are pointless, as they are
non-unique there's no way to match a use of '.L0 ' to its appearence
in the output, so we'd be better off just not printing it at all.
That's what this patch does by defining a 'symbol_is_valid' method for
RISC-V.  With this commit, the same disassembly now looks like this:

  000000000000001e <main>:
    1e:   7179                    addi    sp,sp,-48
    20:   f406                    sd      ra,40(sp)
    22:   f022                    sd      s0,32(sp)
    24:   1800                    addi    s0,sp,48
    26:   87aa                    mv      a5,a0
    28:   feb43023                sd      a1,-32(s0)
    2c:   fcc43c23                sd      a2,-40(s0)
    30:   fef42623                sw      a5,-20(s0)
    34:   fec42783                lw      a5,-20(s0)
    38:   0007871b                sext.w  a4,a5
    3c:   678d                    lui     a5,0x3
    3e:   03978793                addi    a5,a5,57 # 3039 <.LASF30+0x2a9d>
    42:   02f71463                bne     a4,a5,6a <.L4>
    46:   000007b7                lui     a5,0x0
    4a:   0007b783                ld      a5,0(a5) # 0 <need_malloc>
    4e:   6f9c                    ld      a5,24(a5)
    50:   86be                    mv      a3,a5
    52:   466d                    li      a2,27
    54:   4585                    li      a1,1
    56:   000007b7                lui     a5,0x0
    5a:   00078513                mv      a0,a5
    5e:   00000097                auipc   ra,0x0
    62:   000080e7                jalr    ra # 5e <main+0x40>
    66:   4785                    li      a5,1
    68:   a869                    j       102 <.L5>

  000000000000006a <.L4>:
    6a:   000007b7                lui     a5,0x0
    6e:   00078513                mv      a0,a5
    72:   00000097                auipc   ra,0x0
    76:   000080e7                jalr    ra # 72 <.L4+0x8>

In order to share the fake label between the assembler and the
libopcodes library, I've added some new defines RISCV_FAKE_LABEL_NAME
and RISCV_FAKE_LABEL_CHAR in include/opcode/riscv.h.  I could have
just moved FAKE_LABEL_NAME to the include file, however, I thnk this
would be confusing, someone working on the assembler would likely not
expect to find FAKE_LABEL_NAME defined outside of the assembler source
tree.  By introducing the RISCV_FAKE_LABEL_* defines I can leave the
assembler standard FAKE_LABEL_ defines in the assembler source, but
still share the RISCV_FAKE_LABEL_* with libopcodes.

gas/ChangeLog:

	* config/tc-riscv.h (FAKE_LABEL_NAME): Define as
	RISCV_FAKE_LABEL_NAME.
	(FAKE_LABEL_CHAR): Define as RISCV_FAKE_LABEL_CHAR.

include/ChangeLog:

	* dis-asm.h (riscv_symbol_is_valid): Declare.
	* opcode/riscv.h (RISCV_FAKE_LABEL_NAME): Define.
	(RISCV_FAKE_LABEL_CHAR): Define.

opcodes/ChangeLog:

        * disassembler.c (disassemble_init_for_target): Add RISC-V
        initialisation.
        * riscv-dis.c (riscv_symbol_is_valid): New function.
2018-12-06 09:40:56 +00:00

554 lines
14 KiB
C

/* RISC-V disassembler
Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Andrew Waterman (andrew@sifive.com).
Based on MIPS target.
This file is part of the GNU opcodes library.
This library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING3. If not,
see <http://www.gnu.org/licenses/>. */
#include "sysdep.h"
#include "disassemble.h"
#include "libiberty.h"
#include "opcode/riscv.h"
#include "opintl.h"
#include "elf-bfd.h"
#include "elf/riscv.h"
#include <stdint.h>
#include <ctype.h>
struct riscv_private_data
{
bfd_vma gp;
bfd_vma print_addr;
bfd_vma hi_addr[OP_MASK_RD + 1];
};
static const char * const *riscv_gpr_names;
static const char * const *riscv_fpr_names;
/* Other options. */
static int no_aliases; /* If set disassemble as most general inst. */
static void
set_default_riscv_dis_options (void)
{
riscv_gpr_names = riscv_gpr_names_abi;
riscv_fpr_names = riscv_fpr_names_abi;
no_aliases = 0;
}
static void
parse_riscv_dis_option (const char *option)
{
if (strcmp (option, "no-aliases") == 0)
no_aliases = 1;
else if (strcmp (option, "numeric") == 0)
{
riscv_gpr_names = riscv_gpr_names_numeric;
riscv_fpr_names = riscv_fpr_names_numeric;
}
else
{
/* xgettext:c-format */
opcodes_error_handler (_("unrecognized disassembler option: %s"), option);
}
}
static void
parse_riscv_dis_options (const char *opts_in)
{
char *opts = xstrdup (opts_in), *opt = opts, *opt_end = opts;
set_default_riscv_dis_options ();
for ( ; opt_end != NULL; opt = opt_end + 1)
{
if ((opt_end = strchr (opt, ',')) != NULL)
*opt_end = 0;
parse_riscv_dis_option (opt);
}
free (opts);
}
/* Print one argument from an array. */
static void
arg_print (struct disassemble_info *info, unsigned long val,
const char* const* array, size_t size)
{
const char *s = val >= size || array[val] == NULL ? "unknown" : array[val];
(*info->fprintf_func) (info->stream, "%s", s);
}
static void
maybe_print_address (struct riscv_private_data *pd, int base_reg, int offset)
{
if (pd->hi_addr[base_reg] != (bfd_vma)-1)
{
pd->print_addr = (base_reg != 0 ? pd->hi_addr[base_reg] : 0) + offset;
pd->hi_addr[base_reg] = -1;
}
else if (base_reg == X_GP && pd->gp != (bfd_vma)-1)
pd->print_addr = pd->gp + offset;
else if (base_reg == X_TP || base_reg == 0)
pd->print_addr = offset;
}
/* Print insn arguments for 32/64-bit code. */
static void
print_insn_args (const char *d, insn_t l, bfd_vma pc, disassemble_info *info)
{
struct riscv_private_data *pd = info->private_data;
int rs1 = (l >> OP_SH_RS1) & OP_MASK_RS1;
int rd = (l >> OP_SH_RD) & OP_MASK_RD;
fprintf_ftype print = info->fprintf_func;
if (*d != '\0')
print (info->stream, "\t");
for (; *d != '\0'; d++)
{
switch (*d)
{
case 'C': /* RVC */
switch (*++d)
{
case 's': /* RS1 x8-x15 */
case 'w': /* RS1 x8-x15 */
print (info->stream, "%s",
riscv_gpr_names[EXTRACT_OPERAND (CRS1S, l) + 8]);
break;
case 't': /* RS2 x8-x15 */
case 'x': /* RS2 x8-x15 */
print (info->stream, "%s",
riscv_gpr_names[EXTRACT_OPERAND (CRS2S, l) + 8]);
break;
case 'U': /* RS1, constrained to equal RD */
print (info->stream, "%s", riscv_gpr_names[rd]);
break;
case 'c': /* RS1, constrained to equal sp */
print (info->stream, "%s", riscv_gpr_names[X_SP]);
break;
case 'V': /* RS2 */
print (info->stream, "%s",
riscv_gpr_names[EXTRACT_OPERAND (CRS2, l)]);
break;
case 'i':
print (info->stream, "%d", (int)EXTRACT_RVC_SIMM3 (l));
break;
case 'o':
case 'j':
print (info->stream, "%d", (int)EXTRACT_RVC_IMM (l));
break;
case 'k':
print (info->stream, "%d", (int)EXTRACT_RVC_LW_IMM (l));
break;
case 'l':
print (info->stream, "%d", (int)EXTRACT_RVC_LD_IMM (l));
break;
case 'm':
print (info->stream, "%d", (int)EXTRACT_RVC_LWSP_IMM (l));
break;
case 'n':
print (info->stream, "%d", (int)EXTRACT_RVC_LDSP_IMM (l));
break;
case 'K':
print (info->stream, "%d", (int)EXTRACT_RVC_ADDI4SPN_IMM (l));
break;
case 'L':
print (info->stream, "%d", (int)EXTRACT_RVC_ADDI16SP_IMM (l));
break;
case 'M':
print (info->stream, "%d", (int)EXTRACT_RVC_SWSP_IMM (l));
break;
case 'N':
print (info->stream, "%d", (int)EXTRACT_RVC_SDSP_IMM (l));
break;
case 'p':
info->target = EXTRACT_RVC_B_IMM (l) + pc;
(*info->print_address_func) (info->target, info);
break;
case 'a':
info->target = EXTRACT_RVC_J_IMM (l) + pc;
(*info->print_address_func) (info->target, info);
break;
case 'u':
print (info->stream, "0x%x",
(int)(EXTRACT_RVC_IMM (l) & (RISCV_BIGIMM_REACH-1)));
break;
case '>':
print (info->stream, "0x%x", (int)EXTRACT_RVC_IMM (l) & 0x3f);
break;
case '<':
print (info->stream, "0x%x", (int)EXTRACT_RVC_IMM (l) & 0x1f);
break;
case 'T': /* floating-point RS2 */
print (info->stream, "%s",
riscv_fpr_names[EXTRACT_OPERAND (CRS2, l)]);
break;
case 'D': /* floating-point RS2 x8-x15 */
print (info->stream, "%s",
riscv_fpr_names[EXTRACT_OPERAND (CRS2S, l) + 8]);
break;
}
break;
case ',':
case '(':
case ')':
case '[':
case ']':
print (info->stream, "%c", *d);
break;
case '0':
/* Only print constant 0 if it is the last argument */
if (!d[1])
print (info->stream, "0");
break;
case 'b':
case 's':
if ((l & MASK_JALR) == MATCH_JALR)
maybe_print_address (pd, rs1, 0);
print (info->stream, "%s", riscv_gpr_names[rs1]);
break;
case 't':
print (info->stream, "%s",
riscv_gpr_names[EXTRACT_OPERAND (RS2, l)]);
break;
case 'u':
print (info->stream, "0x%x",
(unsigned)EXTRACT_UTYPE_IMM (l) >> RISCV_IMM_BITS);
break;
case 'm':
arg_print (info, EXTRACT_OPERAND (RM, l),
riscv_rm, ARRAY_SIZE (riscv_rm));
break;
case 'P':
arg_print (info, EXTRACT_OPERAND (PRED, l),
riscv_pred_succ, ARRAY_SIZE (riscv_pred_succ));
break;
case 'Q':
arg_print (info, EXTRACT_OPERAND (SUCC, l),
riscv_pred_succ, ARRAY_SIZE (riscv_pred_succ));
break;
case 'o':
maybe_print_address (pd, rs1, EXTRACT_ITYPE_IMM (l));
/* Fall through. */
case 'j':
if (((l & MASK_ADDI) == MATCH_ADDI && rs1 != 0)
|| (l & MASK_JALR) == MATCH_JALR)
maybe_print_address (pd, rs1, EXTRACT_ITYPE_IMM (l));
print (info->stream, "%d", (int)EXTRACT_ITYPE_IMM (l));
break;
case 'q':
maybe_print_address (pd, rs1, EXTRACT_STYPE_IMM (l));
print (info->stream, "%d", (int)EXTRACT_STYPE_IMM (l));
break;
case 'a':
info->target = EXTRACT_UJTYPE_IMM (l) + pc;
(*info->print_address_func) (info->target, info);
break;
case 'p':
info->target = EXTRACT_SBTYPE_IMM (l) + pc;
(*info->print_address_func) (info->target, info);
break;
case 'd':
if ((l & MASK_AUIPC) == MATCH_AUIPC)
pd->hi_addr[rd] = pc + EXTRACT_UTYPE_IMM (l);
else if ((l & MASK_LUI) == MATCH_LUI)
pd->hi_addr[rd] = EXTRACT_UTYPE_IMM (l);
else if ((l & MASK_C_LUI) == MATCH_C_LUI)
pd->hi_addr[rd] = EXTRACT_RVC_LUI_IMM (l);
print (info->stream, "%s", riscv_gpr_names[rd]);
break;
case 'z':
print (info->stream, "%s", riscv_gpr_names[0]);
break;
case '>':
print (info->stream, "0x%x", (int)EXTRACT_OPERAND (SHAMT, l));
break;
case '<':
print (info->stream, "0x%x", (int)EXTRACT_OPERAND (SHAMTW, l));
break;
case 'S':
case 'U':
print (info->stream, "%s", riscv_fpr_names[rs1]);
break;
case 'T':
print (info->stream, "%s", riscv_fpr_names[EXTRACT_OPERAND (RS2, l)]);
break;
case 'D':
print (info->stream, "%s", riscv_fpr_names[rd]);
break;
case 'R':
print (info->stream, "%s", riscv_fpr_names[EXTRACT_OPERAND (RS3, l)]);
break;
case 'E':
{
const char* csr_name = NULL;
unsigned int csr = EXTRACT_OPERAND (CSR, l);
switch (csr)
{
#define DECLARE_CSR(name, num) case num: csr_name = #name; break;
#include "opcode/riscv-opc.h"
#undef DECLARE_CSR
}
if (csr_name)
print (info->stream, "%s", csr_name);
else
print (info->stream, "0x%x", csr);
break;
}
case 'Z':
print (info->stream, "%d", rs1);
break;
default:
/* xgettext:c-format */
print (info->stream, _("# internal error, undefined modifier (%c)"),
*d);
return;
}
}
}
/* Print the RISC-V instruction at address MEMADDR in debugged memory,
on using INFO. Returns length of the instruction, in bytes.
BIGENDIAN must be 1 if this is big-endian code, 0 if
this is little-endian code. */
static int
riscv_disassemble_insn (bfd_vma memaddr, insn_t word, disassemble_info *info)
{
const struct riscv_opcode *op;
static bfd_boolean init = 0;
static const struct riscv_opcode *riscv_hash[OP_MASK_OP + 1];
struct riscv_private_data *pd;
int insnlen;
#define OP_HASH_IDX(i) ((i) & (riscv_insn_length (i) == 2 ? 0x3 : OP_MASK_OP))
/* Build a hash table to shorten the search time. */
if (! init)
{
for (op = riscv_opcodes; op->name; op++)
if (!riscv_hash[OP_HASH_IDX (op->match)])
riscv_hash[OP_HASH_IDX (op->match)] = op;
init = 1;
}
if (info->private_data == NULL)
{
int i;
pd = info->private_data = xcalloc (1, sizeof (struct riscv_private_data));
pd->gp = -1;
pd->print_addr = -1;
for (i = 0; i < (int)ARRAY_SIZE (pd->hi_addr); i++)
pd->hi_addr[i] = -1;
for (i = 0; i < info->symtab_size; i++)
if (strcmp (bfd_asymbol_name (info->symtab[i]), RISCV_GP_SYMBOL) == 0)
pd->gp = bfd_asymbol_value (info->symtab[i]);
}
else
pd = info->private_data;
insnlen = riscv_insn_length (word);
info->bytes_per_chunk = insnlen % 4 == 0 ? 4 : 2;
info->bytes_per_line = 8;
info->display_endian = info->endian;
info->insn_info_valid = 1;
info->branch_delay_insns = 0;
info->data_size = 0;
info->insn_type = dis_nonbranch;
info->target = 0;
info->target2 = 0;
op = riscv_hash[OP_HASH_IDX (word)];
if (op != NULL)
{
unsigned xlen = 0;
/* If XLEN is not known, get its value from the ELF class. */
if (info->mach == bfd_mach_riscv64)
xlen = 64;
else if (info->mach == bfd_mach_riscv32)
xlen = 32;
else if (info->section != NULL)
{
Elf_Internal_Ehdr *ehdr = elf_elfheader (info->section->owner);
xlen = ehdr->e_ident[EI_CLASS] == ELFCLASS64 ? 64 : 32;
}
for (; op->name; op++)
{
/* Does the opcode match? */
if (! (op->match_func) (op, word))
continue;
/* Is this a pseudo-instruction and may we print it as such? */
if (no_aliases && (op->pinfo & INSN_ALIAS))
continue;
/* Is this instruction restricted to a certain value of XLEN? */
if ((op->xlen_requirement != 0) && (op->xlen_requirement != xlen))
continue;
/* It's a match. */
(*info->fprintf_func) (info->stream, "%s", op->name);
print_insn_args (op->args, word, memaddr, info);
/* Try to disassemble multi-instruction addressing sequences. */
if (pd->print_addr != (bfd_vma)-1)
{
info->target = pd->print_addr;
(*info->fprintf_func) (info->stream, " # ");
(*info->print_address_func) (info->target, info);
pd->print_addr = -1;
}
/* Finish filling out insn_info fields. */
switch (op->pinfo & INSN_TYPE)
{
case INSN_BRANCH:
info->insn_type = dis_branch;
break;
case INSN_CONDBRANCH:
info->insn_type = dis_condbranch;
break;
case INSN_JSR:
info->insn_type = dis_jsr;
break;
case INSN_DREF:
info->insn_type = dis_dref;
break;
default:
break;
}
if (op->pinfo & INSN_DATA_SIZE)
{
int size = ((op->pinfo & INSN_DATA_SIZE)
>> INSN_DATA_SIZE_SHIFT);
info->data_size = 1 << (size - 1);
}
return insnlen;
}
}
/* We did not find a match, so just print the instruction bits. */
info->insn_type = dis_noninsn;
(*info->fprintf_func) (info->stream, "0x%llx", (unsigned long long)word);
return insnlen;
}
int
print_insn_riscv (bfd_vma memaddr, struct disassemble_info *info)
{
bfd_byte packet[2];
insn_t insn = 0;
bfd_vma n;
int status;
if (info->disassembler_options != NULL)
{
parse_riscv_dis_options (info->disassembler_options);
/* Avoid repeatedly parsing the options. */
info->disassembler_options = NULL;
}
else if (riscv_gpr_names == NULL)
set_default_riscv_dis_options ();
/* Instructions are a sequence of 2-byte packets in little-endian order. */
for (n = 0; n < sizeof (insn) && n < riscv_insn_length (insn); n += 2)
{
status = (*info->read_memory_func) (memaddr + n, packet, 2, info);
if (status != 0)
{
/* Don't fail just because we fell off the end. */
if (n > 0)
break;
(*info->memory_error_func) (status, memaddr, info);
return status;
}
insn |= ((insn_t) bfd_getl16 (packet)) << (8 * n);
}
return riscv_disassemble_insn (memaddr, insn, info);
}
/* Prevent use of the fake labels that are generated as part of the DWARF
and for relaxable relocations in the assembler. */
bfd_boolean
riscv_symbol_is_valid (asymbol * sym,
struct disassemble_info * info ATTRIBUTE_UNUSED)
{
const char * name;
if (sym == NULL)
return FALSE;
name = bfd_asymbol_name (sym);
return (strcmp (name, RISCV_FAKE_LABEL_NAME) != 0);
}
void
print_riscv_disassembler_options (FILE *stream)
{
fprintf (stream, _("\n\
The following RISC-V-specific disassembler options are supported for use\n\
with the -M switch (multiple options should be separated by commas):\n"));
fprintf (stream, _("\n\
numeric Print numeric register names, rather than ABI names.\n"));
fprintf (stream, _("\n\
no-aliases Disassemble only into canonical instructions, rather\n\
than into pseudoinstructions.\n"));
fprintf (stream, _("\n"));
}