mirror of
https://sourceware.org/git/binutils-gdb.git
synced 2025-01-12 12:16:04 +08:00
a2c5833233
The result of running etc/update-copyright.py --this-year, fixing all the files whose mode is changed by the script, plus a build with --enable-maintainer-mode --enable-cgen-maint=yes, then checking out */po/*.pot which we don't update frequently. The copy of cgen was with commit d1dd5fcc38ead reverted as that commit breaks building of bfp opcodes files.
472 lines
11 KiB
ArmAsm
472 lines
11 KiB
ArmAsm
/* Overlay manager for SPU.
|
|
|
|
Copyright (C) 2006-2022 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU Binutils.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
/* MFC DMA defn's. */
|
|
#define MFC_GET_CMD 0x40
|
|
#define MFC_MAX_DMA_SIZE 0x4000
|
|
#define MFC_TAG_UPDATE_ALL 2
|
|
#define MFC_TAG_ID 0
|
|
|
|
/* Register usage. */
|
|
#define reserved1 $75
|
|
#define parm $75
|
|
#define tab1 reserved1
|
|
#define tab2 reserved1
|
|
#define vma reserved1
|
|
#define oldvma reserved1
|
|
#define newmask reserved1
|
|
#define map reserved1
|
|
|
|
#define reserved2 $76
|
|
#define off1 reserved2
|
|
#define off2 reserved2
|
|
#define present1 reserved2
|
|
#define present2 reserved2
|
|
#define sz reserved2
|
|
#define cmp reserved2
|
|
#define add64 reserved2
|
|
#define cgbits reserved2
|
|
#define off3 reserved2
|
|
#define off4 reserved2
|
|
#define addr4 reserved2
|
|
#define off5 reserved2
|
|
#define tagstat reserved2
|
|
|
|
#define reserved3 $77
|
|
#define size1 reserved3
|
|
#define size2 reserved3
|
|
#define rv3 reserved3
|
|
#define ealo reserved3
|
|
#define cmd reserved3
|
|
#define off64 reserved3
|
|
#define tab3 reserved3
|
|
#define tab4 reserved3
|
|
#define tab5 reserved3
|
|
|
|
#define reserved4 $78
|
|
#define ovl reserved4
|
|
#define rv2 reserved4
|
|
#define rv5 reserved4
|
|
#define cgshuf reserved4
|
|
#define newovl reserved4
|
|
#define irqtmp1 reserved4
|
|
#define irqtmp2 reserved4
|
|
|
|
#define reserved5 $79
|
|
#define target reserved5
|
|
|
|
#define save1 $74
|
|
#define rv4 save1
|
|
#define rv7 save1
|
|
#define tagid save1
|
|
#define maxsize save1
|
|
#define pbyte save1
|
|
#define pbit save1
|
|
|
|
#define save2 $73
|
|
#define cur save2
|
|
#define rv6 save2
|
|
#define osize save2
|
|
#define zovl save2
|
|
#define oldovl save2
|
|
#define newvma save2
|
|
|
|
#define save3 $72
|
|
#define rv1 save3
|
|
#define ea64 save3
|
|
#define buf3 save3
|
|
#define genwi save3
|
|
#define newmap save3
|
|
#define oldmask save3
|
|
|
|
#define save4 $71
|
|
#define irq_stat save4
|
|
|
|
.text
|
|
.align 4
|
|
.type __rv_pattern, @object
|
|
.size __rv_pattern, 16
|
|
__rv_pattern:
|
|
.word 0x00010203, 0x10111213, 0x80808080, 0x80808080
|
|
|
|
.type __cg_pattern, @object
|
|
.size __cg_pattern, 16
|
|
__cg_pattern:
|
|
.word 0x04050607, 0x80808080, 0x80808080, 0x80808080
|
|
|
|
.type __ovly_current, @object
|
|
.size __ovly_current, 16
|
|
__ovly_current:
|
|
.space 16
|
|
|
|
/*
|
|
* __ovly_return - stub for returning from overlay functions.
|
|
*
|
|
* On entry the four slots of $lr are:
|
|
* __ovly_return, prev ovl index, caller return addr, undefined.
|
|
*
|
|
* Load the previous overlay and jump to the caller return address.
|
|
* Updates __ovly_current.
|
|
*/
|
|
.align 4
|
|
.global __ovly_return
|
|
.type __ovly_return, @function
|
|
__ovly_return:
|
|
ila tab1, _ovly_table - 16 # 0,2 0
|
|
shlqbyi ovl, $lr, 4 # 1,4 0
|
|
#nop
|
|
shlqbyi target, $lr, 8 # 1,4 1
|
|
#nop; lnop
|
|
#nop; lnop
|
|
shli off1, ovl, 4 # 0,4 4
|
|
#lnop
|
|
#nop
|
|
hbr ovly_ret9, target # 1,15 5
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop
|
|
lqx vma, tab1, off1 # 1,6 8
|
|
#ifdef OVLY_IRQ_SAVE
|
|
nop
|
|
stqd save4, -64($sp) # 1,6 9
|
|
#else
|
|
#nop; lnop
|
|
#endif
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop
|
|
rotqbyi size1, vma, 4 # 1,4 14
|
|
#nop
|
|
stqd save3, -48($sp) # 1,6 15
|
|
#nop
|
|
stqd save2, -32($sp) # 1,6 16
|
|
#nop
|
|
stqd save1, -16($sp) # 1,6 17
|
|
andi present1, size1, 1 # 0,2 18
|
|
stqr ovl, __ovly_current # 1,6 18
|
|
#nop; lnop
|
|
#nop
|
|
brz present1, do_load # 1,4 20
|
|
ovly_ret9:
|
|
#nop
|
|
bi target # 1,4 21
|
|
|
|
/*
|
|
* __ovly_load - copy an overlay partion to local store.
|
|
*
|
|
* On entry $75 points to a word consisting of the overlay index in
|
|
* the top 14 bits, and the target address in the bottom 18 bits.
|
|
*
|
|
* Sets up $lr to return via __ovly_return. If $lr is already set
|
|
* to return via __ovly_return, don't change it. In that case we
|
|
* have a tail call from one overlay function to another.
|
|
* Updates __ovly_current.
|
|
*/
|
|
.align 3
|
|
.global __ovly_load
|
|
.type __ovly_load, @function
|
|
__ovly_load:
|
|
#if OVL_STUB_SIZE == 8
|
|
########
|
|
#nop
|
|
lqd target, 0(parm) # 1,6 -11
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop
|
|
rotqby target, target, parm # 1,4 -5
|
|
ila tab2, _ovly_table - 16 # 0,2 -4
|
|
stqd save3, -48($sp) # 1,6 -4
|
|
#nop
|
|
stqd save2, -32($sp) # 1,6 -3
|
|
#nop
|
|
stqd save1, -16($sp) # 1,6 -2
|
|
rotmi ovl, target, -18 # 0,4 -1
|
|
hbr ovly_load9, target # 1,15 -1
|
|
ila rv1, __ovly_return # 0,2 0
|
|
#lnop
|
|
#nop; lnop
|
|
#nop
|
|
lqr cur, __ovly_current # 1,6 2
|
|
shli off2, ovl, 4 # 0,4 3
|
|
stqr ovl, __ovly_current # 1,6 3
|
|
ceq rv2, $lr, rv1 # 0,2 4
|
|
lqr rv3, __rv_pattern # 1,6 4
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop
|
|
lqx vma, tab2, off2 # 1,6 7
|
|
########
|
|
#else /* OVL_STUB_SIZE == 16 */
|
|
########
|
|
ila tab2, _ovly_table - 16 # 0,2 0
|
|
stqd save3, -48($sp) # 1,6 0
|
|
ila rv1, __ovly_return # 0,2 1
|
|
stqd save2, -32($sp) # 1,6 1
|
|
shli off2, ovl, 4 # 0,4 2
|
|
lqr cur, __ovly_current # 1,6 2
|
|
nop
|
|
stqr ovl, __ovly_current # 1,6 3
|
|
ceq rv2, $lr, rv1 # 0,2 4
|
|
lqr rv3, __rv_pattern # 1,6 4
|
|
#nop
|
|
hbr ovly_load9, target # 1,15 5
|
|
#nop
|
|
lqx vma, tab2, off2 # 1,6 6
|
|
#nop
|
|
stqd save1, -16($sp) # 1,6 7
|
|
########
|
|
#endif
|
|
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop
|
|
shufb rv4, rv1, cur, rv3 # 1,4 10
|
|
#nop
|
|
fsmb rv5, rv2 # 1,4 11
|
|
#nop
|
|
rotqmbyi rv6, $lr, -8 # 1,4 12
|
|
#nop
|
|
rotqbyi size2, vma, 4 # 1,4 13
|
|
#nop
|
|
lqd save3, -48($sp) # 1,6 14
|
|
#nop; lnop
|
|
or rv7, rv4, rv6 # 0,2 16
|
|
lqd save2, -32($sp) # 1,6 16
|
|
andi present2, size2, 1 # 0,2 17
|
|
#ifdef OVLY_IRQ_SAVE
|
|
stqd save4, -64($sp) # 1,6 17
|
|
#else
|
|
lnop # 1,0 17
|
|
#endif
|
|
selb $lr, rv7, $lr, rv5 # 0,2 18
|
|
lqd save1, -16($sp) # 1,6 18
|
|
#nop
|
|
brz present2, do_load # 1,4 19
|
|
ovly_load9:
|
|
#nop
|
|
bi target # 1,4 20
|
|
|
|
/* If we get here, we are about to load a new overlay.
|
|
* "vma" contains the relevant entry from _ovly_table[].
|
|
* extern struct {
|
|
* u32 vma;
|
|
* u32 size;
|
|
* u32 file_offset;
|
|
* u32 buf;
|
|
* } _ovly_table[];
|
|
*/
|
|
.align 3
|
|
.global __ovly_load_event
|
|
.type __ovly_load_event, @function
|
|
__ovly_load_event:
|
|
do_load:
|
|
#ifdef OVLY_IRQ_SAVE
|
|
ila irqtmp1, do_load10 # 0,2 -5
|
|
rotqbyi sz, vma, 8 # 1,4 -5
|
|
#nop
|
|
rdch irq_stat, $SPU_RdMachStat # 1,6 -4
|
|
#nop
|
|
bid irqtmp1 # 1,4 -3
|
|
do_load10:
|
|
nop
|
|
#else
|
|
#nop
|
|
rotqbyi sz, vma, 8 # 1,4 0
|
|
#endif
|
|
rotqbyi osize, vma, 4 # 1,4 1
|
|
#nop
|
|
lqa ea64, _EAR_ # 1,6 2
|
|
#nop
|
|
lqr cgshuf, __cg_pattern # 1,6 3
|
|
|
|
/* We could predict the branch at the end of this loop by adding a few
|
|
instructions, and there are plenty of free cycles to do so without
|
|
impacting loop execution time. However, it doesn't make a great
|
|
deal of sense since we need to wait for the dma to complete anyway. */
|
|
__ovly_xfer_loop:
|
|
#nop
|
|
rotqmbyi off64, sz, -4 # 1,4 4
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
cg cgbits, ea64, off64 # 0,2 8
|
|
#lnop
|
|
#nop; lnop
|
|
#nop
|
|
shufb add64, cgbits, cgbits, cgshuf # 1,4 10
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
addx add64, ea64, off64 # 0,2 14
|
|
#lnop
|
|
ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
|
|
lnop
|
|
ori ea64, add64, 0 # 0,2 16
|
|
rotqbyi ealo, add64, 4 # 1,4 16
|
|
cgt cmp, osize, maxsize # 0,2 17
|
|
wrch $MFC_LSA, vma # 1,6 17
|
|
#nop; lnop
|
|
selb sz, osize, maxsize, cmp # 0,2 19
|
|
wrch $MFC_EAH, ea64 # 1,6 19
|
|
ila tagid, MFC_TAG_ID # 0,2 20
|
|
wrch $MFC_EAL, ealo # 1,6 20
|
|
ila cmd, MFC_GET_CMD # 0,2 21
|
|
wrch $MFC_Size, sz # 1,6 21
|
|
sf osize, sz, osize # 0,2 22
|
|
wrch $MFC_TagId, tagid # 1,6 22
|
|
a vma, vma, sz # 0,2 23
|
|
wrch $MFC_Cmd, cmd # 1,6 23
|
|
#nop
|
|
brnz osize, __ovly_xfer_loop # 1,4 24
|
|
|
|
/* Now update our data structions while waiting for DMA to complete.
|
|
Low bit of .size needs to be cleared on the _ovly_table entry
|
|
corresponding to the evicted overlay, and set on the entry for the
|
|
newly loaded overlay. Note that no overlay may in fact be evicted
|
|
as _ovly_buf_table[] starts with all zeros. Don't zap .size entry
|
|
for zero index! Also of course update the _ovly_buf_table entry. */
|
|
#nop
|
|
lqr newovl, __ovly_current # 1,6 25
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
shli off3, newovl, 4 # 0,4 31
|
|
#lnop
|
|
ila tab3, _ovly_table - 16 # 0,2 32
|
|
#lnop
|
|
#nop
|
|
fsmbi pbyte, 0x100 # 1,4 33
|
|
#nop; lnop
|
|
#nop
|
|
lqx vma, tab3, off3 # 1,6 35
|
|
#nop; lnop
|
|
andi pbit, pbyte, 1 # 0,2 37
|
|
lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
or newvma, vma, pbit # 0,2 41
|
|
rotqbyi buf3, vma, 12 # 1,4 41
|
|
#nop; lnop
|
|
#nop
|
|
stqx newvma, tab3, off3 # 1,6 43
|
|
#nop; lnop
|
|
shli off4, buf3, 2 # 1,4 45
|
|
#lnop
|
|
ila tab4, _ovly_buf_table - 4 # 0,2 46
|
|
#lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop
|
|
lqx map, tab4, off4 # 1,6 49
|
|
#nop
|
|
cwx genwi, tab4, off4 # 1,4 50
|
|
a addr4, tab4, off4 # 0,2 51
|
|
#lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#nop
|
|
rotqby oldovl, map, addr4 # 1,4 55
|
|
#nop
|
|
shufb newmap, newovl, map, genwi # 0,4 56
|
|
#if MFC_TAG_ID < 16
|
|
ila newmask, 1 << MFC_TAG_ID # 0,2 57
|
|
#else
|
|
ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57
|
|
#endif
|
|
#lnop
|
|
#nop; lnop
|
|
#nop; lnop
|
|
stqd newmap, 0(addr4) # 1,6 60
|
|
|
|
/* Save app's tagmask, wait for DMA complete, restore mask. */
|
|
ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61
|
|
rdch oldmask, $MFC_RdTagMask # 1,6 61
|
|
#nop
|
|
wrch $MFC_WrTagMask, newmask # 1,6 62
|
|
#nop
|
|
wrch $MFC_WrTagUpdate, tagstat # 1,6 63
|
|
#nop
|
|
rdch tagstat, $MFC_RdTagStat # 1,6 64
|
|
#nop
|
|
sync # 1,4 65
|
|
/* Any hint prior to the sync is lost. A hint here allows the branch
|
|
to complete 15 cycles after the hint. With no hint the branch will
|
|
take 18 or 19 cycles. */
|
|
ila tab5, _ovly_table - 16 # 0,2 66
|
|
hbr do_load99, target # 1,15 66
|
|
shli off5, oldovl, 4 # 0,4 67
|
|
wrch $MFC_WrTagMask, oldmask # 1,6 67
|
|
ceqi zovl, oldovl, 0 # 0,2 68
|
|
#lnop
|
|
#nop; lnop
|
|
#nop
|
|
fsm zovl, zovl # 1,4 70
|
|
#nop
|
|
lqx oldvma, tab5, off5 # 1,6 71
|
|
#nop
|
|
lqd save3, -48($sp) # 1,6 72
|
|
#nop; lnop
|
|
andc pbit, pbit, zovl # 0,2 74
|
|
lqd save2, -32($sp) # 1,6 74
|
|
#ifdef OVLY_IRQ_SAVE
|
|
ila irqtmp2, do_load90 # 0,2 75
|
|
#lnop
|
|
andi irq_stat, irq_stat, 1 # 0,2 76
|
|
#lnop
|
|
#else
|
|
#nop; lnop
|
|
#nop; lnop
|
|
#endif
|
|
andc oldvma, oldvma, pbit # 0,2 77
|
|
lqd save1, -16($sp) # 1,6 77
|
|
nop # 0,0 78
|
|
#lnop
|
|
#nop
|
|
stqx oldvma, tab5, off5 # 1,6 79
|
|
#nop
|
|
#ifdef OVLY_IRQ_SAVE
|
|
binze irq_stat, irqtmp2 # 1,4 80
|
|
do_load90:
|
|
#nop
|
|
lqd save4, -64($sp) # 1,6 84
|
|
#else
|
|
#nop; lnop
|
|
#endif
|
|
|
|
.global _ovly_debug_event
|
|
.type _ovly_debug_event, @function
|
|
_ovly_debug_event:
|
|
nop
|
|
/* Branch to target address. */
|
|
do_load99:
|
|
bi target # 1,4 81/85
|
|
|
|
.size __ovly_load, . - __ovly_load
|