diff --git a/libffi/ChangeLog b/libffi/ChangeLog index ebefffa6f5f..17ed41118d4 100644 --- a/libffi/ChangeLog +++ b/libffi/ChangeLog @@ -1,3 +1,10 @@ +2003-01-28 Andrew Haley + + * include/ffi.h.in: Enable FFI_CLOSURES for x86_64. + * src/x86/ffi64.c (ffi_prep_closure): New. + (ffi_closure_UNIX64_inner): New. + * src/x86/unix64.S (ffi_closure_UNIX64): New. + 2003-01-27 Alexandre Oliva * configure.in (toolexecdir, toolexeclibdir): Set and AC_SUBST. diff --git a/libffi/include/ffi.h.in b/libffi/include/ffi.h.in index bc15daace04..02853846c9f 100644 --- a/libffi/include/ffi.h.in +++ b/libffi/include/ffi.h.in @@ -471,6 +471,12 @@ struct ffi_ia64_trampoline_struct { #define FFI_TRAMPOLINE_SIZE 16 #define FFI_NATIVE_RAW_API 0 +#elif defined(__x86_64__) + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 24 +#define FFI_NATIVE_RAW_API 0 + #else #define FFI_CLOSURES 0 diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c index ed6e89d7729..9427a37c8b5 100644 --- a/libffi/src/x86/ffi64.c +++ b/libffi/src/x86/ffi64.c @@ -27,6 +27,7 @@ #include #include +#include /* ffi_prep_args is called by the assembly routine once stack space has been allocated for the function's arguments */ @@ -571,4 +572,135 @@ void ffi_call(/*@dependent@*/ ffi_cif *cif, } } +extern void ffi_closure_UNIX64(void); + +ffi_status +ffi_prep_closure (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data) +{ + volatile unsigned short *tramp; + + /* FFI_ASSERT (cif->abi == FFI_OSF); */ + + tramp = (volatile unsigned short *) &closure->tramp[0]; + tramp[0] = 0xbb49; /* mov , %r11 */ + tramp[5] = 0xba49; /* mov , %r10 */ + tramp[10] = 0xff49; /* jmp *%r11 */ + tramp[11] = 0x00e3; + *(void * volatile *) &tramp[1] = ffi_closure_UNIX64; + *(void * volatile *) &tramp[6] = closure; + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +int +ffi_closure_UNIX64_inner(ffi_closure *closure, va_list l, void *rp) +{ + ffi_cif *cif; + void **avalue; + ffi_type **arg_types; + long i, avn, argn; + + cif = closure->cif; + avalue = alloca(cif->nargs * sizeof(void *)); + + argn = 0; + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + /* Grab the addresses of the arguments from the stack frame. */ + while (i < avn) + { + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + { + if (l->gp_offset > 48-8) + { + avalue[i] = l->overflow_arg_area; + l->overflow_arg_area = (char *)l->overflow_arg_area + 8; + } + else + { + avalue[i] = (char *)l->reg_save_area + l->gp_offset; + l->gp_offset += 8; + } + } + break; + + case FFI_TYPE_STRUCT: + /* FIXME */ + FFI_ASSERT(0); + break; + + case FFI_TYPE_DOUBLE: + { + if (l->fp_offset > 176-16) + { + avalue[i] = l->overflow_arg_area; + l->overflow_arg_area = (char *)l->overflow_arg_area + 8; + } + else + { + avalue[i] = (char *)l->reg_save_area + l->fp_offset; + l->fp_offset += 16; + } + } +#if DEBUG_FFI + fprintf (stderr, "double arg %d = %g\n", i, *(double *)avalue[i]); +#endif + break; + + case FFI_TYPE_FLOAT: + { + if (l->fp_offset > 176-16) + { + avalue[i] = l->overflow_arg_area; + l->overflow_arg_area = (char *)l->overflow_arg_area + 8; + } + else + { + avalue[i] = (char *)l->reg_save_area + l->fp_offset; + l->fp_offset += 16; + } + } +#if DEBUG_FFI + fprintf (stderr, "float arg %d = %g\n", i, *(float *)avalue[i]); +#endif + break; + + default: + FFI_ASSERT(0); + } + + argn += ALIGN(arg_types[i]->size, SIZEOF_ARG) / SIZEOF_ARG; + i++; + } + + /* Invoke the closure. */ + (closure->fun) (cif, rp, avalue, closure->user_data); + + /* FIXME: Structs not supported. */ + FFI_ASSERT(cif->rtype->type != FFI_TYPE_STRUCT); + + /* Tell ffi_closure_UNIX64 how to perform return type promotions. */ + + return cif->rtype->type; +} #endif /* ifndef __x86_64__ */ diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S index 8ac1145f7e9..f0cd3c9c0c9 100644 --- a/libffi/src/x86/unix64.S +++ b/libffi/src/x86/unix64.S @@ -162,7 +162,81 @@ sse2floatfloat: movaps (%rdi), %xmm0 movq %xmm0, (%rsi) ret - + + .align 2 +.globl ffi_closure_UNIX64 + .type ffi_closure_UNIX64,@function + +ffi_closure_UNIX64: +.LFB2: + pushq %rbp +.LCFI10: + movq %rsp, %rbp +.LCFI11: + subq $240, %rsp +.LCFI12: + movq %rdi, -176(%rbp) + movq %rsi, -168(%rbp) + movq %rdx, -160(%rbp) + movq %rcx, -152(%rbp) + movq %r8, -144(%rbp) + movq %r9, -136(%rbp) + /* FIXME: We can avoid all this stashing of XMM registers by + (in ffi_prep_closure) computing the number of + floating-point args and moving it into %rax before calling + this function. Once this is done, uncomment the next few + lines and only the essential XMM registers will be written + to memory. This is a significant saving. */ +/* movzbl %al, %eax */ +/* movq %rax, %rdx */ +/* leaq 0(,%rdx,4), %rax */ +/* leaq 2f(%rip), %rdx */ +/* subq %rax, %rdx */ + leaq -1(%rbp), %rax +/* jmp *%rdx */ + movaps %xmm7, -15(%rax) + movaps %xmm6, -31(%rax) + movaps %xmm5, -47(%rax) + movaps %xmm4, -63(%rax) + movaps %xmm3, -79(%rax) + movaps %xmm2, -95(%rax) + movaps %xmm1, -111(%rax) + movaps %xmm0, -127(%rax) +2: + movl %edi, -180(%rbp) + movl $0, -224(%rbp) + movl $48, -220(%rbp) + leaq 16(%rbp), %rax + movq %rax, -216(%rbp) + leaq -176(%rbp), %rdx + movq %rdx, -208(%rbp) + leaq -224(%rbp), %rsi + movq %r10, %rdi + movq %rsp, %rdx + call ffi_closure_UNIX64_inner@PLT + + cmpl $FFI_TYPE_FLOAT, %eax + je 1f + cmpl $FFI_TYPE_DOUBLE, %eax + je 2f + cmpl $FFI_TYPE_LONGDOUBLE, %eax + je 3f + cmpl $FFI_TYPE_STRUCT, %eax + je 4f + popq %rax + leave + ret +1: +2: +3: + movaps -240(%rbp), %xmm0 + leave + ret +4: + leave + ret +.LFE2: + .section .eh_frame,"a",@progbits .Lframe0: .long .LECIE1-.LSCIE1 @@ -204,5 +278,25 @@ sse2floatfloat: .uleb128 0x6 .align 8 .LEFDE1: +.LSFDE3: + .long .LEFDE3-.LASFDE3 # FDE Length +.LASFDE3: + .long .LASFDE3-.Lframe0 # FDE CIE offset + + .long .LFB2-. # FDE initial location + .long .LFE2-.LFB2 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x4 # DW_CFA_advance_loc4 + .long .LCFI10-.LFB2 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 0x10 + .byte 0x86 # DW_CFA_offset, column 0x6 + .uleb128 0x2 + .byte 0x4 # DW_CFA_advance_loc4 + .long .LCFI11-.LCFI10 + .byte 0xd # DW_CFA_def_cfa_register + .uleb128 0x6 + .align 8 +.LEFDE3: #endif /* __x86_64__ */