ffi.h.in: Enable FFI_CLOSURES for x86_64.

2003-01-28 Andrew Haley <aph@redhat.com> * include/ffi.h.in: Enable FFI_CLOSURES for x86_64. * src/x86/ffi64.c (ffi_prep_closure): New. (ffi_closure_UNIX64_inner): New. * src/x86/unix64.S (ffi_closure_UNIX64): New. From-SVN: r61978
2024-12-12 06:29:58 +08:00 · 2003-01-28 15:54:28 +00:00 · 2003-01-28 15:54:28 +00:00 · fd2e49b2ac
commit fd2e49b2ac
parent 447ff38fba
4 changed files with 240 additions and 1 deletions
--- a/libffi/ChangeLog
+++ b/libffi/ChangeLog
@ -1,3 +1,10 @@
+2003-01-28  Andrew Haley  <aph@redhat.com>
+
+	* include/ffi.h.in: Enable FFI_CLOSURES for x86_64.
+	* src/x86/ffi64.c (ffi_prep_closure): New.
+	(ffi_closure_UNIX64_inner): New.
+	* src/x86/unix64.S (ffi_closure_UNIX64): New.
+
 2003-01-27  Alexandre Oliva  <aoliva@redhat.com>

 	* configure.in (toolexecdir, toolexeclibdir): Set and AC_SUBST.
--- a/libffi/include/ffi.h.in
+++ b/libffi/include/ffi.h.in
@ -471,6 +471,12 @@ struct ffi_ia64_trampoline_struct {
 #define FFI_TRAMPOLINE_SIZE 16
 #define FFI_NATIVE_RAW_API 0

+#elif defined(__x86_64__)
+
+#define FFI_CLOSURES 1
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_NATIVE_RAW_API 0
+
 #else 

 #define FFI_CLOSURES 0
--- a/libffi/src/x86/ffi64.c
+++ b/libffi/src/x86/ffi64.c
@ -27,6 +27,7 @@
 #include <ffi_common.h>

 #include <stdlib.h>
+#include <stdarg.h>

 /* ffi_prep_args is called by the assembly routine once stack space
   has been allocated for the function's arguments */
@ -571,4 +572,135 @@ void ffi_call(/*@dependent@*/ ffi_cif *cif,
    }
 }

+extern void ffi_closure_UNIX64(void);
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif*, void*, void**, void*),
+		  void *user_data)
+{
+  volatile unsigned short *tramp;
+
+  /* FFI_ASSERT (cif->abi == FFI_OSF);  */
+
+  tramp = (volatile unsigned short *) &closure->tramp[0];
+  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
+  tramp[10] = 0xff49;		/* jmp *%r11	*/
+  tramp[11] = 0x00e3;
+  *(void * volatile *) &tramp[1] = ffi_closure_UNIX64;
+  *(void * volatile *) &tramp[6] = closure;
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_UNIX64_inner(ffi_closure *closure, va_list l, void *rp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn, argn;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+
+  argn = 0;
+
+  i = 0;
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  /* Grab the addresses of the arguments from the stack frame.  */
+  while (i < avn)
+    {
+      switch (arg_types[i]->type)
+	{
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_POINTER:
+	  {
+	    if (l->gp_offset > 48-8)
+	      {
+		avalue[i] = l->overflow_arg_area;
+		l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
+	      }
+	    else
+	      {
+		avalue[i] = (char *)l->reg_save_area + l->gp_offset;
+		l->gp_offset += 8;
+	      }
+	  }
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  /* FIXME  */
+	  FFI_ASSERT(0);
+	  break;
+
+	case FFI_TYPE_DOUBLE:
+	  {
+	    if (l->fp_offset > 176-16)
+	      {
+		avalue[i] = l->overflow_arg_area;
+		l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
+	      }
+	    else
+	      {
+		avalue[i] = (char *)l->reg_save_area + l->fp_offset;
+		l->fp_offset += 16;
+	      }
+	  }
+#if DEBUG_FFI
+	  fprintf (stderr, "double arg %d = %g\n", i, *(double *)avalue[i]);
+#endif
+	  break;
+	  
+	case FFI_TYPE_FLOAT:
+	  {
+	    if (l->fp_offset > 176-16)
+	      {
+		avalue[i] = l->overflow_arg_area;
+		l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
+	      }
+	    else
+	      {
+		avalue[i] = (char *)l->reg_save_area + l->fp_offset;
+		l->fp_offset += 16;
+	      }
+	  }
+#if DEBUG_FFI
+	  fprintf (stderr, "float arg %d = %g\n", i, *(float *)avalue[i]);
+#endif
+	  break;
+	  
+	default:
+	  FFI_ASSERT(0);
+	}
+
+      argn += ALIGN(arg_types[i]->size, SIZEOF_ARG) / SIZEOF_ARG;
+      i++;
+    }
+
+  /* Invoke the closure.  */
+  (closure->fun) (cif, rp, avalue, closure->user_data);
+
+  /* FIXME: Structs not supported.  */
+  FFI_ASSERT(cif->rtype->type != FFI_TYPE_STRUCT);
+
+  /* Tell ffi_closure_UNIX64 how to perform return type promotions.  */
+
+  return cif->rtype->type;
+}
 #endif /* ifndef __x86_64__ */
--- a/libffi/src/x86/unix64.S
+++ b/libffi/src/x86/unix64.S
@ -162,7 +162,81 @@ sse2floatfloat:
 	movaps	(%rdi), %xmm0
 	movq	%xmm0, (%rsi)
 	ret
-	
+
+	.align	2
+.globl ffi_closure_UNIX64
+        .type	ffi_closure_UNIX64,@function
+
+ffi_closure_UNIX64:
+.LFB2:
+        pushq   %rbp
+.LCFI10:
+        movq    %rsp, %rbp
+.LCFI11:
+        subq    $240, %rsp
+.LCFI12:
+	movq	%rdi, -176(%rbp)
+        movq    %rsi, -168(%rbp)
+        movq    %rdx, -160(%rbp)
+        movq    %rcx, -152(%rbp)
+        movq    %r8, -144(%rbp)
+        movq    %r9, -136(%rbp)
+        /* FIXME: We can avoid all this stashing of XMM registers by
+	   (in ffi_prep_closure) computing the number of
+	   floating-point args and moving it into %rax before calling
+	   this function.  Once this is done, uncomment the next few
+	   lines and only the essential XMM registers will be written
+	   to memory.  This is a significant saving.  */
+/*         movzbl  %al, %eax  */
+/*         movq    %rax, %rdx */
+/*         leaq    0(,%rdx,4), %rax */
+/*         leaq    2f(%rip), %rdx */
+/*         subq    %rax, %rdx */
+        leaq    -1(%rbp), %rax
+/*         jmp     *%rdx */
+        movaps  %xmm7, -15(%rax)
+        movaps  %xmm6, -31(%rax)
+        movaps  %xmm5, -47(%rax)
+        movaps  %xmm4, -63(%rax)
+        movaps  %xmm3, -79(%rax)
+        movaps  %xmm2, -95(%rax)
+        movaps  %xmm1, -111(%rax)
+        movaps  %xmm0, -127(%rax)
+2:
+        movl    %edi, -180(%rbp)
+        movl    $0, -224(%rbp)
+        movl    $48, -220(%rbp)
+        leaq    16(%rbp), %rax
+        movq    %rax, -216(%rbp)
+        leaq    -176(%rbp), %rdx
+        movq    %rdx, -208(%rbp)
+        leaq    -224(%rbp), %rsi
+	movq	%r10, %rdi
+	movq	%rsp, %rdx
+        call    ffi_closure_UNIX64_inner@PLT
+
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	1f
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	2f
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	3f
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	4f
+	popq	%rax
+        leave
+        ret
+1:
+2:
+3:	
+	movaps	-240(%rbp), %xmm0
+        leave
+        ret
+4:
+	leave
+	ret
+.LFE2:	
+		
        .section        .eh_frame,"a",@progbits
 .Lframe0:
        .long   .LECIE1-.LSCIE1
@ -204,5 +278,25 @@ sse2floatfloat:
        .uleb128 0x6
 	.align 8
 .LEFDE1:
+.LSFDE3:
+        .long   .LEFDE3-.LASFDE3        # FDE Length
+.LASFDE3:
+        .long   .LASFDE3-.Lframe0       # FDE CIE offset
+
+        .long   .LFB2-. # FDE initial location
+        .long   .LFE2-.LFB2     # FDE address range
+        .uleb128 0x0    # Augmentation size
+        .byte   0x4     # DW_CFA_advance_loc4
+        .long   .LCFI10-.LFB2
+        .byte   0xe     # DW_CFA_def_cfa_offset
+        .uleb128 0x10
+        .byte   0x86    # DW_CFA_offset, column 0x6
+        .uleb128 0x2
+        .byte   0x4     # DW_CFA_advance_loc4
+        .long   .LCFI11-.LCFI10
+        .byte   0xd     # DW_CFA_def_cfa_register
+        .uleb128 0x6
+        .align 8
+.LEFDE3:

 #endif /* __x86_64__  */