diff --git a/libitm/ChangeLog b/libitm/ChangeLog
index 0501d168a23e..b1629b1a89e3 100644
--- a/libitm/ChangeLog
+++ b/libitm/ChangeLog
@@ -1,3 +1,26 @@
+2011-11-09  Richard Henderson  <rth@redhat.com>
+
+	* barrier.tpl, memcpy.cc, memset.cc, method-wbetl.cc: Remove file.
+	* config/alpha/unaligned.h: Remove file.
+	* config/generic/unaligned.h: Remove file.
+	* config/x86/unaligned.h: Remove file.
+	* config/generic/cachepage.h: Remove file.
+	* config/posix/cachepage.cc: Remove file.
+	* config/generic/cacheline.cc: Remove file.
+	* config/x86/cacheline.cc: Remove file.
+	* config/generic/cacheline.h (gtm_cacheline): Remove the
+	store_mask, copy_mask, copy_mask_wb methods.
+	* config/x86/cacheline.h: Likewise.
+	* config/alpha/cacheline.h: Fall back to generic after setting size.
+	* config/generic/tls.cc (gtm_mask_stack): Remove.
+	* config/x86/x86_avx.cc (GTM_vpperm_shift): Remove.
+	(GTM_vpalignr_table): Remove.
+	* config/x86/x86_sse.cc (GTM_palignr_table): Remove.
+	(GTM_pshift_table): Remove.
+	* config/libitm_i.h: Don't include cachepage.h.
+	* Makefile.am (libitm_la_SOURCES): Remove cacheline.cc, cachepage.cc
+	* Makefile.in, testsuite/Makefile.in: Rebuild.
+
 2011-11-09  Richard Henderson  <rth@redhat.com>
 
 	* config/x86/cacheline.h (gtm_cacheline::store_mask): Use .byte
diff --git a/libitm/Makefile.am b/libitm/Makefile.am
index 69234099b6cf..45789866b4f2 100644
--- a/libitm/Makefile.am
+++ b/libitm/Makefile.am
@@ -41,7 +41,7 @@ libitm_la_LDFLAGS = $(libitm_version_info) $(libitm_version_script) \
 
 libitm_la_SOURCES = \
 	aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc barrier.cc beginend.cc \
-	clone.cc cacheline.cc cachepage.cc eh_cpp.cc local.cc \
+	clone.cc eh_cpp.cc local.cc \
 	query.cc retry.cc rwlock.cc useraction.cc util.cc \
 	sjlj.S tls.cc method-serial.cc method-gl.cc
 
diff --git a/libitm/Makefile.in b/libitm/Makefile.in
index 7dc864b3087e..8816580fb65c 100644
--- a/libitm/Makefile.in
+++ b/libitm/Makefile.in
@@ -48,6 +48,7 @@ DIST_COMMON = $(am__configure_deps) $(srcdir)/../config.guess \
 	$(top_srcdir)/configure ChangeLog
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
+	$(top_srcdir)/../config/asmcfi.m4 \
 	$(top_srcdir)/../config/depstand.m4 \
 	$(top_srcdir)/../config/enable.m4 \
 	$(top_srcdir)/../config/futex.m4 \
@@ -94,17 +95,17 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \
 LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
 libitm_la_LIBADD =
 am__libitm_la_SOURCES_DIST = aatree.cc alloc.cc alloc_c.cc \
-	alloc_cpp.cc barrier.cc beginend.cc clone.cc cacheline.cc \
-	cachepage.cc eh_cpp.cc local.cc query.cc retry.cc rwlock.cc \
-	useraction.cc util.cc sjlj.S tls.cc method-serial.cc \
-	method-gl.cc x86_sse.cc x86_avx.cc futex.cc
+	alloc_cpp.cc barrier.cc beginend.cc clone.cc eh_cpp.cc \
+	local.cc query.cc retry.cc rwlock.cc useraction.cc util.cc \
+	sjlj.S tls.cc method-serial.cc method-gl.cc x86_sse.cc \
+	x86_avx.cc futex.cc
 @ARCH_X86_TRUE@am__objects_1 = x86_sse.lo x86_avx.lo
 @ARCH_FUTEX_TRUE@am__objects_2 = futex.lo
 am_libitm_la_OBJECTS = aatree.lo alloc.lo alloc_c.lo alloc_cpp.lo \
-	barrier.lo beginend.lo clone.lo cacheline.lo cachepage.lo \
-	eh_cpp.lo local.lo query.lo retry.lo rwlock.lo useraction.lo \
-	util.lo sjlj.lo tls.lo method-serial.lo method-gl.lo \
-	$(am__objects_1) $(am__objects_2)
+	barrier.lo beginend.lo clone.lo eh_cpp.lo local.lo query.lo \
+	retry.lo rwlock.lo useraction.lo util.lo sjlj.lo tls.lo \
+	method-serial.lo method-gl.lo $(am__objects_1) \
+	$(am__objects_2)
 libitm_la_OBJECTS = $(am_libitm_la_OBJECTS)
 DEFAULT_INCLUDES = -I.@am__isrc@
 depcomp = $(SHELL) $(top_srcdir)/../depcomp
@@ -234,8 +235,6 @@ ECHO_N = @ECHO_N@
 ECHO_T = @ECHO_T@
 EGREP = @EGREP@
 EXEEXT = @EXEEXT@
-FC = @FC@
-FCFLAGS = @FCFLAGS@
 FGREP = @FGREP@
 GREP = @GREP@
 INSTALL = @INSTALL@
@@ -286,7 +285,6 @@ abs_top_srcdir = @abs_top_srcdir@
 ac_ct_CC = @ac_ct_CC@
 ac_ct_CXX = @ac_ct_CXX@
 ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-ac_ct_FC = @ac_ct_FC@
 am__include = @am__include@
 am__leading_dot = @am__leading_dot@
 am__quote = @am__quote@
@@ -371,10 +369,9 @@ libitm_la_LDFLAGS = $(libitm_version_info) $(libitm_version_script) \
         -no-undefined
 
 libitm_la_SOURCES = aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc \
-	barrier.cc beginend.cc clone.cc cacheline.cc cachepage.cc \
-	eh_cpp.cc local.cc query.cc retry.cc rwlock.cc useraction.cc \
-	util.cc sjlj.S tls.cc method-serial.cc method-gl.cc \
-	$(am__append_1) $(am__append_2)
+	barrier.cc beginend.cc clone.cc eh_cpp.cc local.cc query.cc \
+	retry.cc rwlock.cc useraction.cc util.cc sjlj.S tls.cc \
+	method-serial.cc method-gl.cc $(am__append_1) $(am__append_2)
 
 # Automake Documentation:
 # If your package has Texinfo files in many directories, you can use the
@@ -500,8 +497,6 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc_cpp.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/beginend.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cacheline.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachepage.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clone.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eh_cpp.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/futex.Plo@am__quote@
diff --git a/libitm/barrier.tpl b/libitm/barrier.tpl
deleted file mode 100644
index dcf101356e67..000000000000
--- a/libitm/barrier.tpl
+++ /dev/null
@@ -1,170 +0,0 @@
-/* -*- c++ -*- */
-/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "unaligned.h"
-
-namespace {
-
-using namespace GTM;
-
-template<typename T>
-T do_read (const T *ptr, abi_dispatch::lock_type lock)
-{
-  //
-  // Find the cacheline that holds the current value of *PTR.
-  //
-  abi_dispatch *disp = abi_disp();
-  uintptr_t iptr = reinterpret_cast<uintptr_t>(ptr);
-  // Normalize PTR by chopping off the bottom bits so we can search
-  // for PTR in the cacheline hash.
-  uintptr_t iline = iptr & -CACHELINE_SIZE;
-  // The position in the resulting cacheline where *PTR is actually stored.
-  uintptr_t iofs = iptr & (CACHELINE_SIZE - 1);
-  const gtm_cacheline *pline = reinterpret_cast<const gtm_cacheline *>(iline);
-  // Search for the actual cacheline that holds the current value of *PTR.
-  const gtm_cacheline *line = disp->read_lock(pline, lock);
-
-  // Point to the position in the cacheline where *PTR is stored.
-  ptr = reinterpret_cast<const T *>(&line->b[iofs]);
-
-  // Straight up loads, because we're either aligned, or we don't care
-  // about alignment.
-  //
-  // If we require alignment on type T, do a straight load if we're
-  // aligned.  Otherwise do a straight load IFF the load fits entirely
-  // in this cacheline.  That is, it won't span multiple cachelines.
-  if (__builtin_expect (strict_alignment<T>::value
-			? (iofs & (sizeof (T) - 1)) == 0
-			: iofs + sizeof(T) <= CACHELINE_SIZE, 1))
-    {
-    do_normal_load:
-      return *ptr;
-    }
-  // If alignment on T is necessary, but we're unaligned, yet we fit
-  // entirely in this cacheline... do the unaligned load dance.
-  else if (__builtin_expect (strict_alignment<T>::value
-			     && iofs + sizeof(T) <= CACHELINE_SIZE, 1))
-    {
-    do_unaligned_load:
-      return unaligned_load<T>(ptr);
-    }
-  // Otherwise, this load will span multiple cachelines.
-  else
-    {
-      // Get the following cacheline for the rest of the data.
-      const gtm_cacheline *line2 = disp->read_lock(pline + 1, lock);
-
-      // If the two cachelines are adjacent, just load it all in one
-      // swoop.
-      if (line2 == line + 1)
-	{
-	  if (!strict_alignment<T>::value)
-	    goto do_normal_load;
-	  else
-	    goto do_unaligned_load;
-	}
-      else
-	{
-	  // Otherwise, ask the backend to load from two different
-	  // cachelines.
-	  return unaligned_load2<T>(line, line2, iofs);
-	}
-    }
-}
-
-template<typename T>
-void do_write (T *ptr, T val, abi_dispatch::lock_type lock)
-{
-  // Note: See comments for do_read() above for hints on this
-  // function.  Ideally we should abstract out a lot out of these two
-  // functions, and avoid all this duplication.
-
-  abi_dispatch *disp = abi_disp();
-  uintptr_t iptr = reinterpret_cast<uintptr_t>(ptr);
-  uintptr_t iline = iptr & -CACHELINE_SIZE;
-  uintptr_t iofs = iptr & (CACHELINE_SIZE - 1);
-  gtm_cacheline *pline = reinterpret_cast<gtm_cacheline *>(iline);
-  gtm_cacheline_mask m = ((gtm_cacheline_mask)2 << (sizeof(T) - 1)) - 1;
-  abi_dispatch::mask_pair pair = disp->write_lock(pline, lock);
-
-  ptr = reinterpret_cast<T *>(&pair.line->b[iofs]);
-
-  if (__builtin_expect (strict_alignment<T>::value
-			? (iofs & (sizeof (val) - 1)) == 0
-			: iofs + sizeof(val) <= CACHELINE_SIZE, 1))
-    {
-      *pair.mask |= m << iofs;
-    do_normal_store:
-      *ptr = val;
-    }
-  else if (__builtin_expect (strict_alignment<T>::value
-			     && iofs + sizeof(val) <= CACHELINE_SIZE, 1))
-    {
-      *pair.mask |= m << iofs;
-    do_unaligned_store:
-      unaligned_store<T>(ptr, val);
-    }
-  else
-    {
-      *pair.mask |= m << iofs;
-      abi_dispatch::mask_pair pair2 = disp->write_lock(pline + 1, lock);
-
-      uintptr_t ileft = CACHELINE_SIZE - iofs;
-      *pair2.mask |= m >> ileft;
-
-      if (pair2.line == pair.line + 1)
-	{
-	  if (!strict_alignment<T>::value)
-	    goto do_normal_store;
-	  else
-	    goto do_unaligned_store;
-	}
-      else
-	unaligned_store2<T>(pair.line, pair2.line, iofs, val);
-    }
-}
-
-} /* anonymous namespace */
-
-#define ITM_READ(T, LOCK)						\
-  _ITM_TYPE_##T ITM_REGPARM _ITM_##LOCK##T (const _ITM_TYPE_##T *ptr)	\
-  {									\
-    return do_read (ptr, abi_dispatch::LOCK);				\
-  }
-
-#define ITM_WRITE(T, LOCK)						\
-  void ITM_REGPARM _ITM_##LOCK##T (_ITM_TYPE_##T *ptr, _ITM_TYPE_##T val) \
-  {									\
-    do_write (ptr, val, abi_dispatch::LOCK);				\
-  }
-
-#define ITM_BARRIERS(T)		\
-  ITM_READ(T, R)		\
-  ITM_READ(T, RaR)		\
-  ITM_READ(T, RaW)		\
-  ITM_READ(T, RfW)		\
-  ITM_WRITE(T, W)		\
-  ITM_WRITE(T, WaR)		\
-  ITM_WRITE(T, WaW)
diff --git a/libitm/config/alpha/cacheline.h b/libitm/config/alpha/cacheline.h
index 5e38486b7134..611a1c9a26e4 100644
--- a/libitm/config/alpha/cacheline.h
+++ b/libitm/config/alpha/cacheline.h
@@ -33,90 +33,6 @@
 // modification mask, below.
 #define CACHELINE_SIZE 64
 
-#ifdef __alpha_bwx__
-# include "config/generic/cacheline.h"
-#else
-// If we don't have byte-word stores, then we'll never be able to
-// adjust *all* of the byte loads/stores to be truely atomic.  So
-// only guarantee 4-byte aligned values atomicly stored, exactly
-// like the native system.  Use byte zap instructions to accelerate
-// sub-word masked stores.
+#include "config/generic/cacheline.h"
 
-namespace GTM HIDDEN {
-
-// A gtm_cacheline_mask stores a modified bit for every modified byte
-// in the cacheline with which it is associated.
-typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask;
-
-union gtm_cacheline
-{
-  // Byte access to the cacheline.
-  unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE)));
-
-  // Larger sized access to the cacheline.
-  uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)];
-  uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)];
-  uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)];
-  gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)];
-
-  // Store S into D, but only the bytes specified by M.
-  static void store_mask(uint32_t *d, uint32_t s, uint8_t m);
-  static void store_mask(uint64_t *d, uint64_t s, uint8_t m);
-
-  // Copy S to D, but only the bytes specified by M.
-  static void copy_mask (gtm_cacheline * __restrict d,
-			 const gtm_cacheline * __restrict s,
-			 gtm_cacheline_mask m);
-
-  // A write barrier to emit after (a series of) copy_mask.
-  static void copy_mask_wb () { atomic_write_barrier(); }
-};
-
-inline void ALWAYS_INLINE
-gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m)
-{
-  const uint8_t tm = (1 << sizeof(uint32_t)) - 1;
-
-  m &= tm;
-  if (__builtin_expect (m, tm))
-    {
-      if (__builtin_expect (m == tm, 1))
-	*d = s;
-      else
-	*d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m);
-    }
-}
-
-inline void ALWAYS_INLINE
-gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m)
-{
-  if (__builtin_expect (m, 0xff))
-    {
-      if (__builtin_expect (m == 0xff, 1))
-	*d = s;
-      else
-	{
-	  typedef uint32_t *p32 __attribute__((may_alias));
-	  p32 d32 = reinterpret_cast<p32>(d);
-
-	  if ((m & 0x0f) == 0x0f)
-	    {
-	      d32[0] = s;
-	      m &= 0xf0;
-	    }
-	  else if ((m & 0xf0) == 0xf0)
-	    {
-	      d32[1] = s >> 32;
-	      m &= 0x0f;
-	    }
-
-	  if (m)
-	    *d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m);
-	}
-    }
-}
-
-} // namespace GTM
-
-#endif // __alpha_bwx__
 #endif // LIBITM_ALPHA_CACHELINE_H
diff --git a/libitm/config/alpha/unaligned.h b/libitm/config/alpha/unaligned.h
deleted file mode 100644
index 3d091aee2283..000000000000
--- a/libitm/config/alpha/unaligned.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef LIBITM_ALPHA_UNALIGNED_H
-#define LIBITM_ALPHA_UNALIGNED_H 1
-
-#define HAVE_ARCH_UNALIGNED_LOAD2_U2 1
-#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1
-#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1
-
-#ifndef __alpha_bwx__
-#define HAVE_ARCH_UNALIGNED_STORE2_U2 1
-#endif
-#define HAVE_ARCH_UNALIGNED_STORE2_U4 1
-#define HAVE_ARCH_UNALIGNED_STORE2_U8 1
-
-#include "config/generic/unaligned.h"
-
-namespace GTM HIDDEN {
-
-template<>
-inline uint16_t ALWAYS_INLINE
-unaligned_load2<uint16_t>(const gtm_cacheline *c1,
-			  const gtm_cacheline *c2, size_t ofs)
-{
-  uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
-  uint64_t v2 = c2->u64[0];
-
-  return __builtin_alpha_extwl (v1, ofs) | __builtin_alpha_extwh (v2, ofs);
-}
-
-template<>
-inline uint32_t ALWAYS_INLINE
-unaligned_load2<uint32_t>(const gtm_cacheline *c1,
-			  const gtm_cacheline *c2, size_t ofs)
-{
-  uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
-  uint64_t v2 = c2->u64[0];
-
-  return __builtin_alpha_extll (v1, ofs) + __builtin_alpha_extlh (v2, ofs);
-}
-
-template<>
-inline uint64_t ALWAYS_INLINE
-unaligned_load2<uint64_t>(const gtm_cacheline *c1,
-			  const gtm_cacheline *c2, size_t ofs)
-{
-  uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
-  uint64_t v2 = c2->u64[0];
-
-  return __builtin_alpha_extql (v1, ofs) | __builtin_alpha_extqh (v2, ofs);
-}
-
-#ifndef __alpha_bwx__
-template<>
-inline void
-unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2,
-			   size_t ofs, uint16_t val)
-{
-  uint32_t vl = (uint32_t)val << 24, vh = val >> 8;
-
-  gtm_cacheline::store_mask (&c1->u32[CACHELINE_SIZE / 4 - 1], vl, 4);
-  gtm_cacheline::store_mask (&c2->u32[0], vh, 1);
-}
-#endif
-
-template<>
-inline void
-unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2,
-			   size_t ofs, uint32_t val)
-{
-  uint64_t vl = __builtin_alpha_insll (val, ofs);
-  uint64_t ml = __builtin_alpha_insll (~0u, ofs);
-  uint64_t vh = __builtin_alpha_inslh (val, ofs);
-  uint64_t mh = __builtin_alpha_inslh (~0u, ofs);
-
-  gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml);
-  gtm_cacheline::store_mask (&c2->u64[0], vh, mh);
-}
-
-template<>
-inline void
-unaligned_store2<uint64_t>(gtm_cacheline *c1, gtm_cacheline *c2,
-			   size_t ofs, uint64_t val)
-{
-  uint64_t vl = __builtin_alpha_insql (val, ofs);
-  uint64_t ml = __builtin_alpha_insql (~0u, ofs);
-  uint64_t vh = __builtin_alpha_insqh (val, ofs);
-  uint64_t mh = __builtin_alpha_insqh (~0u, ofs);
-
-  gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml);
-  gtm_cacheline::store_mask (&c2->u64[0], vh, mh);
-}
-
-} // namespace GTM
-
-#endif // LIBITM_ALPHA_UNALIGNED_H
diff --git a/libitm/config/generic/cacheline.cc b/libitm/config/generic/cacheline.cc
deleted file mode 100644
index 108ffba30378..000000000000
--- a/libitm/config/generic/cacheline.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "libitm_i.h"
-
-
-namespace GTM HIDDEN {
-
-void
-gtm_cacheline::copy_mask (gtm_cacheline * __restrict d,
-			  const gtm_cacheline * __restrict s,
-			  gtm_cacheline_mask m)
-{
-  const size_t n = sizeof (gtm_word);
-
-  if (m == (gtm_cacheline_mask) -1)
-    {
-      *d = *s;
-      return;
-    }
-  if (__builtin_expect (m == 0, 0))
-    return;
-
-  for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n)
-    store_mask (&d->w[i], s->w[i], m);
-}
-
-} // namespace GTM
diff --git a/libitm/config/generic/cacheline.h b/libitm/config/generic/cacheline.h
index 0a5af761d6e6..dd7d877d1d16 100644
--- a/libitm/config/generic/cacheline.h
+++ b/libitm/config/generic/cacheline.h
@@ -51,57 +51,8 @@ union gtm_cacheline
   uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)];
   uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)];
   gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)];
-
-  // Store S into D, but only the bytes specified by M.
-  template<typename T> static void store_mask (T *d, T s, uint8_t m);
-
-  // Copy S to D, but only the bytes specified by M.
-  static void copy_mask (gtm_cacheline * __restrict d,
-			 const gtm_cacheline * __restrict s,
-			 gtm_cacheline_mask m);
-
-  // A write barrier to emit after (a series of) copy_mask.
-  // When we're emitting non-temporal stores, the normal strong
-  // ordering of the machine doesn't apply.
-  static void copy_mask_wb () { atomic_write_barrier(); }
 };
 
-template<typename T>
-inline void
-gtm_cacheline::store_mask (T *d, T s, uint8_t m)
-{
-  const uint8_t tm = (1 << sizeof(T)) - 1;
-
-  if (__builtin_expect (m & tm, tm))
-    {
-      if (__builtin_expect ((m & tm) == tm, 1))
-	*d = s;
-      else
-	{
-	  const int half = sizeof(T) / 2;
-	  typedef typename sized_integral<half>::type half_t;
-	  half_t *dhalf = reinterpret_cast<half_t *>(d);
-	  half_t s1, s2;
-
-	  if (WORDS_BIGENDIAN)
-	    s1 = s >> half*8, s2 = s;
-	  else
-	    s1 = s, s2 = s >> half*8;
-
-	  store_mask (dhalf, s1, m);
-	  store_mask (dhalf + 1, s2, m >> half);
-	}
-    }
-}
-
-template<>
-inline void ALWAYS_INLINE
-gtm_cacheline::store_mask<uint8_t> (uint8_t *d, uint8_t s, uint8_t m)
-{
-  if (m & 1)
-    *d = s;
-}
-
 } // namespace GTM
 
 #endif // LIBITM_CACHELINE_H
diff --git a/libitm/config/generic/cachepage.h b/libitm/config/generic/cachepage.h
deleted file mode 100644
index a5472f3831b1..000000000000
--- a/libitm/config/generic/cachepage.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef LIBITM_CACHEPAGE_H
-#define LIBITM_CACHEPAGE_H 1
-
-namespace GTM HIDDEN {
-
-// A "page" worth of saved cachelines plus modification masks.  This
-// arrangement is intended to minimize the overhead of alignment.  The
-// PAGE_SIZE defined by the target must be a constant for this to work,
-// which means that this definition may not be the same as the real
-// system page size.  An additional define of FIXED_PAGE_SIZE by the
-// target indicates that PAGE_SIZE exactly matches the system page size.
-
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
-
-struct gtm_cacheline_page
-{
-  static const size_t LINES
-    = ((PAGE_SIZE - sizeof(gtm_cacheline_page *))
-       / (CACHELINE_SIZE + sizeof(gtm_cacheline_mask)));
-
-  gtm_cacheline lines[LINES] __attribute__((aligned(PAGE_SIZE)));
-  gtm_cacheline_mask masks[LINES];
-  gtm_cacheline_page *prev;
-
-  static gtm_cacheline_page *
-  page_for_line (gtm_cacheline *c)
-  {
-    return (gtm_cacheline_page *)((uintptr_t)c & -PAGE_SIZE);
-  }
-
-  gtm_cacheline_mask *
-  mask_for_line (gtm_cacheline *c)
-  {
-    size_t index = c - &this->lines[0];
-    return &this->masks[index];
-  }
-
-  static gtm_cacheline_mask *
-  mask_for_page_line (gtm_cacheline *c)
-  {
-    gtm_cacheline_page *p = page_for_line (c);
-    return p->mask_for_line (c);
-  }
-
-  static void *operator new (size_t);
-  static void operator delete (void *);
-};
-
-} // namespace GTM
-
-#endif // LIBITM_CACHEPAGE_H
diff --git a/libitm/config/generic/tls.cc b/libitm/config/generic/tls.cc
index c6421113c86f..e502e50869b4 100644
--- a/libitm/config/generic/tls.cc
+++ b/libitm/config/generic/tls.cc
@@ -30,51 +30,4 @@ namespace GTM HIDDEN {
 __thread gtm_thread_tls _gtm_thr_tls;
 #endif
 
-// Filter out any updates that overlap the libitm stack, as defined by
-// TOP (entry point to library) and BOT (below current function).  This
-// definition should be fine for all stack-grows-down architectures.
-
-gtm_cacheline_mask __attribute__((noinline))
-gtm_mask_stack(gtm_cacheline *line, gtm_cacheline_mask mask)
-{
-  void *top = gtm_thr()->jb.cfa;
-  void *bot = __builtin_dwarf_cfa();
-
-  // We must have come through an entry point that set TOP.
-  assert (top != NULL);
-
-  if (line + 1 < bot)
-    {
-      // Since we don't have the REAL stack boundaries for this thread,
-      // we cannot know if this is a dead write to a stack address below
-      // the current function or if it is write to another VMA.  In either
-      // case allowing the write should not affect correctness.
-    }
-  else if (line >= top)
-    {
-      // A valid write to an address in an outer stack frame, or a write
-      // to another VMA.
-    }
-  else
-    {
-      uintptr_t diff = (uintptr_t)top - (uintptr_t)line;
-      if (diff >= CACHELINE_SIZE)
-	{
-	  // The write is either fully within the proscribed area, or the tail
-	  // of the cacheline overlaps the proscribed area.  Assume that all
-	  // stacks are at least cacheline aligned and declare the head of the
-	  // cacheline dead.
-	  mask = 0;
-	}
-      else
-	{
-	  // The head of the cacheline is within the proscribed area, but the
-	  // tail of the cacheline is live.  Eliminate the dead writes.
-	  mask &= (gtm_cacheline_mask)-1 << diff;
-	}
-    }
-
-  return mask;
-}
-
 } // namespace GTM
diff --git a/libitm/config/generic/unaligned.h b/libitm/config/generic/unaligned.h
deleted file mode 100644
index 50cb13bd277b..000000000000
--- a/libitm/config/generic/unaligned.h
+++ /dev/null
@@ -1,228 +0,0 @@
-/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef LIBITM_UNALIGNED_H
-#define LIBITM_UNALIGNED_H 1
-
-namespace GTM HIDDEN {
-
-#ifndef STRICT_ALIGNMENT
-#define STRICT_ALIGNMENT 1
-#endif
-
-// A type trait for whether type T requires strict alignment.
-// The generic types are assumed to all be the same; specializations
-// for target-specific types should be done in config/cpu/unaligned.h.
-template<typename T>
-  struct strict_alignment
-    : public std::integral_constant<bool, STRICT_ALIGNMENT>
-  { };
-
-// A helper template for accessing an integral type the same size as T
-template<typename T>
-  struct make_integral
-    : public sized_integral<sizeof(T)>
-  { };
-
-// A helper class for accessing T as an unaligned value.
-template<typename T>
-struct __attribute__((packed)) unaligned_helper
-  { T x; };
-
-// A helper class for view-converting T as an integer.
-template<typename T>
-union view_convert_helper
-{
-  typedef T type;
-  typedef make_integral<T> itype;
-
-  type t;
-  itype i;
-};
-
-// Generate an unaligned load sequence.
-// The compiler knows how to do this for any specific type.
-template<typename T>
-inline T ALWAYS_INLINE
-unaligned_load(const void *t)
-{
-  typedef unaligned_helper<T> UT;
-  const UT *ut = reinterpret_cast<const UT *>(t);
-  return ut->x;
-}
-
-// Generate an unaligned store sequence.
-template<typename T>
-inline void ALWAYS_INLINE
-unaligned_store(void *t, T val)
-{
-  typedef unaligned_helper<T> UT;
-  UT *ut = reinterpret_cast<UT *>(t);
-  ut->x = val;
-}
-
-// Generate an unaligned load from two different cachelines.
-// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE.
-template<typename T>
-inline T ALWAYS_INLINE
-unaligned_load2(const gtm_cacheline *c1, const gtm_cacheline *c2, size_t ofs)
-{
-  size_t left = CACHELINE_SIZE - ofs;
-  T ret;
-
-  memcpy (&ret, &c1->b[ofs], left);
-  memcpy ((char *)&ret + ofs, c2, sizeof(T) - left);
-
-  return ret;
-}
-
-// Generate an unaligned store into two different cachelines.
-// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE.
-template<typename T>
-inline void ALWAYS_INLINE
-unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, size_t ofs, T val)
-{
-  size_t left = CACHELINE_SIZE - ofs;
-  memcpy (&c1->b[ofs], &val, left);
-  memcpy (c2, (char *)&val + left, sizeof(T) - left);
-}
-
-#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U2
-template<>
-inline uint16_t ALWAYS_INLINE
-unaligned_load2<uint16_t>(const gtm_cacheline *c1,
-			  const gtm_cacheline *c2, size_t ofs)
-{
-  uint16_t v1 = c1->b[CACHELINE_SIZE - 1];
-  uint16_t v2 = c2->b[0];
-
-  if (WORDS_BIGENDIAN)
-    return v1 << 8 | v2;
-  else
-    return v2 << 8 | v1;
-}
-#endif
-
-#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U4
-template<>
-inline uint32_t ALWAYS_INLINE
-unaligned_load2<uint32_t>(const gtm_cacheline *c1,
-			  const gtm_cacheline *c2, size_t ofs)
-{
-  uint32_t v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
-  uint32_t v2 = c2->u32[0];
-  int s2 = (ofs & (sizeof(uint32_t) - 1)) * 8;
-  int s1 = sizeof(uint32_t) * 8 - s2;
-
-  if (WORDS_BIGENDIAN)
-    return v1 << s2 | v2 >> s1;
-  else
-    return v2 << s2 | v1 >> s1;
-}
-#endif
-
-#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U8
-template<>
-inline uint64_t ALWAYS_INLINE
-unaligned_load2<uint64_t>(const gtm_cacheline *c1,
-			  const gtm_cacheline *c2, size_t ofs)
-{
-  uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
-  uint64_t v2 = c2->u64[0];
-  int s2 = (ofs & (sizeof(uint64_t) - 1)) * 8;
-  int s1 = sizeof(uint64_t) * 8 - s2;
-
-  if (WORDS_BIGENDIAN)
-    return v1 << s2 | v2 >> s1;
-  else
-    return v2 << s2 | v1 >> s1;
-}
-#endif
-
-template<>
-inline float ALWAYS_INLINE
-unaligned_load2<float>(const gtm_cacheline *c1,
-		       const gtm_cacheline *c2, size_t ofs)
-{
-  typedef view_convert_helper<float> VC; VC vc;
-  vc.i = unaligned_load2<VC::itype>(c1, c2, ofs);
-  return vc.t;
-}
-
-template<>
-inline double ALWAYS_INLINE
-unaligned_load2<double>(const gtm_cacheline *c1,
-			const gtm_cacheline *c2, size_t ofs)
-{
-  typedef view_convert_helper<double> VC; VC vc;
-  vc.i = unaligned_load2<VC::itype>(c1, c2, ofs);
-  return vc.t;
-}
-
-#ifndef HAVE_ARCH_UNALIGNED_STORE2_U2
-template<>
-inline void ALWAYS_INLINE
-unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2,
-			   size_t ofs, uint16_t val)
-{
-  uint8_t vl = val, vh = val >> 8;
-
-  if (WORDS_BIGENDIAN)
-    {
-      c1->b[CACHELINE_SIZE - 1] = vh;
-      c2->b[0] = vl;
-    }
-  else
-    {
-      c1->b[CACHELINE_SIZE - 1] = vl;
-      c2->b[0] = vh;
-    }
-}
-#endif
-
-#if 0
-#ifndef HAVE_ARCH_UNALIGNED_STORE2_U4
-template<>
-inline void ALWAYS_INLINE
-unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2,
-			   size_t ofs, uint32_t val)
-{
-  // ??? We could reuse the store_mask stuff here.
-}
-#endif
-
-template<>
-inline void ALWAYS_INLINE
-unaligned_store2<float>(gtm_cacheline *c1, gtm_cacheline *c2,
-			size_t ofs, float val)
-{
-  typedef view_convert_helper<float> VC; VC vc;
-  vc.t = val;
-  unaligned_store2(c1, c2, ofs, vc.i);
-}
-#endif
-
-} // namespace GTM
-
-#endif // LIBITM_UNALIGNED_H
diff --git a/libitm/config/posix/cachepage.cc b/libitm/config/posix/cachepage.cc
deleted file mode 100644
index 128cd5435aeb..000000000000
--- a/libitm/config/posix/cachepage.cc
+++ /dev/null
@@ -1,183 +0,0 @@
-/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "libitm_i.h"
-#include <pthread.h>
-
-//
-// We have three possibilities for alloction: mmap, memalign, posix_memalign
-//
-
-#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO)
-#include <sys/mman.h>
-#include <fcntl.h>
-#endif
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-
-namespace GTM HIDDEN {
-
-#if defined(HAVE_MMAP_ANON)
-# if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
-#  define MAP_ANONYMOUS MAP_ANON
-# endif
-# define dev_zero -1
-#elif defined(HAVE_MMAP_DEV_ZERO)
-# ifndef MAP_ANONYMOUS
-#  define MAP_ANONYMOUS 0
-# endif
-static int dev_zero = -1;
-#endif
-
-#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO)
-/* If we get here, we've already opened /dev/zero and verified that
-   PAGE_SIZE is valid for the system.  */
-static gtm_cacheline_page * alloc_mmap (void) UNUSED;
-static gtm_cacheline_page *
-alloc_mmap (void)
-{
-  gtm_cacheline_page *r;
-  r = (gtm_cacheline_page *) mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
-				   MAP_PRIVATE | MAP_ANONYMOUS, dev_zero, 0);
-  if (r == (gtm_cacheline_page *) MAP_FAILED)
-    abort ();
-  return r;
-}
-#endif /* MMAP_ANON | MMAP_DEV_ZERO */
-
-#ifdef HAVE_MEMALIGN
-static gtm_cacheline_page * alloc_memalign (void) UNUSED;
-static gtm_cacheline_page *
-alloc_memalign (void)
-{
-  gtm_cacheline_page *r;
-  r = (gtm_cacheline_page *) memalign (PAGE_SIZE, PAGE_SIZE);
-  if (r == NULL)
-    abort ();
-  return r;
-}
-#endif /* MEMALIGN */
-
-#ifdef HAVE_POSIX_MEMALIGN
-static gtm_cacheline_page *alloc_posix_memalign (void) UNUSED;
-static gtm_cacheline_page *
-alloc_posix_memalign (void)
-{
-  void *r;
-  if (posix_memalign (&r, PAGE_SIZE, PAGE_SIZE))
-    abort ();
-  return (gtm_cacheline_page *) r;
-}
-#endif /* POSIX_MEMALIGN */
-
-#if defined(HAVE_MMAP_ANON) && defined(FIXED_PAGE_SIZE)
-# define alloc_page  alloc_mmap
-#elif defined(HAVE_MMAP_DEV_ZERO) && defined(FIXED_PAGE_SIZE)
-static gtm_cacheline_page *
-alloc_page (void)
-{
-  if (dev_zero < 0)
-    {
-      dev_zero = open ("/dev/zero", O_RDWR);
-      assert (dev_zero >= 0);
-    }
-  return alloc_mmap ();
-}
-#elif defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO)
-static gtm_cacheline_page * (*alloc_page) (void);
-static void __attribute__((constructor))
-init_alloc_page (void)
-{
-  size_t page_size = getpagesize ();
-  if (page_size <= PAGE_SIZE && PAGE_SIZE % page_size == 0)
-    {
-# ifndef HAVE_MMAP_ANON
-      dev_zero = open ("/dev/zero", O_RDWR);
-      assert (dev_zero >= 0);
-# endif
-      alloc_page = alloc_mmap;
-      return;
-    }
-# ifdef HAVE_MEMALIGN
-  alloc_page = alloc_memalign;
-# elif defined(HAVE_POSIX_MEMALIGN)
-  alloc_page = alloc_posix_memalign;
-# else
-#  error "No fallback aligned memory allocation method"
-# endif
-}
-#elif defined(HAVE_MEMALIGN)
-# define alloc_page  alloc_memalign
-#elif defined(HAVE_POSIX_MEMALIGN)
-# define alloc_page  alloc_posix_memalign
-#else
-# error "No aligned memory allocation method"
-#endif
-
-static gtm_cacheline_page *free_pages;
-static pthread_mutex_t free_page_lock = PTHREAD_MUTEX_INITIALIZER;
-
-void *
-gtm_cacheline_page::operator new (size_t size)
-{
-  assert (size == sizeof (gtm_cacheline_page));
-  assert (size <= PAGE_SIZE);
-
-  pthread_mutex_lock(&free_page_lock);
-
-  gtm_cacheline_page *r = free_pages;
-  free_pages = r ? r->prev : NULL;
-
-  pthread_mutex_unlock(&free_page_lock);
-
-  if (r == NULL)
-    r = alloc_page ();
-
-  return r;
-}
-
-void
-gtm_cacheline_page::operator delete (void *xhead)
-{
-  gtm_cacheline_page *head = static_cast<gtm_cacheline_page *>(xhead);
-  gtm_cacheline_page *tail;
-
-  if (head == 0)
-    return;
-
-  /* ??? We should eventually really free some of these.  */
-
-  for (tail = head; tail->prev != 0; tail = tail->prev)
-    continue;
-
-  pthread_mutex_lock(&free_page_lock);
-
-  tail->prev = free_pages;
-  free_pages = head;
-
-  pthread_mutex_unlock(&free_page_lock);
-}
-
-} // namespace GTM
diff --git a/libitm/config/x86/cacheline.cc b/libitm/config/x86/cacheline.cc
deleted file mode 100644
index 2e49a3559532..000000000000
--- a/libitm/config/x86/cacheline.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "libitm_i.h"
-
-namespace GTM HIDDEN {
-
-uint32_t const gtm_bit_to_byte_mask[16] =
-{
-  0x00000000,
-  0x000000ff,
-  0x0000ff00,
-  0x0000ffff,
-  0x00ff0000,
-  0x00ff00ff,
-  0x00ffff00,
-  0x00ffffff,
-  0xff000000,
-  0xff0000ff,
-  0xff00ff00,
-  0xff00ffff,
-  0xffff0000,
-  0xffff00ff,
-  0xffffff00,
-  0xffffffff
-};
-
-#ifdef __SSE2__
-# define MEMBER	m128i
-#else
-# define MEMBER	w
-#endif
-
-void
-gtm_cacheline::copy_mask (gtm_cacheline * __restrict d,
-			  const gtm_cacheline * __restrict s,
-			  gtm_cacheline_mask m)
-{
-  if (m == (gtm_cacheline_mask)-1)
-    {
-      *d = *s;
-      return;
-    }
-  if (__builtin_expect (m == 0, 0))
-    return;
-
-  size_t n = sizeof(d->MEMBER[0]);
-  for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n)
-    store_mask (&d->MEMBER[i], s->MEMBER[i], m);
-}
-
-} // namespace GTM
diff --git a/libitm/config/x86/cacheline.h b/libitm/config/x86/cacheline.h
index f91d7ccb8020..337c9995c178 100644
--- a/libitm/config/x86/cacheline.h
+++ b/libitm/config/x86/cacheline.h
@@ -40,8 +40,6 @@ namespace GTM HIDDEN {
 // in the cacheline with which it is associated.
 typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask;
 
-extern uint32_t const gtm_bit_to_byte_mask[16];
-
 union gtm_cacheline
 {
   // Byte access to the cacheline.
@@ -67,23 +65,6 @@ union gtm_cacheline
   __m256i m256i[CACHELINE_SIZE / sizeof(__m256i)];
 #endif
 
-  // Store S into D, but only the bytes specified by M.
-  static void store_mask (uint32_t *d, uint32_t s, uint8_t m);
-  static void store_mask (uint64_t *d, uint64_t s, uint8_t m);
-#ifdef __SSE2__
-  static void store_mask (__m128i *d, __m128i s, uint16_t m);
-#endif
-
-  // Copy S to D, but only the bytes specified by M.
-  static void copy_mask (gtm_cacheline * __restrict d,
-			 const gtm_cacheline * __restrict s,
-			 gtm_cacheline_mask m);
-
-  // A write barrier to emit after (a series of) copy_mask.
-  // When we're emitting non-temporal stores, the normal strong
-  // ordering of the machine doesn't apply.
-  static void copy_mask_wb ();
-
 #if defined(__SSE__) || defined(__AVX__)
   // Copy S to D; only bother defining if we can do this more efficiently
   // than the compiler-generated default implementation.
@@ -91,14 +72,6 @@ union gtm_cacheline
 #endif // SSE, AVX
 };
 
-inline void
-gtm_cacheline::copy_mask_wb ()
-{
-#ifdef __SSE2__
-  _mm_sfence ();
-#endif
-}
-
 #if defined(__SSE__) || defined(__AVX__)
 inline gtm_cacheline& ALWAYS_INLINE
 gtm_cacheline::operator= (const gtm_cacheline & __restrict s)
@@ -141,104 +114,12 @@ gtm_cacheline::operator= (const gtm_cacheline & __restrict s)
     }
 
   return *this;
+
+#undef CP
+#undef TYPE
 }
 #endif
 
-// Support masked integer stores more efficiently with an unlocked cmpxchg
-// insn.  My reasoning is that while we write to locations that we do not wish
-// to modify, we do it in an uninterruptable insn, and so we either truely
-// write back the original data or the insn fails -- unlike with a
-// load/and/or/write sequence which can be interrupted either by a kernel
-// task switch or an unlucky cacheline steal by another processor.  Avoiding
-// the LOCK prefix improves performance by a factor of 10, and we don't need
-// the memory barrier semantics implied by that prefix.
-
-inline void ALWAYS_INLINE
-gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m)
-{
-  gtm_cacheline_mask tm = (1 << sizeof (s)) - 1;
-  if (__builtin_expect (m & tm, tm))
-    {
-      if (__builtin_expect ((m & tm) == tm, 1))
-	*d = s;
-      else
-	{
-	  gtm_cacheline_mask bm = gtm_bit_to_byte_mask[m & 15];
-	  gtm_word n, o = *d;
-
-	  __asm("\n0:\t"
-		"mov	%[o], %[n]\n\t"
-		"and	%[m], %[n]\n\t"
-		"or	%[s], %[n]\n\t"
-		"cmpxchg %[n], %[d]\n\t"
-		".byte	0x2e\n\t"	// predict not-taken, aka jnz,pn
-		"jnz	0b"
-		: [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o)
-		: [s] "r" (s & bm), [m] "r" (~bm));
-	}
-    }
-}
-
-inline void ALWAYS_INLINE
-gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m)
-{
-  gtm_cacheline_mask tm = (1 << sizeof (s)) - 1;
-  if (__builtin_expect (m & tm, tm))
-    {
-      if (__builtin_expect ((m & tm) == tm, 1))
-	*d = s;
-      else
-	{
-#ifdef __x86_64__
-	  uint32_t bl = gtm_bit_to_byte_mask[m & 15];
-	  uint32_t bh = gtm_bit_to_byte_mask[(m >> 4) & 15];
-	  gtm_cacheline_mask bm = bl | ((gtm_cacheline_mask)bh << 31 << 1);
-	  uint64_t n, o = *d;
-	  __asm("\n0:\t"
-		"mov	%[o], %[n]\n\t"
-		"and	%[m], %[n]\n\t"
-		"or	%[s], %[n]\n\t"
-		"cmpxchg %[n], %[d]\n\t"
-		".byte	0x2e\n\t"	// predict not-taken, aka jnz,pn
-		"jnz	0b"
-		: [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o)
-		: [s] "r" (s & bm), [m] "r" (~bm));
-#else
-	  /* ??? While it's possible to perform this operation with
-	     cmpxchg8b, the sequence requires all 7 general registers
-	     and thus cannot be performed with -fPIC.  Don't even try.  */
-	  uint32_t *d32 = reinterpret_cast<uint32_t *>(d);
-	  store_mask (d32, s, m);
-	  store_mask (d32 + 1, s >> 32, m >> 4);
-#endif
-	}
-    }
-}
-
-#ifdef __SSE2__
-inline void ALWAYS_INLINE
-gtm_cacheline::store_mask (__m128i *d, __m128i s, uint16_t m)
-{
-  if (__builtin_expect (m == 0, 0))
-    return;
-  if (__builtin_expect (m == 0xffff, 1))
-    *d = s;
-  else
-    {
-      __m128i bm0, bm1, bm2, bm3;
-      bm0 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
-      bm1 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
-      bm2 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
-      bm3 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
-      bm0 = _mm_unpacklo_epi32 (bm0, bm1);
-      bm2 = _mm_unpacklo_epi32 (bm2, bm3);
-      bm0 = _mm_unpacklo_epi64 (bm0, bm2);
-
-      _mm_maskmoveu_si128 (s, bm0, (char *)d);
-    }
-}
-#endif // SSE2
-
 } // namespace GTM
 
 #endif // LIBITM_CACHELINE_H
diff --git a/libitm/config/x86/unaligned.h b/libitm/config/x86/unaligned.h
deleted file mode 100644
index 01abc47dccb3..000000000000
--- a/libitm/config/x86/unaligned.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef LIBITM_X86_UNALIGNED_H
-#define LIBITM_X86_UNALIGNED_H 1
-
-#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1
-#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1
-
-#include "config/generic/unaligned.h"
-
-namespace GTM HIDDEN {
-
-template<>
-inline uint32_t
-unaligned_load2<uint32_t>(const gtm_cacheline *c1,
-			  const gtm_cacheline *c2, size_t ofs)
-{
-  uint32_t r, lo, hi;
-  lo = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
-  hi = c2->u32[0];
-  asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo));
-  return r;
-}
-
-template<>
-inline uint64_t
-unaligned_load2<uint64_t>(const gtm_cacheline *c1,
-			  const gtm_cacheline *c2, size_t ofs)
-{
-#ifdef __x86_64__
-  uint64_t r, lo, hi;
-  lo = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
-  hi = c2->u64[0];
-  asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo));
-  return r;
-#else
-  uint32_t v0, v1, v2;
-  uint64_t r;
-
-  if (ofs < CACHELINE_SIZE - 4)
-    {
-      v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 2];
-      v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
-      v2 = c2->u32[0];
-    }
-  else
-    {
-      v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
-      v1 = c2->u32[0];
-      v2 = c2->u32[1];
-    }
-  ofs = (ofs & 3) * 8;
-  asm("shrd %%cl, %[v1], %[v0]; shrd %%cl, %[v2], %[v1]"
-      : "=A"(r) : "c"(ofs), [v0] "a"(v0), [v1] "d"(v1), [v2] "r"(v2));
-
-  return r;
-#endif
-}
-
-#if defined(__SSE2__) || defined(__MMX__)
-template<>
-inline _ITM_TYPE_M64
-unaligned_load2<_ITM_TYPE_M64>(const gtm_cacheline *c1,
-			       const gtm_cacheline *c2, size_t ofs)
-{
-# ifdef __x86_64__
-  __m128i lo = _mm_movpi64_epi64 (c1->m64[CACHELINE_SIZE / 8 - 1]);
-  __m128i hi = _mm_movpi64_epi64 (c2->m64[0]);
-
-  ofs = (ofs & 7) * 8;
-  lo = _mm_srli_epi64 (lo, ofs);
-  hi = _mm_slli_epi64 (hi, 64 - ofs);
-  lo = lo | hi;
-  return _mm_movepi64_pi64 (lo);
-# else
-  // On 32-bit we're about to return the result in an MMX register, so go
-  // ahead and do the computation in that unit, even if SSE2 is available.
-  __m64 lo = c1->m64[CACHELINE_SIZE / 8 - 1];
-  __m64 hi = c2->m64[0];
-
-  ofs = (ofs & 7) * 8;
-  lo = _mm_srli_si64 (lo, ofs);
-  hi = _mm_slli_si64 (hi, 64 - ofs);
-  return lo | hi;
-# endif
-}
-#endif // SSE2 or MMX
-
-// The SSE types are strictly aligned.
-#ifdef __SSE__
-template<>
-  struct strict_alignment<_ITM_TYPE_M128>
-    : public std::true_type
-  { };
-
-// Expand the unaligned SSE move instructions.
-template<>
-inline _ITM_TYPE_M128
-unaligned_load<_ITM_TYPE_M128>(const void *t)
-{
-  return _mm_loadu_ps (static_cast<const float *>(t));
-}
-
-template<>
-inline void
-unaligned_store<_ITM_TYPE_M128>(void *t, _ITM_TYPE_M128 val)
-{
-  _mm_storeu_ps (static_cast<float *>(t), val);
-}
-#endif // SSE
-
-#ifdef __AVX__
-// The AVX types are strictly aligned when it comes to vmovaps vs vmovups.
-template<>
-  struct strict_alignment<_ITM_TYPE_M256>
-    : public std::true_type
-  { };
-
-template<>
-inline _ITM_TYPE_M256
-unaligned_load<_ITM_TYPE_M256>(const void *t)
-{
-  return _mm256_loadu_ps (static_cast<const float *>(t));
-}
-
-template<>
-inline void
-unaligned_store<_ITM_TYPE_M256>(void *t, _ITM_TYPE_M256 val)
-{
-  _mm256_storeu_ps (static_cast<float *>(t), val);
-}
-#endif // AVX
-
-#ifdef __XOP__
-# define HAVE_ARCH_REALIGN_M128I 1
-extern const __v16qi GTM_vpperm_shift[16];
-inline __m128i
-realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
-{
-  return _mm_perm_epi8 (lo, hi, GTM_vpperm_shift[byte_count]);
-}
-#elif defined(__AVX__)
-# define HAVE_ARCH_REALIGN_M128I 1
-extern "C" const uint64_t GTM_vpalignr_table[16];
-inline __m128i
-realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
-{
-  register __m128i xmm0 __asm__("xmm0") = hi;
-  register __m128i xmm1 __asm__("xmm1") = lo;
-  __asm("call *%2" : "+x"(xmm0) : "x"(xmm1),
-	"r"(&GTM_vpalignr_table[byte_count]));
-  return xmm0;
-}
-#elif defined(__SSSE3__)
-# define HAVE_ARCH_REALIGN_M128I 1
-extern "C" const uint64_t GTM_palignr_table[16];
-inline __m128i
-realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
-{
-  register __m128i xmm0 __asm__("xmm0") = hi;
-  register __m128i xmm1 __asm__("xmm1") = lo;
-  __asm("call *%2" : "+x"(xmm0) : "x"(xmm1),
-	"r"(&GTM_palignr_table[byte_count]));
-  return xmm0;
-}
-#elif defined(__SSE2__)
-# define HAVE_ARCH_REALIGN_M128I 1
-extern "C" const char GTM_pshift_table[16 * 16];
-inline __m128i
-realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
-{
-  register __m128i xmm0 __asm__("xmm0") = lo;
-  register __m128i xmm1 __asm__("xmm1") = hi;
-  __asm("call *%2" : "+x"(xmm0), "+x"(xmm1)
-	: "r"(GTM_pshift_table + byte_count*16));
-  return xmm0;
-}
-#endif // XOP, AVX, SSSE3, SSE2
-
-#ifdef HAVE_ARCH_REALIGN_M128I
-template<>
-inline _ITM_TYPE_M128
-unaligned_load2<_ITM_TYPE_M128>(const gtm_cacheline *c1,
-				const gtm_cacheline *c2, size_t ofs)
-{
-  return (_ITM_TYPE_M128)
-    realign_m128i (c1->m128i[CACHELINE_SIZE / 16 - 1],
-		   c2->m128i[0], ofs & 15);
-}
-#endif // HAVE_ARCH_REALIGN_M128I
-
-#ifdef __AVX__
-template<>
-inline _ITM_TYPE_M256
-unaligned_load2<_ITM_TYPE_M256>(const gtm_cacheline *c1,
-				const gtm_cacheline *c2, size_t ofs)
-{
-  __m128i v0, v1;
-  __m256i r;
-
-  v0 = (__m128i) unaligned_load2<_ITM_TYPE_M128>(c1, c2, ofs);
-  if (ofs < CACHELINE_SIZE - 16)
-    v1 = v0, v0 = _mm_loadu_si128 ((const __m128i *) &c1->b[ofs]);
-  else
-    v1 = _mm_loadu_si128((const __m128i *)&c2->b[ofs + 16 - CACHELINE_SIZE]);
-
-  r = _mm256_castsi128_si256 ((__m128i)v0);
-  r = _mm256_insertf128_si256 (r, (__m128i)v1, 1);
-  return (_ITM_TYPE_M256) r;
-}
-#endif // AVX
-
-} // namespace GTM
-
-#endif // LIBITM_X86_UNALIGNED_H
diff --git a/libitm/config/x86/x86_avx.cc b/libitm/config/x86/x86_avx.cc
index 9d1ddfb3ee89..30420aa87f54 100644
--- a/libitm/config/x86/x86_avx.cc
+++ b/libitm/config/x86/x86_avx.cc
@@ -34,62 +34,3 @@ _ITM_LM256 (const _ITM_TYPE_M256 *ptr)
 {
   GTM::GTM_LB (ptr, sizeof (*ptr));
 }
-
-// Helpers for re-aligning two 128-bit values.
-#ifdef __XOP__
-const __v16qi GTM::GTM_vpperm_shift[16] =
-{
-  {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
-  {  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16 },
-  {  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17 },
-  {  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18 },
-  {  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
-  {  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 },
-  {  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 },
-  {  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 },
-  {  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
-  {  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 },
-  { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 },
-  { 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 },
-  { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
-  { 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 },
-  { 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 },
-  { 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 },
-};
-#else
-# define INSN0		"movdqa  %xmm1, %xmm0"
-# define INSN(N)	"vpalignr $" #N ", %xmm0, %xmm1, %xmm0"
-# define TABLE_ENT_0	INSN0 "\n\tret\n\t"
-# define TABLE_ENT(N)	".balign 8\n\t" INSN(N) "\n\tret\n\t"
-
-asm(".pushsection .text\n\
-	.balign 16\n\
-	.globl	GTM_vpalignr_table\n\
-	.hidden	GTM_vpalignr_table\n\
-	.type	GTM_vpalignr_table, @function\n\
-GTM_vpalignr_table:\n\t"
-	TABLE_ENT_0
-	TABLE_ENT(1)
-	TABLE_ENT(2)
-	TABLE_ENT(3)
-	TABLE_ENT(4)
-	TABLE_ENT(5)
-	TABLE_ENT(6)
-	TABLE_ENT(7)
-	TABLE_ENT(8)
-	TABLE_ENT(9)
-	TABLE_ENT(10)
-	TABLE_ENT(11)
-	TABLE_ENT(12)
-	TABLE_ENT(13)
-	TABLE_ENT(14)
-	TABLE_ENT(15)
-	".balign 8\n\
-	.size	GTM_vpalignr_table, .-GTM_vpalignr_table\n\
-	.popsection");
-
-# undef INSN0
-# undef INSN
-# undef TABLE_ENT_0
-# undef TABLE_ENT
-#endif
diff --git a/libitm/config/x86/x86_sse.cc b/libitm/config/x86/x86_sse.cc
index 7440c949cb76..5a1c67ac8b16 100644
--- a/libitm/config/x86/x86_sse.cc
+++ b/libitm/config/x86/x86_sse.cc
@@ -41,82 +41,3 @@ _ITM_LM128 (const _ITM_TYPE_M128 *ptr)
 {
   GTM::GTM_LB (ptr, sizeof (*ptr));
 }
-
-// Helpers for re-aligning two 128-bit values.
-#ifdef __SSSE3__
-# define INSN0		"movdqa	%xmm1, %xmm0"
-# define INSN(N)	"palignr $" #N ", %xmm1, %xmm0"
-# define TABLE_ENT_0	INSN0 "\n\tret\n\t"
-# define TABLE_ENT(N)	".balign 8\n\t" INSN(N) "\n\tret\n\t"
-
-asm(".pushsection .text\n\
-	.balign 16\n\
-	.globl	GTM_palignr_table\n\
-	.hidden	GTM_palignr_table\n\
-	.type	GTM_palignr_table, @function\n\
-GTM_palignr_table:\n\t"
-	TABLE_ENT_0
-	TABLE_ENT(1)
-	TABLE_ENT(2)
-	TABLE_ENT(3)
-	TABLE_ENT(4)
-	TABLE_ENT(5)
-	TABLE_ENT(6)
-	TABLE_ENT(7)
-	TABLE_ENT(8)
-	TABLE_ENT(9)
-	TABLE_ENT(10)
-	TABLE_ENT(11)
-	TABLE_ENT(12)
-	TABLE_ENT(13)
-	TABLE_ENT(14)
-	TABLE_ENT(15)
-	".balign 8\n\
-	.size	GTM_palignr_table, .-GTM_palignr_table\n\
-	.popsection");
-
-# undef INSN0
-# undef INSN
-# undef TABLE_ENT_0
-# undef TABLE_ENT
-#elif defined(__SSE2__)
-# define INSNS_8	"punpcklqdq %xmm1, %xmm0"
-# define INSNS(N)	"psrldq $"#N", %xmm0\n\t" \
-			"pslldq $(16-"#N"), %xmm1\n\t" \
-			"por %xmm1, %xmm0"
-# define TABLE_ENT_0	"ret\n\t"
-# define TABLE_ENT_8	".balign 16\n\t" INSNS_8 "\n\tret\n\t"
-# define TABLE_ENT(N)	".balign 16\n\t" INSNS(N) "\n\tret\n\t"
-
-asm(".pushsection .text\n\
-	.balign 16\n\
-	.globl	GTM_pshift_table\n\
-	.hidden	GTM_pshift_table\n\
-	.type	GTM_pshift_table, @function\n\
-GTM_pshift_table:\n\t"
-	TABLE_ENT_0
-	TABLE_ENT(1)
-	TABLE_ENT(2)
-	TABLE_ENT(3)
-	TABLE_ENT(4)
-	TABLE_ENT(5)
-	TABLE_ENT(6)
-	TABLE_ENT(7)
-	TABLE_ENT_8
-	TABLE_ENT(9)
-	TABLE_ENT(10)
-	TABLE_ENT(11)
-	TABLE_ENT(12)
-	TABLE_ENT(13)
-	TABLE_ENT(14)
-	TABLE_ENT(15)
-	".balign 8\n\
-	.size	GTM_pshift_table, .-GTM_pshift_table\n\
-	.popsection");
-
-# undef INSNS_8
-# undef INSNS
-# undef TABLE_ENT_0
-# undef TABLE_ENT_8
-# undef TABLE_ENT
-#endif
diff --git a/libitm/libitm_i.h b/libitm/libitm_i.h
index c188fa5f43f5..58e43b01281b 100644
--- a/libitm/libitm_i.h
+++ b/libitm/libitm_i.h
@@ -78,7 +78,6 @@ enum gtm_restart_reason
 #include "rwlock.h"
 #include "aatree.h"
 #include "cacheline.h"
-#include "cachepage.h"
 #include "stmlock.h"
 #include "dispatch.h"
 #include "containers.h"
diff --git a/libitm/memcpy.cc b/libitm/memcpy.cc
deleted file mode 100644
index 9919e6a9afac..000000000000
--- a/libitm/memcpy.cc
+++ /dev/null
@@ -1,365 +0,0 @@
-/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "libitm_i.h"
-
-using namespace GTM;
-
-static void
-do_memcpy (uintptr_t idst, uintptr_t isrc, size_t size,
-	   abi_dispatch::lock_type W, abi_dispatch::lock_type R)
-{
-  abi_dispatch *disp = abi_disp();
-  // The position in the destination cacheline where *IDST starts.
-  uintptr_t dofs = idst & (CACHELINE_SIZE - 1);
-  // The position in the source cacheline where *ISRC starts.
-  uintptr_t sofs = isrc & (CACHELINE_SIZE - 1);
-  const gtm_cacheline *src
-    = reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
-  gtm_cacheline *dst
-    = reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
-  const gtm_cacheline *sline;
-  abi_dispatch::mask_pair dpair;
-
-  if (size == 0)
-    return;
-
-  // If both SRC and DST data start at the same position in the cachelines,
-  // we can easily copy the data in tandem, cacheline by cacheline...
-  if (dofs == sofs)
-    {
-      // We copy the data in three stages:
-
-      // (a) Copy stray bytes at the beginning that are smaller than a
-      // cacheline.
-      if (sofs != 0)
-	{
-	  size_t sleft = CACHELINE_SIZE - sofs;
-	  size_t min = (size <= sleft ? size : sleft);
-
-	  dpair = disp->write_lock(dst, W);
-	  sline = disp->read_lock(src, R);
-	  *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << sofs;
-	  memcpy (&dpair.line->b[sofs], &sline->b[sofs], min);
-	  dst++;
-	  src++;
-	  size -= min;
-	}
-
-      // (b) Copy subsequent cacheline sized chunks.
-      while (size >= CACHELINE_SIZE)
-	{
-	  dpair = disp->write_lock(dst, W);
-	  sline = disp->read_lock(src, R);
-	  *dpair.mask = -1;
-	  *dpair.line = *sline;
-	  dst++;
-	  src++;
-	  size -= CACHELINE_SIZE;
-	}
-
-      // (c) Copy anything left over.
-      if (size != 0)
-	{
-	  dpair = disp->write_lock(dst, W);
-	  sline = disp->read_lock(src, R);
-	  *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
-	  memcpy (dpair.line, sline, size);
-	}
-    }
-  // ... otherwise, we must copy the data in disparate hunks using
-  // temporary storage.
-  else
-    {
-      gtm_cacheline c;
-      size_t sleft = CACHELINE_SIZE - sofs;
-
-      sline = disp->read_lock(src, R);
-
-      // As above, we copy the data in three stages:
-
-      // (a) Copy stray bytes at the beginning that are smaller than a
-      // cacheline.
-      if (dofs != 0)
-	{
-	  size_t dleft = CACHELINE_SIZE - dofs;
-	  size_t min = (size <= dleft ? size : dleft);
-
-	  dpair = disp->write_lock(dst, W);
-	  *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
-
-	  // If what's left in the source cacheline will fit in the
-	  // rest of the destination cacheline, straight up copy it.
-	  if (min <= sleft)
-	    {
-	      memcpy (&dpair.line->b[dofs], &sline->b[sofs], min);
-	      sofs += min;
-	    }
-	  // Otherwise, we need more bits from the source cacheline
-	  // that are available.  Piece together what we need from
-	  // contiguous (source) cachelines, into temp space, and copy
-	  // it over.
-	  else
-	    {
-	      memcpy (&c, &sline->b[sofs], sleft);
-	      sline = disp->read_lock(++src, R);
-	      sofs = min - sleft;
-	      memcpy (&c.b[sleft], sline, sofs);
-	      memcpy (&dpair.line->b[dofs], &c, min);
-	    }
-	  sleft = CACHELINE_SIZE - sofs;
-
-	  dst++;
-	  size -= min;
-	}
-
-      // (b) Copy subsequent cacheline sized chunks.
-      while (size >= CACHELINE_SIZE)
-	{
-	  // We have a full (destination) cacheline where to put the
-	  // data, but to get to the corresponding cacheline sized
-	  // chunk in the source, we have to piece together two
-	  // contiguous source cachelines.
-
-	  memcpy (&c, &sline->b[sofs], sleft);
-	  sline = disp->read_lock(++src, R);
-	  memcpy (&c.b[sleft], sline, sofs);
-
-	  dpair = disp->write_lock(dst, W);
-	  *dpair.mask = -1;
-	  *dpair.line = c;
-
-	  dst++;
-	  size -= CACHELINE_SIZE;
-	}
-
-      // (c) Copy anything left over.
-      if (size != 0)
-	{
-	  dpair = disp->write_lock(dst, W);
-	  *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
-	  // If what's left to copy is entirely in the remaining
-	  // source cacheline, do it.
-	  if (size <= sleft)
-	    memcpy (dpair.line, &sline->b[sofs], size);
-	  // Otherwise, piece together the remaining bits, and copy.
-	  else
-	    {
-	      memcpy (&c, &sline->b[sofs], sleft);
-	      sline = disp->read_lock(++src, R);
-	      memcpy (&c.b[sleft], sline, size - sleft);
-	      memcpy (dpair.line, &c, size);
-	    }
-	}
-    }
-}
-
-static void
-do_memmove (uintptr_t idst, uintptr_t isrc, size_t size,
-	    abi_dispatch::lock_type W, abi_dispatch::lock_type R)
-{
-  abi_dispatch *disp = abi_disp();
-  uintptr_t dleft, sleft, sofs, dofs;
-  const gtm_cacheline *sline;
-  abi_dispatch::mask_pair dpair;
-
-  if (size == 0)
-    return;
-
-  /* The co-aligned memmove below doesn't work for DST == SRC, so filter
-     that out.  It's tempting to just return here, as this is a no-op move.
-     However, our caller has the right to expect the locks to be acquired
-     as advertized.  */
-  if (__builtin_expect (idst == isrc, 0))
-    {
-      /* If the write lock is already acquired, nothing to do.  */
-      if (W == abi_dispatch::WaW)
-	return;
-      /* If the destination is protected, acquire a write lock.  */
-      if (W != abi_dispatch::NOLOCK)
-	R = abi_dispatch::RfW;
-      /* Notice serial mode, where we don't acquire locks at all.  */
-      if (R == abi_dispatch::NOLOCK)
-	return;
-
-      idst = isrc + size;
-      for (isrc &= -CACHELINE_SIZE; isrc < idst; isrc += CACHELINE_SIZE)
-	disp->read_lock(reinterpret_cast<const gtm_cacheline *>(isrc), R);
-      return;
-    }
-
-  /* Fall back to memcpy if the implementation above can handle it.  */
-  if (idst < isrc || isrc + size <= idst)
-    {
-      do_memcpy (idst, isrc, size, W, R);
-      return;
-    }
-
-  /* What remains requires a backward copy from the end of the blocks.  */
-  idst += size;
-  isrc += size;
-  dofs = idst & (CACHELINE_SIZE - 1);
-  sofs = isrc & (CACHELINE_SIZE - 1);
-  dleft = CACHELINE_SIZE - dofs;
-  sleft = CACHELINE_SIZE - sofs;
-
-  gtm_cacheline *dst
-    = reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
-  const gtm_cacheline *src
-    = reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
-  if (dofs == 0)
-    dst--;
-  if (sofs == 0)
-    src--;
-
-  if (dofs == sofs)
-    {
-      /* Since DST and SRC are co-aligned, and we didn't use the memcpy
-	 optimization above, that implies that SIZE > CACHELINE_SIZE.  */
-      if (sofs != 0)
-	{
-	  dpair = disp->write_lock(dst, W);
-	  sline = disp->read_lock(src, R);
-	  *dpair.mask |= ((gtm_cacheline_mask)1 << sleft) - 1;
-	  memcpy (dpair.line, sline, sleft);
-	  dst--;
-	  src--;
-	  size -= sleft;
-	}
-
-      while (size >= CACHELINE_SIZE)
-	{
-	  dpair = disp->write_lock(dst, W);
-	  sline = disp->read_lock(src, R);
-	  *dpair.mask = -1;
-	  *dpair.line = *sline;
-	  dst--;
-	  src--;
-	  size -= CACHELINE_SIZE;
-	}
-
-      if (size != 0)
-	{
-	  size_t ofs = CACHELINE_SIZE - size;
-	  dpair = disp->write_lock(dst, W);
-	  sline = disp->read_lock(src, R);
-	  *dpair.mask |= (((gtm_cacheline_mask)1 << size) - 1) << ofs;
-	  memcpy (&dpair.line->b[ofs], &sline->b[ofs], size);
-	}
-    }
-  else
-    {
-      gtm_cacheline c;
-
-      sline = disp->read_lock(src, R);
-      if (dofs != 0)
-	{
-	  size_t min = (size <= dofs ? size : dofs);
-
-	  if (min <= sofs)
-	    {
-	      sofs -= min;
-	      memcpy (&c, &sline->b[sofs], min);
-	    }
-	  else
-	    {
-	      size_t min_ofs = min - sofs;
-	      memcpy (&c.b[min_ofs], sline, sofs);
-	      sline = disp->read_lock(--src, R);
-	      sofs = CACHELINE_SIZE - min_ofs;
-	      memcpy (&c, &sline->b[sofs], min_ofs);
-	    }
-
-	  dofs = dleft - min;
-	  dpair = disp->write_lock(dst, W);
-	  *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
-	  memcpy (&dpair.line->b[dofs], &c, min);
-
-	  sleft = CACHELINE_SIZE - sofs;
-	  dst--;
-	  size -= min;
-	}
-
-      while (size >= CACHELINE_SIZE)
-	{
-	  memcpy (&c.b[sleft], sline, sofs);
-	  sline = disp->read_lock(--src, R);
-	  memcpy (&c, &sline->b[sofs], sleft);
-
-	  dpair = disp->write_lock(dst, W);
-	  *dpair.mask = -1;
-	  *dpair.line = c;
-
-	  dst--;
-	  size -= CACHELINE_SIZE;
-	}
-
-      if (size != 0)
-	{
-	  dofs = CACHELINE_SIZE - size;
-
-	  memcpy (&c.b[sleft], sline, sofs);
-	  if (sleft > dofs)
-	    {
-	      sline = disp->read_lock(--src, R);
-	      memcpy (&c, &sline->b[sofs], sleft);
-	    }
-
-	  dpair = disp->write_lock(dst, W);
-	  *dpair.mask |= (gtm_cacheline_mask)-1 << dofs;
-	  memcpy (&dpair.line->b[dofs], &c.b[dofs], size);
-	}
-    }
-}
-
-#define ITM_MEM_DEF(NAME, READ, WRITE) \
-void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t size)  \
-{									     \
-  do_memcpy ((uintptr_t)dst, (uintptr_t)src, size,			     \
-	     abi_dispatch::WRITE, abi_dispatch::READ);			     \
-}									     \
-void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t size) \
-{									     \
-  do_memmove ((uintptr_t)dst, (uintptr_t)src, size,			     \
-	      abi_dispatch::WRITE, abi_dispatch::READ);			     \
-}
-
-ITM_MEM_DEF(RnWt,	NOLOCK,		W)
-ITM_MEM_DEF(RnWtaR,	NOLOCK,		WaR)
-ITM_MEM_DEF(RnWtaW,	NOLOCK,		WaW)
-
-ITM_MEM_DEF(RtWn,	R,		NOLOCK)
-ITM_MEM_DEF(RtWt,	R,		W)
-ITM_MEM_DEF(RtWtaR,	R,		WaR)
-ITM_MEM_DEF(RtWtaW,	R,		WaW)
-
-ITM_MEM_DEF(RtaRWn,	RaR,		NOLOCK)
-ITM_MEM_DEF(RtaRWt,	RaR,		W)
-ITM_MEM_DEF(RtaRWtaR,	RaR,		WaR)
-ITM_MEM_DEF(RtaRWtaW,	RaR,		WaW)
-
-ITM_MEM_DEF(RtaWWn,	RaW,		NOLOCK)
-ITM_MEM_DEF(RtaWWt,	RaW,		W)
-ITM_MEM_DEF(RtaWWtaR,	RaW,		WaR)
-ITM_MEM_DEF(RtaWWtaW,	RaW,		WaW)
diff --git a/libitm/memset.cc b/libitm/memset.cc
deleted file mode 100644
index 3a627dd6c7d1..000000000000
--- a/libitm/memset.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "libitm_i.h"
-
-using namespace GTM;
-
-static void
-do_memset(uintptr_t idst, int c, size_t size, abi_dispatch::lock_type W)
-{
-  abi_dispatch *disp = abi_disp();
-  uintptr_t dofs = idst & (CACHELINE_SIZE - 1);
-  abi_dispatch::mask_pair dpair;
-  gtm_cacheline *dst
-    = reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
-
-  if (size == 0)
-    return;
-
-  if (dofs != 0)
-    {
-      size_t dleft = CACHELINE_SIZE - dofs;
-      size_t min = (size <= dleft ? size : dleft);
-
-      dpair = disp->write_lock(dst, W);
-      *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
-      memset (&dpair.line->b[dofs], c, min);
-      dst++;
-      size -= min;
-    }
-
-  while (size >= CACHELINE_SIZE)
-    {
-      dpair = disp->write_lock(dst, W);
-      *dpair.mask = -1;
-      memset (dpair.line, c, CACHELINE_SIZE);
-      dst++;
-      size -= CACHELINE_SIZE;
-    }
-
-  if (size != 0)
-    {
-      dpair = disp->write_lock(dst, W);
-      *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
-      memset (dpair.line, c, size);
-    }
-}
-
-#define ITM_MEM_DEF(WRITE) \
-void ITM_REGPARM _ITM_memset##WRITE(void *dst, int c, size_t size)	\
-{									\
-  do_memset ((uintptr_t)dst, c, size, abi_dispatch::WRITE);		\
-}
-
-ITM_MEM_DEF(W)
-ITM_MEM_DEF(WaR)
-ITM_MEM_DEF(WaW)
diff --git a/libitm/method-wbetl.cc b/libitm/method-wbetl.cc
deleted file mode 100644
index 093d1c769f19..000000000000
--- a/libitm/method-wbetl.cc
+++ /dev/null
@@ -1,628 +0,0 @@
-/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Transactional Memory Library (libitm).
-
-   Libitm is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "libitm_i.h"
-
-namespace {
-
-using namespace GTM;
-
-class wbetl_dispatch : public abi_dispatch
-{
- private:
-  static const size_t RW_SET_SIZE = 4096;
-
-  struct r_entry
-  {
-    gtm_version version;
-    gtm_stmlock *lock;
-  };
-
-  r_entry *m_rset_entries;
-  size_t m_rset_nb_entries;
-  size_t m_rset_size;
-
-  struct w_entry
-  {
-    /* There's a hashtable where the locks are held, so multiple
-       cachelines can hash to a given bucket.  This link points to the
-       possible next cacheline that also hashes to this bucket.  */
-    struct w_entry *next;
-
-    /* Every entry in this bucket (accessed by NEXT) has the same LOCK
-       address below.  */
-    gtm_stmlock *lock;
-
-    gtm_cacheline *addr;
-    gtm_cacheline *value;
-    gtm_version version;
-  };
-
-  w_entry *m_wset_entries;
-  size_t m_wset_nb_entries;
-  size_t m_wset_size;
-  bool m_wset_reallocate;
-
-  gtm_version m_start;
-  gtm_version m_end;
-
-  gtm_cacheline_page *m_cache_page;
-  unsigned m_n_cache_page;
-
- private:
-  bool local_w_entry_p (w_entry *w);
-  bool has_read (gtm_stmlock *lock);
-  bool validate();
-  bool extend();
-
-  gtm_cacheline *do_write_lock(gtm_cacheline *);
-  gtm_cacheline *do_after_write_lock(gtm_cacheline *);
-  const gtm_cacheline *do_read_lock(const gtm_cacheline *, bool);
-
- public:
-  wbetl_dispatch();
-
-  virtual const gtm_cacheline *read_lock(const gtm_cacheline *, ls_modifier);
-  virtual mask_pair write_lock(gtm_cacheline *, ls_modifier);
-
-  virtual bool trycommit();
-  virtual void rollback();
-  virtual void reinit();
-  virtual void fini();
-  virtual bool trydropreference (void *, size_t);
-};
-
-/* Check if W is one of our write locks.  */
-
-inline bool
-wbetl_dispatch::local_w_entry_p (w_entry *w)
-{
-  return (m_wset_entries <= w && w < m_wset_entries + m_wset_nb_entries);
-}
-
-/* Check if stripe has been read previously.  */
-
-inline bool
-wbetl_dispatch::has_read (gtm_stmlock *lock)
-{
-  // ??? Consider using an AA tree to lookup the r_set entries.
-  size_t n = m_rset_nb_entries;
-  for (size_t i = 0; i < n; ++i)
-    if (m_rset_entries[i].lock == lock)
-      return true;
-
-  return false;
-}
-
-/* Validate read set, i.e. check if all read addresses are still valid now.  */
-
-bool
-wbetl_dispatch::validate ()
-{
-  __sync_synchronize ();
-
-  size_t n = m_rset_nb_entries;
-  for (size_t i = 0; i < n; ++i)
-    {
-      r_entry *r = &m_rset_entries[i];
-      gtm_stmlock l = *r->lock;
-
-      if (gtm_stmlock_owned_p (l))
-	{
-	  w_entry *w = (w_entry *) gtm_stmlock_get_addr (l);
-
-	  // If someone has locked us, it better be by someone in the
-	  // current thread.
-	  if (!local_w_entry_p (w))
-	    return false;
-	}
-      else if (gtm_stmlock_get_version (l) != r->version)
-	return false;
-    }
-
-  return true;
-}
-
-/* Extend the snapshot range.  */
-
-bool
-wbetl_dispatch::extend ()
-{
-  gtm_version now = gtm_get_clock ();
-
-  if (validate ())
-    {
-      m_end = now;
-      return true;
-    }
-  return false;
-}
-
-/* Acquire a write lock on ADDR.  */
-
-gtm_cacheline *
-wbetl_dispatch::do_write_lock(gtm_cacheline *addr)
-{
-  gtm_stmlock *lock;
-  gtm_stmlock l, l2;
-  gtm_version version;
-  w_entry *w, *prev = NULL;
-
-  lock = gtm_get_stmlock (addr);
-  l = *lock;
-
- restart_no_load:
-  if (gtm_stmlock_owned_p (l))
-    {
-      w = (w_entry *) gtm_stmlock_get_addr (l);
-
-      /* Did we previously write the same address?  */
-      if (local_w_entry_p (w))
-	{
-	  prev = w;
-	  while (1)
-	    {
-	      if (addr == prev->addr)
-		return prev->value;
-	      if (prev->next == NULL)
-		break;
-	      prev = prev->next;
-	    }
-
-	  /* Get version from previous entry write set.  */
-	  version = prev->version;
-
-	  /* If there's not enough entries, we must reallocate the array,
-	     which invalidates all pointers to write set entries, which
-	     means we have to restart the transaction.  */
-	  if (m_wset_nb_entries == m_wset_size)
-	    {
-	      m_wset_size *= 2;
-	      m_wset_reallocate = true;
-	      gtm_tx()->restart (RESTART_REALLOCATE);
-	    }
-
-	  w = &m_wset_entries[m_wset_nb_entries];
-	  goto do_write;
-	}
-
-      gtm_tx()->restart (RESTART_LOCKED_WRITE);
-    }
-  else
-    {
-      version = gtm_stmlock_get_version (l);
-
-      /* We might have read an older version previously.  */
-      if (version > m_end)
-	{
-	  if (has_read (lock))
-	    gtm_tx()->restart (RESTART_VALIDATE_WRITE);
-	}
-
-      /* Extend write set, aborting to reallocate write set entries.  */
-      if (m_wset_nb_entries == m_wset_size)
-	{
-	  m_wset_size *= 2;
-	  m_wset_reallocate = true;
-	  gtm_tx()->restart (RESTART_REALLOCATE);
-	}
-
-      /* Acquire the lock.  */
-      w = &m_wset_entries[m_wset_nb_entries];
-      l2 = gtm_stmlock_set_owned (w);
-      l = __sync_val_compare_and_swap (lock, l, l2);
-      if (l != l2)
-	goto restart_no_load;
-    }
-
- do_write:
-  m_wset_nb_entries++;
-  if (prev != NULL)
-    prev->next = w;
-  w->next = 0;
-  w->lock = lock;
-  w->addr = addr;
-  w->version = version;
-
-  gtm_cacheline_page *page = m_cache_page;
-  unsigned index = m_n_cache_page;
-
-  if (page == NULL || index == gtm_cacheline_page::LINES)
-    {
-      gtm_cacheline_page *npage = new gtm_cacheline_page;
-      npage->prev = page;
-      m_cache_page = page = npage;
-      m_n_cache_page = 1;
-      index = 0;
-    }
-  else
-    m_n_cache_page = index + 1;
-
-  gtm_cacheline *line = &page->lines[index];
-  w->value = line;
-  page->masks[index] = 0;
-  *line = *addr;
-
-  return line;
-}
-
-gtm_cacheline *
-wbetl_dispatch::do_after_write_lock (gtm_cacheline *addr)
-{
-  gtm_stmlock *lock;
-  gtm_stmlock l;
-  w_entry *w;
-
-  lock = gtm_get_stmlock (addr);
-  l = *lock;
-  assert (gtm_stmlock_owned_p (l));
-
-  w = (w_entry *) gtm_stmlock_get_addr (l);
-  assert (local_w_entry_p (w));
-
-  while (1)
-    {
-      if (addr == w->addr)
-	return w->value;
-      w = w->next;
-    }
-}
-
-/* Acquire a read lock on ADDR.  */
-
-const gtm_cacheline *
-wbetl_dispatch::do_read_lock (const gtm_cacheline *addr, bool after_read)
-{
-  gtm_stmlock *lock;
-  gtm_stmlock l, l2;
-  gtm_version version;
-  w_entry *w;
-
-  lock = gtm_get_stmlock (addr);
-  l = *lock;
-
- restart_no_load:
-  if (gtm_stmlock_owned_p (l))
-    {
-      w = (w_entry *) gtm_stmlock_get_addr (l);
-
-      /* Did we previously write the same address?  */
-      if (local_w_entry_p (w))
-	{
-	  while (1)
-	    {
-	      if (addr == w->addr)
-		return w->value;
-	      if (w->next == NULL)
-		return addr;
-	      w = w->next;
-	    }
-	}
-
-      gtm_tx()->restart (RESTART_LOCKED_READ);
-    }
-
-  version = gtm_stmlock_get_version (l);
-
-  /* If version is no longer valid, re-validate the read set.  */
-  if (version > m_end)
-    {
-      if (!extend ())
-	gtm_tx()->restart (RESTART_VALIDATE_READ);
-
-      if (!after_read)
-	{
-	  // Verify that the version has not yet been overwritten.  The read
-	  // value has not yet been added to read set and may not have been
-	  // checked during the extend.
-	  //
-	  // ??? This only makes sense if we're actually reading the value
-	  // and returning it now -- which I believe the original TinySTM
-	  // did.  This doesn't make a whole lot of sense when we're
-	  // manipulating cachelines as we are now.  Do we need some other
-	  // form of lock verification here, or is the validate call in
-	  // trycommit sufficient?
-
-	  __sync_synchronize ();
-	  l2 = *lock;
-	  if (l != l2)
-	    {
-	      l = l2;
-	      goto restart_no_load;
-	    }
-	}
-    }
-
-  if (!after_read)
-    {
-      r_entry *r;
-
-      /* Add the address and version to the read set.  */
-      if (m_rset_nb_entries == m_rset_size)
-	{
-	  m_rset_size *= 2;
-
-	  m_rset_entries = (r_entry *)
-	    xrealloc (m_rset_entries, m_rset_size * sizeof(r_entry));
-	}
-      r = &m_rset_entries[m_rset_nb_entries++];
-      r->version = version;
-      r->lock = lock;
-    }
-
-  return addr;
-}
-
-const gtm_cacheline *
-wbetl_dispatch::read_lock (const gtm_cacheline *addr, ls_modifier ltype)
-{
-  switch (ltype)
-    {
-    case NONTXNAL:
-      return addr;
-    case R:
-      return do_read_lock (addr, false);
-    case RaR:
-      return do_read_lock (addr, true);
-    case RaW:
-      return do_after_write_lock (const_cast<gtm_cacheline *>(addr));
-    case RfW:
-      return do_write_lock (const_cast<gtm_cacheline *>(addr));
-    default:
-      abort ();
-    }
-}
-
-abi_dispatch::mask_pair
-wbetl_dispatch::write_lock (gtm_cacheline *addr, ls_modifier ltype)
-{
-  gtm_cacheline *line;
-
-  switch (ltype)
-    {
-    case NONTXNAL:
-      return mask_pair (addr, &mask_sink);
-    case W:
-    case WaR:
-      line = do_write_lock (addr);
-      break;
-    case WaW:
-      line = do_after_write_lock (addr);
-      break;
-    default:
-      abort ();
-    }
-
-  return mask_pair (line, gtm_cacheline_page::mask_for_page_line (line));
-}
-
-/* Commit the transaction.  */
-
-bool
-wbetl_dispatch::trycommit ()
-{
-  const size_t n = m_wset_nb_entries;
-  if (n != 0)
-    {
-      /* Get commit timestamp.  */
-      gtm_version t = gtm_inc_clock ();
-
-      /* Validate only if a concurrent transaction has started since.  */
-      if (m_start != t - 1 && !validate ())
-	return false;
-
-      /* Install new versions.  */
-      for (size_t i = 0; i < n; ++i)
-	{
-	  w_entry *w = &m_wset_entries[i];
-	  gtm_cacheline_mask mask
-	    = *gtm_cacheline_page::mask_for_page_line (w->value);
-
-	  /* Filter out any updates that overlap the libitm stack.  */
-	  mask = gtm_mask_stack (w->addr, mask);
-
-	  gtm_cacheline::copy_mask (w->addr, w->value, mask);
-	}
-
-      /* Only emit barrier after all cachelines are copied.  */
-      gtm_cacheline::copy_mask_wb ();
-
-      /* Drop locks.  */
-      for (size_t i = 0; i < n; ++i)
-	{
-	  w_entry *w = &m_wset_entries[i];
-
-	  /* Every link along the chain has the same lock, but only
-	     bother dropping the lock once per bucket (at the end).  */
-	  if (w->next == NULL)
-	    *w->lock = gtm_stmlock_set_version (t);
-	}
-    }
-
-  __sync_synchronize ();
-  return true;
-}
-
-void
-wbetl_dispatch::rollback ()
-{
-  /* Drop locks.  */
-  const size_t n = m_wset_nb_entries;
-  for (size_t i = 0; i < n; ++i)
-    {
-      w_entry *w = &m_wset_entries[i];
-
-      /* Every link along the chain has the same lock, but only
-	 bother dropping the lock once per bucket (at the end).  */
-      if (w->next == NULL)
-	*w->lock = gtm_stmlock_set_version (w->version);
-    }
-
-  __sync_synchronize ();
-}
-
-void
-wbetl_dispatch::reinit ()
-{
-  gtm_cacheline_page *page;
-
-  m_rset_nb_entries = 0;
-  m_wset_nb_entries = 0;
-
-  if (m_wset_reallocate)
-    {
-      m_wset_reallocate = 0;
-      m_wset_entries = (w_entry *)
-	xrealloc (m_wset_entries, m_wset_size * sizeof(w_entry));
-    }
-
-  page = m_cache_page;
-  if (page)
-    {
-      /* Release all but one of the pages of cachelines.  */
-      gtm_cacheline_page *prev = page->prev;
-      if (prev)
-	{
-	  page->prev = 0;
-	  delete prev;
-	}
-
-      /* Start the next cacheline allocation from the beginning.  */
-      m_n_cache_page = 0;
-    }
-
-  m_start = m_end = gtm_get_clock ();
-}
-
-void
-wbetl_dispatch::fini ()
-{
-  delete m_cache_page;
-  free (m_rset_entries);
-  free (m_wset_entries);
-  delete this;
-}
-
-/* Attempt to drop any internal references to PTR.  Return TRUE if successful.
-
-   This is an adaptation of the transactional memcpy function.
-
-   What we do here is flush out the current transactional content of
-   PTR to real memory, and remove the write mask bits associated with
-   it so future commits will ignore this piece of memory.  */
-
-bool
-wbetl_dispatch::trydropreference (void *ptr, size_t size)
-{
-  if (size == 0)
-    return true;
-
-  if (!validate ())
-    return false;
-
-  uintptr_t isrc = (uintptr_t)ptr;
-  // The position in the source cacheline where *PTR starts.
-  uintptr_t sofs = isrc & (CACHELINE_SIZE - 1);
-  gtm_cacheline *src
-    = reinterpret_cast<gtm_cacheline *>(isrc & -CACHELINE_SIZE);
-  unsigned char *dst = (unsigned char *)ptr;
-  abi_dispatch::mask_pair pair;
-
-  // If we're trying to drop a reference, we should already have a
-  // write lock on it.  If we don't have one, there's no work to do.
-  if (!gtm_stmlock_owned_p (*gtm_get_stmlock (src)))
-    return true;
-
-  // We copy the data in three stages:
-
-  // (a) Copy stray bytes at the beginning that are smaller than a
-  // cacheline.
-  if (sofs != 0)
-    {
-      size_t sleft = CACHELINE_SIZE - sofs;
-      size_t min = (size <= sleft ? size : sleft);
-
-      // WaW will give us the current locked entry.
-      pair = this->write_lock (src, WaW);
-
-      // *jedi mind wave*...these aren't the droids you're looking for.
-      *pair.mask &= ~((((gtm_cacheline_mask)1 << min) - 1) << sofs);
-
-      memcpy (dst, &pair.line->b[sofs], min);
-      dst += min;
-      src++;
-      size -= min;
-    }
-
-  // (b) Copy subsequent cacheline sized chunks.
-  while (size >= CACHELINE_SIZE)
-    {
-      pair = this->write_lock(src, WaW);
-      *pair.mask = 0;
-      memcpy (dst, pair.line, CACHELINE_SIZE);
-      dst += CACHELINE_SIZE;
-      src++;
-      size -= CACHELINE_SIZE;
-    }
-
-  // (c) Copy anything left over.
-  if (size != 0)
-    {
-      pair = this->write_lock(src, WaW);
-      *pair.mask &= ~(((gtm_cacheline_mask)1 << size) - 1);
-      memcpy (dst, pair.line, size);
-    }
-
-  // No need to drop locks, since we're going to abort the transaction
-  // anyhow.
-
-  return true;
-}
-
-
-wbetl_dispatch::wbetl_dispatch ()
-  : abi_dispatch (false, false)
-{
-  m_rset_entries = (r_entry *) xmalloc (RW_SET_SIZE * sizeof(r_entry));
-  m_rset_nb_entries = 0;
-  m_rset_size = RW_SET_SIZE;
-
-  m_wset_entries = (w_entry *) xmalloc (RW_SET_SIZE * sizeof(w_entry));
-  m_wset_nb_entries = 0;
-  m_wset_size = RW_SET_SIZE;
-  m_wset_reallocate = false;
-
-  m_start = m_end = gtm_get_clock ();
-
-  m_cache_page = 0;
-  m_n_cache_page = 0;
-}
-
-} // anon namespace
-
-abi_dispatch *
-GTM::dispatch_wbetl ()
-{
-  return new wbetl_dispatch ();
-}
diff --git a/libitm/testsuite/Makefile.in b/libitm/testsuite/Makefile.in
index ed1f314e0d61..6990cfeb6812 100644
--- a/libitm/testsuite/Makefile.in
+++ b/libitm/testsuite/Makefile.in
@@ -38,6 +38,7 @@ subdir = testsuite
 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
+	$(top_srcdir)/../config/asmcfi.m4 \
 	$(top_srcdir)/../config/depstand.m4 \
 	$(top_srcdir)/../config/enable.m4 \
 	$(top_srcdir)/../config/futex.m4 \
@@ -90,8 +91,6 @@ ECHO_N = @ECHO_N@
 ECHO_T = @ECHO_T@
 EGREP = @EGREP@
 EXEEXT = @EXEEXT@
-FC = @FC@
-FCFLAGS = @FCFLAGS@
 FGREP = @FGREP@
 GREP = @GREP@
 INSTALL = @INSTALL@
@@ -142,7 +141,6 @@ abs_top_srcdir = @abs_top_srcdir@
 ac_ct_CC = @ac_ct_CC@
 ac_ct_CXX = @ac_ct_CXX@
 ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-ac_ct_FC = @ac_ct_FC@
 am__include = @am__include@
 am__leading_dot = @am__leading_dot@
 am__quote = @am__quote@