diff -r --unified eglibc-2.19.orig/crypt/crypt_util.c eglibc-2.19/crypt/crypt_util.c
--- eglibc-2.19.orig/crypt/crypt_util.c	2014-05-12 14:05:09.559661627 +0200
+++ eglibc-2.19/crypt/crypt_util.c	2014-12-02 16:58:38.744874336 +0100
@@ -360,10 +360,10 @@
   sb[2] = (long64*)__data->sb2; sb[3] = (long64*)__data->sb3;
 #endif
 
-  if(small_tables_initialized == 0) {
+  if(atomic_load(small_tables_initialized) == 0) {
 #ifdef __GNU_LIBRARY__
     __libc_lock_lock (_ufc_tables_lock);
-    if(small_tables_initialized)
+    if(atomic_load(small_tables_initialized))
       goto small_tables_done;
 #endif
 
@@ -467,7 +467,7 @@
       }
     }
     atomic_write_barrier ();
-    small_tables_initialized = 1;
+    atomic_store(small_tables_initialized, 1);
 #ifdef __GNU_LIBRARY__
 small_tables_done:
     __libc_lock_unlock(_ufc_tables_lock);
diff -r --unified eglibc-2.19.orig/csu/libc-start.c eglibc-2.19/csu/libc-start.c
--- eglibc-2.19.orig/csu/libc-start.c	2014-05-12 14:05:10.619661624 +0200
+++ eglibc-2.19/csu/libc-start.c	2014-12-09 15:49:43.178119413 +0100
@@ -104,6 +104,305 @@
 # define MAIN_AUXVEC_PARAM
 #endif
 
+#ifdef USE_MVEE_LIBC
+
+#define MVEE_SLAVE_YIELD
+// #define MVEE_TICKET_LOCKS
+#define MVEE_TOTAL_CLOCK_COUNT   2048
+#define MVEE_CLOCK_GROUP_SIZE    64
+#define MVEE_TOTAL_CLOCK_GROUPS  (MVEE_TOTAL_CLOCK_COUNT / MVEE_CLOCK_GROUP_SIZE)
+
+struct mvee_counter
+{
+  volatile unsigned long lock;
+  volatile unsigned long counter;
+  unsigned char padding[64 - 2 * sizeof(unsigned long)]; // prevents false sharing
+};
+
+struct mvee_op_entry
+{
+  volatile unsigned long  counter_and_idx; // the value we must see in mvee_counters[idx] before we can replay the operation
+};
+
+static unsigned char                  mvee_sync_enabled             = 0;
+static unsigned char                  mvee_libc_initialized         = 0;
+static unsigned char                  mvee_master_variant           = 0;
+static __thread unsigned long         mvee_thread_local_pos         = 0; // our position in the thread local queue
+static __thread  
+    struct mvee_op_entry*             mvee_thread_local_queue       = NULL;
+static __thread unsigned long         mvee_thread_local_queue_size  = 0; // nr of slots in the thread local queue
+static __thread unsigned short        mvee_prev_idx                 = 0;
+
+__attribute__((aligned (64)))
+static struct mvee_counter            mvee_counters[MVEE_TOTAL_CLOCK_COUNT + 1];
+
+#define likely(x)       __builtin_expect((x),1)
+#define unlikely(x)     __builtin_expect((x),0)
+
+/* MVEE PATCH:
+   Checks wether or not all variants got HEAP_MAX_SIZE aligned heaps from
+   the previous mmap request. If some of them have not, ALL variants
+   have to bail out and fall back to another heap allocation method.
+   This ensures that the variants stay in sync with respect to future mm
+   requests.
+*/
+#define HEAP_MIN_SIZE (32 * 1024)
+#ifndef HEAP_MAX_SIZE
+# ifdef DEFAULT_MMAP_THRESHOLD_MAX
+#  define HEAP_MAX_SIZE (2 * DEFAULT_MMAP_THRESHOLD_MAX)
+# else
+#  define HEAP_MAX_SIZE (1024 * 1024) /* must be a power of two */
+# endif
+#endif
+
+int
+mvee_all_heaps_aligned(char* heap)
+{
+  // if we're not running under MVEE control,
+  // just check the alignment of the current heap
+  if (!mvee_thread_local_queue)
+    {
+      if ((unsigned long)heap & (HEAP_MAX_SIZE-1))
+	return 0;
+      return 1;
+    }
+
+  // We ARE running under MVEE control
+  // => ask the MVEE to check the alignments
+  // of ALL heaps
+  return syscall(MVEE_ALL_HEAPS_ALIGNED);
+}
+
+/* 
+ * mvee_infinite_loop:
+ * this function is used for both thread transfering and signal delivery 
+ * 
+ * 1) to transfer threads to a new monitor, the original monitor (i.e. the 
+ * monitor that monitors the childs that instigated the fork event) needs to
+ * detach from the threads first. While the threads are detached, they can
+ * run freely, without the intervention of a debugger.
+ * As such, we have to move the program counter to an infinite loop while
+ * the threads are detached. This way, the threads will all be in an equivalent
+ * state when the new monitor attaches to them.
+ * Because we're going to replace the registers by their original contents
+ * when the new monitor attaches, we can use sys_pause calls in the infinite
+ * loop.
+ * 
+ * 2) delivering signals through the ptrace API happens asynchronously 
+ * (I found out the hard way). As such, we should wait for the threads to be
+ * in equivalent states (e.g. stopped on the same syscall). Then the registers
+ * should be backed up and the syscall nr should be replaced by a harmless
+ * syscall that doesn't modify the program state. We use sys_getpid for this
+ * purpose. When that replaced syscall returns, we change the pc to this
+ * infinite loop while we wait for async signal delivery.
+ * We probably cannot use syscalls while waiting for signal delivery. 
+ * One possible exception is sys_sched_yield. Our modified MVEE kernel does
+ * not report this syscall to the ptracer
+ * 
+ * the with_syscalls parameter is passed through the ecx register!
+ */
+void mvee_infinite_loop(int with_syscalls, int dummy)
+{
+  if (with_syscalls)
+    {
+      for (;;)
+	syscall(__NR_pause);
+    }
+  else
+    {
+      for (;;)
+	{
+	  dummy = dummy << 2;
+	}
+    }
+}
+
+static inline void mvee_check_buffer(void)
+{
+  if (unlikely(!mvee_thread_local_queue))
+    {
+      long mvee_thread_local_queue_id = syscall(MVEE_GET_SHARED_BUFFER, &mvee_counters, MVEE_LIBC_ATOMIC_BUFFER, &mvee_thread_local_queue_size, &mvee_thread_local_pos, NULL);
+      mvee_thread_local_queue_size   /= sizeof(struct mvee_op_entry);
+      mvee_thread_local_queue         = (void*)syscall(__NR_shmat, mvee_thread_local_queue_id, NULL, 0);     
+    }
+}
+
+
+// This function is a bit tricky, especially on x86_64!
+// In some contexts, such as syscalls that enable asynchronous cancellation,
+// libc expects none of the code it executes to touch registers other than
+// %rax and %r11. Consequently, we have to make sure that at most 2 registers
+// live at any point during our mvee funcs!
+static inline int mvee_should_sync(void)
+{
+  if (unlikely(!mvee_libc_initialized))
+  {
+    long res = syscall(MVEE_RUNS_UNDER_MVEE_CONTROL, &mvee_sync_enabled, &mvee_infinite_loop, 
+		       NULL, NULL, &mvee_master_variant);
+    //    if (!(res < 0 && res > -4095))
+    //      mvee_check_buffer();
+    mvee_libc_initialized = 1;
+  }
+  return mvee_sync_enabled;
+}
+
+
+int mvee_should_sync_tid(void)
+{
+  return mvee_should_sync();
+}
+
+void mvee_invalidate_buffer()
+{
+  mvee_thread_local_queue = NULL;
+}
+
+#define cpu_relax() asm volatile("rep; nop" ::: "memory")
+
+#define gcc_barrier() asm volatile("" ::: "memory")
+
+static unsigned short mvee_hash_word_ptr(void* word_ptr)
+{
+  // page number defines the clock group
+  // offset within page defines the clock within that group
+
+  return 
+    (((((unsigned long)word_ptr >> 24) % MVEE_TOTAL_CLOCK_GROUPS) * (MVEE_CLOCK_GROUP_SIZE) 
+     + ((((unsigned long)word_ptr & 4095) >> 6) % MVEE_CLOCK_GROUP_SIZE))
+     & 0xFFF) + 1;
+}
+
+#ifdef MVEE_TICKET_LOCKS
+static inline unsigned long mvee_write_lock_result_prepare(unsigned short idx)
+{
+  unsigned long cntr = __sync_fetch_and_add(&mvee_counters[idx].lock, 1);
+
+  while (cntr != mvee_counters[idx].counter)
+    cpu_relax();
+
+  return cntr;
+}
+
+static inline void mvee_write_lock_result_finish(unsigned short idx)
+{
+  gcc_barrier();
+  mvee_counters[idx].counter++;
+}
+
+#else
+static inline unsigned long mvee_write_lock_result_prepare(unsigned short idx)
+{
+  while (!__sync_bool_compare_and_swap(&mvee_counters[idx].lock, 0, 1))
+    cpu_relax();
+
+  return mvee_counters[idx].counter;    
+}
+
+static inline void mvee_write_lock_result_finish(unsigned short idx)
+{
+  gcc_barrier();
+  orig_nonatomic_increment(&mvee_counters[idx].counter);
+  mvee_counters[idx].lock = 0;
+}
+#endif
+
+static inline void mvee_write_lock_result_write(unsigned long pos, unsigned short idx)
+{  
+  mvee_thread_local_queue[mvee_thread_local_pos++].counter_and_idx 
+    = (pos << 12) | idx;
+}
+
+static inline void mvee_read_lock_result_wait(void)
+{
+  unsigned long counter_and_idx = 0;
+
+  while (unlikely(1))
+    {
+      counter_and_idx = mvee_thread_local_queue[mvee_thread_local_pos].counter_and_idx;
+
+      if (likely(counter_and_idx))
+	break;
+
+#ifdef MVEE_SLAVE_YIELD
+	syscall(__NR_sched_yield);
+#else
+	cpu_relax();
+#endif
+    }
+
+  mvee_prev_idx = counter_and_idx & 0xFFF;
+  counter_and_idx &= ~0xFFF;
+
+  while ((mvee_counters[mvee_prev_idx].counter << 12) != counter_and_idx)
+#ifdef MVEE_SLAVE_YIELD
+    syscall(__NR_sched_yield);
+#else
+    cpu_relax();
+#endif
+}
+
+static inline void mvee_read_lock_result_wake(unsigned short idx)
+{
+  gcc_barrier();
+  mvee_counters[idx].counter++;
+  mvee_thread_local_pos++;
+}
+
+static inline void mvee_maybe_flush_queue(void)
+{
+  if (unlikely(mvee_thread_local_pos >= mvee_thread_local_queue_size))
+    {
+      syscall(MVEE_FLUSH_SHARED_BUFFER, MVEE_LIBC_ATOMIC_BUFFER);
+      mvee_thread_local_pos = 0;
+    }
+}
+
+unsigned char mvee_atomic_preop_internal(void* word_ptr)
+{
+  if (unlikely(!mvee_should_sync()))
+    return 0;
+  mvee_check_buffer();
+  mvee_maybe_flush_queue();
+  if (likely(mvee_master_variant))
+    {
+      mvee_prev_idx = mvee_hash_word_ptr(word_ptr);
+      unsigned long pos = mvee_write_lock_result_prepare(mvee_prev_idx);
+      mvee_write_lock_result_write(pos, mvee_prev_idx);
+      return 1;
+    }
+  else
+    {
+      mvee_read_lock_result_wait();
+      return 2;
+    }
+}
+
+void mvee_atomic_postop_internal(unsigned char preop_result)
+{
+  if(likely(preop_result) == 1)
+    mvee_write_lock_result_finish(mvee_prev_idx);
+  else if (likely(preop_result) == 2)
+    mvee_read_lock_result_wake(mvee_prev_idx);
+}
+
+unsigned char mvee_atomic_preop(unsigned short op_type, void* word_ptr)
+{
+  return mvee_atomic_preop_internal(word_ptr);
+}
+
+void mvee_atomic_postop(unsigned char preop_result)
+{
+  mvee_atomic_postop_internal(preop_result);
+}
+
+unsigned char mvee_should_futex_unlock(void)
+{
+  return (!mvee_master_variant && mvee_should_sync()) ? 1 : 0;
+}
+
+#endif
+
+
 STATIC int LIBC_START_MAIN (int (*main) (int, char **, char **
 					 MAIN_AUXVEC_DECL),
 			    int argc,
diff -r --unified eglibc-2.19.orig/csu/Versions eglibc-2.19/csu/Versions
--- eglibc-2.19.orig/csu/Versions	2014-05-12 14:05:10.619661624 +0200
+++ eglibc-2.19/csu/Versions	2014-12-08 13:10:15.078377639 +0100
@@ -4,6 +4,14 @@
   GLIBC_2.0 {
     # helper functions
     __libc_init_first; __libc_start_main;
+    mvee_atomic_preop;
+    mvee_atomic_postop;
+    mvee_atomic_preop_internal;
+    mvee_atomic_postop_internal;
+    mvee_invalidate_buffer;
+    mvee_all_heaps_aligned;
+    mvee_should_sync_tid;
+    mvee_should_futex_unlock;
   }
   GLIBC_2.1 {
     # New special glibc functions.
diff -r --unified eglibc-2.19.orig/debian/rules eglibc-2.19/debian/rules
--- eglibc-2.19.orig/debian/rules	2014-05-12 14:05:12.827661619 +0200
+++ eglibc-2.19/debian/rules	2014-12-09 14:14:38.593956435 +0100
@@ -116,6 +116,8 @@
 
 BUILD_CFLAGS = -O2 -g
 HOST_CFLAGS = -pipe -O2 -g $(call xx,extra_cflags)
+#BUILD_CFLAGS = -O2 -ggdb -fno-omit-frame-pointer
+#HOST_CFLAGS = -pipe -O2 -ggdb -fno-omit-frame-pointer $(call xx,extra_cflags)
 
 # This subst can go away when dpkg-dev starts reporting i586 instead of i486
 configure_target := $(subst i486,i586,$(DEB_HOST_GNU_TYPE))
diff -r --unified eglibc-2.19.orig/inet/getnetgrent_r.c eglibc-2.19/inet/getnetgrent_r.c
--- eglibc-2.19.orig/inet/getnetgrent_r.c	2014-05-12 14:05:12.451661620 +0200
+++ eglibc-2.19/inet/getnetgrent_r.c	2014-12-02 16:58:38.747874336 +0100
@@ -50,7 +50,7 @@
   static service_user *startp;
   int no_more;
 
-  if (!startp_initialized)
+  if (!atomic_load(startp_initialized))
     {
       /* Executing this more than once at the same time must yield the
 	 same result every time.  So we need no locking.  */
@@ -60,7 +60,7 @@
       PTR_MANGLE (startp);
 #endif
       atomic_write_barrier ();
-      startp_initialized = true;
+      atomic_store(startp_initialized, true);
     }
   else
     {
diff -r --unified eglibc-2.19.orig/malloc/arena.c eglibc-2.19/malloc/arena.c
--- eglibc-2.19.orig/malloc/arena.c	2014-05-12 14:05:12.535661620 +0200
+++ eglibc-2.19/malloc/arena.c	2014-12-02 16:58:38.748874336 +0100
@@ -526,7 +526,7 @@
 new_heap (size_t size, size_t top_pad)
 {
   size_t page_mask = GLRO (dl_pagesize) - 1;
-  char *p1, *p2;
+  char *p1, *p2, *prev_heap_area;
   unsigned long ul;
   heap_info *h;
 
@@ -545,12 +545,34 @@
      mapping (on Linux, this is the case for all non-writable mappings
      anyway). */
   p2 = MAP_FAILED;
-  if (aligned_heap_area)
+  prev_heap_area = atomic_load(aligned_heap_area);
+  if (prev_heap_area)
     {
-      p2 = (char *) MMAP (aligned_heap_area, HEAP_MAX_SIZE, PROT_NONE,
+      p2 = (char *) MMAP (prev_heap_area, HEAP_MAX_SIZE, PROT_NONE,
                           MAP_NORESERVE);
-      aligned_heap_area = NULL;
-      if (p2 != MAP_FAILED && ((unsigned long) p2 & (HEAP_MAX_SIZE - 1)))
+      atomic_store(aligned_heap_area, NULL);
+      /* MVEE Patch:
+	 The original code tries to do the following:
+	 1) It tries to allocate a new heap that expands the previously
+	 allocated heap (prev_heap_area) downwards. This way, 
+	 fragmentation can be minimized.
+	 2) It ensures that the newly allocated heap is HEAP_MAX_SIZE
+	 aligned. If it is not, the allocation is considered to have failed
+	 and malloc will allocate a HEAP_MAX_SIZE aligned block elsewhere.
+
+	 Inside the MVEE though, there is a bit of a problem. With ASLR
+	 enabled, it is very much possible that some replicae get a
+	 HEAP_MAX_SIZE aligned block, while others do not. This can happen
+	 because the previous heap cannot be extended downwards and the
+	 kernel picks a new base address randomly.
+
+	 In order to maintain consistent behavior later on, all allocations
+	 must either fail or succeed.
+	 => if we run under the MVEE's control, we check whether all
+	 heaps are aligned. If not, we unmap all of them, even if some
+	 WERE aligned!!!
+       */
+      if (p2 != MAP_FAILED && !mvee_all_heaps_aligned(p2))
         {
           __munmap (p2, HEAP_MAX_SIZE);
           p2 = MAP_FAILED;
@@ -558,6 +580,16 @@
     }
   if (p2 == MAP_FAILED)
     {
+      /* MVEE Patch:
+	 The MVEE will hold its internal MMAN lock across this mmap call
+	 and the subsequent (optional) munmap calls, thus ensuring that
+	 all replicae will have a consistent memory map after this whole
+	 operation.
+	 
+	 Because of this, we have to make sure that the MMAP+munmaps
+	 sequence is atomic and cannot block (=> we should _NOT_ grab
+	 ANY locks until after the last munmap)	
+       */
       p1 = (char *) MMAP (0, HEAP_MAX_SIZE << 1, PROT_NONE, MAP_NORESERVE);
       if (p1 != MAP_FAILED)
         {
@@ -566,9 +598,10 @@
           ul = p2 - p1;
           if (ul)
             __munmap (p1, ul);
-          else
-            aligned_heap_area = p2 + HEAP_MAX_SIZE;
+
           __munmap (p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul);
+
+	  atomic_store(aligned_heap_area, p2 + HEAP_MAX_SIZE);
         }
       else
         {
@@ -578,7 +611,8 @@
           if (p2 == MAP_FAILED)
             return 0;
 
-          if ((unsigned long) p2 & (HEAP_MAX_SIZE - 1))
+	  /* MVEE patch: see comment above */
+          if (!mvee_all_heaps_aligned(p2))
             {
               __munmap (p2, HEAP_MAX_SIZE);
               return 0;
@@ -661,8 +695,8 @@
 
 #define delete_heap(heap) \
   do {									      \
-      if ((char *) (heap) + HEAP_MAX_SIZE == aligned_heap_area)		      \
-        aligned_heap_area = NULL;					      \
+    if ((char *) (heap) + HEAP_MAX_SIZE == atomic_load(aligned_heap_area)) \
+      atomic_store(aligned_heap_area, NULL);				\
       __munmap ((char *) (heap), HEAP_MAX_SIZE);			      \
     } while (0)
 
@@ -786,14 +820,10 @@
 static mstate
 get_free_list (void)
 {
+  (void) mutex_lock (&list_lock);
   mstate result = free_list;
-  if (result != NULL)
-    {
-      (void) mutex_lock (&list_lock);
-      result = free_list;
       if (result != NULL)
         free_list = result->next_free;
-      (void) mutex_unlock (&list_lock);
 
       if (result != NULL)
         {
@@ -802,7 +832,8 @@
           tsd_setspecific (arena_key, (void *) result);
           THREAD_STAT (++(result->stat_lock_loop));
         }
-    }
+
+  (void) mutex_unlock (&list_lock);
 
   return result;
 }
@@ -858,24 +889,24 @@
   if (a == NULL)
     {
       /* Nothing immediately available, so generate a new arena.  */
-      if (narenas_limit == 0)
+      if (atomic_load(narenas_limit) == 0)
         {
           if (mp_.arena_max != 0)
-            narenas_limit = mp_.arena_max;
-          else if (narenas > mp_.arena_test)
+            atomic_store(narenas_limit, mp_.arena_max);
+          else if (atomic_load(narenas) > mp_.arena_test)
             {
               int n = __get_nprocs ();
 
               if (n >= 1)
-                narenas_limit = NARENAS_FROM_NCORES (n);
+                atomic_store(narenas_limit, NARENAS_FROM_NCORES (n));
               else
                 /* We have no information about the system.  Assume two
                    cores.  */
-                narenas_limit = NARENAS_FROM_NCORES (2);
+                atomic_store(narenas_limit, NARENAS_FROM_NCORES (2));
             }
         }
     repeat:;
-      size_t n = narenas;
+      size_t n = atomic_load(narenas);
       /* NB: the following depends on the fact that (size_t)0 - 1 is a
          very large number and that the underflow is OK.  If arena_max
          is set the value of arena_test is irrelevant.  If arena_test
@@ -883,7 +914,7 @@
          narenas_limit is 0.  There is no possibility for narenas to
          be too big for the test to always fail since there is not
          enough address space to create that many arenas.  */
-      if (__builtin_expect (n <= narenas_limit - 1, 0))
+      if (__builtin_expect (n <= atomic_load(narenas_limit) - 1, 0))
         {
           if (catomic_compare_and_exchange_bool_acq (&narenas, n + 1, n))
             goto repeat;
diff -r --unified eglibc-2.19.orig/malloc/malloc.c eglibc-2.19/malloc/malloc.c
--- eglibc-2.19.orig/malloc/malloc.c	2014-05-12 14:05:12.535661620 +0200
+++ eglibc-2.19/malloc/malloc.c	2014-12-02 16:58:38.751874336 +0100
@@ -1705,6 +1705,9 @@
 
 struct malloc_par
 {
+  /* MVEE patch */
+  mutex_t mutex;
+
   /* Tunable parameters */
   unsigned long trim_threshold;
   INTERNAL_SIZE_T top_pad;
@@ -1748,6 +1751,7 @@
 
 static struct malloc_par mp_ =
 {
+  .mutex = MUTEX_INITIALIZER,
   .top_pad = DEFAULT_TOP_PAD,
   .n_mmaps_max = DEFAULT_MMAP_MAX,
   .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
@@ -2278,10 +2282,12 @@
      allocated mmapped regions, try to directly map this request
      rather than expanding top.
    */
-
+  
+  (void) mutex_lock(&mp_.mutex);
   if ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold) &&
       (mp_.n_mmaps < mp_.n_mmaps_max))
     {
+      (void) mutex_unlock(&mp_.mutex);
       char *mm;           /* return value from mmap call*/
 
     try_mmap:
@@ -2352,6 +2358,8 @@
             }
         }
     }
+  else
+    (void) mutex_unlock(&mp_.mutex);
 
   /* Record incoming configuration of top */
 
@@ -2886,13 +2894,17 @@
 
   arena_lock (ar_ptr, bytes);
   if (!ar_ptr)
-    return 0;
+    {
+      MVEE_MALLOC_HOOK(LIBC_MALLOC, 0, bytes, 0, 0);
+      return 0;
+    }
 
   victim = _int_malloc (ar_ptr, bytes);
   if (!victim)
     {
       LIBC_PROBE (memory_malloc_retry, 1, bytes);
       ar_ptr = arena_get_retry (ar_ptr, bytes);
+      MVEE_MALLOC_HOOK(LIBC_MALLOC, 1, bytes, ar_ptr, 0);
       if (__builtin_expect (ar_ptr != NULL, 1))
         {
           victim = _int_malloc (ar_ptr, bytes);
@@ -2900,9 +2912,15 @@
         }
     }
   else
-    (void) mutex_unlock (&ar_ptr->mutex);
+    {
+      MVEE_MALLOC_HOOK(LIBC_MALLOC, 2, bytes, ar_ptr, victim);
+      (void) mutex_unlock (&ar_ptr->mutex);
+    }
+
   assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
           ar_ptr == arena_for_chunk (mem2chunk (victim)));
+
+  MVEE_MALLOC_HOOK(LIBC_MALLOC, 3, bytes, ar_ptr, victim);
   return victim;
 }
 libc_hidden_def (__libc_malloc)
@@ -2928,7 +2946,9 @@
 
   if (chunk_is_mmapped (p))                       /* release mmapped memory. */
     {
+      MVEE_MALLOC_HOOK(LIBC_FREE, 0, chunksize(p), 0, p);
       /* see if the dynamic brk/mmap threshold needs adjusting */
+      (void) mutex_lock(&mp_.mutex);
       if (!mp_.no_dyn_threshold
           && p->size > mp_.mmap_threshold
           && p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
@@ -2938,11 +2958,13 @@
           LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
                       mp_.mmap_threshold, mp_.trim_threshold);
         }
+      (void) mutex_unlock(&mp_.mutex);
       munmap_chunk (p);
       return;
     }
 
   ar_ptr = arena_for_chunk (p);
+  MVEE_MALLOC_HOOK(LIBC_FREE, 1, chunksize(p), ar_ptr, p);
   _int_free (ar_ptr, p, 0);
 }
 libc_hidden_def (__libc_free)
@@ -3000,15 +3022,22 @@
 #endif
       /* Note the extra SIZE_SZ overhead. */
       if (oldsize - SIZE_SZ >= nb)
-        return oldmem;                         /* do nothing */
+	{
+	  MVEE_MALLOC_HOOK(LIBC_REALLOC, 0, bytes, 0, oldmem);
+	  return oldmem;                         /* do nothing */
+	}
 
       /* Must alloc, copy, free. */
       newmem = __libc_malloc (bytes);
       if (newmem == 0)
-        return 0;              /* propagate failure */
+	{
+	  MVEE_MALLOC_HOOK(LIBC_REALLOC, 1, bytes, 0, oldmem);
+	  return 0;              /* propagate failure */
+	}
 
       memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
       munmap_chunk (oldp);
+      MVEE_MALLOC_HOOK(LIBC_REALLOC, 2, bytes, 0, newmem);
       return newmem;
     }
 
@@ -3026,6 +3055,7 @@
 #endif
 
 
+  MVEE_MALLOC_HOOK(LIBC_REALLOC, 3, bytes, ar_ptr, oldp);
   newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
 
   (void) mutex_unlock (&ar_ptr->mutex);
@@ -3034,6 +3064,7 @@
 
   if (newp == NULL)
     {
+      MVEE_MALLOC_HOOK(LIBC_REALLOC, 4, bytes, 0, oldp);
       /* Try harder to allocate memory in other arenas.  */
       LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
       newp = __libc_malloc (bytes);
@@ -3044,6 +3075,7 @@
         }
     }
 
+  MVEE_MALLOC_HOOK(LIBC_REALLOC, 5, bytes, 0, newp);
   return newp;
 }
 libc_hidden_def (__libc_realloc)
@@ -3052,6 +3084,7 @@
 __libc_memalign (size_t alignment, size_t bytes)
 {
   void *address = RETURN_ADDRESS (0);
+  MVEE_MALLOC_HOOK(LIBC_MEMALIGN, 0, bytes, NULL, (void*)alignment);
   return _mid_memalign (alignment, bytes, address);
 }
 
@@ -3118,6 +3151,7 @@
     (void) mutex_unlock (&ar_ptr->mutex);
   assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
           ar_ptr == arena_for_chunk (mem2chunk (p)));
+  MVEE_MALLOC_HOOK(LIBC_MEMALIGN, 1, bytes, ar_ptr, p);
   return p;
 }
 /* For ISO C11.  */
@@ -3194,9 +3228,13 @@
 
   sz = bytes;
 
+  MVEE_MALLOC_HOOK(LIBC_CALLOC, 0, bytes, 0, 0);
   arena_get (av, sz);
   if (!av)
-    return 0;
+    {
+      MVEE_MALLOC_HOOK(LIBC_CALLOC, 1, bytes, 0, 0);
+      return 0;
+    }
 
   /* Check if we hand out the top chunk, in which case there may be no
      need to clear. */
@@ -3224,6 +3262,7 @@
 
   if (mem == 0)
     {
+      MVEE_MALLOC_HOOK(LIBC_CALLOC, 2, bytes, 0, 0);
       LIBC_PROBE (memory_calloc_retry, 1, sz);
       av = arena_get_retry (av, sz);
       if (__builtin_expect (av != NULL, 1))
@@ -3241,6 +3280,7 @@
   /* Two optional cases in which clearing not necessary */
   if (chunk_is_mmapped (p))
     {
+      MVEE_MALLOC_HOOK(LIBC_CALLOC, 3, bytes, 0, 0);
       if (__builtin_expect (perturb_byte, 0))
         return memset (mem, 0, sz);
 
@@ -3257,6 +3297,8 @@
     }
 #endif
 
+  MVEE_MALLOC_HOOK(LIBC_CALLOC, 4, bytes, 0, 0);
+
   /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
      contents have an odd number of INTERNAL_SIZE_T-sized words;
      minimally 3.  */
@@ -3331,6 +3373,8 @@
 
   checked_request2size (bytes, nb);
 
+  MVEE_MALLOC_HOOK(_INT_MALLOC, 0, nb, 0, 0);
+
   /*
      If the size qualifies as a fastbin, first check corresponding bin.
      This code is safe to execute even if av is not yet initialized, so we
@@ -3338,8 +3382,9 @@
    */
 
   if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
-    {
+    {      
       idx = fastbin_index (nb);
+      MVEE_MALLOC_HOOK(_INT_MALLOC, 1, nb, av, idx);
       mfastbinptr *fb = &fastbin (av, idx);
       mchunkptr pp = *fb;
       do
@@ -3352,6 +3397,7 @@
              != victim);
       if (victim != 0)
         {
+	  MVEE_MALLOC_HOOK(_INT_MALLOC, 2, nb, av, victim);
           if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
             {
               errstr = "malloc(): memory corruption (fast)";
@@ -3364,6 +3410,10 @@
           alloc_perturb (p, bytes);
           return p;
         }
+      else
+	{
+	  MVEE_MALLOC_HOOK(_INT_MALLOC, 3, nb, av, 0);
+	}
     }
 
   /*
@@ -3378,13 +3428,18 @@
     {
       idx = smallbin_index (nb);
       bin = bin_at (av, idx);
+      MVEE_MALLOC_HOOK(_INT_MALLOC, 4, nb, av, idx);
 
       if ((victim = last (bin)) != bin)
         {
           if (victim == 0) /* initialization check */
-            malloc_consolidate (av);
+	    {
+	      MVEE_MALLOC_HOOK(_INT_MALLOC, 5, nb, av, idx);
+	      malloc_consolidate (av);
+	    }
           else
             {
+	      MVEE_MALLOC_HOOK(_INT_MALLOC, 6, nb, av, victim);
               bck = victim->bk;
               if (__builtin_expect (bck->fd != victim, 0))
                 {
@@ -3419,8 +3474,12 @@
   else
     {
       idx = largebin_index (nb);
+      MVEE_MALLOC_HOOK(_INT_MALLOC, 7, nb, av, idx);
       if (have_fastchunks (av))
-        malloc_consolidate (av);
+	{
+	  MVEE_MALLOC_HOOK(_INT_MALLOC, 8, nb, av, 0);
+	  malloc_consolidate (av);
+	}
     }
 
   /*
@@ -3463,6 +3522,7 @@
             {
               /* split and reattach remainder */
               remainder_size = size - nb;
+	      MVEE_MALLOC_HOOK(_INT_MALLOC, 9, size, av, victim);
               remainder = chunk_at_offset (victim, nb);
               unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
               av->last_remainder = remainder;
@@ -3492,6 +3552,7 @@
 
           if (size == nb)
             {
+	      MVEE_MALLOC_HOOK(_INT_MALLOC, 10, size, av, victim);
               set_inuse_bit_at_offset (victim, size);
               if (av != &main_arena)
                 victim->size |= NON_MAIN_ARENA;
@@ -3565,7 +3626,10 @@
 
 #define MAX_ITERS       10000
           if (++iters >= MAX_ITERS)
-            break;
+	    {
+	      MVEE_MALLOC_HOOK(_INT_MALLOC, 11, nb, av, 0);
+	      break;
+	    }
         }
 
       /*
@@ -3577,6 +3641,8 @@
         {
           bin = bin_at (av, idx);
 
+	  MVEE_MALLOC_HOOK(_INT_MALLOC, 12, nb, av, idx);
+
           /* skip scan if empty or largest chunk is too small */
           if ((victim = first (bin)) != bin &&
               (unsigned long) (victim->size) >= (unsigned long) (nb))
@@ -3591,6 +3657,8 @@
               if (victim != last (bin) && victim->size == victim->fd->size)
                 victim = victim->fd;
 
+	      MVEE_MALLOC_HOOK(_INT_MALLOC, 13, nb, av, victim);
+
               remainder_size = size - nb;
               unlink (victim, bck, fwd);
 
@@ -3652,6 +3720,8 @@
       map = av->binmap[block];
       bit = idx2bit (idx);
 
+      MVEE_MALLOC_HOOK(_INT_MALLOC, 14, nb, av, idx);
+
       for (;; )
         {
           /* Skip rest of block if there are no more set bits in this block.  */
@@ -3691,6 +3761,8 @@
             {
               size = chunksize (victim);
 
+	      MVEE_MALLOC_HOOK(_INT_MALLOC, 15, size, av, idx);
+
               /*  We know the first chunk in this bin is big enough to use. */
               assert ((unsigned long) (size) >= (unsigned long) (nb));
 
@@ -3767,6 +3839,7 @@
 
       if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
         {
+	  MVEE_MALLOC_HOOK(_INT_MALLOC, 16, size, av, idx);
           remainder_size = size - nb;
           remainder = chunk_at_offset (victim, nb);
           av->top = remainder;
@@ -3790,6 +3863,7 @@
             idx = smallbin_index (nb);
           else
             idx = largebin_index (nb);
+	  MVEE_MALLOC_HOOK(_INT_MALLOC, 17, nb, av, idx);
         }
 
       /*
@@ -3797,6 +3871,7 @@
        */
       else
         {
+	  MVEE_MALLOC_HOOK(_INT_MALLOC, 18, nb, av, 0);
           void *p = sysmalloc (nb, av);
           if (p != NULL)
             alloc_perturb (p, bytes);
@@ -3866,6 +3941,8 @@
 #endif
       ) {
 
+    MVEE_MALLOC_HOOK(_INT_FREE, 0, size, av, p);
+
     if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
 	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
 			     >= av->system_mem, 0))
@@ -3893,15 +3970,19 @@
 
     free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
 
+    if (!have_lock)
+      (void) mutex_lock(&av->mutex);
+
     set_fastchunks(av);
     unsigned int idx = fastbin_index(size);
     fb = &fastbin (av, idx);
 
+    MVEE_MALLOC_HOOK(_INT_FREE, 1, size, av, idx);
+
     /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
     mchunkptr old = *fb, old2;
     unsigned int old_idx = ~0u;
-    do
-      {
+
 	/* Check that the top of the bin is not the record we are going to add
 	   (i.e., double free).  */
 	if (__builtin_expect (old == p, 0))
@@ -3916,8 +3997,10 @@
 	if (have_lock && old != NULL)
 	  old_idx = fastbin_index(chunksize(old));
 	p->fd = old2 = old;
-      }
-    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
+	*fb = p;
+
+    if (!have_lock)
+      (void) mutex_unlock(&av->mutex);
 
     if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
       {
@@ -3945,6 +4028,8 @@
       locked = 1;
     }
 
+    MVEE_MALLOC_HOOK(_INT_FREE, 2, size, av, 0);
+
     nextchunk = chunk_at_offset(p, size);
 
     /* Lightweight tests: check whether the block is already the
@@ -3982,6 +4067,7 @@
     /* consolidate backward */
     if (!prev_inuse(p)) {
       prevsize = p->prev_size;
+      MVEE_MALLOC_HOOK(_INT_FREE, 3, prevsize, av, 0);
       size += prevsize;
       p = chunk_at_offset(p, -((long) prevsize));
       unlink(p, bck, fwd);
@@ -4033,6 +4119,7 @@
     */
 
     else {
+      MVEE_MALLOC_HOOK(_INT_FREE, 4, size, av, 0);
       size += nextsize;
       set_head(p, size | PREV_INUSE);
       av->top = p;
@@ -4053,14 +4140,22 @@
     */
 
     if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      MVEE_MALLOC_HOOK(_INT_FREE, 5, size, av, 0);
+
       if (have_fastchunks(av))
 	malloc_consolidate(av);
 
       if (av == &main_arena) {
 #ifndef MORECORE_CANNOT_TRIM
+	(void) mutex_lock(&mp_.mutex);
 	if ((unsigned long)(chunksize(av->top)) >=
 	    (unsigned long)(mp_.trim_threshold))
-	  systrim(mp_.top_pad, av);
+	  {
+	    (void) mutex_unlock(&mp_.mutex);
+	    systrim(mp_.top_pad, av);
+	  }
+	else
+	  (void) mutex_unlock(&mp_.mutex);
 #endif
       } else {
 	/* Always try heap_trim(), even if the top chunk is not
@@ -4082,6 +4177,7 @@
   */
 
   else {
+    MVEE_MALLOC_HOOK(_INT_FREE, 6, size, av, p);
     munmap_chunk (p);
   }
 }
@@ -4118,6 +4214,8 @@
   mchunkptr       bck;
   mchunkptr       fwd;
 
+  int cnt = 0;
+
   /*
     If max_fast is 0, we know that av hasn't
     yet been initialized, in which case do so below
@@ -4187,6 +4285,8 @@
 	    av->top = p;
 	  }
 
+	  cnt++;
+
 	} while ( (p = nextp) != 0);
 
       }
@@ -4196,6 +4296,8 @@
     malloc_init_state(av);
     check_malloc_state(av);
   }
+
+  MVEE_MALLOC_HOOK(MALLOC_CONSOLIDATE, 0, cnt, av, 0);
 }
 
 /*
@@ -4263,6 +4365,7 @@
           (unsigned long) (newsize = oldsize + nextsize) >=
           (unsigned long) (nb + MINSIZE))
         {
+	  MVEE_MALLOC_HOOK(_INT_REALLOC, 0, newsize, av, next);
           set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
           av->top = chunk_at_offset (oldp, nb);
           set_head (av->top, (newsize - nb) | PREV_INUSE);
@@ -4276,6 +4379,7 @@
                (unsigned long) (newsize = oldsize + nextsize) >=
                (unsigned long) (nb))
         {
+	  MVEE_MALLOC_HOOK(_INT_REALLOC, 1, newsize, av, next);
           newp = oldp;
           unlink (next, bck, fwd);
         }
@@ -4283,6 +4387,7 @@
       /* allocate, copy, free */
       else
         {
+	  MVEE_MALLOC_HOOK(_INT_REALLOC, 2, newsize, av, 0);
           newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
           if (newmem == 0)
             return 0; /* propagate failure */
@@ -4295,6 +4400,7 @@
            */
           if (newp == next)
             {
+	      MVEE_MALLOC_HOOK(_INT_REALLOC, 3, newsize, av, 0);
               newsize += oldsize;
               newp = oldp;
             }
@@ -4305,6 +4411,7 @@
                  We know that contents have an odd number of
                  INTERNAL_SIZE_T-sized words; minimally 3.
                */
+	      MVEE_MALLOC_HOOK(_INT_REALLOC, 4, newsize, av, newmem);
 
               copysize = oldsize - SIZE_SZ;
               s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
@@ -4352,11 +4459,13 @@
 
   if (remainder_size < MINSIZE)   /* not enough extra to split off */
     {
+      MVEE_MALLOC_HOOK(_INT_REALLOC, 5, newsize, av, newp);
       set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
       set_inuse_bit_at_offset (newp, newsize);
     }
   else   /* split remainder */
     {
+      MVEE_MALLOC_HOOK(_INT_REALLOC, 6, newsize, av, newp);
       remainder = chunk_at_offset (newp, nb);
       set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
       set_head (remainder, remainder_size | PREV_INUSE |
@@ -4535,13 +4644,15 @@
     ptmalloc_init ();
 
   mstate ar_ptr = &main_arena;
+  mstate tmp;
   do
     {
       (void) mutex_lock (&ar_ptr->mutex);
       result |= mtrim (ar_ptr, s);
+      tmp = ar_ptr->next;
       (void) mutex_unlock (&ar_ptr->mutex);
 
-      ar_ptr = ar_ptr->next;
+      ar_ptr = tmp;
     }
   while (ar_ptr != &main_arena);
 
@@ -4642,9 +4753,11 @@
   m->fsmblks += fastavail;
   if (av == &main_arena)
     {
+      (void) mutex_lock(&mp_.mutex);
       m->hblks = mp_.n_mmaps;
       m->hblkhd = mp_.mmapped_mem;
-      m->usmblks = mp_.max_total_mem;
+      m->usmblks = mp_.max_total_mem;      
+      (void) mutex_unlock(&mp_.mutex);
       m->keepcost = chunksize (av->top);
     }
 }
@@ -4655,6 +4768,7 @@
 {
   struct mallinfo m;
   mstate ar_ptr;
+  mstate tmp;
 
   if (__malloc_initialized < 0)
     ptmalloc_init ();
@@ -4665,9 +4779,10 @@
     {
       (void) mutex_lock (&ar_ptr->mutex);
       int_mallinfo (ar_ptr, &m);
+      tmp = ar_ptr->next;
       (void) mutex_unlock (&ar_ptr->mutex);
 
-      ar_ptr = ar_ptr->next;
+      ar_ptr = tmp;
     }
   while (ar_ptr != &main_arena);
 
@@ -4683,7 +4798,10 @@
 {
   int i;
   mstate ar_ptr;
+  mstate tmp;
+  (void) mutex_lock(&mp_.mutex);
   unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+  (void) mutex_unlock(&mp_.mutex);
 #if THREAD_STATS
   long stat_lock_direct = 0, stat_lock_loop = 0, stat_lock_wait = 0;
 #endif
@@ -4714,17 +4832,20 @@
       stat_lock_loop += ar_ptr->stat_lock_loop;
       stat_lock_wait += ar_ptr->stat_lock_wait;
 #endif
+      tmp = ar_ptr->next;
       (void) mutex_unlock (&ar_ptr->mutex);
-      ar_ptr = ar_ptr->next;
+      ar_ptr = tmp;
       if (ar_ptr == &main_arena)
         break;
     }
   fprintf (stderr, "Total (incl. mmap):\n");
   fprintf (stderr, "system bytes     = %10u\n", system_b);
   fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  (void) mutex_lock(&mp_.mutex);
   fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
   fprintf (stderr, "max mmap bytes   = %10lu\n",
            (unsigned long) mp_.max_mmapped_mem);
+  (void) mutex_unlock(&mp_.mutex);
 #if THREAD_STATS
   fprintf (stderr, "heaps created    = %10d\n", stat_n_heaps);
   fprintf (stderr, "locked directly  = %10ld\n", stat_lock_direct);
@@ -4769,20 +4890,25 @@
       break;
 
     case M_TRIM_THRESHOLD:
+      (void) mutex_lock(&mp_.mutex);
       LIBC_PROBE (memory_mallopt_trim_threshold, 3, value,
                   mp_.trim_threshold, mp_.no_dyn_threshold);
       mp_.trim_threshold = value;
       mp_.no_dyn_threshold = 1;
+      (void) mutex_unlock(&mp_.mutex);
       break;
 
     case M_TOP_PAD:
+      (void) mutex_lock(&mp_.mutex);
       LIBC_PROBE (memory_mallopt_top_pad, 3, value,
                   mp_.top_pad, mp_.no_dyn_threshold);
       mp_.top_pad = value;
       mp_.no_dyn_threshold = 1;
+      (void) mutex_unlock(&mp_.mutex);
       break;
 
     case M_MMAP_THRESHOLD:
+      (void) mutex_lock(&mp_.mutex);
       /* Forbid setting the threshold too high. */
       if ((unsigned long) value > HEAP_MAX_SIZE / 2)
         res = 0;
@@ -4793,13 +4919,16 @@
           mp_.mmap_threshold = value;
           mp_.no_dyn_threshold = 1;
         }
+      (void) mutex_unlock(&mp_.mutex);
       break;
 
     case M_MMAP_MAX:
+      (void) mutex_lock(&mp_.mutex);
       LIBC_PROBE (memory_mallopt_mmap_max, 3, value,
                   mp_.n_mmaps_max, mp_.no_dyn_threshold);
       mp_.n_mmaps_max = value;
       mp_.no_dyn_threshold = 1;
+      (void) mutex_unlock(&mp_.mutex);
       break;
 
     case M_CHECK_ACTION:
diff -r --unified eglibc-2.19.orig/nis/nss_nisplus/nisplus-alias.c eglibc-2.19/nis/nss_nisplus/nisplus-alias.c
--- eglibc-2.19.orig/nis/nss_nisplus/nisplus-alias.c	2014-05-12 14:05:10.607661624 +0200
+++ eglibc-2.19/nis/nss_nisplus/nisplus-alias.c	2014-12-02 16:58:38.752874336 +0100
@@ -43,7 +43,7 @@
 static enum nss_status
 _nss_create_tablename (int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       const char *local_dir = nis_local_directory ();
       size_t local_dir_len = strlen (local_dir);
@@ -62,7 +62,7 @@
 
       atomic_write_barrier ();
 
-      tablename_val = p;
+      atomic_store(tablename_val, p);
     }
 
   return NSS_STATUS_SUCCESS;
@@ -176,7 +176,7 @@
     return NSS_STATUS_UNAVAIL;
 
   next_entry = 0;
-  result = nis_list (tablename_val, FOLLOW_PATH | FOLLOW_LINKS, NULL, NULL);
+  result = nis_list (atomic_load(tablename_val), FOLLOW_PATH | FOLLOW_LINKS, NULL, NULL);
   if (result == NULL)
     {
       status = NSS_STATUS_TRYAGAIN;
@@ -279,7 +279,7 @@
 {
   int parse_res;
 
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
diff -r --unified eglibc-2.19.orig/nis/nss_nisplus/nisplus-ethers.c eglibc-2.19/nis/nss_nisplus/nisplus-ethers.c
--- eglibc-2.19.orig/nis/nss_nisplus/nisplus-ethers.c	2014-05-12 14:05:10.607661624 +0200
+++ eglibc-2.19/nis/nss_nisplus/nisplus-ethers.c	2014-12-02 16:58:38.752874336 +0100
@@ -88,7 +88,7 @@
 static enum nss_status
 _nss_create_tablename (int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       const char *local_dir = nis_local_directory ();
       size_t local_dir_len = strlen (local_dir);
@@ -107,7 +107,7 @@
 
       atomic_write_barrier ();
 
-      tablename_val = p;
+      atomic_store(tablename_val, p);
     }
   return NSS_STATUS_SUCCESS;
 }
@@ -157,7 +157,7 @@
 internal_nisplus_getetherent_r (struct etherent *ether, char *buffer,
 				size_t buflen, int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       enum nss_status status = _nss_create_tablename (errnop);
 
@@ -236,7 +236,7 @@
 _nss_nisplus_gethostton_r (const char *name, struct etherent *eth,
 			   char *buffer, size_t buflen, int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       enum nss_status status = _nss_create_tablename (errnop);
 
@@ -294,7 +294,7 @@
 _nss_nisplus_getntohost_r (const struct ether_addr *addr, struct etherent *eth,
 			   char *buffer, size_t buflen, int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
diff -r --unified eglibc-2.19.orig/nis/nss_nisplus/nisplus-grp.c eglibc-2.19/nis/nss_nisplus/nisplus-grp.c
--- eglibc-2.19.orig/nis/nss_nisplus/nisplus-grp.c	2014-05-12 14:05:10.607661624 +0200
+++ eglibc-2.19/nis/nss_nisplus/nisplus-grp.c	2014-12-02 16:58:38.752874336 +0100
@@ -50,7 +50,7 @@
 enum nss_status
 _nss_grp_create_tablename (int *errnop)
 {
-  if (grp_tablename_val == NULL)
+  if (atomic_load(grp_tablename_val) == NULL)
     {
       const char *local_dir = nis_local_directory ();
       size_t local_dir_len = strlen (local_dir);
@@ -106,7 +106,7 @@
 {
   enum nss_status status = NSS_STATUS_SUCCESS;
 
-  if (grp_tablename_val == NULL)
+  if (atomic_load(grp_tablename_val) == NULL)
     status = _nss_grp_create_tablename (errnop);
 
   if (status == NSS_STATUS_SUCCESS)
@@ -288,7 +288,7 @@
 {
   int parse_res;
 
-  if (grp_tablename_val == NULL)
+  if (atomic_load(grp_tablename_val) == NULL)
     {
       enum nss_status status = _nss_grp_create_tablename (errnop);
 
@@ -347,7 +347,7 @@
 _nss_nisplus_getgrgid_r (const gid_t gid, struct group *gr,
 			 char *buffer, size_t buflen, int *errnop)
 {
-  if (grp_tablename_val == NULL)
+  if (atomic_load(grp_tablename_val) == NULL)
     {
       enum nss_status status = _nss_grp_create_tablename (errnop);
 
diff -r --unified eglibc-2.19.orig/nis/nss_nisplus/nisplus-hosts.c eglibc-2.19/nis/nss_nisplus/nisplus-hosts.c
--- eglibc-2.19.orig/nis/nss_nisplus/nisplus-hosts.c	2014-05-12 14:05:10.607661624 +0200
+++ eglibc-2.19/nis/nss_nisplus/nisplus-hosts.c	2014-12-02 16:58:38.753874336 +0100
@@ -191,7 +191,7 @@
 static enum nss_status
 _nss_create_tablename (int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       const char *local_dir = nis_local_directory ();
       size_t local_dir_len = strlen (local_dir);
@@ -210,7 +210,7 @@
 
       atomic_write_barrier ();
 
-      tablename_val = p;
+      atomic_store(tablename_val, p);
     }
 
   return NSS_STATUS_SUCCESS;
@@ -231,7 +231,7 @@
       result = NULL;
     }
 
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     status = _nss_create_tablename (&err);
 
   __libc_lock_unlock (lock);
@@ -271,7 +271,7 @@
       if (result == NULL)
 	{
 	  saved_res = NULL;
-	  if (tablename_val == NULL)
+	  if (atomic_load(tablename_val) == NULL)
 	    {
 	      enum nss_status status = _nss_create_tablename (errnop);
 
@@ -300,7 +300,7 @@
       else
 	{
 	  saved_res = result;
-	  result = nis_next_entry (tablename_val, &result->cookie);
+	  result = nis_next_entry (atomic_load(tablename_val), &result->cookie);
 	  if (result == NULL)
 	    {
 	      *errnop = errno;
@@ -383,7 +383,7 @@
 			   char *buffer, size_t buflen, int *errnop,
 			   int *herrnop, int flags)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       enum nss_status status = get_tablename (herrnop);
       if (status != NSS_STATUS_SUCCESS)
@@ -518,7 +518,7 @@
 			      struct hostent *host, char *buffer,
 			      size_t buflen, int *errnop, int *herrnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       enum nss_status status = get_tablename (herrnop);
       if (status != NSS_STATUS_SUCCESS)
diff -r --unified eglibc-2.19.orig/nis/nss_nisplus/nisplus-network.c eglibc-2.19/nis/nss_nisplus/nisplus-network.c
--- eglibc-2.19.orig/nis/nss_nisplus/nisplus-network.c	2014-05-12 14:05:10.607661624 +0200
+++ eglibc-2.19/nis/nss_nisplus/nisplus-network.c	2014-12-02 16:58:38.753874336 +0100
@@ -142,7 +142,7 @@
 static enum nss_status
 _nss_create_tablename (int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       const char *local_dir = nis_local_directory ();
       size_t local_dir_len = strlen (local_dir);
@@ -161,7 +161,7 @@
 
       atomic_write_barrier ();
 
-      tablename_val = p;
+      atomic_store(tablename_val, p);
     }
 
   return NSS_STATUS_SUCCESS;
@@ -180,7 +180,7 @@
       result = NULL;
     }
 
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       int err;
       status = _nss_create_tablename (&err);
@@ -222,7 +222,7 @@
 	{
 	  saved_res = NULL;
 
-	  if (tablename_val == NULL)
+	  if (atomic_load(tablename_val) == NULL)
 	    {
 	      enum nss_status status = _nss_create_tablename (errnop);
 
@@ -254,7 +254,7 @@
       else
 	{
 	  saved_res = result;
-	  result = nis_next_entry (tablename_val, &result->cookie);
+	  result = nis_next_entry (atomic_load(tablename_val), &result->cookie);
 	  if (result == NULL)
 	    {
 	      *errnop = errno;
@@ -311,7 +311,7 @@
 {
   int parse_res, retval;
 
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
@@ -414,7 +414,7 @@
 			     struct netent *network, char *buffer,
 			     size_t buflen, int *errnop, int *herrnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
diff -r --unified eglibc-2.19.orig/nis/nss_nisplus/nisplus-proto.c eglibc-2.19/nis/nss_nisplus/nisplus-proto.c
--- eglibc-2.19.orig/nis/nss_nisplus/nisplus-proto.c	2014-05-12 14:05:10.607661624 +0200
+++ eglibc-2.19/nis/nss_nisplus/nisplus-proto.c	2014-12-02 16:58:38.754874336 +0100
@@ -137,7 +137,7 @@
 static enum nss_status
 _nss_create_tablename (int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       const char *local_dir = nis_local_directory ();
       size_t local_dir_len = strlen (local_dir);
@@ -156,7 +156,7 @@
 
       atomic_write_barrier ();
 
-      tablename_val = p;
+      atomic_store(tablename_val, p);
     }
 
   return NSS_STATUS_SUCCESS;
@@ -175,7 +175,7 @@
       result = NULL;
     }
 
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       int err;
       status = _nss_create_tablename (&err);
@@ -216,7 +216,7 @@
       if (result == NULL)
 	{
 	  saved_res = NULL;
-	  if (tablename_val == NULL)
+	  if (atomic_load(tablename_val) == NULL)
 	    {
 	      enum nss_status status = _nss_create_tablename (errnop);
 
@@ -236,7 +236,7 @@
       else
 	{
 	  saved_res = result;
-	  result = nis_next_entry (tablename_val, &result->cookie);
+	  result = nis_next_entry (atomic_load(tablename_val), &result->cookie);
 	  if (result == NULL)
 	    {
 	      *errnop = errno;
@@ -290,7 +290,7 @@
 {
   int parse_res;
 
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
@@ -383,7 +383,7 @@
 _nss_nisplus_getprotobynumber_r (const int number, struct protoent *proto,
 				 char *buffer, size_t buflen, int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
diff -r --unified eglibc-2.19.orig/nis/nss_nisplus/nisplus-pwd.c eglibc-2.19/nis/nss_nisplus/nisplus-pwd.c
--- eglibc-2.19.orig/nis/nss_nisplus/nisplus-pwd.c	2014-05-12 14:05:10.607661624 +0200
+++ eglibc-2.19/nis/nss_nisplus/nisplus-pwd.c	2014-12-02 16:58:38.754874336 +0100
@@ -49,7 +49,7 @@
 enum nss_status
 _nss_pwd_create_tablename (int *errnop)
 {
-  if (pwd_tablename_val == NULL)
+  if (atomic_load(pwd_tablename_val) == NULL)
     {
       const char *local_dir = nis_local_directory ();
       size_t local_dir_len = strlen (local_dir);
@@ -105,7 +105,7 @@
 {
   enum nss_status status = NSS_STATUS_SUCCESS;
 
-  if (pwd_tablename_val == NULL)
+  if (atomic_load(pwd_tablename_val) == NULL)
     status = _nss_pwd_create_tablename (errnop);
 
   if (status == NSS_STATUS_SUCCESS)
@@ -289,7 +289,7 @@
 {
   int parse_res;
 
-  if (pwd_tablename_val == NULL)
+  if (atomic_load(pwd_tablename_val) == NULL)
     {
       enum nss_status status = _nss_pwd_create_tablename (errnop);
 
@@ -352,7 +352,7 @@
 _nss_nisplus_getpwuid_r (const uid_t uid, struct passwd *pw,
 			 char *buffer, size_t buflen, int *errnop)
 {
-  if (pwd_tablename_val == NULL)
+  if (atomic_load(pwd_tablename_val) == NULL)
     {
       enum nss_status status = _nss_pwd_create_tablename (errnop);
 
diff -r --unified eglibc-2.19.orig/nis/nss_nisplus/nisplus-rpc.c eglibc-2.19/nis/nss_nisplus/nisplus-rpc.c
--- eglibc-2.19.orig/nis/nss_nisplus/nisplus-rpc.c	2014-05-12 14:05:10.607661624 +0200
+++ eglibc-2.19/nis/nss_nisplus/nisplus-rpc.c	2014-12-02 16:58:38.755874336 +0100
@@ -138,7 +138,7 @@
 static enum nss_status
 _nss_create_tablename (int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       const char *local_dir = nis_local_directory ();
       size_t local_dir_len = strlen (local_dir);
@@ -157,7 +157,7 @@
 
       atomic_write_barrier ();
 
-      tablename_val = p;
+      atomic_store(tablename_val, p);
     }
 
   return NSS_STATUS_SUCCESS;
@@ -177,7 +177,7 @@
       result = NULL;
     }
 
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       int err;
       status = _nss_create_tablename (&err);
@@ -218,7 +218,7 @@
       if (result == NULL)
 	{
 	  saved_res = NULL;
-          if (tablename_val == NULL)
+          if (atomic_load(tablename_val) == NULL)
 	    {
 	      enum nss_status status =  _nss_create_tablename (errnop);
 
@@ -238,7 +238,7 @@
       else
 	{
 	  saved_res = result;
-	  result = nis_next_entry (tablename_val, &result->cookie);
+	  result = nis_next_entry (atomic_load(tablename_val), &result->cookie);
 	  if (result == NULL)
 	    {
 	      *errnop = errno;
@@ -292,7 +292,7 @@
 {
   int parse_res;
 
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
@@ -385,7 +385,7 @@
 _nss_nisplus_getrpcbynumber_r (const int number, struct rpcent *rpc,
 			       char *buffer, size_t buflen, int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
diff -r --unified eglibc-2.19.orig/nis/nss_nisplus/nisplus-service.c eglibc-2.19/nis/nss_nisplus/nisplus-service.c
--- eglibc-2.19.orig/nis/nss_nisplus/nisplus-service.c	2014-05-12 14:05:10.607661624 +0200
+++ eglibc-2.19/nis/nss_nisplus/nisplus-service.c	2014-12-02 16:58:38.755874336 +0100
@@ -145,7 +145,7 @@
 static enum nss_status
 _nss_create_tablename (int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       const char *local_dir = nis_local_directory ();
       size_t local_dir_len = strlen (local_dir);
@@ -164,7 +164,7 @@
 
       atomic_write_barrier ();
 
-      tablename_val = p;
+      atomic_store(tablename_val, p);
     }
 
   return NSS_STATUS_SUCCESS;
@@ -185,7 +185,7 @@
       result = NULL;
     }
 
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     status = _nss_create_tablename (&err);
 
   __libc_lock_unlock (lock);
@@ -223,7 +223,7 @@
       if (result == NULL)
 	{
 	  saved_res = NULL;
-          if (tablename_val == NULL)
+          if (atomic_load(tablename_val) == NULL)
 	    {
 	      enum nss_status status = _nss_create_tablename (errnop);
 
@@ -243,7 +243,7 @@
       else
 	{
 	  saved_res = result;
-	  result = nis_next_entry (tablename_val, &result->cookie);
+	  result = nis_next_entry (atomic_load(tablename_val), &result->cookie);
 	  if (result == NULL)
 	    {
 	      *errnop = errno;
@@ -294,7 +294,7 @@
 			      struct servent *serv,
 			      char *buffer, size_t buflen, int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
@@ -396,7 +396,7 @@
 			      struct servent *serv,
 			      char *buffer, size_t buflen, int *errnop)
 {
-  if (tablename_val == NULL)
+  if (atomic_load(tablename_val) == NULL)
     {
       __libc_lock_lock (lock);
 
diff -r --unified eglibc-2.19.orig/nptl/allocatestack.c eglibc-2.19/nptl/allocatestack.c
--- eglibc-2.19.orig/nptl/allocatestack.c	2014-05-12 14:05:12.423661620 +0200
+++ eglibc-2.19/nptl/allocatestack.c	2014-12-02 16:58:38.756874336 +0100
@@ -130,13 +130,13 @@
 
 
 /* Check whether the stack is still used or not.  */
-#define FREE_P(descr) ((descr)->tid <= 0)
+#define FREE_P(descr) (atomic_load((descr)->tid) <= 0)
 
 
 static void
 stack_list_del (list_t *elem)
 {
-  in_flight_stack = (uintptr_t) elem;
+  atomic_store(in_flight_stack, (uintptr_t) elem);
 
   atomic_write_barrier ();
 
@@ -144,14 +144,14 @@
 
   atomic_write_barrier ();
 
-  in_flight_stack = 0;
+  atomic_store(in_flight_stack, 0);
 }
 
 
 static void
 stack_list_add (list_t *elem, list_t *list)
 {
-  in_flight_stack = (uintptr_t) elem | 1;
+  atomic_store(in_flight_stack, (uintptr_t) elem | 1);
 
   atomic_write_barrier ();
 
@@ -159,7 +159,7 @@
 
   atomic_write_barrier ();
 
-  in_flight_stack = 0;
+  atomic_store(in_flight_stack, 0);
 }
 
 
@@ -215,7 +215,7 @@
     }
 
   /* Don't allow setxid until cloned.  */
-  result->setxid_futex = -1;
+  atomic_store(result->setxid_futex, -1);
 
   /* Dequeue the entry.  */
   stack_list_del (&result->list);
@@ -234,7 +234,7 @@
   *memp = result->stackblock;
 
   /* Cancellation handling is back to the default.  */
-  result->cancelhandling = 0;
+  atomic_store(result->cancelhandling, 0);
   result->cleanup = NULL;
 
   /* No pending event.  */
@@ -446,7 +446,7 @@
       pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
 
       /* Don't allow setxid until cloned.  */
-      pd->setxid_futex = -1;
+      atomic_store(pd->setxid_futex, -1);
 
       /* Allocate the DTV for this thread.  */
       if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
@@ -580,7 +580,7 @@
 #endif
 
 	  /* Don't allow setxid until cloned.  */
-	  pd->setxid_futex = -1;
+	  atomic_store(pd->setxid_futex, -1);
 
 	  /* The process ID is also the same as that of the caller.  */
 	  pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
@@ -838,10 +838,10 @@
      we have to be aware that we might have interrupted a list
      operation.  */
 
-  if (in_flight_stack != 0)
+  if (atomic_load(in_flight_stack) != 0)
     {
-      bool add_p = in_flight_stack & 1;
-      list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1);
+      bool add_p = atomic_load(in_flight_stack) & 1;
+      list_t *elem = (list_t *) (atomic_load(in_flight_stack) & ~(uintptr_t) 1);
 
       if (add_p)
 	{
@@ -849,13 +849,13 @@
 	     case we only need to check the beginning of these lists.  */
 	  int check_list (list_t *l)
 	  {
-	    if (l->next->prev != l)
+	    if (atomic_load(l->next->prev) != l)
 	      {
-		assert (l->next->prev == elem);
+		assert (atomic_load(l->next->prev) == elem);
 
-		elem->next = l->next;
-		elem->prev = l;
-		l->next = elem;
+		atomic_store(elem->next, l->next);
+		atomic_store(elem->prev, l);
+		atomic_store(l->next, elem);
 
 		return 1;
 	      }
@@ -869,8 +869,8 @@
       else
 	{
 	  /* We can simply always replay the delete operation.  */
-	  elem->next->prev = elem->prev;
-	  elem->prev->next = elem->next;
+	  atomic_store(elem->next->prev, elem->prev);
+	  atomic_store(elem->prev->next, elem->next);
 	}
     }
 
@@ -882,7 +882,7 @@
       if (curp != self)
 	{
 	  /* This marks the stack as free.  */
-	  curp->tid = 0;
+	  atomic_store(curp->tid, 0);
 
 	  /* The PID field must be initialized for the new process.  */
 	  curp->pid = self->pid;
@@ -939,7 +939,7 @@
   /* There is one thread running.  */
   __nptl_nthreads = 1;
 
-  in_flight_stack = 0;
+  atomic_store(in_flight_stack, 0);
 
   /* Initialize locks.  */
   stack_cache_lock = LLL_LOCK_INITIALIZER;
@@ -966,7 +966,7 @@
 
       curp = list_entry (runp, struct pthread, list);
 
-      if (curp->tid == tid)
+      if (atomic_load(curp->tid) == tid)
 	{
 	  result = curp;
 	  goto out;
@@ -980,7 +980,7 @@
 
       curp = list_entry (runp, struct pthread, list);
 
-      if (curp->tid == tid)
+      if (atomic_load(curp->tid) == tid)
 	{
 	  result = curp;
 	  goto out;
@@ -1002,18 +1002,18 @@
   int ch;
 
   /* Wait until this thread is cloned.  */
-  if (t->setxid_futex == -1
+  if (atomic_load(t->setxid_futex) == -1
       && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
     do
       lll_futex_wait (&t->setxid_futex, -2, LLL_PRIVATE);
-    while (t->setxid_futex == -2);
+    while (atomic_load(t->setxid_futex) == -2);
 
   /* Don't let the thread exit before the setxid handler runs.  */
-  t->setxid_futex = 0;
+  atomic_store(t->setxid_futex, 0);
 
   do
     {
-      ch = t->cancelhandling;
+      ch = atomic_load(t->cancelhandling);
 
       /* If the thread is exiting right now, ignore it.  */
       if ((ch & EXITING_BITMASK) != 0)
@@ -1022,7 +1022,7 @@
 	     progress.  */
 	  if ((ch & SETXID_BITMASK) == 0)
 	    {
-	      t->setxid_futex = 1;
+	      atomic_store(t->setxid_futex, 1);
 	      lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
 	    }
 	  return;
@@ -1041,7 +1041,7 @@
 
   do
     {
-      ch = t->cancelhandling;
+      ch = atomic_load(t->cancelhandling);
       if ((ch & SETXID_BITMASK) == 0)
 	return;
     }
@@ -1049,7 +1049,7 @@
 					       ch & ~SETXID_BITMASK, ch));
 
   /* Release the futex just in case.  */
-  t->setxid_futex = 1;
+  atomic_store(t->setxid_futex, 1);
   lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
 }
 
@@ -1058,13 +1058,13 @@
 internal_function
 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
 {
-  if ((t->cancelhandling & SETXID_BITMASK) == 0)
+  if ((atomic_load(t->cancelhandling) & SETXID_BITMASK) == 0)
     return 0;
 
   int val;
   INTERNAL_SYSCALL_DECL (err);
   val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
-			  t->tid, SIGSETXID);
+			  atomic_load(t->tid), SIGSETXID);
 
   /* If this failed, it must have had not started yet or else exited.  */
   if (!INTERNAL_SYSCALL_ERROR_P (val, err))
diff -r --unified eglibc-2.19.orig/nptl/descr.h eglibc-2.19/nptl/descr.h
--- eglibc-2.19.orig/nptl/descr.h	2014-05-12 14:05:12.423661620 +0200
+++ eglibc-2.19/nptl/descr.h	2014-12-02 16:58:38.757874337 +0100
@@ -342,7 +342,7 @@
      in normal operation.  */
   struct pthread *joinid;
   /* Check whether a thread is detached.  */
-#define IS_DETACHED(pd) ((pd)->joinid == (pd))
+#define IS_DETACHED(pd) (atomic_load((pd)->joinid) == (pd))
 
   /* The result of the thread function.  */
   void *result;
diff -r --unified eglibc-2.19.orig/nptl/nptl-init.c eglibc-2.19/nptl/nptl-init.c
--- eglibc-2.19.orig/nptl/nptl-init.c	2014-05-12 14:05:12.427661620 +0200
+++ eglibc-2.19/nptl/nptl-init.c	2014-12-02 16:58:38.757874337 +0100
@@ -41,14 +41,14 @@
 size_t __static_tls_size;
 size_t __static_tls_align_m1;
 
-#ifndef __ASSUME_SET_ROBUST_LIST
-/* Negative if we do not have the system call and we can use it.  */
-int __set_robust_list_avail;
-# define set_robust_list_not_avail() \
-  __set_robust_list_avail = -1
-#else
+//#ifndef __ASSUME_SET_ROBUST_LIST
+///* Negative if we do not have the system call and we can use it.  */
+//int __set_robust_list_avail;
+//# define set_robust_list_not_avail()		\
+//  __set_robust_list_avail = -1
+//#else
 # define set_robust_list_not_avail() do { } while (0)
-#endif
+//#endif
 
 #ifndef __ASSUME_FUTEX_CLOCK_REALTIME
 /* Nonzero if we do not have FUTEX_CLOCK_REALTIME.  */
@@ -260,7 +260,7 @@
   while (flags != newval);
 
   /* And release the futex.  */
-  self->setxid_futex = 1;
+  atomic_store(self->setxid_futex, 1);
   lll_futex_wake (&self->setxid_futex, 1, LLL_PRIVATE);
 
   if (atomic_decrement_val (&__xidcmd->cntr) == 0)
diff -r --unified eglibc-2.19.orig/nptl/pthread_barrier_wait.c eglibc-2.19/nptl/pthread_barrier_wait.c
--- eglibc-2.19.orig/nptl/pthread_barrier_wait.c	2014-05-12 14:05:12.443661620 +0200
+++ eglibc-2.19/nptl/pthread_barrier_wait.c	2014-12-02 16:58:38.757874337 +0100
@@ -41,7 +41,10 @@
     {
       /* Yes. Increment the event counter to avoid invalid wake-ups and
 	 tell the current waiters that it is their turn.  */
-      ++ibarrier->curr_event;
+
+      // MVEE patch: with partial lock ordering, the original (non-atomic) 
+      // increment raced with the futex_wait loop below.
+      atomic_increment(&ibarrier->curr_event);
 
       /* Wake up everybody.  */
       lll_futex_wake (&ibarrier->curr_event, INT_MAX,
@@ -54,7 +57,9 @@
     {
       /* The number of the event we are waiting for.  The barrier's event
 	 number must be bumped before we continue.  */
-      unsigned int event = ibarrier->curr_event;
+
+      // MVEE patch: racy with partial lock ordering
+      unsigned int event = atomic_load(ibarrier->curr_event);
 
       /* Before suspending, make the barrier available to others.  */
       lll_unlock (ibarrier->lock, ibarrier->private ^ FUTEX_PRIVATE_FLAG);
@@ -63,11 +68,11 @@
       do
 	lll_futex_wait (&ibarrier->curr_event, event,
 			ibarrier->private ^ FUTEX_PRIVATE_FLAG);
-      while (event == *(volatile unsigned int *)&ibarrier->curr_event);
+      while (event == atomic_load(*(volatile unsigned int *)&ibarrier->curr_event));
     }
 
   /* Make sure the init_count is stored locally or in a register.  */
-  unsigned int init_count = ibarrier->init_count;
+  unsigned int init_count = atomic_load(ibarrier->init_count);
 
   /* If this was the last woken thread, unlock.  */
   if (atomic_increment_val (&ibarrier->left) == init_count)
diff -r --unified eglibc-2.19.orig/nptl/pthread_cancel.c eglibc-2.19/nptl/pthread_cancel.c
--- eglibc-2.19.orig/nptl/pthread_cancel.c	2014-05-12 14:05:12.423661620 +0200
+++ eglibc-2.19/nptl/pthread_cancel.c	2014-12-02 16:58:38.758874337 +0100
@@ -44,7 +44,7 @@
   do
     {
     again:
-      oldval = pd->cancelhandling;
+      oldval = atomic_load(pd->cancelhandling);
       newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK;
 
       /* Avoid doing unnecessary work.  The atomic operation can
@@ -76,7 +76,7 @@
 	     is not guaranteed to be async-safe.  */
 	  int val;
 	  val = INTERNAL_SYSCALL (tgkill, err, 3,
-				  THREAD_GETMEM (THREAD_SELF, pid), pd->tid,
+				  THREAD_GETMEM (THREAD_SELF, pid), atomic_load(pd->tid),
 				  SIGCANCEL);
 
 	  if (INTERNAL_SYSCALL_ERROR_P (val, err))
diff -r --unified eglibc-2.19.orig/nptl/pthread_cond_signal.c eglibc-2.19/nptl/pthread_cond_signal.c
--- eglibc-2.19.orig/nptl/pthread_cond_signal.c	2014-05-12 14:05:12.423661620 +0200
+++ eglibc-2.19/nptl/pthread_cond_signal.c	2014-12-08 19:10:59.493996012 +0100
@@ -63,12 +63,18 @@
 	}
       else
 #endif
+
+	/* MVEE patch: since this syscall unlocks the cond_lock, we have to 
+	   wrap it in a (VERY long) critical section. It is actually faster
+	   to just wake... */
+#if 0
 	/* Wake one.  */
 	if (! __builtin_expect (lll_futex_wake_unlock (&cond->__data.__futex,
 						       1, 1,
 						       &cond->__data.__lock,
 						       pshared), 0))
 	  return 0;
+#endif
 
       /* Fallback if neither of them work.  */
       lll_futex_wake (&cond->__data.__futex, 1, pshared);
diff -r --unified eglibc-2.19.orig/nptl/pthread_create.c eglibc-2.19/nptl/pthread_create.c
--- eglibc-2.19.orig/nptl/pthread_create.c	2014-05-12 14:05:12.443661620 +0200
+++ eglibc-2.19/nptl/pthread_create.c	2014-12-02 16:58:38.758874337 +0100
@@ -268,7 +268,7 @@
   /* If the parent was running cancellation handlers while creating
      the thread the new thread inherited the signal mask.  Reset the
      cancellation signal mask.  */
-  if (__builtin_expect (pd->parent_cancelhandling & CANCELING_BITMASK, 0))
+  if (__builtin_expect (atomic_load(pd->parent_cancelhandling) & CANCELING_BITMASK, 0))
     {
       INTERNAL_SYSCALL_DECL (err);
       sigset_t mask;
@@ -354,7 +354,7 @@
 	      pd->eventbuf.eventdata = pd;
 
 	      do
-		pd->nextevent = __nptl_last_event;
+		pd->nextevent = atomic_load(__nptl_last_event);
 	      while (atomic_compare_and_exchange_bool_acq (&__nptl_last_event,
 							   pd, pd->nextevent));
 	    }
@@ -430,16 +430,16 @@
   if (IS_DETACHED (pd))
     /* Free the TCB.  */
     __free_tcb (pd);
-  else if (__builtin_expect (pd->cancelhandling & SETXID_BITMASK, 0))
+  else if (__builtin_expect (atomic_load(pd->cancelhandling) & SETXID_BITMASK, 0))
     {
       /* Some other thread might call any of the setXid functions and expect
 	 us to reply.  In this case wait until we did that.  */
       do
 	lll_futex_wait (&pd->setxid_futex, 0, LLL_PRIVATE);
-      while (pd->cancelhandling & SETXID_BITMASK);
+      while (atomic_load(pd->cancelhandling) & SETXID_BITMASK);
 
       /* Reset the value so that the stack can be reused.  */
-      pd->setxid_futex = 0;
+      atomic_store(pd->setxid_futex, 0);
     }
 
   /* We cannot call '_exit' here.  '_exit' will terminate the process.
diff -r --unified eglibc-2.19.orig/nptl/pthread_detach.c eglibc-2.19/nptl/pthread_detach.c
--- eglibc-2.19.orig/nptl/pthread_detach.c	2014-05-12 14:05:12.423661620 +0200
+++ eglibc-2.19/nptl/pthread_detach.c	2014-12-02 16:58:38.759874337 +0100
@@ -47,7 +47,7 @@
   else
     /* Check whether the thread terminated meanwhile.  In this case we
        will just free the TCB.  */
-    if ((pd->cancelhandling & EXITING_BITMASK) != 0)
+    if ((atomic_load(pd->cancelhandling) & EXITING_BITMASK) != 0)
       /* Note that the code in __free_tcb makes sure each thread
 	 control block is freed only once.  */
       __free_tcb (pd);
diff -r --unified eglibc-2.19.orig/nptl/pthread_getattr_np.c eglibc-2.19/nptl/pthread_getattr_np.c
--- eglibc-2.19.orig/nptl/pthread_getattr_np.c	2014-05-12 14:05:12.443661620 +0200
+++ eglibc-2.19/nptl/pthread_getattr_np.c	2014-12-02 16:58:38.759874337 +0100
@@ -70,7 +70,7 @@
     {
       /* No stack information available.  This must be for the initial
 	 thread.  Get the info in some magical way.  */
-      assert (abs (thread->pid) == thread->tid);
+      assert (abs (thread->pid) == atomic_load(thread->tid));
 
       /* Stack size limit.  */
       struct rlimit rl;
diff -r --unified eglibc-2.19.orig/nptl/pthread_getcpuclockid.c eglibc-2.19/nptl/pthread_getcpuclockid.c
--- eglibc-2.19.orig/nptl/pthread_getcpuclockid.c	2014-05-12 14:05:12.427661620 +0200
+++ eglibc-2.19/nptl/pthread_getcpuclockid.c	2014-12-02 16:58:38.759874337 +0100
@@ -42,11 +42,11 @@
 
      If some day more clock IDs are needed the ID part can be
      enlarged.  The IDs are entirely internal.  */
-  if (pd->tid >= 1 << (8 * sizeof (*clockid) - CLOCK_IDFIELD_SIZE))
+  if (atomic_load(pd->tid) >= 1 << (8 * sizeof (*clockid) - CLOCK_IDFIELD_SIZE))
     return ERANGE;
 
   /* Store the number.  */
-  *clockid = CLOCK_THREAD_CPUTIME_ID | (pd->tid << CLOCK_IDFIELD_SIZE);
+  *clockid = CLOCK_THREAD_CPUTIME_ID | (atomic_load(pd->tid) << CLOCK_IDFIELD_SIZE);
 
   return 0;
 #else
diff -r --unified eglibc-2.19.orig/nptl/pthread_getschedparam.c eglibc-2.19/nptl/pthread_getschedparam.c
--- eglibc-2.19.orig/nptl/pthread_getschedparam.c	2014-05-12 14:05:12.423661620 +0200
+++ eglibc-2.19/nptl/pthread_getschedparam.c	2014-12-02 16:58:38.760874337 +0100
@@ -46,7 +46,7 @@
      not yet been retrieved do it now.  */
   if ((pd->flags & ATTR_FLAG_SCHED_SET) == 0)
     {
-      if (__sched_getparam (pd->tid, &pd->schedparam) != 0)
+      if (__sched_getparam (atomic_load(pd->tid), &pd->schedparam) != 0)
 	result = 1;
       else
 	pd->flags |= ATTR_FLAG_SCHED_SET;
@@ -54,7 +54,7 @@
 
   if ((pd->flags & ATTR_FLAG_POLICY_SET) == 0)
     {
-      pd->schedpolicy = __sched_getscheduler (pd->tid);
+      pd->schedpolicy = __sched_getscheduler (atomic_load(pd->tid));
       if (pd->schedpolicy == -1)
 	result = 1;
       else
diff -r --unified eglibc-2.19.orig/nptl/pthread_join.c eglibc-2.19/nptl/pthread_join.c
--- eglibc-2.19.orig/nptl/pthread_join.c	2014-05-12 14:05:12.419661620 +0200
+++ eglibc-2.19/nptl/pthread_join.c	2014-12-02 16:58:38.760874337 +0100
@@ -67,11 +67,11 @@
   int oldtype = CANCEL_ASYNC ();
 
   if ((pd == self
-       || (self->joinid == pd
-	   && (pd->cancelhandling
+       || (atomic_load(self->joinid) == pd
+	   && (atomic_load(pd->cancelhandling)
 	       & (CANCELING_BITMASK | CANCELED_BITMASK | EXITING_BITMASK
 		  | TERMINATED_BITMASK)) == 0))
-      && !CANCEL_ENABLED_AND_CANCELED (self->cancelhandling))
+      && !CANCEL_ENABLED_AND_CANCELED (atomic_load(self->cancelhandling)))
     /* This is a deadlock situation.  The threads are waiting for each
        other to finish.  Note that this is a "may" error.  To be 100%
        sure we catch this error we would have to lock the data
@@ -102,7 +102,7 @@
   if (__builtin_expect (result == 0, 1))
     {
       /* We mark the thread as terminated and as joined.  */
-      pd->tid = -1;
+      atomic_store(pd->tid, -1);
 
       /* Store the return value if the caller is interested.  */
       if (thread_return != NULL)
diff -r --unified eglibc-2.19.orig/nptl/pthread_key_create.c eglibc-2.19/nptl/pthread_key_create.c
--- eglibc-2.19.orig/nptl/pthread_key_create.c	2014-05-12 14:05:12.427661620 +0200
+++ eglibc-2.19/nptl/pthread_key_create.c	2014-12-02 16:58:38.760874337 +0100
@@ -29,7 +29,7 @@
   /* Find a slot in __pthread_kyes which is unused.  */
   for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; ++cnt)
     {
-      uintptr_t seq = __pthread_keys[cnt].seq;
+      uintptr_t seq = atomic_load(__pthread_keys[cnt].seq);
 
       if (KEY_UNUSED (seq) && KEY_USABLE (seq)
 	  /* We found an unused slot.  Try to allocate it.  */
diff -r --unified eglibc-2.19.orig/nptl/pthread_key_delete.c eglibc-2.19/nptl/pthread_key_delete.c
--- eglibc-2.19.orig/nptl/pthread_key_delete.c	2014-05-12 14:05:12.447661620 +0200
+++ eglibc-2.19/nptl/pthread_key_delete.c	2014-12-02 16:58:38.760874337 +0100
@@ -29,7 +29,7 @@
 
   if (__builtin_expect (key < PTHREAD_KEYS_MAX, 1))
     {
-      unsigned int seq = __pthread_keys[key].seq;
+      unsigned int seq = atomic_load(__pthread_keys[key].seq);
 
       if (__builtin_expect (! KEY_UNUSED (seq), 1)
 	  && ! atomic_compare_and_exchange_bool_acq (&__pthread_keys[key].seq,
diff -r --unified eglibc-2.19.orig/nptl/pthread_mutex_lock.c eglibc-2.19/nptl/pthread_mutex_lock.c
--- eglibc-2.19.orig/nptl/pthread_mutex_lock.c	2014-05-12 14:05:12.447661620 +0200
+++ eglibc-2.19/nptl/pthread_mutex_lock.c	2014-12-02 16:58:38.761874337 +0100
@@ -183,7 +183,7 @@
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 		     &mutex->__data.__list.__next);
 
-      oldval = mutex->__data.__lock;
+      oldval = atomic_load(mutex->__data.__lock);
       do
 	{
 	again:
@@ -293,7 +293,7 @@
 			 (void *) (((uintptr_t) &mutex->__data.__list.__next)
 				   | 1));
 
-	oldval = mutex->__data.__lock;
+	oldval = atomic_load(mutex->__data.__lock);
 
 	/* Check whether we already hold the mutex.  */
 	if (__builtin_expect ((oldval & FUTEX_TID_MASK) == id, 0))
@@ -354,7 +354,7 @@
 		  pause_not_cancel ();
 	      }
 
-	    oldval = mutex->__data.__lock;
+	    oldval = atomic_load(mutex->__data.__lock);
 
 	    assert (robust || (oldval & FUTEX_OWNER_DIED) == 0);
 	  }
@@ -416,7 +416,7 @@
       {
 	int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
 
-	oldval = mutex->__data.__lock;
+	oldval = atomic_load(mutex->__data.__lock);
 
 	/* Check whether we already hold the mutex.  */
 	if (mutex->__data.__owner == id)
diff -r --unified eglibc-2.19.orig/nptl/pthread_mutex_setprioceiling.c eglibc-2.19/nptl/pthread_mutex_setprioceiling.c
--- eglibc-2.19.orig/nptl/pthread_mutex_setprioceiling.c	2014-05-12 14:05:12.443661620 +0200
+++ eglibc-2.19/nptl/pthread_mutex_setprioceiling.c	2014-12-02 16:58:38.761874337 +0100
@@ -33,7 +33,7 @@
   if ((mutex->__data.__kind & PTHREAD_MUTEX_PRIO_PROTECT_NP) == 0)
     return EINVAL;
 
-  if (__sched_fifo_min_prio == -1)
+  if (atomic_load(__sched_fifo_min_prio) == -1)
     __init_sched_fifo_prio ();
 
   if (__builtin_expect (prioceiling < __sched_fifo_min_prio, 0)
@@ -56,7 +56,7 @@
 	locked = true;
     }
 
-  int oldval = mutex->__data.__lock;
+  int oldval = atomic_load(mutex->__data.__lock);
   if (! locked)
     do
       {
@@ -106,9 +106,9 @@
 
   int newlock = 0;
   if (locked)
-    newlock = (mutex->__data.__lock & ~PTHREAD_MUTEX_PRIO_CEILING_MASK);
-  mutex->__data.__lock = newlock
-			 | (prioceiling << PTHREAD_MUTEX_PRIO_CEILING_SHIFT);
+    newlock = (atomic_load(mutex->__data.__lock) & ~PTHREAD_MUTEX_PRIO_CEILING_MASK);
+  atomic_store(mutex->__data.__lock, newlock
+	       | (prioceiling << PTHREAD_MUTEX_PRIO_CEILING_SHIFT));
   atomic_full_barrier ();
 
   lll_futex_wake (&mutex->__data.__lock, INT_MAX,
diff -r --unified eglibc-2.19.orig/nptl/pthread_mutex_timedlock.c eglibc-2.19/nptl/pthread_mutex_timedlock.c
--- eglibc-2.19.orig/nptl/pthread_mutex_timedlock.c	2014-05-12 14:05:12.423661620 +0200
+++ eglibc-2.19/nptl/pthread_mutex_timedlock.c	2014-12-02 16:58:38.762874337 +0100
@@ -141,7 +141,7 @@
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 		     &mutex->__data.__list.__next);
 
-      oldval = mutex->__data.__lock;
+      oldval = atomic_load(mutex->__data.__lock);
       do
 	{
 	again:
@@ -247,7 +247,7 @@
 			 (void *) (((uintptr_t) &mutex->__data.__list.__next)
 				   | 1));
 
-	oldval = mutex->__data.__lock;
+	oldval = atomic_load(mutex->__data.__lock);
 
 	/* Check whether we already hold the mutex.  */
 	if (__builtin_expect ((oldval & FUTEX_TID_MASK) == id, 0))
@@ -332,7 +332,7 @@
 		return INTERNAL_SYSCALL_ERRNO (e, __err);
 	      }
 
-	    oldval = mutex->__data.__lock;
+	    oldval = atomic_load(mutex->__data.__lock);
 
 	    assert (robust || (oldval & FUTEX_OWNER_DIED) == 0);
 	  }
@@ -389,7 +389,7 @@
       {
 	int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
 
-	oldval = mutex->__data.__lock;
+	oldval = atomic_load(mutex->__data.__lock);
 
 	/* Check whether we already hold the mutex.  */
 	if (mutex->__data.__owner == id)
diff -r --unified eglibc-2.19.orig/nptl/pthread_mutex_trylock.c eglibc-2.19/nptl/pthread_mutex_trylock.c
--- eglibc-2.19.orig/nptl/pthread_mutex_trylock.c	2014-05-12 14:05:12.427661620 +0200
+++ eglibc-2.19/nptl/pthread_mutex_trylock.c	2014-12-02 16:58:38.762874337 +0100
@@ -98,7 +98,7 @@
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 		     &mutex->__data.__list.__next);
 
-      oldval = mutex->__data.__lock;
+      oldval = atomic_load(mutex->__data.__lock);
       do
 	{
 	again:
@@ -208,7 +208,7 @@
 			 (void *) (((uintptr_t) &mutex->__data.__list.__next)
 				   | 1));
 
-	oldval = mutex->__data.__lock;
+	oldval = atomic_load(mutex->__data.__lock);
 
 	/* Check whether we already hold the mutex.  */
 	if (__builtin_expect ((oldval & FUTEX_TID_MASK) == id, 0))
@@ -268,7 +268,7 @@
 		return EBUSY;
 	      }
 
-	    oldval = mutex->__data.__lock;
+	    oldval = atomic_load(mutex->__data.__lock);
 	  }
 
 	if (__builtin_expect (oldval & FUTEX_OWNER_DIED, 0))
@@ -327,7 +327,7 @@
       {
 	int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
 
-	oldval = mutex->__data.__lock;
+	oldval = atomic_load(mutex->__data.__lock);
 
 	/* Check whether we already hold the mutex.  */
 	if (mutex->__data.__owner == id)
diff -r --unified eglibc-2.19.orig/nptl/pthread_mutex_unlock.c eglibc-2.19/nptl/pthread_mutex_unlock.c
--- eglibc-2.19.orig/nptl/pthread_mutex_unlock.c	2014-05-12 14:05:12.427661620 +0200
+++ eglibc-2.19/nptl/pthread_mutex_unlock.c	2014-12-02 16:58:38.762874337 +0100
@@ -229,7 +229,7 @@
 	--mutex->__data.__nusers;
 
       /* Unlock.  */
-      if ((mutex->__data.__lock & FUTEX_WAITERS) != 0
+      if ((atomic_load(mutex->__data.__lock) & FUTEX_WAITERS) != 0
 	  || atomic_compare_and_exchange_bool_rel (&mutex->__data.__lock, 0,
 						   THREAD_GETMEM (THREAD_SELF,
 								  tid)))
@@ -277,7 +277,7 @@
       int newval, oldval;
       do
 	{
-	  oldval = mutex->__data.__lock;
+	  oldval = atomic_load(mutex->__data.__lock);
 	  newval = oldval & PTHREAD_MUTEX_PRIO_CEILING_MASK;
 	}
       while (atomic_compare_and_exchange_bool_rel (&mutex->__data.__lock,
diff -r --unified eglibc-2.19.orig/nptl/pthread_once.c eglibc-2.19/nptl/pthread_once.c
--- eglibc-2.19.orig/nptl/pthread_once.c	2014-05-12 14:05:12.443661620 +0200
+++ eglibc-2.19/nptl/pthread_once.c	2014-12-02 16:58:38.763874337 +0100
@@ -1,6 +1,5 @@
-/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -9,46 +8,82 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
+   License along with the GNU C Library.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #include "pthreadP.h"
 #include <lowlevellock.h>
 
+unsigned long int __fork_generation attribute_hidden;
 
+static void
+clear_once_control (void *arg)
+{
+  pthread_once_t *once_control = (pthread_once_t *) arg;
 
-static int once_lock = LLL_LOCK_INITIALIZER;
-
+  atomic_store(*once_control, 0);
+  lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE);
+}
 
 int
-__pthread_once (once_control, init_routine)
-     pthread_once_t *once_control;
-     void (*init_routine) (void);
+__pthread_once (pthread_once_t *once_control, void (*init_routine) (void))
 {
-  /* XXX Depending on whether the LOCK_IN_ONCE_T is defined use a
-     global lock variable or one which is part of the pthread_once_t
-     object.  */
-  if (*once_control == PTHREAD_ONCE_INIT)
+  for (;;)
     {
-      lll_lock (once_lock, LLL_PRIVATE);
+      int oldval;
+      int newval;
 
-      /* XXX This implementation is not complete.  It doesn't take
-	 cancelation and fork into account.  */
-      if (*once_control == PTHREAD_ONCE_INIT)
+      /* Pseudo code:
+	 newval = __fork_generation | 1;
+	 oldval = *once_control;
+	 if ((oldval & 2) == 0)
+	   *once_control = newval;
+	 Do this atomically.
+      */
+      do
 	{
-	  init_routine ();
+	  newval = atomic_load(__fork_generation) | 1;
+	  oldval = atomic_load(*once_control);
+	  if (oldval & 2)
+	    break;
+	} while (atomic_compare_and_exchange_val_acq (once_control, newval, oldval) != oldval);
+
+      /* Check if the initializer has already been done.  */
+      if ((oldval & 2) != 0)
+	return 0;
+
+      /* Check if another thread already runs the initializer.	*/
+      if ((oldval & 1) == 0)
+	break;
+
+      /* Check whether the initializer execution was interrupted by a fork.  */
+      if (oldval != newval)
+	break;
 
-	  *once_control = !PTHREAD_ONCE_INIT;
-	}
-
-      lll_unlock (once_lock, LLL_PRIVATE);
+      /* Same generation, some other thread was faster. Wait.  */
+      lll_futex_wait (once_control, oldval, LLL_PRIVATE);
     }
 
+  /* This thread is the first here.  Do the initialization.
+     Register a cleanup handler so that in case the thread gets
+     interrupted the initialization can be restarted.  */
+  pthread_cleanup_push (clear_once_control, once_control);
+
+  init_routine ();
+
+  pthread_cleanup_pop (0);
+
+  /* Say that the initialisation is done.  */
+  atomic_store(*once_control, __fork_generation | 2);
+
+  /* Wake up all other threads.  */
+  lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE);
+
   return 0;
 }
-strong_alias (__pthread_once, pthread_once)
+weak_alias (__pthread_once, pthread_once)
 hidden_def (__pthread_once)
diff -r --unified eglibc-2.19.orig/nptl/pthread_setschedparam.c eglibc-2.19/nptl/pthread_setschedparam.c
--- eglibc-2.19.orig/nptl/pthread_setschedparam.c	2014-05-12 14:05:12.419661620 +0200
+++ eglibc-2.19/nptl/pthread_setschedparam.c	2014-12-02 16:58:38.763874337 +0100
@@ -54,7 +54,7 @@
     }
 
   /* Try to set the scheduler information.  */
-  if (__builtin_expect (__sched_setscheduler (pd->tid, policy,
+  if (__builtin_expect (__sched_setscheduler (atomic_load(pd->tid), policy,
 					      param) == -1, 0))
     result = errno;
   else
diff -r --unified eglibc-2.19.orig/nptl/pthread_setschedprio.c eglibc-2.19/nptl/pthread_setschedprio.c
--- eglibc-2.19.orig/nptl/pthread_setschedprio.c	2014-05-12 14:05:12.423661620 +0200
+++ eglibc-2.19/nptl/pthread_setschedprio.c	2014-12-02 16:58:38.763874337 +0100
@@ -48,7 +48,7 @@
     param.sched_priority = pd->tpp->priomax;
 
   /* Try to set the scheduler information.  */
-  if (__builtin_expect (sched_setparam (pd->tid, &param) == -1, 0))
+  if (__builtin_expect (sched_setparam (atomic_load(pd->tid), &param) == -1, 0))
     result = errno;
   else
     {
diff -r --unified eglibc-2.19.orig/nptl/pthread_spin_lock.c eglibc-2.19/nptl/pthread_spin_lock.c
--- eglibc-2.19.orig/nptl/pthread_spin_lock.c	2014-05-12 14:05:12.423661620 +0200
+++ eglibc-2.19/nptl/pthread_spin_lock.c	2014-12-02 16:58:38.764874337 +0100
@@ -54,12 +54,12 @@
 	{
 	  int wait = SPIN_LOCK_READS_BETWEEN_CMPXCHG;
 
-	  while (*lock != 0 && wait > 0)
+	  while (atomic_load(*lock) != 0 && wait > 0)
 	    --wait;
 	}
       else
 	{
-	  while (*lock != 0)
+	  while (atomic_load(*lock) != 0)
 	    ;
 	}
     }
diff -r --unified eglibc-2.19.orig/nptl/pthread_spin_unlock.c eglibc-2.19/nptl/pthread_spin_unlock.c
--- eglibc-2.19.orig/nptl/pthread_spin_unlock.c	2014-05-12 14:05:12.443661620 +0200
+++ eglibc-2.19/nptl/pthread_spin_unlock.c	2014-12-02 16:58:38.764874337 +0100
@@ -24,6 +24,6 @@
 pthread_spin_unlock (pthread_spinlock_t *lock)
 {
   atomic_full_barrier ();
-  *lock = 0;
+  atomic_store(*lock, 0);
   return 0;
 }
diff -r --unified eglibc-2.19.orig/nptl/pthread_timedjoin.c eglibc-2.19/nptl/pthread_timedjoin.c
--- eglibc-2.19.orig/nptl/pthread_timedjoin.c	2014-05-12 14:05:12.427661620 +0200
+++ eglibc-2.19/nptl/pthread_timedjoin.c	2014-12-02 16:58:38.764874337 +0100
@@ -50,7 +50,7 @@
     return EINVAL;
 
   self = THREAD_SELF;
-  if (pd == self || self->joinid == pd)
+  if (pd == self || atomic_load(self->joinid) == pd)
     /* This is a deadlock situation.  The threads are waiting for each
        other to finish.  Note that this is a "may" error.  To be 100%
        sure we catch this error we would have to lock the data
@@ -100,7 +100,7 @@
       __free_tcb (pd);
     }
   else
-    pd->joinid = NULL;
+    atomic_store(pd->joinid, NULL);
 
   return result;
 }
diff -r --unified eglibc-2.19.orig/nptl/pthread_tryjoin.c eglibc-2.19/nptl/pthread_tryjoin.c
--- eglibc-2.19.orig/nptl/pthread_tryjoin.c	2014-05-12 14:05:12.443661620 +0200
+++ eglibc-2.19/nptl/pthread_tryjoin.c	2014-12-02 16:58:38.764874337 +0100
@@ -42,7 +42,7 @@
     return EINVAL;
 
   self = THREAD_SELF;
-  if (pd == self || self->joinid == pd)
+  if (pd == self || atomic_load(self->joinid) == pd)
     /* This is a deadlock situation.  The threads are waiting for each
        other to finish.  Note that this is a "may" error.  To be 100%
        sure we catch this error we would have to lock the data
@@ -53,7 +53,7 @@
     return EDEADLK;
 
   /* Return right away if the thread hasn't terminated yet.  */
-  if (pd->tid != 0)
+  if (atomic_load(pd->tid) != 0)
     return EBUSY;
 
   /* Wait for the thread to finish.  If it is already locked something
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/pthread/createthread.c eglibc-2.19/nptl/sysdeps/pthread/createthread.c
--- eglibc-2.19.orig/nptl/sysdeps/pthread/createthread.c	2014-05-12 14:05:12.427661620 +0200
+++ eglibc-2.19/nptl/sysdeps/pthread/createthread.c	2014-12-02 16:58:38.765874337 +0100
@@ -211,7 +211,7 @@
 
 	      /* Enqueue the descriptor.  */
 	      do
-		pd->nextevent = __nptl_last_event;
+		pd->nextevent = atomic_load(__nptl_last_event);
 	      while (atomic_compare_and_exchange_bool_acq (&__nptl_last_event,
 							   pd, pd->nextevent)
 		     != 0);
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/pthread/list.h eglibc-2.19/nptl/sysdeps/pthread/list.h
--- eglibc-2.19.orig/nptl/sysdeps/pthread/list.h	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/pthread/list.h	2014-12-02 16:58:38.765874337 +0100
@@ -57,11 +57,11 @@
 static inline void
 list_add (list_t *newp, list_t *head)
 {
-  newp->next = head->next;
+  newp->next = atomic_load(head->next);
   newp->prev = head;
-  head->next->prev = newp;
+  atomic_store(head->next->prev, newp);
   atomic_write_barrier ();
-  head->next = newp;
+  atomic_store(head->next, newp);
 }
 
 
@@ -69,8 +69,8 @@
 static inline void
 list_del (list_t *elem)
 {
-  elem->next->prev = elem->prev;
-  elem->prev->next = elem->next;
+  atomic_store(elem->next->prev, elem->prev);
+  atomic_store(elem->prev->next, elem->next);
 }
 
 
@@ -79,12 +79,12 @@
 list_splice (list_t *add, list_t *head)
 {
   /* Do nothing if the list which gets added is empty.  */
-  if (add != add->next)
+  if (add != atomic_load(add->next))
     {
-      add->next->prev = head;
-      add->prev->next = head->next;
-      head->next->prev = add->prev;
-      head->next = add->next;
+      atomic_store(add->next->prev, head);
+      atomic_store(add->prev->next, head->next);
+      atomic_store(head->next->prev, add->prev);
+      atomic_store(head->next, add->next);
     }
 }
 
@@ -97,20 +97,20 @@
 
 /* Iterate forward over the elements of the list.  */
 # define list_for_each(pos, head) \
-  for (pos = (head)->next; pos != (head); pos = pos->next)
+  for (pos = atomic_load((head)->next); pos != (head); pos = atomic_load(pos->next))
 
 
 /* Iterate forward over the elements of the list.  */
 # define list_for_each_prev(pos, head) \
-  for (pos = (head)->prev; pos != (head); pos = pos->prev)
+  for (pos = atomic_load((head)->prev); pos != (head); pos = atomic_load(pos->prev))
 
 
 /* Iterate backwards over the elements list.  The list elements can be
    removed from the list while doing this.  */
 # define list_for_each_prev_safe(pos, p, head) \
-  for (pos = (head)->prev, p = pos->prev; \
+  for (pos = atomic_load((head)->prev), p = atomic_load(pos->prev);	\
        pos != (head); \
-       pos = p, p = pos->prev)
+       pos = p, p = atomic_load(pos->prev))
 
 #endif /* _LIST_H */
 
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/pthread/unwind-forcedunwind.c eglibc-2.19/nptl/sysdeps/pthread/unwind-forcedunwind.c
--- eglibc-2.19.orig/nptl/sysdeps/pthread/unwind-forcedunwind.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/pthread/unwind-forcedunwind.c	2014-12-02 16:58:38.766874337 +0100
@@ -42,7 +42,7 @@
   void *getcfa;
   void *handle;
 
-  if (__builtin_expect (libgcc_s_handle != NULL, 1))
+  if (__builtin_expect (atomic_load(libgcc_s_handle) != NULL, 1))
     {
       /* Force gcc to reload all values.  */
       asm volatile ("" ::: "memory");
@@ -75,17 +75,17 @@
      pthread_cancel_init might return early even when the pointer the
      caller is interested in is not initialized yet.  */
   atomic_write_barrier ();
-  libgcc_s_handle = handle;
+  atomic_store(libgcc_s_handle, handle);
 }
 
 void
 __libc_freeres_fn_section
 __unwind_freeres (void)
 {
-  void *handle = libgcc_s_handle;
+  void *handle = atomic_load(libgcc_s_handle);
   if (handle != NULL)
     {
-      libgcc_s_handle = NULL;
+      atomic_store(libgcc_s_handle, NULL);
       __libc_dlclose (handle);
     }
 }
@@ -93,7 +93,7 @@
 void
 _Unwind_Resume (struct _Unwind_Exception *exc)
 {
-  if (__builtin_expect (libgcc_s_handle == NULL, 0))
+  if (__builtin_expect (atomic_load(libgcc_s_handle) == NULL, 0))
     pthread_cancel_init ();
   else
     atomic_read_barrier ();
@@ -109,7 +109,7 @@
 		      struct _Unwind_Exception *ue_header,
 		      struct _Unwind_Context *context)
 {
-  if (__builtin_expect (libgcc_s_handle == NULL, 0))
+  if (__builtin_expect (atomic_load(libgcc_s_handle) == NULL, 0))
     pthread_cancel_init ();
   else
     atomic_read_barrier ();
@@ -125,7 +125,7 @@
 _Unwind_ForcedUnwind (struct _Unwind_Exception *exc, _Unwind_Stop_Fn stop,
 		      void *stop_argument)
 {
-  if (__builtin_expect (libgcc_s_handle == NULL, 0))
+  if (__builtin_expect (atomic_load(libgcc_s_handle) == NULL, 0))
     pthread_cancel_init ();
   else
     atomic_read_barrier ();
@@ -140,7 +140,7 @@
 _Unwind_Word
 _Unwind_GetCFA (struct _Unwind_Context *context)
 {
-  if (__builtin_expect (libgcc_s_handle == NULL, 0))
+  if (__builtin_expect (atomic_load(libgcc_s_handle) == NULL, 0))
     pthread_cancel_init ();
   else
     atomic_read_barrier ();
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/fork.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/fork.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/fork.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/fork.c	2014-12-02 16:58:38.766874337 +0100
@@ -62,12 +62,12 @@
   /* Run all the registered preparation handlers.  In reverse order.
      While doing this we build up a list of all the entries.  */
   struct fork_handler *runp;
-  while ((runp = __fork_handlers) != NULL)
+  while ((runp = atomic_load(__fork_handlers)) != NULL)
     {
       /* Make sure we read from the current RUNP pointer.  */
       atomic_full_barrier ();
 
-      unsigned int oldval = runp->refcntr;
+      unsigned int oldval = atomic_load(runp->refcntr);
 
       if (oldval == 0)
 	/* This means some other thread removed the list just after
@@ -102,7 +102,7 @@
 	  allp = newp;
 
 	  /* Advance to the next handler.  */
-	  runp = runp->next;
+	  runp = atomic_load(runp->next);
 	  if (runp == NULL)
 	    break;
 
@@ -138,6 +138,8 @@
     {
       struct pthread *self = THREAD_SELF;
 
+      mvee_invalidate_buffer();
+
       assert (THREAD_GETMEM (self, tid) != ppid);
 
       if (__fork_generation_pointer != NULL)
@@ -223,10 +225,10 @@
 	    allp->handler->parent_handler ();
 
 	  if (atomic_decrement_and_test (&allp->handler->refcntr)
-	      && allp->handler->need_signal)
+	      && atomic_load(allp->handler->need_signal))
 	    lll_futex_wake (allp->handler->refcntr, 1, LLL_PRIVATE);
 
-	  allp = allp->next;
+	  allp = atomic_load(allp->next);
 	}
     }
 
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/lowlevellock.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/lowlevellock.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/lowlevellock.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/lowlevellock.c	2014-12-02 16:58:38.766874337 +0100
@@ -26,7 +26,7 @@
 void
 __lll_lock_wait_private (int *futex)
 {
-  if (*futex == 2)
+  if (atomic_load(*futex) == 2)
     lll_futex_wait (futex, 2, LLL_PRIVATE);
 
   while (atomic_exchange_acq (futex, 2) != 0)
@@ -39,7 +39,7 @@
 void
 __lll_lock_wait (int *futex, int private)
 {
-  if (*futex == 2)
+  if (atomic_load(*futex) == 2)
     lll_futex_wait (futex, 2, private);
 
   while (atomic_exchange_acq (futex, 2) != 0)
@@ -92,7 +92,7 @@
     return EINVAL;
 
   /* Repeat until thread terminated.  */
-  while ((tid = *tidp) != 0)
+  while ((tid = atomic_load(*tidp)) != 0)
     {
       struct timeval tv;
       struct timespec rt;
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c	2014-05-12 14:05:12.439661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c	2014-12-02 16:58:38.767874337 +0100
@@ -26,7 +26,7 @@
 int
 __lll_robust_lock_wait (int *futex, int private)
 {
-  int oldval = *futex;
+  int oldval = atomic_load(*futex);
   int tid = THREAD_GETMEM (THREAD_SELF, tid);
 
   /* If the futex changed meanwhile try locking again.  */
@@ -64,7 +64,7 @@
     return EINVAL;
 
   int tid = THREAD_GETMEM (THREAD_SELF, tid);
-  int oldval = *futex;
+  int oldval = atomic_load(*futex);
 
   /* If the futex changed meanwhile try locking again.  */
   if (oldval == 0)
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_getaffinity.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_getaffinity.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_getaffinity.c	2014-05-12 14:05:12.435661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_getaffinity.c	2014-12-02 16:58:38.767874337 +0100
@@ -32,7 +32,7 @@
   const struct pthread *pd = (const struct pthread *) th;
 
   INTERNAL_SYSCALL_DECL (err);
-  int res = INTERNAL_SYSCALL (sched_getaffinity, err, 3, pd->tid,
+  int res = INTERNAL_SYSCALL (sched_getaffinity, err, 3, atomic_load(pd->tid),
 			      MIN (INT_MAX, cpusetsize), cpuset);
   if (INTERNAL_SYSCALL_ERROR_P (res, err))
     return INTERNAL_SYSCALL_ERRNO (res, err);
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_getcpuclockid.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_getcpuclockid.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_getcpuclockid.c	2014-05-12 14:05:12.435661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_getcpuclockid.c	2014-12-02 16:58:38.767874337 +0100
@@ -38,7 +38,7 @@
 
   /* The clockid_t value is a simple computation from the TID.  */
 
-  const clockid_t tidclock = MAKE_THREAD_CPUCLOCK (pd->tid, CPUCLOCK_SCHED);
+  const clockid_t tidclock = MAKE_THREAD_CPUCLOCK (atomic_load(pd->tid), CPUCLOCK_SCHED);
 
   *clockid = tidclock;
   return 0;
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_getname.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_getname.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_getname.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_getname.c	2014-12-02 16:58:38.767874337 +0100
@@ -46,7 +46,7 @@
 
 #define FMT "/proc/self/task/%u/comm"
   char fname[sizeof (FMT) + 8];
-  sprintf (fname, FMT, (unsigned int) pd->tid);
+  sprintf (fname, FMT, (unsigned int) atomic_load(pd->tid));
 
   int fd = open_not_cancel_2 (fname, O_RDONLY);
   if (fd == -1)
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_setaffinity.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_setaffinity.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_setaffinity.c	2014-05-12 14:05:12.439661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_setaffinity.c	2014-12-02 16:58:38.768874337 +0100
@@ -63,7 +63,7 @@
 
   if (__builtin_expect (__kernel_cpumask_size == 0, 0))
     {
-      res = __determine_cpumask_size (pd->tid);
+      res = __determine_cpumask_size (atomic_load(pd->tid));
       if (res != 0)
 	return res;
     }
@@ -76,7 +76,7 @@
 	 fulfilled.  */
       return EINVAL;
 
-  res = INTERNAL_SYSCALL (sched_setaffinity, err, 3, pd->tid, cpusetsize,
+  res = INTERNAL_SYSCALL (sched_setaffinity, err, 3, atomic_load(pd->tid), cpusetsize,
 			  cpuset);
 
 #ifdef RESET_VGETCPU_CACHE
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_setname.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_setname.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/pthread_setname.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/pthread_setname.c	2014-12-02 16:58:38.768874337 +0100
@@ -46,7 +46,7 @@
 
 #define FMT "/proc/self/task/%u/comm"
   char fname[sizeof (FMT) + 8];
-  sprintf (fname, FMT, (unsigned int) pd->tid);
+  sprintf (fname, FMT, (unsigned int) atomic_load(pd->tid));
 
   int fd = open_not_cancel_2 (fname, O_RDWR);
   if (fd == -1)
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/register-atfork.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/register-atfork.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/register-atfork.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/register-atfork.c	2014-12-02 16:58:38.768874337 +0100
@@ -49,18 +49,18 @@
     {
       /* Search for an empty entry.  */
       for (i = 0; i < NHANDLER; ++i)
-	if (runp->mem[i].refcntr == 0)
+	if (atomic_load(runp->mem[i].refcntr) == 0)
 	  goto found;
     }
-  while ((runp = runp->next) != NULL);
+  while ((runp = atomic_load(runp->next)) != NULL);
 
   /* We have to allocate a new entry.  */
   runp = (struct fork_handler_pool *) calloc (1, sizeof (*runp));
   if (runp != NULL)
     {
       /* Enqueue the new memory pool into the list.  */
-      runp->next = fork_handler_pool.next;
-      fork_handler_pool.next = runp;
+      runp->next = atomic_load(fork_handler_pool.next);
+      atomic_store(fork_handler_pool.next, runp);
 
       /* We use the last entry on the page.  This means when we start
 	 searching from the front the next time we will find the first
@@ -69,8 +69,8 @@
 
     found:
       result = &runp->mem[i];
-      result->refcntr = 1;
-      result->need_signal = 0;
+      atomic_store(result->refcntr, 1);
+      atomic_store(result->need_signal, 0);
     }
 
   return result;
@@ -113,7 +113,7 @@
 __linkin_atfork (struct fork_handler *newp)
 {
   do
-    newp->next = __fork_handlers;
+    newp->next = atomic_load(__fork_handlers);
   while (catomic_compare_and_exchange_bool_acq (&__fork_handlers,
 						newp, newp->next) != 0);
 }
@@ -125,7 +125,7 @@
   lll_lock (__fork_lock, LLL_PRIVATE);
 
   /* No more fork handlers.  */
-  __fork_handlers = NULL;
+  atomic_store(__fork_handlers, NULL);
 
   /* Free eventually allocated memory blocks for the object pool.  */
   struct fork_handler_pool *runp = fork_handler_pool.next;
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/sem_post.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/sem_post.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/sem_post.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/sem_post.c	2014-12-02 16:58:38.769874337 +0100
@@ -34,8 +34,8 @@
   __typeof (isem->value) cur;
   do
     {
-      cur = isem->value;
-      if (isem->value == SEM_VALUE_MAX)
+      cur = atomic_load(isem->value);
+      if (cur == SEM_VALUE_MAX)
 	{
 	  __set_errno (EOVERFLOW);
 	  return -1;
@@ -44,7 +44,7 @@
   while (atomic_compare_and_exchange_bool_rel (&isem->value, cur + 1, cur));
 
   atomic_full_barrier ();
-  if (isem->nwaiters > 0)
+  if (atomic_load(isem->nwaiters) > 0)
     {
       int err = lll_futex_wake (&isem->value, 1,
 				isem->private ^ FUTEX_PRIVATE_FLAG);
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/sem_trywait.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/sem_trywait.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/sem_trywait.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/sem_trywait.c	2014-12-02 16:58:38.769874337 +0100
@@ -32,7 +32,7 @@
   int *futex = (int *) sem;
   int val;
 
-  if (*futex > 0)
+  if (atomic_load(*futex) > 0)
     {
       val = atomic_decrement_if_positive (futex);
       if (val > 0)
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/timer_routines.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/timer_routines.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/timer_routines.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/timer_routines.c	2014-12-02 16:58:38.769874337 +0100
@@ -182,7 +182,7 @@
   int res = pthread_create (&th, &attr, timer_helper_thread, NULL);
   if (res == 0)
     /* We managed to start the helper thread.  */
-    __helper_tid = ((struct pthread *) th)->tid;
+    __helper_tid = atomic_load(((struct pthread *) th)->tid);
 
   /* Restore the signal mask.  */
   INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &oss, NULL,
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c	2014-05-12 14:05:12.439661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/unregister-atfork.c	2014-12-02 16:58:38.770874337 +0100
@@ -33,16 +33,16 @@
      We do not worry about other threads adding entries for this DSO
      right this moment.  If this happens this is a race and we can do
      whatever we please.  The program will crash anyway seen.  */
-  struct fork_handler *runp = __fork_handlers;
+  struct fork_handler *runp = atomic_load(__fork_handlers);
   struct fork_handler *lastp = NULL;
 
   while (runp != NULL)
-    if (runp->dso_handle == dso_handle)
+    if (atomic_load(runp->dso_handle) == dso_handle)
       break;
     else
       {
 	lastp = runp;
-	runp = runp->next;
+	runp = atomic_load(runp->next);
       }
 
   if (runp == NULL)
@@ -67,7 +67,7 @@
   do
     {
     again:
-      if (runp->dso_handle == dso_handle)
+      if (atomic_load(runp->dso_handle) == dso_handle)
 	{
 	  if (lastp == NULL)
 	    {
@@ -77,12 +77,12 @@
 							 runp->next, runp)
 		  != 0)
 		{
-		  runp = __fork_handlers;
+		  runp = atomic_load(__fork_handlers);
 		  goto again;
 		}
 	    }
 	  else
-	    lastp->next = runp->next;
+	    atomic_store(lastp->next, runp->next);
 
 	  /* We cannot overwrite the ->next element now.  Put the deleted
 	     entries in a separate list.  */
@@ -94,7 +94,7 @@
       else
 	lastp = runp;
 
-      runp = runp->next;
+      runp = atomic_load(runp->next);
     }
   while (runp != NULL);
 
@@ -105,7 +105,7 @@
   while (deleted != NULL)
     {
       /* We need to be informed by possible current users.  */
-      deleted->handler->need_signal = 1;
+      atomic_store(deleted->handler->need_signal, 1);
       /* Make sure this gets written out first.  */
       atomic_write_barrier ();
 
@@ -113,7 +113,7 @@
 	 wait for the last user.  */
       atomic_decrement (&deleted->handler->refcntr);
       unsigned int val;
-      while ((val = deleted->handler->refcntr) != 0)
+      while ((val = atomic_load(deleted->handler->refcntr)) != 0)
 	lll_futex_wait (&deleted->handler->refcntr, val, LLL_PRIVATE);
 
       deleted = deleted->next;
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: clone.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: libc-lowlevellock.S
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h eglibc-2.19/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h	2014-12-04 18:05:32.771479413 +0100
@@ -1,6 +1,5 @@
-/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+/* Copyright (C) 2005-2014 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -9,61 +8,42 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
+   License along with the GNU C Library.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef _LOWLEVELLOCK_H
 #define _LOWLEVELLOCK_H	1
 
-#include <stap-probe.h>
-
 #ifndef __ASSEMBLER__
-# include <time.h>
-# include <sys/param.h>
-# include <bits/pthreadtypes.h>
-# include <kernel-features.h>
-# include <tcb-offsets.h>
-
-# ifndef LOCK_INSTR
-#  ifdef UP
-#   define LOCK_INSTR	/* nothing */
-#  else
-#   define LOCK_INSTR "lock;"
-#  endif
-# endif
-#else
-# ifndef LOCK
-#  ifdef UP
-#   define LOCK
-#  else
-#   define LOCK lock
-#  endif
-# endif
-#endif
+#include <time.h>
+#include <sys/param.h>
+#include <bits/pthreadtypes.h>
+#include <atomic.h>
+#include <kernel-features.h>
+#endif // </__ASSEMBLER__>
 
-#define SYS_futex		__NR_futex
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
+#define FUTEX_REQUEUE		3
 #define FUTEX_CMP_REQUEUE	4
 #define FUTEX_WAKE_OP		5
+#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE	((4 << 24) | 1)
 #define FUTEX_LOCK_PI		6
 #define FUTEX_UNLOCK_PI		7
 #define FUTEX_TRYLOCK_PI	8
 #define FUTEX_WAIT_BITSET	9
 #define FUTEX_WAKE_BITSET	10
-#define FUTEX_WAIT_REQUEUE_PI	11
-#define FUTEX_CMP_REQUEUE_PI	12
+#define FUTEX_WAIT_REQUEUE_PI   11
+#define FUTEX_CMP_REQUEUE_PI    12
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
 
 #define FUTEX_BITSET_MATCH_ANY	0xffffffff
 
-#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE	((4 << 24) | 1)
-
 /* Values for 'private' parameter of locking macros.  Yes, the
    definition seems to be backwards.  But it is not.  The bit will be
    reversed before passing to the system call.  */
@@ -71,7 +51,6 @@
 #define LLL_SHARED	FUTEX_PRIVATE_FLAG
 
 #ifndef __ASSEMBLER__
-
 #if !defined NOT_IN_libc || defined IS_IN_rtld
 /* In libc.so or ld.so all futexes are private.  */
 # ifdef __ASSUME_PRIVATE_FUTEX
@@ -91,535 +70,253 @@
    ? ((private) == 0							      \
       ? ((fl) | THREAD_GETMEM (THREAD_SELF, header.private_futex))	      \
       : (fl))								      \
-   : ({ unsigned int __fl = ((private) ^ FUTEX_PRIVATE_FLAG);		      \
-	asm ("andl %%fs:%P1, %0" : "+r" (__fl)				      \
-	     : "i" (offsetof (struct pthread, header.private_futex)));	      \
-	__fl | (fl); }))
+   : ((fl) | (((private) ^ FUTEX_PRIVATE_FLAG)				      \
+	      & THREAD_GETMEM (THREAD_SELF, header.private_futex))))
 # endif
 #endif
+#endif
 
-/* Initializer for lock.  */
-#define LLL_LOCK_INITIALIZER		(0)
-#define LLL_LOCK_INITIALIZER_LOCKED	(1)
-#define LLL_LOCK_INITIALIZER_WAITERS	(2)
-
-/* Delay in spinlock loop.  */
-#define BUSY_WAIT_NOP	  asm ("rep; nop")
 
+#define lll_futex_wait(futexp, val, private) \
+  lll_futex_timed_wait(futexp, val, NULL, private)
 
-#define LLL_STUB_UNWIND_INFO_START \
-	".section	.eh_frame,\"a\",@progbits\n" 		\
-"7:\t"	".long	9f-8f	# Length of Common Information Entry\n" \
-"8:\t"	".long	0x0	# CIE Identifier Tag\n\t" 		\
-	".byte	0x1	# CIE Version\n\t" 			\
-	".ascii \"zR\\0\"	# CIE Augmentation\n\t" 	\
-	".uleb128 0x1	# CIE Code Alignment Factor\n\t" 	\
-	".sleb128 -8	# CIE Data Alignment Factor\n\t" 	\
-	".byte	0x10	# CIE RA Column\n\t" 			\
-	".uleb128 0x1	# Augmentation size\n\t" 		\
-	".byte	0x1b	# FDE Encoding (pcrel sdata4)\n\t" 	\
-	".byte	0x12	# DW_CFA_def_cfa_sf\n\t" 		\
-	".uleb128 0x7\n\t" 					\
-	".sleb128 16\n\t" 					\
-	".align " LP_SIZE "\n" 					\
-"9:\t"	".long	23f-10f	# FDE Length\n" 			\
-"10:\t"	".long	10b-7b	# FDE CIE offset\n\t" 			\
-	".long	1b-.	# FDE initial location\n\t" 		\
-	".long	6b-1b	# FDE address range\n\t" 		\
-	".uleb128 0x0	# Augmentation size\n\t" 		\
-	".byte	0x16	# DW_CFA_val_expression\n\t" 		\
-	".uleb128 0x10\n\t" 					\
-	".uleb128 12f-11f\n" 					\
-"11:\t"	".byte	0x80	# DW_OP_breg16\n\t" 			\
-	".sleb128 4b-1b\n"
-#define LLL_STUB_UNWIND_INFO_END \
-	".byte	0x16	# DW_CFA_val_expression\n\t" 		\
-	".uleb128 0x10\n\t" 					\
-	".uleb128 14f-13f\n" 					\
-"13:\t"	".byte	0x80	# DW_OP_breg16\n\t" 			\
-	".sleb128 4b-2b\n" 					\
-"14:\t"	".byte	0x40 + (3b-2b) # DW_CFA_advance_loc\n\t" 	\
-	".byte	0x0e	# DW_CFA_def_cfa_offset\n\t" 		\
-	".uleb128 0\n\t" 					\
-	".byte	0x16	# DW_CFA_val_expression\n\t" 		\
-	".uleb128 0x10\n\t" 					\
-	".uleb128 16f-15f\n" 					\
-"15:\t"	".byte	0x80	# DW_OP_breg16\n\t" 			\
-	".sleb128 4b-3b\n" 					\
-"16:\t"	".byte	0x40 + (4b-3b-1) # DW_CFA_advance_loc\n\t" 	\
-	".byte	0x0e	# DW_CFA_def_cfa_offset\n\t" 		\
-	".uleb128 128\n\t" 					\
-	".byte	0x16	# DW_CFA_val_expression\n\t" 		\
-	".uleb128 0x10\n\t" 					\
-	".uleb128 20f-17f\n" 					\
-"17:\t"	".byte	0x80	# DW_OP_breg16\n\t" 			\
-	".sleb128 19f-18f\n\t" 					\
-	".byte	0x0d	# DW_OP_const4s\n" 			\
-"18:\t"	".4byte	4b-.\n\t" 					\
-	".byte	0x1c	# DW_OP_minus\n\t" 			\
-	".byte	0x0d	# DW_OP_const4s\n" 			\
-"19:\t"	".4byte	24f-.\n\t" 					\
-	".byte	0x22	# DW_OP_plus\n" 			\
-"20:\t"	".byte	0x40 + (5b-4b+1) # DW_CFA_advance_loc\n\t" 	\
-	".byte	0x13	# DW_CFA_def_cfa_offset_sf\n\t" 	\
-	".sleb128 16\n\t" 					\
-	".byte	0x16	# DW_CFA_val_expression\n\t" 		\
-	".uleb128 0x10\n\t" 					\
-	".uleb128 22f-21f\n" 					\
-"21:\t"	".byte	0x80	# DW_OP_breg16\n\t" 			\
-	".sleb128 4b-5b\n" 					\
-"22:\t"	".align " LP_SIZE "\n" 					\
-"23:\t"	".previous\n"
-
-/* Unwind info for
-   1: leaq ..., %rdi
-   2: subq $128, %rsp
-   3: callq ...
-   4: addq $128, %rsp
-   5: jmp 24f
-   6:
-   snippet.  */
-#define LLL_STUB_UNWIND_INFO_5 \
-LLL_STUB_UNWIND_INFO_START					\
-"12:\t"	".byte	0x40 + (2b-1b) # DW_CFA_advance_loc\n\t" 	\
-LLL_STUB_UNWIND_INFO_END
-
-/* Unwind info for
-   1: leaq ..., %rdi
-   0: movq ..., %rdx
-   2: subq $128, %rsp
-   3: callq ...
-   4: addq $128, %rsp
-   5: jmp 24f
-   6:
-   snippet.  */
-#define LLL_STUB_UNWIND_INFO_6 \
-LLL_STUB_UNWIND_INFO_START					\
-"12:\t"	".byte	0x40 + (0b-1b) # DW_CFA_advance_loc\n\t" 	\
-	".byte	0x16	# DW_CFA_val_expression\n\t" 		\
-	".uleb128 0x10\n\t" 					\
-	".uleb128 26f-25f\n" 					\
-"25:\t"	".byte	0x80	# DW_OP_breg16\n\t" 			\
-	".sleb128 4b-0b\n" 					\
-"26:\t"	".byte	0x40 + (2b-0b) # DW_CFA_advance_loc\n\t" 	\
-LLL_STUB_UNWIND_INFO_END
+#define lll_futex_timed_wait(futexp, val, timespec, private) \
+  ({									      \
+    INTERNAL_SYSCALL_DECL (__err);					      \
+    long int __ret;							      \
+    __ret = INTERNAL_SYSCALL (futex, __err, 4, (futexp),		      \
+			      __lll_private_flag (FUTEX_WAIT, private),	      \
+			      (val), (timespec));			      \
+    __ret;								      \
+  })
 
+#define lll_futex_timed_wait_bitset(futexp, val, timespec, clockbit, private) \
+  ({									\
+    INTERNAL_SYSCALL_DECL (__err);					\
+    long int __ret;							\
+    int __op = FUTEX_WAIT_BITSET | clockbit;				\
+    __ret = INTERNAL_SYSCALL (futex, __err, 6, (futexp),		\
+			      __lll_private_flag (__op, private),	\
+			      (val), (timespec), NULL /* Unused.  */,	\
+			      FUTEX_BITSET_MATCH_ANY);			\
+    __ret;								\
+  })
 
-#define lll_futex_wait(futex, val, private) \
-  lll_futex_timed_wait(futex, val, NULL, private)
+#define lll_futex_wake(futexp, nr, private) \
+  ({									      \
+    INTERNAL_SYSCALL_DECL (__err);					      \
+    long int __ret;							      \
+    __ret = INTERNAL_SYSCALL (futex, __err, 4, (futexp),		      \
+			      __lll_private_flag (FUTEX_WAKE, private),	      \
+			      (nr), 0);					      \
+    __ret;								      \
+  })
 
+#define lll_robust_dead(futexv, private) \
+  do									      \
+    {									      \
+      int *__futexp = &(futexv);					      \
+      atomic_or (__futexp, FUTEX_OWNER_DIED);				      \
+      lll_futex_wake (__futexp, 1, private);				      \
+    }									      \
+  while (0)
 
-#define lll_futex_timed_wait(futex, val, timeout, private) \
+/* Returns non-zero if error happened, zero if success.  */
+#define lll_futex_requeue(futexp, nr_wake, nr_move, mutex, val, private) \
   ({									      \
-    register const struct timespec *__to __asm ("r10") = timeout;	      \
-    int __status;							      \
-    register __typeof (val) _val __asm ("edx") = (val);			      \
-    __asm __volatile ("syscall"						      \
-		      : "=a" (__status)					      \
-		      : "0" (SYS_futex), "D" (futex),			      \
-			"S" (__lll_private_flag (FUTEX_WAIT, private)),	      \
-			"d" (_val), "r" (__to)				      \
-		      : "memory", "cc", "r11", "cx");			      \
-    __status;								      \
+    INTERNAL_SYSCALL_DECL (__err);					      \
+    long int __ret;							      \
+    __ret = INTERNAL_SYSCALL (futex, __err, 6, (futexp),		      \
+			      __lll_private_flag (FUTEX_CMP_REQUEUE, private),\
+			      (nr_wake), (nr_move), (mutex), (val));	      \
+    INTERNAL_SYSCALL_ERROR_P (__ret, __err);				      \
   })
 
 
-#define lll_futex_wake(futex, nr, private) \
+/* Returns non-zero if error happened, zero if success.  */
+/* lll_futex_wake_unlock => moved to atomic.h */
+
+/* Priority Inheritance support.  */
+#define lll_futex_wait_requeue_pi(futexp, val, mutex, private) \
+  lll_futex_timed_wait_requeue_pi (futexp, val, NULL, 0, mutex, private)
+
+#define lll_futex_timed_wait_requeue_pi(futexp, val, timespec, clockbit,      \
+					mutex, private)			      \
   ({									      \
-    int __status;							      \
-    register __typeof (nr) _nr __asm ("edx") = (nr);			      \
-    LIBC_PROBE (lll_futex_wake, 3, futex, nr, private);                       \
-    __asm __volatile ("syscall"						      \
-		      : "=a" (__status)					      \
-		      : "0" (SYS_futex), "D" (futex),			      \
-			"S" (__lll_private_flag (FUTEX_WAKE, private)),	      \
-			"d" (_nr)					      \
-		      : "memory", "cc", "r10", "r11", "cx");		      \
-    __status;								      \
+    INTERNAL_SYSCALL_DECL (__err);					      \
+    int __op = FUTEX_WAIT_REQUEUE_PI | clockbit;			      \
+									      \
+    INTERNAL_SYSCALL (futex, __err, 5, (futexp),			      \
+		      __lll_private_flag (__op, private),		      \
+		      (val), (timespec), mutex); 			      \
   })
 
+#define lll_futex_cmp_requeue_pi(futexp, nr_wake, nr_move, mutex, val, priv)  \
+  ({									      \
+    INTERNAL_SYSCALL_DECL (__err);					      \
+    long int __ret;							      \
+									      \
+    __ret = INTERNAL_SYSCALL (futex, __err, 6, (futexp),		      \
+			      __lll_private_flag (FUTEX_CMP_REQUEUE_PI, priv),\
+			      (nr_wake), (nr_move), (mutex), (val));	      \
+    INTERNAL_SYSCALL_ERROR_P (__ret, __err);				      \
+  })
 
-/* NB: in the lll_trylock macro we simply return the value in %eax
-   after the cmpxchg instruction.  In case the operation succeded this
-   value is zero.  In case the operation failed, the cmpxchg instruction
-   has loaded the current value of the memory work which is guaranteed
-   to be nonzero.  */
-#if defined NOT_IN_libc || defined UP
-# define __lll_trylock_asm LOCK_INSTR "cmpxchgl %2, %1"
-#else
-# define __lll_trylock_asm "cmpl $0, __libc_multiple_threads(%%rip)\n\t"      \
-			   "je 0f\n\t"					      \
-			   "lock; cmpxchgl %2, %1\n\t"			      \
-			   "jmp 1f\n\t"					      \
-			   "0:\tcmpxchgl %2, %1\n\t"			      \
-			   "1:"
-#endif
 
-#define lll_trylock(futex) \
-  ({ int ret;								      \
-     __asm __volatile (__lll_trylock_asm				      \
-		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (LLL_LOCK_INITIALIZER_LOCKED), "m" (futex),      \
-			 "0" (LLL_LOCK_INITIALIZER)			      \
-		       : "memory");					      \
-     ret; })
-
-#define lll_robust_trylock(futex, id) \
-  ({ int ret;								      \
-     __asm __volatile (LOCK_INSTR "cmpxchgl %2, %1"			      \
-		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (id), "m" (futex),	"0" (LLL_LOCK_INITIALIZER)    \
-		       : "memory");					      \
-     ret; })
-
-#define lll_cond_trylock(futex) \
-  ({ int ret;								      \
-     __asm __volatile (LOCK_INSTR "cmpxchgl %2, %1"			      \
-		       : "=a" (ret), "=m" (futex)			      \
-		       : "r" (LLL_LOCK_INITIALIZER_WAITERS),		      \
-			 "m" (futex), "0" (LLL_LOCK_INITIALIZER)	      \
-		       : "memory");					      \
-     ret; })
-
-#if defined NOT_IN_libc || defined UP
-# define __lll_lock_asm_start LOCK_INSTR "cmpxchgl %4, %2\n\t"		      \
-			      "jnz 1f\n\t"
-#else
-# define __lll_lock_asm_start "cmpl $0, __libc_multiple_threads(%%rip)\n\t"   \
-			      "je 0f\n\t"				      \
-			      "lock; cmpxchgl %4, %2\n\t"		      \
-			      "jnz 1f\n\t"				      \
-			      "jmp 24f\n"				      \
-			      "0:\tcmpxchgl %4, %2\n\t"			      \
-			      "jnz 1f\n\t"
+#define lll_trylock(lock)	\
+  atomic_compare_and_exchange_val_acq(&(lock), 1, 0)
+
+#define lll_cond_trylock(lock)	\
+  atomic_compare_and_exchange_val_acq(&(lock), 2, 0)
+
+#define __lll_robust_trylock(futex, id) \
+  (atomic_compare_and_exchange_val_acq (futex, id, 0) != 0)
+#define lll_robust_trylock(lock, id) \
+  __lll_robust_trylock (&(lock), id)
+
+#ifndef __ASSEMBLER__
+extern void __lll_lock_wait_private (int *futex) attribute_hidden;
+extern void __lll_lock_wait (int *futex, int private) attribute_hidden;
+extern int __lll_robust_lock_wait (int *futex, int private) attribute_hidden;
 #endif
 
-#define lll_lock(futex, private) \
-  (void)								      \
-    ({ int ignore1, ignore2, ignore3;					      \
-       if (__builtin_constant_p (private) && (private) == LLL_PRIVATE)	      \
-	 __asm __volatile (__lll_lock_asm_start				      \
-			   ".subsection 1\n\t"				      \
-			   ".type _L_lock_%=, @function\n"		      \
-			   "_L_lock_%=:\n"				      \
-			   "1:\tlea %2, %%" RDI_LP "\n"			      \
-			   "2:\tsub $128, %%" RSP_LP "\n"		      \
-			   "3:\tcallq __lll_lock_wait_private\n"	      \
-			   "4:\tadd $128, %%" RSP_LP "\n"		      \
-			   "5:\tjmp 24f\n"				      \
-			   "6:\t.size _L_lock_%=, 6b-1b\n\t"		      \
-			   ".previous\n"				      \
-			   LLL_STUB_UNWIND_INFO_5			      \
-			   "24:"					      \
-			   : "=S" (ignore1), "=&D" (ignore2), "=m" (futex),   \
-			     "=a" (ignore3)				      \
-			   : "0" (1), "m" (futex), "3" (0)		      \
-			   : "cx", "r11", "cc", "memory");		      \
-       else								      \
-	 __asm __volatile (__lll_lock_asm_start				      \
-			   ".subsection 1\n\t"				      \
-			   ".type _L_lock_%=, @function\n"		      \
-			   "_L_lock_%=:\n"				      \
-			   "1:\tlea %2, %%" RDI_LP "\n"			      \
-			   "2:\tsub $128, %%" RSP_LP "\n"		      \
-			   "3:\tcallq __lll_lock_wait\n"		      \
-			   "4:\tadd $128, %%" RSP_LP "\n"		      \
-			   "5:\tjmp 24f\n"				      \
-			   "6:\t.size _L_lock_%=, 6b-1b\n\t"		      \
-			   ".previous\n"				      \
-			   LLL_STUB_UNWIND_INFO_5			      \
-			   "24:"					      \
-			   : "=S" (ignore1), "=D" (ignore2), "=m" (futex),    \
-			     "=a" (ignore3)				      \
-			   : "1" (1), "m" (futex), "3" (0), "0" (private)     \
-			   : "cx", "r11", "cc", "memory");		      \
-    })									      \
+#define __lll_lock(futex, private)					      \
+  ((void) ({								      \
+    int *__futex = (futex);						      \
+    if (__builtin_expect (atomic_compare_and_exchange_val_acq (__futex,       \
+								1, 0), 0))    \
+      {									      \
+	if (__builtin_constant_p (private) && (private) == LLL_PRIVATE)	      \
+	  __lll_lock_wait_private (__futex);				      \
+	else								      \
+	  __lll_lock_wait (__futex, private);				      \
+      }									      \
+  }))
+#define lll_lock(futex, private) __lll_lock (&(futex), private)
+
 
+#define __lll_robust_lock(futex, id, private)				      \
+  ({									      \
+    int *__futex = (futex);						      \
+    int __val = 0;							      \
+									      \
+    if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, id,  \
+								0), 0))	      \
+      __val = __lll_robust_lock_wait (__futex, private);		      \
+    __val;								      \
+  })
 #define lll_robust_lock(futex, id, private) \
-  ({ int result, ignore1, ignore2;					      \
-    __asm __volatile (LOCK_INSTR "cmpxchgl %4, %2\n\t"			      \
-		      "jnz 1f\n\t"					      \
-		      ".subsection 1\n\t"				      \
-		      ".type _L_robust_lock_%=, @function\n"		      \
-		      "_L_robust_lock_%=:\n"				      \
-		      "1:\tlea %2, %%" RDI_LP "\n"			      \
-		      "2:\tsub $128, %%" RSP_LP "\n"			      \
-		      "3:\tcallq __lll_robust_lock_wait\n"		      \
-		      "4:\tadd $128, %%" RSP_LP "\n"			      \
-		      "5:\tjmp 24f\n"					      \
-		      "6:\t.size _L_robust_lock_%=, 6b-1b\n\t"		      \
-		      ".previous\n"					      \
-		      LLL_STUB_UNWIND_INFO_5				      \
-		      "24:"						      \
-		      : "=S" (ignore1), "=D" (ignore2), "=m" (futex),	      \
-			"=a" (result)					      \
-		      : "1" (id), "m" (futex), "3" (0), "0" (private)	      \
-		      : "cx", "r11", "cc", "memory");			      \
-    result; })
-
-#define lll_cond_lock(futex, private) \
-  (void)								      \
-    ({ int ignore1, ignore2, ignore3;					      \
-       __asm __volatile (LOCK_INSTR "cmpxchgl %4, %2\n\t"		      \
-			 "jnz 1f\n\t"					      \
-			 ".subsection 1\n\t"				      \
-			 ".type _L_cond_lock_%=, @function\n"		      \
-			 "_L_cond_lock_%=:\n"				      \
-			 "1:\tlea %2, %%" RDI_LP "\n"			      \
-			 "2:\tsub $128, %%" RSP_LP "\n"			      \
-			 "3:\tcallq __lll_lock_wait\n"			      \
-			 "4:\tadd $128, %%" RSP_LP "\n"			      \
-			 "5:\tjmp 24f\n"				      \
-			 "6:\t.size _L_cond_lock_%=, 6b-1b\n\t"		      \
-			 ".previous\n"					      \
-			 LLL_STUB_UNWIND_INFO_5				      \
-			 "24:"						      \
-			 : "=S" (ignore1), "=D" (ignore2), "=m" (futex),      \
-			   "=a" (ignore3)				      \
-			 : "1" (2), "m" (futex), "3" (0), "0" (private)	      \
-			 : "cx", "r11", "cc", "memory");		      \
-    })
+  __lll_robust_lock (&(futex), id, private)
+
+
+#define __lll_cond_lock(futex, private)					      \
+  ((void) ({								      \
+    int *__futex = (futex);						      \
+    if (__builtin_expect (atomic_exchange_acq (__futex, 2), 0))		      \
+      __lll_lock_wait (__futex, private);				      \
+  }))
+#define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
+
 
 #define lll_robust_cond_lock(futex, id, private) \
-  ({ int result, ignore1, ignore2;					      \
-    __asm __volatile (LOCK_INSTR "cmpxchgl %4, %2\n\t"			      \
-		      "jnz 1f\n\t"					      \
-		      ".subsection 1\n\t"				      \
-		      ".type _L_robust_cond_lock_%=, @function\n"	      \
-		      "_L_robust_cond_lock_%=:\n"			      \
-		      "1:\tlea %2, %%" RDI_LP "\n"			      \
-		      "2:\tsub $128, %%" RSP_LP "\n"			      \
-		      "3:\tcallq __lll_robust_lock_wait\n"		      \
-		      "4:\tadd $128, %%" RSP_LP "\n"			      \
-		      "5:\tjmp 24f\n"					      \
-		      "6:\t.size _L_robust_cond_lock_%=, 6b-1b\n\t"	      \
-		      ".previous\n"					      \
-		      LLL_STUB_UNWIND_INFO_5				      \
-		      "24:"						      \
-		      : "=S" (ignore1), "=D" (ignore2), "=m" (futex),	      \
-			"=a" (result)					      \
-		      : "1" (id | FUTEX_WAITERS), "m" (futex), "3" (0),	      \
-			"0" (private)					      \
-		      : "cx", "r11", "cc", "memory");			      \
-    result; })
-
-#define lll_timedlock(futex, timeout, private) \
-  ({ int result, ignore1, ignore2, ignore3;				      \
-     __asm __volatile (LOCK_INSTR "cmpxchgl %1, %4\n\t"			      \
-		       "jnz 1f\n\t"					      \
-		       ".subsection 1\n\t"				      \
-		       ".type _L_timedlock_%=, @function\n"		      \
-		       "_L_timedlock_%=:\n"				      \
-		       "1:\tlea %4, %%" RDI_LP "\n"			      \
-		       "0:\tmov %8, %%" RDX_LP "\n"			      \
-		       "2:\tsub $128, %%" RSP_LP "\n"			      \
-		       "3:\tcallq __lll_timedlock_wait\n"		      \
-		       "4:\tadd $128, %%" RSP_LP "\n"			      \
-		       "5:\tjmp 24f\n"					      \
-		       "6:\t.size _L_timedlock_%=, 6b-1b\n\t"		      \
-		       ".previous\n"					      \
-		       LLL_STUB_UNWIND_INFO_6				      \
-		       "24:"						      \
-		       : "=a" (result), "=D" (ignore1), "=S" (ignore2),	      \
-			 "=&d" (ignore3), "=m" (futex)			      \
-		       : "0" (0), "1" (1), "m" (futex), "m" (timeout),	      \
-			 "2" (private)					      \
-		       : "memory", "cx", "cc", "r10", "r11");		      \
-     result; })
-
-extern int __lll_timedlock_elision (int *futex, short *adapt_count,
-					 const struct timespec *timeout,
-					 int private) attribute_hidden;
-
-#define lll_timedlock_elision(futex, adapt_count, timeout, private)	\
-  __lll_timedlock_elision(&(futex), &(adapt_count), timeout, private)
-
-#define lll_robust_timedlock(futex, timeout, id, private) \
-  ({ int result, ignore1, ignore2, ignore3;				      \
-     __asm __volatile (LOCK_INSTR "cmpxchgl %1, %4\n\t"			      \
-		       "jnz 1f\n\t"					      \
-		       ".subsection 1\n\t"				      \
-		       ".type _L_robust_timedlock_%=, @function\n"	      \
-		       "_L_robust_timedlock_%=:\n"			      \
-		       "1:\tlea %4, %%" RDI_LP "\n"			      \
-		       "0:\tmov %8, %%" RDX_LP "\n"			      \
-		       "2:\tsub $128, %%" RSP_LP "\n"			      \
-		       "3:\tcallq __lll_robust_timedlock_wait\n"	      \
-		       "4:\tadd $128, %%" RSP_LP "\n"			      \
-		       "5:\tjmp 24f\n"					      \
-		       "6:\t.size _L_robust_timedlock_%=, 6b-1b\n\t"	      \
-		       ".previous\n"					      \
-		       LLL_STUB_UNWIND_INFO_6				      \
-		       "24:"						      \
-		       : "=a" (result), "=D" (ignore1), "=S" (ignore2),       \
-			 "=&d" (ignore3), "=m" (futex)			      \
-		       : "0" (0), "1" (id), "m" (futex), "m" (timeout),	      \
-			 "2" (private)					      \
-		       : "memory", "cx", "cc", "r10", "r11");		      \
-     result; })
-
-#if defined NOT_IN_libc || defined UP
-# define __lll_unlock_asm_start LOCK_INSTR "decl %0\n\t"		      \
-				"jne 1f\n\t"
-#else
-# define __lll_unlock_asm_start "cmpl $0, __libc_multiple_threads(%%rip)\n\t" \
-				"je 0f\n\t"				      \
-				"lock; decl %0\n\t"			      \
-				"jne 1f\n\t"				      \
-				"jmp 24f\n\t"				      \
-				"0:\tdecl %0\n\t"			      \
-				"jne 1f\n\t"
+  __lll_robust_lock (&(futex), (id) | FUTEX_WAITERS, private)
+
+
+#ifndef __ASSEMBLER__
+extern int __lll_timedlock_wait (int *futex, const struct timespec *,
+				 int private) attribute_hidden;
+extern int __lll_robust_timedlock_wait (int *futex, const struct timespec *,
+					int private) attribute_hidden;
 #endif
 
-#define lll_unlock(futex, private) \
-  (void)								      \
-    ({ int ignore;							      \
-       if (__builtin_constant_p (private) && (private) == LLL_PRIVATE)	      \
-	 __asm __volatile (__lll_unlock_asm_start			      \
-			   ".subsection 1\n\t"				      \
-			   ".type _L_unlock_%=, @function\n"		      \
-			   "_L_unlock_%=:\n"				      \
-			   "1:\tlea %0, %%" RDI_LP "\n"			      \
-			   "2:\tsub $128, %%" RSP_LP "\n"		      \
-			   "3:\tcallq __lll_unlock_wake_private\n"	      \
-			   "4:\tadd $128, %%" RSP_LP "\n"		      \
-			   "5:\tjmp 24f\n"				      \
-			   "6:\t.size _L_unlock_%=, 6b-1b\n\t"		      \
-			   ".previous\n"				      \
-			   LLL_STUB_UNWIND_INFO_5			      \
-			   "24:"					      \
-			   : "=m" (futex), "=&D" (ignore)		      \
-			   : "m" (futex)				      \
-			   : "ax", "cx", "r11", "cc", "memory");	      \
-       else								      \
-	 __asm __volatile (__lll_unlock_asm_start			      \
-			   ".subsection 1\n\t"				      \
-			   ".type _L_unlock_%=, @function\n"		      \
-			   "_L_unlock_%=:\n"				      \
-			   "1:\tlea %0, %%" RDI_LP "\n"			      \
-			   "2:\tsub $128, %%" RSP_LP "\n"		      \
-			   "3:\tcallq __lll_unlock_wake\n"		      \
-			   "4:\tadd $128, %%" RSP_LP "\n"		      \
-			   "5:\tjmp 24f\n"				      \
-			   "6:\t.size _L_unlock_%=, 6b-1b\n\t"		      \
-			   ".previous\n"				      \
-			   LLL_STUB_UNWIND_INFO_5			      \
-			   "24:"					      \
-			   : "=m" (futex), "=&D" (ignore)		      \
-			   : "m" (futex), "S" (private)			      \
-			   : "ax", "cx", "r11", "cc", "memory");	      \
+#define __lll_timedlock(futex, abstime, private)			      \
+  ({									      \
+     int *__futex = (futex);						      \
+     int __val = 0;							      \
+									      \
+     if (__builtin_expect (atomic_exchange_acq (__futex, 1), 0))	      \
+       __val = __lll_timedlock_wait (__futex, abstime, private);	      \
+     __val;								      \
+  })
+#define lll_timedlock(futex, abstime, private) \
+  __lll_timedlock (&(futex), abstime, private)
+
+
+#define __lll_robust_timedlock(futex, abstime, id, private)		      \
+  ({									      \
+    int *__futex = (futex);						      \
+    int __val = 0;							      \
+									      \
+    if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, id,  \
+								0), 0))	      \
+      __val = __lll_robust_timedlock_wait (__futex, abstime, private);	      \
+    __val;								      \
+  })
+#define lll_robust_timedlock(futex, abstime, id, private) \
+  __lll_robust_timedlock (&(futex), abstime, id, private)
+
+
+#define __lll_unlock(futex, private) \
+  (void)							\
+    ({ int *__futex = (futex);					\
+       int __oldval = atomic_exchange_rel (__futex, 0);		\
+       if (__builtin_expect (__oldval > 1, 0))			\
+	 lll_futex_wake (__futex, 1, private);			\
     })
+#define lll_unlock(futex, private) __lll_unlock(&(futex), private)
 
-#define lll_robust_unlock(futex, private) \
-  do									      \
-    {									      \
-      int ignore;							      \
-      __asm __volatile (LOCK_INSTR "andl %2, %0\n\t"			      \
-			"jne 1f\n\t"					      \
-			".subsection 1\n\t"				      \
-			".type _L_robust_unlock_%=, @function\n"	      \
-			"_L_robust_unlock_%=:\n"			      \
-			"1:\tlea %0, %%" RDI_LP "\n"			      \
-			"2:\tsub $128, %%" RSP_LP "\n"			      \
-			"3:\tcallq __lll_unlock_wake\n"			      \
-			"4:\tadd $128, %%" RSP_LP "\n"			      \
-			"5:\tjmp 24f\n"					      \
-			"6:\t.size _L_robust_unlock_%=, 6b-1b\n\t"	      \
-			".previous\n"					      \
-			LLL_STUB_UNWIND_INFO_5				      \
-			"24:"						      \
-			: "=m" (futex), "=&D" (ignore)			      \
-			: "i" (FUTEX_WAITERS), "m" (futex),		      \
-			  "S" (private)					      \
-			: "ax", "cx", "r11", "cc", "memory");		      \
-    }									      \
-  while (0)
 
-#define lll_robust_dead(futex, private) \
-  do									      \
-    {									      \
-      int ignore;							      \
-      __asm __volatile (LOCK_INSTR "orl %3, (%2)\n\t"			      \
-			"syscall"					      \
-			: "=m" (futex), "=a" (ignore)			      \
-			: "D" (&(futex)), "i" (FUTEX_OWNER_DIED),	      \
-			  "S" (__lll_private_flag (FUTEX_WAKE, private)),     \
-			  "1" (__NR_futex), "d" (1)			      \
-			: "cx", "r11", "cc", "memory");			      \
-    }									      \
-  while (0)
+#define __lll_robust_unlock(futex, private) \
+  (void)							\
+    ({ int *__futex = (futex);					\
+       int __oldval = atomic_exchange_rel (__futex, 0);		\
+       if (__builtin_expect (__oldval & FUTEX_WAITERS, 0))	\
+	 lll_futex_wake (__futex, 1, private);			\
+    })
+#define lll_robust_unlock(futex, private) \
+  __lll_robust_unlock(&(futex), private)
 
-/* Returns non-zero if error happened, zero if success.  */
-#define lll_futex_requeue(ftx, nr_wake, nr_move, mutex, val, private) \
-  ({ int __res;								      \
-     register int __nr_move __asm ("r10") = nr_move;			      \
-     register void *__mutex __asm ("r8") = mutex;			      \
-     register int __val __asm ("r9") = val;				      \
-     __asm __volatile ("syscall"					      \
-		       : "=a" (__res)					      \
-		       : "0" (__NR_futex), "D" ((void *) ftx),		      \
-			 "S" (__lll_private_flag (FUTEX_CMP_REQUEUE,	      \
-						  private)), "d" (nr_wake),   \
-			 "r" (__nr_move), "r" (__mutex), "r" (__val)	      \
-		       : "cx", "r11", "cc", "memory");			      \
-     __res < 0; })
 
 #define lll_islocked(futex) \
-  (futex != LLL_LOCK_INITIALIZER)
+  (futex != 0)
 
 
+/* Our internal lock implementation is identical to the binary-compatible
+   mutex implementation. */
+
+/* Initializers for lock.  */
+#define LLL_LOCK_INITIALIZER		(0)
+#define LLL_LOCK_INITIALIZER_LOCKED	(1)
+
+/* The states of a lock are:
+    0  -  untaken
+    1  -  taken by one user
+   >1  -  taken by more users */
+
 /* The kernel notifies a process which uses CLONE_CHILD_CLEARTID via futex
    wakeup when the clone terminates.  The memory location contains the
    thread ID while the clone is running and is reset to zero
-   afterwards.
-
-   The macro parameter must not have any side effect.  */
-#define lll_wait_tid(tid) \
-  do {									      \
-    int __ignore;							      \
-    register __typeof (tid) _tid asm ("edx") = (tid);			      \
-    if (_tid != 0)							      \
-      __asm __volatile ("xorq %%r10, %%r10\n\t"				      \
-			"1:\tmovq %2, %%rax\n\t"			      \
-			"syscall\n\t"					      \
-			"cmpl $0, (%%rdi)\n\t"				      \
-			"jne 1b"					      \
-			: "=&a" (__ignore)				      \
-			: "S" (FUTEX_WAIT), "i" (SYS_futex), "D" (&tid),      \
-			  "d" (_tid)					      \
-			: "memory", "cc", "r10", "r11", "cx");		      \
+   afterwards.	*/
+#define lll_wait_tid(tid)						\
+  do {									\
+    __typeof (tid) __tid;						\
+    /* we must perform the call because the kernel will introduce */    \
+    /* a datarace by clearing the tids */				\
+    if ((__tid = atomic_load(tid)) != 0 || mvee_should_sync_tid())	\
+      syscall(__NR_futex, &(tid), __lll_private_flag(mvee_should_sync_tid() ? MVEE_FUTEX_WAIT_TID : FUTEX_WAIT, LLL_SHARED), __tid, NULL); \
   } while (0)
 
-extern int __lll_timedwait_tid (int *tid, const struct timespec *abstime)
+#ifndef __ASSEMBLER__
+extern int __lll_timedwait_tid (int *, const struct timespec *)
      attribute_hidden;
-#define lll_timedwait_tid(tid, abstime) \
-  ({									      \
-    int __result = 0;							      \
-    if (tid != 0)							      \
-      {									      \
-	if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)	      \
-	  __result = EINVAL;						      \
-	else								      \
-	  __result = __lll_timedwait_tid (&tid, abstime);		      \
-      }									      \
-    __result; })
-
-extern int __lll_lock_elision (int *futex, short *adapt_count, int private)
-  attribute_hidden;
-
-extern int __lll_unlock_elision (int *lock, int private)
-  attribute_hidden;
-
-extern int __lll_trylock_elision (int *lock, short *adapt_count)
-  attribute_hidden;
-
-#define lll_lock_elision(futex, adapt_count, private) \
-  __lll_lock_elision (&(futex), &(adapt_count), private)
-#define lll_unlock_elision(futex, private) \
-  __lll_unlock_elision (&(futex), private)
-#define lll_trylock_elision(futex, adapt_count) \
-  __lll_trylock_elision (&(futex), &(adapt_count))
+#endif
 
-#endif  /* !__ASSEMBLER__ */
+#define lll_timedwait_tid(tid, abstime) \
+  ({							\
+    int __res = 0;					\
+    if (atomic_load(tid) != 0 || mvee_should_sync_tid())	\
+      __res = __lll_timedwait_tid (&(tid), (abstime));	\
+    __res;						\
+  })
 
 #endif	/* lowlevellock.h */
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: lowlevellock.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: lowlevelrobustlock.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_barrier_wait.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_cond_broadcast.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_cond_signal.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_cond_timedwait.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_cond_wait.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_once.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_rwlock_rdlock.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_rwlock_timedrdlock.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_rwlock_timedwrlock.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_rwlock_unlock.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_rwlock_wrlock.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: pthread_spin_unlock.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: sem_post.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: sem_timedwait.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: sem_trywait.S
Only in eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64: sem_wait.S
diff -r --unified eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64/timer_create.c eglibc-2.19/nptl/sysdeps/unix/sysv/linux/x86_64/timer_create.c
--- eglibc-2.19.orig/nptl/sysdeps/unix/sysv/linux/x86_64/timer_create.c	2014-05-12 14:05:12.431661620 +0200
+++ eglibc-2.19/nptl/sysdeps/unix/sysv/linux/x86_64/timer_create.c	2014-12-02 16:58:38.772874337 +0100
@@ -42,7 +42,7 @@
     {
       int i;
       for (i = 0; i < OLD_TIMER_MAX; ++i)
-	if (__compat_timer_list[i] == NULL
+	if (atomic_load(__compat_timer_list[i]) == NULL
 	    && ! atomic_compare_and_exchange_bool_acq (&__compat_timer_list[i],
 						       newp, NULL))
 	  {
diff -r --unified eglibc-2.19.orig/nptl/tpp.c eglibc-2.19/nptl/tpp.c
--- eglibc-2.19.orig/nptl/tpp.c	2014-05-12 14:05:12.427661620 +0200
+++ eglibc-2.19/nptl/tpp.c	2014-12-02 16:58:38.772874337 +0100
@@ -33,7 +33,7 @@
 {
   __sched_fifo_max_prio = sched_get_priority_max (SCHED_FIFO);
   atomic_write_barrier ();
-  __sched_fifo_min_prio = sched_get_priority_min (SCHED_FIFO);
+  atomic_store(__sched_fifo_min_prio, sched_get_priority_min (SCHED_FIFO));
 }
 
 int
@@ -44,7 +44,7 @@
 
   if (tpp == NULL)
     {
-      if (__sched_fifo_min_prio == -1)
+      if (atomic_load(__sched_fifo_min_prio) == -1)
 	__init_sched_fifo_prio ();
 
       size_t size = sizeof *tpp;
@@ -100,7 +100,7 @@
 
   if ((self->flags & ATTR_FLAG_SCHED_SET) == 0)
     {
-      if (__sched_getparam (self->tid, &self->schedparam) != 0)
+      if (__sched_getparam (atomic_load(self->tid), &self->schedparam) != 0)
 	result = errno;
       else
 	self->flags |= ATTR_FLAG_SCHED_SET;
@@ -108,7 +108,7 @@
 
   if ((self->flags & ATTR_FLAG_POLICY_SET) == 0)
     {
-      self->schedpolicy = __sched_getscheduler (self->tid);
+      self->schedpolicy = __sched_getscheduler (atomic_load(self->tid));
       if (self->schedpolicy == -1)
 	result = errno;
       else
@@ -123,7 +123,7 @@
 	  if (sp.sched_priority < newpriomax)
 	    sp.sched_priority = newpriomax;
 
-	  if (__sched_setscheduler (self->tid, self->schedpolicy, &sp) < 0)
+	  if (__sched_setscheduler (atomic_load(self->tid), self->schedpolicy, &sp) < 0)
 	    result = errno;
 	}
     }
@@ -147,7 +147,7 @@
 
   if ((self->flags & ATTR_FLAG_SCHED_SET) == 0)
     {
-      if (__sched_getparam (self->tid, &self->schedparam) != 0)
+      if (__sched_getparam (atomic_load(self->tid), &self->schedparam) != 0)
 	result = -1;
       else
 	self->flags |= ATTR_FLAG_SCHED_SET;
@@ -155,7 +155,7 @@
 
   if ((self->flags & ATTR_FLAG_POLICY_SET) == 0)
     {
-      self->schedpolicy = __sched_getscheduler (self->tid);
+      self->schedpolicy = __sched_getscheduler (atomic_load(self->tid));
       if (self->schedpolicy == -1)
 	result = -1;
       else
diff -r --unified eglibc-2.19.orig/nss/getXXbyYY_r.c eglibc-2.19/nss/getXXbyYY_r.c
--- eglibc-2.19.orig/nss/getXXbyYY_r.c	2014-05-12 14:05:10.599661625 +0200
+++ eglibc-2.19/nss/getXXbyYY_r.c	2014-12-02 16:58:38.773874337 +0100
@@ -200,7 +200,7 @@
     }
 #endif
 
-  if (! startp_initialized)
+  if (! atomic_load(startp_initialized))
     {
       no_more = DB_LOOKUP_FCT (&nip, REENTRANT_NAME_STRING,
 			       REENTRANT2_NAME_STRING, &fct.ptr);
@@ -244,7 +244,7 @@
       /* Make sure start_fct and startp are written before
 	 startp_initialized.  */
       atomic_write_barrier ();
-      startp_initialized = true;
+      atomic_store(startp_initialized, true);
     }
   else
     {
Only in eglibc-2.19: screenlog.0
diff -r --unified eglibc-2.19.orig/stdlib/cxa_atexit.c eglibc-2.19/stdlib/cxa_atexit.c
--- eglibc-2.19.orig/stdlib/cxa_atexit.c	2014-05-12 14:05:12.247661620 +0200
+++ eglibc-2.19/stdlib/cxa_atexit.c	2014-12-02 16:58:38.773874337 +0100
@@ -44,7 +44,7 @@
   new->func.cxa.arg = arg;
   new->func.cxa.dso_handle = d;
   atomic_write_barrier ();
-  new->flavor = ef_cxa;
+  atomic_store(new->flavor, ef_cxa);
   return 0;
 }
 
diff -r --unified eglibc-2.19.orig/stdlib/cxa_finalize.c eglibc-2.19/stdlib/cxa_finalize.c
--- eglibc-2.19.orig/stdlib/cxa_finalize.c	2014-05-12 14:05:12.255661620 +0200
+++ eglibc-2.19/stdlib/cxa_finalize.c	2014-12-02 16:58:38.773874337 +0100
@@ -32,7 +32,7 @@
   struct exit_function_list *funcs;
 
  restart:
-  for (funcs = __exit_funcs; funcs; funcs = funcs->next)
+  for (funcs = atomic_load(__exit_funcs); funcs; funcs = atomic_load(funcs->next))
     {
       struct exit_function *f;
 
@@ -41,10 +41,10 @@
 	  void (*cxafn) (void *arg, int status);
 	  void *cxaarg;
 
-	  if ((d == NULL || d == f->func.cxa.dso_handle)
+	  if ((d == NULL || d == atomic_load(f->func.cxa.dso_handle))
 	      /* We don't want to run this cleanup more than once.  */
-	      && (cxafn = f->func.cxa.fn,
-		  cxaarg = f->func.cxa.arg,
+	      && (cxafn = atomic_load(f->func.cxa.fn),
+		  cxaarg = atomic_load(f->func.cxa.arg),
 		  ! catomic_compare_and_exchange_bool_acq (&f->flavor, ef_free,
 							   ef_cxa)))
 	    {
@@ -64,13 +64,13 @@
     }
 
   /* Also remove the quick_exit handlers, but do not call them.  */
-  for (funcs = __quick_exit_funcs; funcs; funcs = funcs->next)
+  for (funcs = atomic_load(__quick_exit_funcs); funcs; funcs = atomic_load(funcs->next))
     {
       struct exit_function *f;
 
       for (f = &funcs->fns[funcs->idx - 1]; f >= &funcs->fns[0]; --f)
-	if (d == NULL || d == f->func.cxa.dso_handle)
-	  f->flavor = ef_free;
+	if (d == NULL || d == atomic_load(f->func.cxa.dso_handle))
+	  atomic_store(f->flavor, ef_free);
     }
 
   /* Remove the registered fork handlers.  We do not have to
diff -r --unified eglibc-2.19.orig/stdlib/msort.c eglibc-2.19/stdlib/msort.c
--- eglibc-2.19.orig/stdlib/msort.c	2014-05-12 14:05:12.251661620 +0200
+++ eglibc-2.19/stdlib/msort.c	2014-12-02 16:58:38.774874337 +0100
@@ -182,7 +182,7 @@
       static long int phys_pages;
       static int pagesize;
 
-      if (pagesize == 0)
+      if (atomic_load(pagesize) == 0)
 	{
 	  phys_pages = __sysconf (_SC_PHYS_PAGES);
 
@@ -200,7 +200,7 @@
 	  /* Make sure phys_pages is written to memory.  */
 	  atomic_write_barrier ();
 
-	  pagesize = __sysconf (_SC_PAGESIZE);
+	  atomic_store(pagesize, __sysconf (_SC_PAGESIZE));
 	}
 
       /* Just a comment here.  We cannot compute
diff -r --unified eglibc-2.19.orig/stdlib/on_exit.c eglibc-2.19/stdlib/on_exit.c
--- eglibc-2.19.orig/stdlib/on_exit.c	2014-05-12 14:05:12.251661620 +0200
+++ eglibc-2.19/stdlib/on_exit.c	2014-12-02 16:58:38.774874337 +0100
@@ -35,7 +35,7 @@
   new->func.on.fn = func;
   new->func.on.arg = arg;
   atomic_write_barrier ();
-  new->flavor = ef_on;
+  atomic_store(new->flavor, ef_on);
   return 0;
 }
 weak_alias (__on_exit, on_exit)
diff -r --unified eglibc-2.19.orig/stijn-build-libc-fast.sh eglibc-2.19/stijn-build-libc-fast.sh
--- eglibc-2.19.orig/stijn-build-libc-fast.sh	2014-05-12 14:05:12.255661620 +0200
+++ eglibc-2.19/stijn-build-libc-fast.sh	2014-12-03 12:07:34.036176788 +0100
@@ -1,2 +1,2 @@
 #debuild --preserve-envvar=CCACHE* --preserve-envvar=DISTCC* --prepend-path=/usr/local/bin/ -j8
-DEB_BUILD_OPTIONS=nocheck debuild -j8 -us -uc -b
+DEB_BUILD_OPTIONS=nocheck debuild -j16 -us -uc -b
diff -r --unified eglibc-2.19.orig/sysdeps/unix/sysv/linux/check_pf.c eglibc-2.19/sysdeps/unix/sysv/linux/check_pf.c
--- eglibc-2.19.orig/sysdeps/unix/sysv/linux/check_pf.c	2014-05-12 14:05:12.347661620 +0200
+++ eglibc-2.19/sysdeps/unix/sysv/linux/check_pf.c	2014-12-02 16:58:38.774874337 +0100
@@ -77,7 +77,7 @@
   if (atomic_increment_val (&nl_timestamp) == 0)
     atomic_increment (&nl_timestamp);
 
-  return nl_timestamp;
+  return atomic_load(nl_timestamp);
 }
 #endif
 
@@ -360,7 +360,7 @@
       *in6ailen = data->in6ailen;
       *in6ai = data->in6ai;
 
-      if (olddata != NULL && olddata->usecnt > 0
+      if (atomic_load(olddata) != NULL && atomic_load(olddata->usecnt) > 0
 	  && atomic_add_zero (&olddata->usecnt, -1))
 	free (olddata);
 
diff -r --unified eglibc-2.19.orig/sysdeps/unix/sysv/linux/getsysstats.c eglibc-2.19/sysdeps/unix/sysv/linux/getsysstats.c
--- eglibc-2.19.orig/sysdeps/unix/sysv/linux/getsysstats.c	2014-05-12 14:05:12.327661620 +0200
+++ eglibc-2.19/sysdeps/unix/sysv/linux/getsysstats.c	2014-12-02 16:58:38.775874337 +0100
@@ -130,7 +130,7 @@
   static time_t timestamp;
 
   time_t now = time (NULL);
-  time_t prev = timestamp;
+  time_t prev = atomic_load(timestamp);
   atomic_read_barrier ();
   if (now == prev)
     return cached_result;
@@ -224,7 +224,7 @@
  out:
   cached_result = result;
   atomic_write_barrier ();
-  timestamp = now;
+  atomic_store(timestamp, now);
 
   return result;
 }
diff -r --unified eglibc-2.19.orig/sysdeps/unix/sysv/linux/malloc-sysdep.h eglibc-2.19/sysdeps/unix/sysv/linux/malloc-sysdep.h
--- eglibc-2.19.orig/sysdeps/unix/sysv/linux/malloc-sysdep.h	2014-05-12 14:05:12.327661620 +0200
+++ eglibc-2.19/sysdeps/unix/sysv/linux/malloc-sysdep.h	2014-12-02 16:58:38.775874337 +0100
@@ -34,13 +34,14 @@
 check_may_shrink_heap (void)
 {
   static int may_shrink_heap = -1;
+  int tmp_shrink_heap = atomic_load(may_shrink_heap);
 
-  if (__builtin_expect (may_shrink_heap >= 0, 1))
-    return may_shrink_heap;
+  if (__builtin_expect (tmp_shrink_heap >= 0, 1))
+    return tmp_shrink_heap;
 
-  may_shrink_heap = __libc_enable_secure;
+  atomic_store(may_shrink_heap, __libc_enable_secure);
 
-  if (__builtin_expect (may_shrink_heap == 0, 1))
+  if (__builtin_expect (atomic_load(may_shrink_heap) == 0, 1))
     {
       int fd = open_not_cancel_2 ("/proc/sys/vm/overcommit_memory",
 				  O_RDONLY | O_CLOEXEC);
@@ -48,12 +49,12 @@
 	{
 	  char val;
 	  ssize_t n = read_not_cancel (fd, &val, 1);
-	  may_shrink_heap = n > 0 && val == '2';
+	  atomic_store(may_shrink_heap, n > 0 && val == '2');
 	  close_not_cancel_no_status (fd);
 	}
     }
 
-  return may_shrink_heap;
+  return atomic_load(may_shrink_heap);
 }
 
 #define HAVE_MREMAP 1
diff -r --unified eglibc-2.19.orig/sysdeps/x86_64/bits/atomic.h eglibc-2.19/sysdeps/x86_64/bits/atomic.h
--- eglibc-2.19.orig/sysdeps/x86_64/bits/atomic.h	2014-05-12 14:05:12.275661620 +0200
+++ eglibc-2.19/sysdeps/x86_64/bits/atomic.h	2014-12-08 13:10:14.570377625 +0100
@@ -55,10 +55,71 @@
 # endif
 #endif
 
+#define orig_catomic_max(mem, value)					\
+  do {									\
+    __typeof (*(mem)) __atg9_oldv;					\
+    __typeof (mem) __atg9_memp = (mem);					\
+    __typeof (*(mem)) __atg9_value = (value);				\
+    do {								\
+      __atg9_oldv = *(volatile __typeof (mem))__atg9_memp;		\
+      if (__atg9_oldv >= __atg9_value)					\
+	break;								\
+    } while (__builtin_expect						\
+	     (orig_catomic_compare_and_exchange_bool_acq (__atg9_memp,	\
+							  __atg9_value,	\
+							  __atg9_oldv), 0)); \
+  } while (0)
+
+// overrides from include/atomic.h
+#define orig_catomic_compare_and_exchange_val_acq(mem, newval, oldval)	\
+  __atomic_val_bysize (__arch_c_compare_and_exchange_val,acq,		\
+		       mem, newval, oldval)
+
+#define orig_catomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
+  ({ __typeof (oldval) __atg3_old = (oldval);				\
+    orig_catomic_compare_and_exchange_val_acq (mem, newval, __atg3_old) \
+      != __atg3_old;							\
+  })
+
+#define orig_atomic_forced_read(x) \
+  ({ __typeof (x) __x; __asm ("" : "=r" (__x) : "0" (x)); __x; })
+
+// MVEE additions
+#define orig_atomic_max(mem, value)					\
+  do {									\
+    __typeof (*(mem)) __atg8_oldval;					\
+    __typeof (mem) __atg8_memp = (mem);					\
+    __typeof (*(mem)) __atg8_value = (value);				\
+    do {								\
+      __atg8_oldval = *(volatile __typeof (mem))__atg8_memp;		\
+      if (__atg8_oldval >= __atg8_value)				\
+	break;								\
+    } while (__builtin_expect						\
+	     (orig_atomic_compare_and_exchange_bool_acq (__atg8_memp, __atg8_value, \
+							 __atg8_oldval), 0)); \
+  } while (0)
+
+#define orig_atomic_decrement_if_positive(mem)				\
+  ({ __typeof (*(mem)) __atg11_oldval;					\
+    __typeof (mem) __atg11_memp = (mem);				\
+									\
+    do									\
+      {									\
+	__atg11_oldval = *(volatile __typeof (mem))__atg11_memp;	\
+	if (__builtin_expect (__atg11_oldval <= 0, 0))			\
+	  break;							\
+      }									\
+    while (__builtin_expect						\
+	   (orig_atomic_compare_and_exchange_bool_acq (__atg11_memp,	\
+						       __atg11_oldval - 1, \
+						       __atg11_oldval), 0)); \
+    __atg11_oldval; })
+
 
-#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
+// original sysdeps atomics
+#define orig_atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
   __sync_val_compare_and_swap (mem, oldval, newval)
-#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
+#define orig_atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
   (! __sync_bool_compare_and_swap (mem, oldval, newval))
 
 
@@ -110,7 +171,7 @@
 
 
 /* Note that we need no lock prefix.  */
-#define atomic_exchange_acq(mem, newvalue) \
+#define orig_atomic_exchange_acq(mem, newvalue) \
   ({ __typeof (*mem) result;						      \
      if (sizeof (*mem) == 1)						      \
        __asm __volatile ("xchgb %b0, %1"				      \
@@ -132,7 +193,7 @@
      result; })
 
 
-#define __arch_exchange_and_add_body(lock, mem, value)			      \
+#define __arch_exchange_and_add_body(lock, mem, value)			\
   ({ __typeof (*mem) result;						      \
      if (sizeof (*mem) == 1)						      \
        __asm __volatile (lock "xaddb %b0, %1"				      \
@@ -157,13 +218,13 @@
 			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
      result; })
 
-#define atomic_exchange_and_add(mem, value) \
+#define orig_atomic_exchange_and_add(mem, value) \
   __sync_fetch_and_add (mem, value)
 
 #define __arch_exchange_and_add_cprefix \
   "cmpl $0, %%fs:%P4\n\tje 0f\n\tlock\n0:\t"
 
-#define catomic_exchange_and_add(mem, value) \
+#define orig_catomic_exchange_and_add(mem, value) \
   __arch_exchange_and_add_body (__arch_exchange_and_add_cprefix, mem, value)
 
 
@@ -196,17 +257,17 @@
 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
   } while (0)
 
-#define atomic_add(mem, value) \
-  __arch_add_body (LOCK_PREFIX, atomic, mem, value)
+#define orig_atomic_add(mem, value) \
+  ({__arch_add_body (LOCK_PREFIX, atomic, mem, value);})
 
 #define __arch_add_cprefix \
   "cmpl $0, %%fs:%P3\n\tje 0f\n\tlock\n0:\t"
 
-#define catomic_add(mem, value) \
-  __arch_add_body (__arch_add_cprefix, catomic, mem, value)
+#define orig_catomic_add(mem, value) \
+  ({__arch_add_body (__arch_add_cprefix, catomic, mem, value);})
 
 
-#define atomic_add_negative(mem, value) \
+#define orig_atomic_add_negative(mem, value) \
   ({ unsigned char __result;						      \
      if (sizeof (*mem) == 1)						      \
        __asm __volatile (LOCK_PREFIX "addb %b2, %0; sets %1"		      \
@@ -228,7 +289,7 @@
      __result; })
 
 
-#define atomic_add_zero(mem, value) \
+#define orig_atomic_add_zero(mem, value) \
   ({ unsigned char __result;						      \
      if (sizeof (*mem) == 1)						      \
        __asm __volatile (LOCK_PREFIX "addb %b2, %0; setz %1"		      \
@@ -274,16 +335,17 @@
 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
   } while (0)
 
-#define atomic_increment(mem) __arch_increment_body (LOCK_PREFIX, mem)
+#define orig_atomic_increment(mem) ({__arch_increment_body (LOCK_PREFIX, mem);})
+#define orig_nonatomic_increment(mem) ({__arch_increment_body ("", mem);})
 
 #define __arch_increment_cprefix \
   "cmpl $0, %%fs:%P2\n\tje 0f\n\tlock\n0:\t"
 
-#define catomic_increment(mem) \
-  __arch_increment_body (__arch_increment_cprefix, mem)
+#define orig_catomic_increment(mem) \
+  ({__arch_increment_body (__arch_increment_cprefix, mem);})
 
 
-#define atomic_increment_and_test(mem) \
+#define orig_atomic_increment_and_test(mem) \
   ({ unsigned char __result;						      \
      if (sizeof (*mem) == 1)						      \
        __asm __volatile (LOCK_PREFIX "incb %b0; sete %1"		      \
@@ -328,16 +390,16 @@
 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
   } while (0)
 
-#define atomic_decrement(mem) __arch_decrement_body (LOCK_PREFIX, mem)
+#define orig_atomic_decrement(mem) ({__arch_decrement_body (LOCK_PREFIX, mem);})
 
 #define __arch_decrement_cprefix \
   "cmpl $0, %%fs:%P2\n\tje 0f\n\tlock\n0:\t"
 
-#define catomic_decrement(mem) \
-  __arch_decrement_body (__arch_decrement_cprefix, mem)
+#define orig_catomic_decrement(mem) \
+    ({__arch_decrement_body (__arch_decrement_cprefix, mem);})
 
 
-#define atomic_decrement_and_test(mem) \
+#define orig_atomic_decrement_and_test(mem) \
   ({ unsigned char __result;						      \
      if (sizeof (*mem) == 1)						      \
        __asm __volatile (LOCK_PREFIX "decb %b0; sete %1"		      \
@@ -358,7 +420,7 @@
      __result; })
 
 
-#define atomic_bit_set(mem, bit) \
+#define orig_atomic_bit_set(mem, bit) \
   do {									      \
     if (sizeof (*mem) == 1)						      \
       __asm __volatile (LOCK_PREFIX "orb %b2, %0"			      \
@@ -383,7 +445,7 @@
   } while (0)
 
 
-#define atomic_bit_test_set(mem, bit) \
+#define orig_atomic_bit_test_set(mem, bit) \
   ({ unsigned char __result;						      \
      if (sizeof (*mem) == 1)						      \
        __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0"		      \
@@ -434,9 +496,9 @@
 #define __arch_cprefix \
   "cmpl $0, %%fs:%P3\n\tje 0f\n\tlock\n0:\t"
 
-#define atomic_and(mem, mask) __arch_and_body (LOCK_PREFIX, mem, mask)
+#define orig_atomic_and(mem, mask) ({__arch_and_body (LOCK_PREFIX, mem, mask);})
 
-#define catomic_and(mem, mask) __arch_and_body (__arch_cprefix, mem, mask)
+#define orig_catomic_and(mem, mask) ({__arch_and_body (__arch_cprefix, mem, mask);})
 
 
 #define __arch_or_body(lock, mem, mask)					      \
@@ -463,6 +525,479 @@
 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
   } while (0)
 
-#define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask)
+#define orig_atomic_or(mem, mask) ({__arch_or_body (LOCK_PREFIX, mem, mask);})
 
-#define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask)
+#define orig_catomic_or(mem, mask) ({__arch_or_body (__arch_cprefix, mem, mask);})
+
+/*--------------------------------------------------------------------------------
+                                  MVEE PATCHES
+--------------------------------------------------------------------------------*/
+#define USE_MVEE_LIBC
+
+#define MVEE_MAX_COUNTERS 65536
+
+#ifdef USE_MVEE_LIBC
+#define MVEE_FAKE_SYSCALL_BASE          0x6FFFFFFF
+#define MVEE_GET_MASTERTHREAD_ID        MVEE_FAKE_SYSCALL_BASE + 3
+#define MVEE_GET_SHARED_BUFFER          MVEE_FAKE_SYSCALL_BASE + 4
+#define MVEE_FLUSH_SHARED_BUFFER        MVEE_FAKE_SYSCALL_BASE + 5
+#define MVEE_SET_INFINITE_LOOP_PTR      MVEE_FAKE_SYSCALL_BASE + 6
+#define MVEE_TOGGLESYNC                 MVEE_FAKE_SYSCALL_BASE + 7
+#define MVEE_SET_SHARED_BUFFER_POS_PTR  MVEE_FAKE_SYSCALL_BASE + 8
+#define MVEE_RUNS_UNDER_MVEE_CONTROL    MVEE_FAKE_SYSCALL_BASE + 9
+#define MVEE_GET_THREAD_NUM             MVEE_FAKE_SYSCALL_BASE + 10
+#define MVEE_SET_SYNC_PRIMITIVES_PTR    MVEE_FAKE_SYSCALL_BASE + 12
+#define MVEE_ALL_HEAPS_ALIGNED          MVEE_FAKE_SYSCALL_BASE + 13
+#define MVEE_LIBC_LOCK_BUFFER           3
+#define MVEE_LIBC_MALLOC_DEBUG_BUFFER   11
+#define MVEE_LIBC_ATOMIC_BUFFER         13
+#define MVEE_FUTEX_WAIT_TID             30
+
+enum mvee_alloc_types
+  {
+  LIBC_MALLOC,
+  LIBC_FREE,
+  LIBC_REALLOC,
+  LIBC_MEMALIGN,
+  LIBC_CALLOC,
+  MALLOC_TRIM,
+  HEAP_TRIM,
+  MALLOC_CONSOLIDATE,
+  ARENA_GET2,
+  _INT_MALLOC,
+  _INT_FREE,
+  _INT_REALLOC
+  };
+
+enum mvee_base_atomics
+  {
+    // LOAD OPERATIONS FIRST!!! DO NOT CHANGE THIS CONVENTION
+    ATOMIC_FORCED_READ,
+    ATOMIC_LOAD,
+    // THE FOLLOWING IS NOT AN ACTUAL ATOMIC OPERATION, IT JUST DENOTES THE END OF THE LOAD-ONLY ATOMICS!!!
+    ATOMIC_LOAD_MAX,
+    // STORES AFTER LOADS
+    CATOMIC_COMPARE_AND_EXCHANGE_VAL_ACQ,
+    CATOMIC_COMPARE_AND_EXCHANGE_BOOL_ACQ,
+    CATOMIC_AND,
+    CATOMIC_OR,
+    CATOMIC_EXCHANGE_AND_ADD,
+    CATOMIC_ADD,
+    CATOMIC_INCREMENT,
+    CATOMIC_DECREMENT,
+    CATOMIC_MAX,
+    ATOMIC_COMPARE_AND_EXCHANGE_VAL_ACQ,
+    ATOMIC_COMPARE_AND_EXCHANGE_BOOL_ACQ,
+    ATOMIC_EXCHANGE_ACQ,
+    ATOMIC_EXCHANGE_AND_ADD,
+    ATOMIC_INCREMENT_AND_TEST,
+    ATOMIC_DECREMENT_AND_TEST,
+    ATOMIC_ADD_ZERO,
+    ATOMIC_ADD,
+    ATOMIC_INCREMENT,
+    ATOMIC_DECREMENT,
+    ATOMIC_BIT_TEST_SET,
+    ATOMIC_BIT_SET,
+    ATOMIC_AND,
+    ATOMIC_STORE,
+    ATOMIC_MAX,
+    ATOMIC_DECREMENT_IF_POSITIVE,
+    __THREAD_ATOMIC_CMPXCHG_VAL,
+    __THREAD_ATOMIC_AND,
+    __THREAD_ATOMIC_BIT_SET,
+    ___UNKNOWN_LOCK_TYPE___,
+    __MVEE_BASE_ATOMICS_MAX__
+  };
+
+enum mvee_extended_atomics {
+  mvee_atomic_load_n,
+  mvee_atomic_load,
+  mvee_atomic_store_n,
+  mvee_atomic_store,
+  mvee_atomic_exchange_n,
+  mvee_atomic_exchange,
+  mvee_atomic_compare_exchange_n,
+  mvee_atomic_compare_exchange,
+  mvee_atomic_add_fetch,
+  mvee_atomic_sub_fetch,
+  mvee_atomic_and_fetch,
+  mvee_atomic_xor_fetch,
+  mvee_atomic_or_fetch,
+  mvee_atomic_nand_fetch,
+  mvee_atomic_fetch_add,
+  mvee_atomic_fetch_sub,
+  mvee_atomic_fetch_and,
+  mvee_atomic_fetch_xor,
+  mvee_atomic_fetch_or,
+  mvee_atomic_fetch_nand,
+  mvee_atomic_test_and_set,
+  mvee_atomic_clear,
+  mvee_atomic_always_lock_free,
+  mvee_atomic_is_lock_free,
+  mvee_sync_fetch_and_add,
+  mvee_sync_fetch_and_sub,
+  mvee_sync_fetch_and_or,
+  mvee_sync_fetch_and_and,
+  mvee_sync_fetch_and_xor,
+  mvee_sync_fetch_and_nand,
+  mvee_sync_add_and_fetch,
+  mvee_sync_sub_and_fetch,
+  mvee_sync_or_and_fetch,
+  mvee_sync_and_and_fetch,
+  mvee_sync_xor_and_fetch,
+  mvee_sync_nand_and_fetch,
+  mvee_sync_bool_compare_and_swap,
+  mvee_sync_val_compare_and_swap,
+  mvee_sync_lock_test_and_set,
+  mvee_sync_lock_release,
+  mvee_atomic_ops_max
+};
+
+#define MVEE_ROUND_UP(x, multiple)		\
+  ((x + (multiple - 1)) & ~(multiple -1))
+#define MVEE_MIN(a, b) ((a > b) ? (b) : (a))
+#define MVEE_MALLOC_HOOK(type, msg, sz, ar_ptr, chunk_ptr)
+
+extern void          mvee_atomic_postop_internal (unsigned char preop_result);
+extern unsigned char mvee_atomic_preop_internal  (void* word_ptr);
+extern int           mvee_should_sync_tid        (void);
+extern int           mvee_all_heaps_aligned      (char* heap); 
+extern void          mvee_invalidate_buffer      (void);
+
+#define MVEE_POSTOP()					\
+  mvee_atomic_postop_internal(__tmp_mvee_preop);
+
+#define MVEE_PREOP(op_type, mem, is_store)					\
+   register unsigned char  __tmp_mvee_preop = mvee_atomic_preop_internal(mem);
+
+#endif
+
+#ifdef IS_IN_rtld
+
+// sysdeps/atomic.h patches
+#define catomic_and(mem, mask) orig_catomic_and(mem, mask)
+#define catomic_or(mem, mask) orig_catomic_or(mem, mask)
+#define catomic_exchange_and_add(mem, value) orig_catomic_exchange_and_add(mem, value)
+#define catomic_add(mem, value) orig_catomic_add(mem, value)
+#define catomic_increment(mem) orig_catomic_increment(mem)
+#define catomic_decrement(mem) orig_catomic_decrement(mem)
+#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) orig_atomic_compare_and_exchange_val_acq(mem, newval, oldval)
+#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) orig_atomic_compare_and_exchange_bool_acq(mem, newval, oldval)
+#define atomic_exchange_acq(mem, newvalue) orig_atomic_exchange_acq(mem, newvalue)
+#define atomic_exchange_and_add(mem, value) orig_atomic_exchange_and_add(mem, value)
+#define atomic_increment_and_test(mem) orig_atomic_increment_and_test(mem)
+#define atomic_decrement_and_test(mem) orig_atomic_decrement_and_test(mem)
+#define atomic_add_zero(mem, value) orig_atomic_add_zero(mem, value)
+#define atomic_add(mem, value) orig_atomic_add(mem, value)
+#define atomic_increment(mem) orig_atomic_increment(mem)
+#define atomic_decrement(mem) orig_atomic_decrement(mem)
+#define atomic_bit_test_set(mem, bit) orig_atomic_bit_test_set(mem, bit)
+#define atomic_bit_set(mem, bit) orig_atomic_bit_set(mem, bit)
+#define atomic_and(mem, mask) orig_atomic_and(mem, mask)
+
+// include/atomic.h patches
+// #define catomic_compare_and_exchange_val_acq(mem, newval, oldval) orig_catomic_compare_and_exchange_val_acq(mem, newval, oldval)
+// #define catomic_compare_and_exchange_bool_acq(mem, newval, oldval) orig_catomic_compare_and_exchange_bool_acq(mem, newval, oldval)
+// #define atomic_forced_read(x) orig_atomic_forced_read(x)
+
+// nptl/sysdeps/tls.h patches
+// #define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) orig_THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval)
+// #define THREAD_ATOMIC_AND(descr, member, val) orig_THREAD_ATOMIC_AND(descr, member, val)
+// #define THREAD_ATOMIC_BIT_SET(descr, member, bit) orig_THREAD_ATOMIC_BIT_SET(descr, member, bit)
+
+// MVEE additions
+#define atomic_load(var) ({ var; })
+#define atomic_store(var, val) ({ var = val; })
+
+#else // !IS_IN_rtld
+
+// sysdeps/atomic.h patches
+#define catomic_and(mem, mask)			\
+  ({						\
+    MVEE_PREOP(CATOMIC_AND, mem, 1);		\
+    orig_catomic_and(mem, mask);		\
+    MVEE_POSTOP();				\
+  })
+
+#define catomic_or(mem, mask)			\
+  ({						\
+    MVEE_PREOP(CATOMIC_OR, mem, 1);		\
+    orig_catomic_or(mem, mask);			\
+    MVEE_POSTOP();				\
+  })
+
+
+#define catomic_exchange_and_add(mem, value)			\
+  ({								\
+    typeof(*mem) ____result;					\
+    MVEE_PREOP(CATOMIC_EXCHANGE_AND_ADD, mem, 1);		\
+    ____result = orig_catomic_exchange_and_add(mem, value);	\
+    MVEE_POSTOP();						\
+    ____result;							\
+  })
+
+
+#define catomic_add(mem, value)			\
+  ({						\
+    MVEE_PREOP(CATOMIC_ADD, mem, 1);		\
+    orig_catomic_add(mem, value);		\
+    MVEE_POSTOP();				\
+  })
+
+
+#define catomic_increment(mem)			\
+  ({						\
+    MVEE_PREOP(CATOMIC_INCREMENT, mem, 1);	\
+    orig_catomic_increment(mem);		\
+    MVEE_POSTOP();				\
+  })
+
+
+#define catomic_decrement(mem)			\
+  ({						\
+    MVEE_PREOP(CATOMIC_DECREMENT, mem, 1);	\
+    orig_catomic_decrement(mem);		\
+    MVEE_POSTOP();				\
+  })
+
+#define catomic_max(mem, value)			\
+  ({						\
+    MVEE_PREOP(CATOMIC_MAX, mem, 1);		\
+    orig_catomic_max(mem, value);		\
+    MVEE_POSTOP();				\
+  })
+
+
+#define atomic_compare_and_exchange_val_acq(mem, newval, oldval)	\
+  ({									\
+    typeof(*mem) ____result;						\
+    MVEE_PREOP(ATOMIC_COMPARE_AND_EXCHANGE_VAL_ACQ, mem, 1);		\
+    ____result = orig_atomic_compare_and_exchange_val_acq(mem, newval, oldval); \
+    MVEE_POSTOP();							\
+    ____result;								\
+  })
+
+
+#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval)	\
+  ({									\
+    bool ____result;							\
+    MVEE_PREOP(ATOMIC_COMPARE_AND_EXCHANGE_BOOL_ACQ, mem, 1);		\
+    ____result = orig_atomic_compare_and_exchange_bool_acq(mem, newval, oldval); \
+    MVEE_POSTOP();							\
+    ____result;								\
+  })
+
+
+#define atomic_exchange_acq(mem, newvalue)			\
+  ({								\
+    typeof(*mem) ____result;					\
+    MVEE_PREOP(ATOMIC_EXCHANGE_ACQ, mem, 1);			\
+    ____result = orig_atomic_exchange_acq(mem, newvalue);	\
+    MVEE_POSTOP();						\
+    ____result;							\
+  })
+
+
+#define atomic_exchange_and_add(mem, value)			\
+  ({								\
+    typeof(*mem) ____result;					\
+    MVEE_PREOP(ATOMIC_EXCHANGE_AND_ADD, mem, 1);		\
+    ____result = orig_atomic_exchange_and_add(mem, value);	\
+    MVEE_POSTOP();						\
+    ____result;							\
+  })
+
+
+#define atomic_increment_and_test(mem)			\
+  ({							\
+    unsigned char ____result;				\
+    MVEE_PREOP(ATOMIC_INCREMENT_AND_TEST, mem, 1);	\
+    ____result = orig_atomic_increment_and_test(mem);	\
+    MVEE_POSTOP();					\
+    ____result;						\
+  })
+
+
+#define atomic_decrement_and_test(mem)			\
+  ({							\
+    unsigned char ____result;				\
+    MVEE_PREOP(ATOMIC_DECREMENT_AND_TEST, mem, 1);	\
+    ____result = orig_atomic_decrement_and_test(mem);	\
+    MVEE_POSTOP();					\
+    ____result;						\
+  })
+
+
+#define atomic_add_zero(mem, value)			\
+  ({							\
+    unsigned char ____result;				\
+    MVEE_PREOP(ATOMIC_ADD_ZERO, mem, 1);		\
+    ____result = orig_atomic_add_zero(mem, value);	\
+    MVEE_POSTOP();					\
+    ____result;						\
+  })
+
+
+#define atomic_add(mem, value)			\
+  ({						\
+    MVEE_PREOP(ATOMIC_ADD, mem, 1);		\
+    orig_atomic_add(mem, value);		\
+    MVEE_POSTOP();				\
+  })
+
+
+#define atomic_increment(mem)			\
+  ({						\
+    MVEE_PREOP(ATOMIC_INCREMENT, mem, 1);	\
+    orig_atomic_increment(mem);			\
+    MVEE_POSTOP();				\
+  })
+
+
+#define atomic_decrement(mem)			\
+  ({						\
+    MVEE_PREOP(ATOMIC_DECREMENT, mem, 1);	\
+    orig_atomic_decrement(mem);			\
+    MVEE_POSTOP();				\
+  })
+
+
+#define atomic_bit_test_set(mem, bit)			\
+  ({							\
+    unsigned char ____result;				\
+    MVEE_PREOP(ATOMIC_BIT_TEST_SET, mem, 1);		\
+    ____result = orig_atomic_bit_test_set(mem, bit);	\
+    MVEE_POSTOP();					\
+    ____result;						\
+  })
+
+
+#define atomic_bit_set(mem, bit)		\
+  ({						\
+    MVEE_PREOP(ATOMIC_BIT_SET, mem, 1);		\
+    orig_atomic_bit_set(mem, bit);		\
+    MVEE_POSTOP();				\
+  })
+
+
+#define atomic_and(mem, mask)			\
+  ({						\
+    MVEE_PREOP(ATOMIC_AND, mem, 1);		\
+    orig_atomic_and(mem, mask);			\
+    MVEE_POSTOP();				\
+  })
+
+
+
+// include/atomic.h patches
+#define catomic_compare_and_exchange_val_acq(mem, newval, oldval)	\
+  ({									\
+    typeof(*mem) ____result;						\
+    MVEE_PREOP(CATOMIC_COMPARE_AND_EXCHANGE_VAL_ACQ, mem, 1);		\
+    ____result = orig_catomic_compare_and_exchange_val_acq(mem, newval, oldval); \
+    MVEE_POSTOP();							\
+    ____result;								\
+  })
+
+#define catomic_compare_and_exchange_bool_acq(mem, newval, oldval)	\
+  ({									\
+    bool ____result;							\
+    MVEE_PREOP(CATOMIC_COMPARE_AND_EXCHANGE_BOOL_ACQ, mem, 1);		\
+    ____result = orig_catomic_compare_and_exchange_bool_acq(mem, newval, oldval); \
+    MVEE_POSTOP();							\
+    ____result;								\
+  })
+
+#define atomic_forced_read(x)			\
+  ({						\
+    typeof(x) ____result;			\
+    MVEE_PREOP(ATOMIC_FORCED_READ, &x, 0);	\
+    ____result = orig_atomic_forced_read(x);	\
+    MVEE_POSTOP();				\
+    ____result;					\
+  })
+
+
+
+// nptl/sysdeps/tls.h patches
+#if 0
+#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval)	\
+  ({									\
+    __typeof(descr->member) ____result;					\
+    MVEE_PREOP(__THREAD_ATOMIC_CMPXCHG_VAL, &descr->member, 1);		\
+    ____result = orig_THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval); \
+    MVEE_POSTOP();							\
+    ____result;								\
+  })
+
+
+#define THREAD_ATOMIC_AND(descr, member, val)			\
+  (void)({							\
+      MVEE_PREOP(__THREAD_ATOMIC_AND, &descr->member, 1);	\
+      orig_THREAD_ATOMIC_AND(descr, member, val);		\
+      MVEE_POSTOP();						\
+    })
+
+
+#define THREAD_ATOMIC_BIT_SET(descr, member, bit)		\
+  (void)({							\
+      MVEE_PREOP(__THREAD_ATOMIC_BIT_SET, &descr->member, 1);	\
+      orig_THREAD_ATOMIC_BIT_SET(descr, member, bit);		\
+      MVEE_POSTOP();						\
+    })
+#endif
+
+
+// MVEE additions
+#define atomic_load(var)			\
+  ({						\
+    __typeof(var+0) ____result;			\
+    MVEE_PREOP(ATOMIC_LOAD, &var, 0);		\
+    ____result = var;				\
+    MVEE_POSTOP();				\
+    ____result;					\
+  })
+
+#define atomic_store(var, val)			\
+  ({						\
+    MVEE_PREOP(ATOMIC_STORE, &var, 1);		\
+    var = val;					\
+    MVEE_POSTOP();				\
+  })
+
+#define atomic_max(mem, value)			\
+  ({						\
+    MVEE_PREOP(ATOMIC_MAX, mem, 1);		\
+    orig_atomic_max(mem, value);		\
+    MVEE_POSTOP();				\
+  })
+
+#define atomic_decrement_if_positive(mem)		\
+  ({							\
+    __typeof(*mem) __result;				\
+    MVEE_PREOP(ATOMIC_DECREMENT_IF_POSITIVE, mem, 1);	\
+    __result = orig_atomic_decrement_if_positive(mem);	\
+    MVEE_POSTOP();					\
+    __result;						\
+  })
+
+#define lll_futex_wake_unlock(futexp, nr_wake, nr_wake2, futexp2, private) \
+  ({									      \
+    INTERNAL_SYSCALL_DECL (__err);					      \
+    long int __ret;							      \
+    MVEE_PREOP(___UNKNOWN_LOCK_TYPE___, futexp2, 1);			      \
+    __ret = INTERNAL_SYSCALL (futex, __err, 6, (futexp),		      \
+			      __lll_private_flag (FUTEX_WAKE_OP, private),    \
+			      (nr_wake), (nr_wake2), (futexp2),		      \
+			      FUTEX_OP_CLEAR_WAKE_IF_GT_ONE);		      \
+    if (mvee_should_futex_unlock())					\
+      {									      \
+	*futexp2 = 0;							\
+      }									\
+    MVEE_POSTOP();							      \
+    INTERNAL_SYSCALL_ERROR_P (__ret, __err);				      \
+  })
+
+
+
+#endif
