--- a/l11/handout/c11-handout.txt
+++ b/l11/handout/c11-handout.txt
@@ -190,6 +190,11 @@ abstractly, to refer to execution contexts that share memory.)
 	  }
 	}
 
+	void release(Lock *lock){
+	   xchg_val(&lock->locked, 0);
+	   popcli();    /* what does this do? */
+	}
+
     5b. Test-and-test-and-set lock
 
 	/* optimization in acquire; call xchg_val() less frequently */
@@ -200,16 +205,13 @@ abstractly, to refer to execution contexts that share memory.)
 	    }
 	}
 
-	void release(Lock *lock){
-	   xchg_val(&lock->locked, 0);
-	   popcli();    /* what does this do? */
-	}
+
 
 6. Ticket locks
 
     The spinlocks presented above have fairness issues on NUMA machines
-    (cores closer to the memory containing the 'locked' variable) are
-    more likely to succeed in acquiring the lock.
+    (cores closer to the memory containing the 'locked' variable are
+    more likely to succeed in acquiring the lock).
 
     Ticket locks address that issue.
 
@@ -218,17 +220,25 @@ abstractly, to refer to execution contexts that share memory.)
     instruction, but note that this instruction is not atomic by
     default, so we need the LOCK prefix. 
 
-    /* 
-     * see
-     * https://en.wikipedia.org/wiki/Fetch-and-add#x86_implementation
-     */
-     inline int fetch_and_add( int * variable, int value ) {
-        asm volatile("lock; xaddl %%eax, %2;"
-                     :"=a" (value)                  //Output
-                     :"a" (value), "m" (*variable)  //Input
-                     :"memory");
-        return value;
-    } 
+    Here's pseudocode:
+
+        int fetch_and_increment (int* addr) {
+	    LOCK: // remember, this is pseudocode
+	    int was = *addr;
+	    *addr = was + 1;
+	    return was;
+        }
+
+    Here's inline assembly:
+
+        inline int fetch_and_increment(int *addr) {
+            int was = 1;
+            asm volatile("lock xaddl %1, %0"
+                 : "+m" (*addr), "=r" (was)  // Output
+                 : "1" (was), "m" (*addr)  // Input
+                 );
+            return was;
+        }
 
     struct Lock {
         int current_ticket;
@@ -237,15 +247,14 @@ abstractly, to refer to execution contexts that share memory.)
 
     void acquire (Lock *lock)
     {
-        int t =  atomic_fetch_and_increment (&lock->next_ticket);
+        int t =  fetch_and_increment (&lock->next_ticket);
         while (t != lock->current_ticket) ;
     }
 
     void release (Lock *lock) {
         lock->current_ticket++;
     }
-
-
+
 7. MCS locks (a kind of queue lock)
 
     Ticket locks are fair, as noted above, but they (and baseline
@@ -274,7 +283,7 @@ abstractly, to refer to execution contexts that share memory.)
     On the X86, we implement CAS with the CMPXCHG instruction, but note
     that this instruction is not atomic by default, so we need the LOCK
     prefix.
-
+
     Here's pseudocode:
 
 	int cmpxchg_val(int* addr, int oldval, int newval) {
@@ -291,7 +300,7 @@ abstractly, to refer to execution contexts that share memory.)
 	    uint32_t was;
 	    asm volatile("lock cmpxchg %3, %0"
 			    : "+m" (*addr), "=a" (was)
-			    : "a" (oldval), "r" (newval)
+			    : "a" (oldval), "r" (newval), "m" (*addr)
 			    : "cc");
 	    return was;
 	}
@@ -313,8 +322,9 @@ abstractly, to refer to execution contexts that share memory.)
     --The lock itself is literally the *tail* of the list of CPUs holding
     or waiting for the lock.
 
-    --While waiting, a CPU spins on its local "locked" flag. Here's the
-    code for acquire:
+    --While waiting, a CPU spins on its local "locked" flag.
+    
+    Here's the code for acquire:
 
 	// lockp is a qnode**. I points to our local qnode.
 	void acquire(lock* lockp, qnode* I) {
@@ -345,7 +355,7 @@ abstractly, to refer to execution contexts that share memory.)
 
 	--If the lock is locked, and there are waiters, then *lockp points
 	to the qnode at the tail of the waiter list.
-
+
     --Here's the code for release:
 
 	void release(lock* lockp, qnode* I) {
@@ -381,11 +391,12 @@ abstractly, to refer to execution contexts that share memory.)
 	--If I->next != NULL, then we know that there is a spinning
 	waiter (the oldest one). Hand it the lock by setting its flag to
 	false.
-
+
 9. Mutexes
 
     Motivation: all of the aforementioned locks were called spinlocks
-    because acquire() spins.
+    because acquire() spins. A mutex avoids busy waiting. Usually, in
+    user space code, you want to be using mutexes, not spinlocks.
 
     Spinlocks are good for some things, not so great for others. The
     main problem is that it *busy waits*: it spins, chewing up CPU
@@ -397,15 +408,9 @@ abstractly, to refer to execution contexts that share memory.)
     lock would waste cycles spinning instead of running some other
     thread or process).
 
-    This motivates mutexes. Usually, in user space code, you want to be
-    using mutexes, not spinlocks.
-
-    ---
-   
-    The intent of a mutex is to avoid busy waiting: if the lock is not
-    available, the locking thread is put to sleep, and tracked by a
-    queue in the mutex.
-
+    With a mutex, if the lock is not available, the locking thread is
+    put to sleep, and tracked by a queue in the mutex.
+    
 	struct Mutex {
 	    bool is_held;           /* true if mutex held */
 	    thread_id owner;	    /* thread holding mutex, if locked */