WaitGroup Implementation

WaitGroup Implementation

Data Structure

type WaitGroup struct {
	noCopy noCopy
	// 64-bit atomic operations need 64-bit memory alignment
	// 32-bit compilers can't guarantee this alignment
	// So for 32-bit systems: first 8 bytes hold state, last 4 bytes hold semaphore
	state1 [3]uint32
}
func (wg *WaitGroup) state() (statep *uint64, semap *uint32) {
	if uintptr(unsafe.Pointer(&wg.state1))%8 == 0 {
		// Memory aligned -> 64-bit
		return (*uint64)(unsafe.Pointer(&wg.state1)), &wg.state1[2]
	} else {
		// Not aligned -> 32-bit  
		return (*uint64)(unsafe.Pointer(&wg.state1[1])), &wg.state1[0]
	}
}
|counter + waiter 64| padding 32 (8 bytes)| sema 32  -> 64-bit

|sema 32| counter 32 + waiter 32  -> 32-bit

CPUs read memory in blocks.

API

The source shows that Add must be called before creating goroutines since it needs to know the number of goroutines upfront. Done just calls Add(-1).

func (wg *WaitGroup) Add(delta int) {
	statep, semap := wg.state()

	state := atomic.AddUint64(statep, uint64(delta)<<32)
	v := int32(state >> 32)	// extract counter
	w := uint32(state) // extract waiter

	if v < 0 {
		panic("sync: negative WaitGroup counter")
	}
	if w != 0 && delta > 0 && v == int32(delta) {
		panic("sync: WaitGroup misuse: Add called concurrently with Wait")
	}
	if v > 0 || w == 0 {
		return
	}
	// v equals 0, all waiters
	*statep = 0
	// increment semaphore, wake one waiter at a time
	for ; w != 0; w-- {
		runtime_Semrelease(semap, false, 0)
	}
}

Wait blocks itself until the counter reaches zero. If v is already zero before waiting, it returns without blocking.

func (wg *WaitGroup) Wait() {
	statep, semap := wg.state()
	for {
		state := atomic.LoadUint64(statep)
		v := int32(state >> 32)
		w := uint32(state)
		if v == 0 {
			// No need to wait
			return
		}
		// Need to wait, increment waiter with CAS operation
		// May fail (if v becomes 0 during this period)
		// So retry loop needed
		if atomic.CompareAndSwapUint64(statep, state, state+1) {
			// Pass pointer to *uint32 as counter
			// Semaphore starts at 0, so goroutine waits
			// Internal loop gets sudog and sleeps goroutine
			runtime_Semacquire(semap)
			return
		}
	}
}

Use Cases

Commonly used to wait for multiple goroutines to finish.