The Gossiping Problem

The Gossiping Problem is a classic problem in distributed systems and graph theory. N people each know a unique secret, and they share information through phone calls. On each call, both parties share all secrets they know. The goal: find the minimum number of calls needed for everyone to know all secrets.

The Scenario

N people, N secrets:

  • Person i knows secret Si initially
  • When persons i and j call each other, they share ALL secrets they know
  • Goal: Everyone knows all N secrets
  • Question: What’s the minimum number of calls?

The Mathematical Result

For n ≥ 4 people:

  • Minimum calls = 2n - 4

Why 2n-4?

  • First n-2 calls: Create a chain
  • Last n-2 calls: Broadcast to everyone

For n < 4:

  • n=2: 1 call
  • n=3: 3 calls

The Strategy

graph TD subgraph "Phase 1: Chain Building (n-2 calls)" A[Person 0
knows S0] B[Person 1
knows S1] C[Person 2
knows S2] D[Person 3
knows S3] A -->|Call 1| B B -->|Call 2| C C -->|Call 3| D end subgraph "Phase 2: Broadcasting (n-2 calls)" D2[Person 3
knows all] A2[Person 0] B2[Person 1] C2[Person 2] D2 -->|Call 4| A2 D2 -->|Call 5| B2 end subgraph "Result" All[Everyone knows
all 4 secrets] end

Real-World Applications

Gossip protocols appear in many distributed systems:

  • Bitcoin/Blockchain: Transaction propagation
  • Cassandra/DynamoDB: Anti-entropy repairs
  • Consul/etcd: Cluster membership
  • Epidemic Algorithms: Information dissemination
  • COVID contact tracing: Exposure notification
  • Rumor spreading: Social networks

Implementation in Go

package main

import (
	"fmt"
	"sort"
	"sync"
)

// Secret represents a piece of information
type Secret struct {
	ID    int
	Value string
}

// Person represents a participant in gossip
type Person struct {
	ID      int
	Secrets map[int]Secret
	mu      sync.Mutex
}

// GossipSystem coordinates the gossip protocol
type GossipSystem struct {
	People    []*Person
	CallCount int
	CallLog   []Call
	mu        sync.Mutex
}

// Call represents a phone call between two people
type Call struct {
	From    int
	To      int
	CallNum int
	Secrets []int // Secrets shared in this call
}

func NewGossipSystem(n int) *GossipSystem {
	people := make([]*Person, n)

	// Each person starts with one unique secret
	for i := 0; i < n; i++ {
		people[i] = &Person{
			ID:      i,
			Secrets: make(map[int]Secret),
		}
		people[i].Secrets[i] = Secret{
			ID:    i,
			Value: fmt.Sprintf("Secret_%d", i),
		}
	}

	return &GossipSystem{
		People: people,
	}
}

// Call simulates a phone call where both parties share all secrets
func (gs *GossipSystem) Call(fromID, toID int) {
	gs.mu.Lock()
	gs.CallCount++
	callNum := gs.CallCount
	gs.mu.Unlock()

	from := gs.People[fromID]
	to := gs.People[toID]

	// Lock both people (in order to prevent deadlock)
	if fromID < toID {
		from.mu.Lock()
		to.mu.Lock()
	} else {
		to.mu.Lock()
		from.mu.Lock()
	}
	defer from.mu.Unlock()
	defer to.mu.Unlock()

	fmt.Printf("\n📞 Call #%d: Person %d ↔ Person %d\n", callNum, fromID, toID)

	// Collect all secrets before the call
	fromBefore := len(from.Secrets)
	toBefore := len(to.Secrets)

	// Share all secrets
	allSecrets := make(map[int]Secret)

	// Combine secrets from both parties
	for id, secret := range from.Secrets {
		allSecrets[id] = secret
	}
	for id, secret := range to.Secrets {
		allSecrets[id] = secret
	}

	// Both now know all combined secrets
	from.Secrets = make(map[int]Secret)
	to.Secrets = make(map[int]Secret)

	for id, secret := range allSecrets {
		from.Secrets[id] = secret
		to.Secrets[id] = secret
	}

	// Log the call
	secretIDs := make([]int, 0, len(allSecrets))
	for id := range allSecrets {
		secretIDs = append(secretIDs, id)
	}
	sort.Ints(secretIDs)

	gs.mu.Lock()
	gs.CallLog = append(gs.CallLog, Call{
		From:    fromID,
		To:      toID,
		CallNum: callNum,
		Secrets: secretIDs,
	})
	gs.mu.Unlock()

	// Report what was learned
	fromLearned := len(from.Secrets) - fromBefore
	toLearned := len(to.Secrets) - toBefore

	fmt.Printf("  Person %d: %d → %d secrets (+%d)\n",
		fromID, fromBefore, len(from.Secrets), fromLearned)
	fmt.Printf("  Person %d: %d → %d secrets (+%d)\n",
		toID, toBefore, len(to.Secrets), toLearned)
	fmt.Printf("  Secrets shared: %v\n", secretIDs)
}

// IsComplete checks if everyone knows all secrets
func (gs *GossipSystem) IsComplete() bool {
	n := len(gs.People)

	for _, person := range gs.People {
		person.mu.Lock()
		count := len(person.Secrets)
		person.mu.Unlock()

		if count != n {
			return false
		}
	}
	return true
}

// OptimalGossip executes the optimal 2n-4 algorithm
func (gs *GossipSystem) OptimalGossip() {
	n := len(gs.People)

	if n < 2 {
		return
	}

	if n == 2 {
		gs.Call(0, 1)
		return
	}

	if n == 3 {
		// Special case: 3 calls needed
		gs.Call(0, 1)
		gs.Call(1, 2)
		gs.Call(0, 2)
		return
	}

	fmt.Println("=== Phase 1: Building Chain (n-2 calls) ===")

	// Phase 1: Build a chain (n-2 calls)
	// Person 0 calls 1, 1 calls 2, ..., (n-3) calls (n-2)
	for i := 0; i < n-2; i++ {
		gs.Call(i, i+1)
	}

	fmt.Println("\n=== Phase 2: Broadcasting (n-2 calls) ===")

	// Phase 2: Last person in chain broadcasts to everyone except adjacent
	// Person (n-2) now knows all secrets except the last person's
	// Last person needs to call someone to share their secret
	gs.Call(n-2, n-1)

	// Now person n-1 knows all secrets, broadcast to others
	for i := 0; i < n-2; i++ {
		gs.Call(n-1, i)
	}
}

// PrintStatus shows current state
func (gs *GossipSystem) PrintStatus() {
	fmt.Println("\n📊 Current Status:")
	fmt.Println("   Person | Secrets Known | Has All?")
	fmt.Println("   -------|---------------|----------")

	n := len(gs.People)
	for _, person := range gs.People {
		person.mu.Lock()
		secretCount := len(person.Secrets)
		hasAll := secretCount == n

		secretIDs := make([]int, 0, secretCount)
		for id := range person.Secrets {
			secretIDs = append(secretIDs, id)
		}
		sort.Ints(secretIDs)

		fmt.Printf("   %6d | %13d | %-8v %v\n",
			person.ID, secretCount, hasAll, secretIDs)
		person.mu.Unlock()
	}

	fmt.Printf("\n   Total calls: %d\n", gs.CallCount)
	if gs.IsComplete() {
		fmt.Println("   ✅ Everyone knows all secrets!")
	}
}

func main() {
	fmt.Println("=== The Gossiping Problem ===\n")

	testCases := []int{2, 3, 4, 5, 6}

	for _, n := range testCases {
		fmt.Printf("\n%s\n", "="*60)
		fmt.Printf("Scenario: %d people, %d secrets\n", n, n)
		expectedCalls := 1
		if n == 3 {
			expectedCalls = 3
		} else if n >= 4 {
			expectedCalls = 2*n - 4
		}
		fmt.Printf("Expected optimal calls: %d\n", expectedCalls)
		fmt.Printf("%s\n\n", "="*60)

		system := NewGossipSystem(n)
		system.OptimalGossip()
		system.PrintStatus()

		if system.CallCount == expectedCalls {
			fmt.Printf("\n✅ Achieved optimal: %d calls\n", system.CallCount)
		} else {
			fmt.Printf("\n❌ Not optimal: used %d calls, expected %d\n",
				system.CallCount, expectedCalls)
		}
	}
}

How It Works

1. Chain Building Phase

// Person 0 → 1 → 2 → 3 → ... → (n-2)
// After n-2 calls, person (n-2) knows all but one secret
for i := 0; i < n-2; i++ {
    call(i, i+1)
}

2. Broadcasting Phase

// Person (n-1) calls (n-2) to get all secrets
call(n-2, n-1)

// Now person (n-1) knows everything, broadcast back
for i := 0; i < n-2; i++ {
    call(n-1, i)
}

3. Why 2n-4 is Optimal

Lower bound proof:

  • After k calls, at most k+1 people can know all secrets
  • To get n people informed: k+1 ≥ n → k ≥ n-1
  • But we also need to collect all secrets to one person first
  • Minimum: (n-2) to collect + (n-2) to broadcast = 2n-4

Advanced: Parallel Gossiping

In real systems, calls can happen in parallel:

package main

import (
	"fmt"
	"sync"
	"time"
)

// ParallelGossipSystem allows concurrent calls
type ParallelGossipSystem struct {
	*GossipSystem
	CallChannel chan CallRequest
	wg          sync.WaitGroup
}

type CallRequest struct {
	From int
	To   int
}

func NewParallelGossipSystem(n int) *ParallelGossipSystem {
	return &ParallelGossipSystem{
		GossipSystem: NewGossipSystem(n),
		CallChannel:  make(chan CallRequest, 100),
	}
}

// Start begins parallel call processing
func (pgs *ParallelGossipSystem) Start() {
	pgs.wg.Add(1)
	go pgs.processCallsparallel()
}

// Stop gracefully stops the system
func (pgs *ParallelGossipSystem) Stop() {
	close(pgs.CallChannel)
	pgs.wg.Wait()
}

// processCalls handles calls with parallelism constraints
func (pgs *ParallelGossipSystem) processCalls() {
	defer pgs.wg.Done()

	// Track who is currently on a call
	busy := make(map[int]bool)
	var mu sync.Mutex

	for req := range pgs.CallChannel {
		mu.Lock()
		// Check if either person is busy
		if busy[req.From] || busy[req.To] {
			// Can't make call, person is busy
			mu.Unlock()
			// Requeue the call
			go func(r CallRequest) {
				time.Sleep(100 * time.Millisecond)
				pgs.CallChannel <- r
			}(req)
			continue
		}

		// Mark as busy
		busy[req.From] = true
		busy[req.To] = true
		mu.Unlock()

		// Make the call
		pgs.Call(req.From, req.To)

		// Mark as free
		mu.Lock()
		delete(busy, req.From)
		delete(busy, req.To)
		mu.Unlock()

		time.Sleep(50 * time.Millisecond) // Simulate call duration
	}
}

// OptimalParallelGossip finds calls that can be done in parallel
func (pgs *ParallelGossipSystem) OptimalParallelGossip() {
	n := len(pgs.People)

	if n < 4 {
		// Use sequential for small n
		pgs.OptimalGossip()
		return
	}

	fmt.Println("=== Parallel Gossip Protocol ===\n")

	// Round 1: Pair up people for simultaneous calls
	fmt.Println("Round 1: Parallel chain building")
	for i := 0; i < n-1; i += 2 {
		if i+1 < n {
			pgs.CallChannel <- CallRequest{From: i, To: i + 1}
		}
	}

	time.Sleep(200 * time.Millisecond)

	// Continue with remaining calls
	// ... (simplified for demonstration)
}

func RunParallelGossip() {
	fmt.Println("=== Parallel Gossiping ===\n")

	n := 6
	system := NewParallelGossipSystem(n)

	system.Start()
	system.OptimalParallelGossip()
	time.Sleep(2 * time.Second)
	system.Stop()

	system.PrintStatus()
}

func main() {
	RunParallelGossip()
}

Real-World Example: Distributed Key-Value Store Replication

package main

import (
	"fmt"
	"sync"
	"time"
)

// Update represents a change to replicate
type Update struct {
	Key       string
	Value     string
	Version   int
	Timestamp time.Time
}

// Replica represents a database replica
type Replica struct {
	ID      int
	Data    map[string]Update
	Peers   []*Replica
	mu      sync.Mutex
	Updates chan Update
}

func NewReplica(id int) *Replica {
	return &Replica{
		ID:      id,
		Data:    make(map[string]Update),
		Updates: make(chan Update, 100),
	}
}

// GossipUpdates periodically shares updates with random peer
func (r *Replica) GossipUpdates() {
	ticker := time.NewTicker(1 * time.Second)
	defer ticker.Stop()

	for range ticker.C {
		// Pick random peer
		if len(r.Peers) == 0 {
			continue
		}

		peer := r.Peers[time.Now().Unix()%int64(len(r.Peers))]

		// Share all updates
		r.mu.Lock()
		updates := make([]Update, 0, len(r.Data))
		for _, update := range r.Data {
			updates = append(updates, update)
		}
		r.mu.Unlock()

		fmt.Printf("[Replica %d] Gossiping %d updates to Replica %d\n",
			r.ID, len(updates), peer.ID)

		// Send to peer
		for _, update := range updates {
			peer.ReceiveUpdate(update)
		}
	}
}

// ReceiveUpdate merges incoming update
func (r *Replica) ReceiveUpdate(update Update) {
	r.mu.Lock()
	defer r.mu.Unlock()

	existing, exists := r.Data[update.Key]

	// Last-write-wins conflict resolution
	if !exists || update.Version > existing.Version {
		r.Data[update.Key] = update
		fmt.Printf("[Replica %d] Applied update: %s = %s (v%d)\n",
			r.ID, update.Key, update.Value, update.Version)
	}
}

// Write creates a new update
func (r *Replica) Write(key, value string) {
	r.mu.Lock()
	defer r.mu.Unlock()

	version := 1
	if existing, exists := r.Data[key]; exists {
		version = existing.Version + 1
	}

	update := Update{
		Key:       key,
		Value:     value,
		Version:   version,
		Timestamp: time.Now(),
	}

	r.Data[key] = update
	fmt.Printf("[Replica %d] Wrote: %s = %s (v%d)\n",
		r.ID, key, value, version)
}

func RunGossipReplication() {
	fmt.Println("=== Gossip-Based Replication ===\n")

	// Create 3 replicas
	replicas := make([]*Replica, 3)
	for i := 0; i < 3; i++ {
		replicas[i] = NewReplica(i)
	}

	// Set up peer connections
	for _, replica := range replicas {
		for _, peer := range replicas {
			if peer.ID != replica.ID {
				replica.Peers = append(replica.Peers, peer)
			}
		}
	}

	// Start gossip protocols
	for _, replica := range replicas {
		go replica.GossipUpdates()
	}

	// Perform writes on different replicas
	replicas[0].Write("user:1", "Alice")
	time.Sleep(500 * time.Millisecond)

	replicas[1].Write("user:2", "Bob")
	time.Sleep(500 * time.Millisecond)

	replicas[2].Write("user:3", "Charlie")

	// Wait for gossip to propagate
	time.Sleep(3 * time.Second)

	// Check convergence
	fmt.Println("\n📊 Final State:")
	for _, replica := range replicas {
		replica.mu.Lock()
		fmt.Printf("Replica %d: %d keys\n", replica.ID, len(replica.Data))
		for key, update := range replica.Data {
			fmt.Printf("  %s = %s (v%d)\n", key, update.Value, update.Version)
		}
		replica.mu.Unlock()
	}
}

func main() {
	RunGossipReplication()
}

Performance Characteristics

Metric Value
Minimum calls 2n - 4 (n ≥ 4)
Parallel rounds O(log n) possible
Message complexity O(n log n) in practice
Convergence time O(log n) rounds
Fault tolerance High
Scalability Excellent

Advantages of Gossip Protocols

Scalable: Works with thousands of nodes ✓ Fault tolerant: No single point of failure ✓ Eventually consistent: Guaranteed convergence ✓ Simple: Easy to implement and understand ✓ Robust: Tolerates node failures and network partitions

When to Use

Use when:

  • Need high availability
  • Can tolerate eventual consistency
  • Large number of nodes
  • Network unreliable
  • No central coordinator

Avoid when:

  • Strong consistency required
  • Low latency critical
  • Small cluster (simpler protocols exist)
  • Deterministic ordering needed

Try It Yourself

  1. Measure parallel speedup - Compare sequential vs parallel
  2. Add failures - Handle nodes dropping calls
  3. Implement anti-entropy - Periodic full reconciliation
  4. Compare strategies - Random vs structured gossip
  5. Add vector clocks - Track causality

This is part 21 of “Golang Experiments: Classic Concurrency Problems”

Further Reading

  • Original Problem: Graph theory textbooks
  • Epidemic Algorithms: Demers et al. (1987)
  • Cassandra: Anti-entropy and gossip
  • SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol