The Token Ring Problem

The Token Ring is a classic distributed mutual exclusion algorithm where nodes are arranged in a logical ring, and a single token circulates. Only the node holding the token can access the shared resource. It’s simple, fair, and starvation-free.

The Scenario

Nodes arranged in a ring:

  • N nodes form a logical ring
  • A single token passes around the ring
  • Only token holder can enter critical section
  • After using resource, pass token to next node
  • Properties: Fair (FIFO order), no starvation, deadlock-free

The challenge:

  • What if the token is lost?
  • What if a node crashes while holding the token?
  • What if a node crashes and breaks the ring?
  • How do you detect and recover from failures?

The Protocol

graph LR subgraph "Token Ring" N0[Node 0
πŸͺ™ Has Token] N1[Node 1
Waiting] N2[Node 2
Waiting] N3[Node 3
Waiting] N4[Node 4
Waiting] N0 -->|Pass token| N1 N1 -->|Pass token| N2 N2 -->|Pass token| N3 N3 -->|Pass token| N4 N4 -->|Pass token| N0 end style N0 fill:#90EE90

Real-World Applications

This pattern appears in various systems:

  • Token Ring Network: Classic LAN protocol (IEEE 802.5)
  • FDDI: Fiber Distributed Data Interface
  • CAN Bus: Automotive networks
  • Industrial Control: PLCs and SCADA systems
  • Distributed Databases: Write token passing
  • Process Scheduling: Round-robin with privileges

Implementation in Go

Part 1: Basic Token Ring

package main

import (
	"context"
	"fmt"
	"math/rand"
	"sync"
	"time"
)

// Token represents the permission to access shared resource
type Token struct {
	ID        int
	HolderID  int
	Timestamp time.Time
}

// Node in the token ring
type Node struct {
	ID            int
	Next          *Node
	HasToken      bool
	WantsResource bool
	TokenChan     chan Token
	IsAlive       bool
	mu            sync.Mutex
	ctx           context.Context
	cancel        context.CancelFunc
	wg            sync.WaitGroup

	// Statistics
	AccessCount   int
	TokenReceived int
}

// TokenRing manages the ring of nodes
type TokenRing struct {
	Nodes    []*Node
	TokenLost bool
	mu       sync.Mutex
}

func NewTokenRing(numNodes int) *TokenRing {
	nodes := make([]*Node, numNodes)

	// Create nodes
	for i := 0; i < numNodes; i++ {
		ctx, cancel := context.WithCancel(context.Background())
		nodes[i] = &Node{
			ID:        i,
			IsAlive:   true,
			TokenChan: make(chan Token, 1),
			ctx:       ctx,
			cancel:    cancel,
		}
	}

	// Create ring topology
	for i := 0; i < numNodes; i++ {
		nodes[i].Next = nodes[(i+1)%numNodes]
	}

	// Give token to first node
	nodes[0].HasToken = true
	fmt.Printf("πŸͺ™ Initial token holder: Node 0\n")

	return &TokenRing{
		Nodes: nodes,
	}
}

// Start begins node operation
func (n *Node) Start() {
	n.wg.Add(1)
	go n.run()
}

// Stop gracefully stops the node
func (n *Node) Stop() {
	n.cancel()
	n.wg.Wait()
}

// run is the main event loop
func (n *Node) run() {
	defer n.wg.Done()

	ticker := time.NewTicker(500 * time.Millisecond)
	defer ticker.Stop()

	for {
		select {
		case <-n.ctx.Done():
			return

		case token := <-n.TokenChan:
			n.handleToken(token)

		case <-ticker.C:
			n.checkToken()
		}
	}
}

// handleToken processes received token
func (n *Node) handleToken(token Token) {
	n.mu.Lock()
	defer n.mu.Unlock()

	if !n.IsAlive {
		return
	}

	n.HasToken = true
	n.TokenReceived++
	fmt.Printf("[Node %d] πŸͺ™ Received token\n", n.ID)

	// If wants resource, use it
	if n.WantsResource {
		n.mu.Unlock()
		n.accessResource()
		n.mu.Lock()
		n.WantsResource = false
	}

	// Pass token to next node
	n.mu.Unlock()
	time.Sleep(100 * time.Millisecond) // Simulate some work
	n.passToken()
	n.mu.Lock()
}

// accessResource simulates critical section access
func (n *Node) accessResource() {
	n.mu.Lock()
	n.AccessCount++
	count := n.AccessCount
	n.mu.Unlock()

	fmt.Printf("[Node %d] ⚑ Accessing resource (access #%d)\n", n.ID, count)
	time.Sleep(time.Duration(100+rand.Intn(200)) * time.Millisecond)
	fmt.Printf("[Node %d] βœ“ Done with resource\n", n.ID)
}

// passToken sends token to next node
func (n *Node) passToken() {
	n.mu.Lock()
	defer n.mu.Unlock()

	if !n.HasToken || !n.IsAlive {
		return
	}

	// Find next alive node
	next := n.Next
	for {
		next.mu.Lock()
		if next.IsAlive {
			next.mu.Unlock()
			break
		}
		next.mu.Unlock()
		next = next.Next

		// If we've gone full circle, we're the only one alive
		if next.ID == n.ID {
			return
		}
	}

	token := Token{
		ID:        0,
		HolderID:  next.ID,
		Timestamp: time.Now(),
	}

	n.HasToken = false

	select {
	case next.TokenChan <- token:
		fmt.Printf("[Node %d] β†’ [Node %d] Token passed\n", n.ID, next.ID)
	case <-time.After(1 * time.Second):
		fmt.Printf("[Node %d] ⚠️  Failed to pass token to Node %d\n", n.ID, next.ID)
		// Token might be lost!
	}
}

// checkToken verifies token holder status
func (n *Node) checkToken() {
	n.mu.Lock()
	defer n.mu.Unlock()

	if !n.IsAlive {
		return
	}

	// If has token, pass it
	if n.HasToken {
		n.mu.Unlock()
		n.passToken()
		n.mu.Lock()
	}
}

// RequestResource marks node as wanting resource
func (n *Node) RequestResource() {
	n.mu.Lock()
	defer n.mu.Unlock()

	if !n.IsAlive {
		return
	}

	n.WantsResource = true
	fmt.Printf("[Node %d] πŸ™‹ Requesting resource\n", n.ID)
}

// Crash simulates node failure
func (n *Node) Crash() {
	n.mu.Lock()
	defer n.mu.Unlock()

	if !n.IsAlive {
		return
	}

	fmt.Printf("\n[Node %d] πŸ’₯ CRASHED", n.ID)
	if n.HasToken {
		fmt.Printf(" (was holding token!)")
	}
	fmt.Println()

	n.IsAlive = false
	n.HasToken = false
	n.WantsResource = false
}

// Recover simulates node recovery
func (n *Node) Recover() {
	n.mu.Lock()
	defer n.mu.Unlock()

	if n.IsAlive {
		return
	}

	fmt.Printf("\n[Node %d] πŸ”„ RECOVERED\n", n.ID)
	n.IsAlive = true
}

// PrintStatus displays ring status
func (tr *TokenRing) PrintStatus() {
	fmt.Println("\nπŸ“Š Ring Status:")
	fmt.Println("   Node | Has Token | Wants | Alive | Accesses | Tokens Received")
	fmt.Println("   -----|-----------|-------|-------|----------|----------------")

	for _, node := range tr.Nodes {
		node.mu.Lock()
		fmt.Printf("   %4d | %-9v | %-5v | %-5v | %8d | %15d\n",
			node.ID, node.HasToken, node.WantsResource,
			node.IsAlive, node.AccessCount, node.TokenReceived)
		node.mu.Unlock()
	}
}

func main() {
	rand.Seed(42)

	fmt.Println("=== Token Ring: Fair Resource Access ===\n")

	const numNodes = 5
	ring := NewTokenRing(numNodes)

	// Start all nodes
	fmt.Println("πŸš€ Starting all nodes...")
	for _, node := range ring.Nodes {
		node.Start()
	}

	// Scenario 1: Normal operation
	fmt.Println("\n" + "=== Scenario 1: Normal Operation ===" + "\n")
	time.Sleep(500 * time.Millisecond)

	// Multiple nodes request resource
	ring.Nodes[2].RequestResource()
	ring.Nodes[4].RequestResource()
	ring.Nodes[1].RequestResource()

	time.Sleep(5 * time.Second)
	ring.PrintStatus()

	// Scenario 2: Node crashes while holding token
	fmt.Println("\n\n" + "=== Scenario 2: Token Holder Crashes ===" + "\n")
	time.Sleep(1 * time.Second)

	// Find current token holder
	var tokenHolder *Node
	for _, node := range ring.Nodes {
		node.mu.Lock()
		if node.HasToken {
			tokenHolder = node
			node.mu.Unlock()
			break
		}
		node.mu.Unlock()
	}

	if tokenHolder != nil {
		tokenHolder.Crash()
	}

	time.Sleep(3 * time.Second)
	ring.PrintStatus()

	// Scenario 3: Token regeneration
	fmt.Println("\n\n" + "=== Scenario 3: Token Regeneration ===" + "\n")
	fmt.Println("⚠️  Token appears to be lost, regenerating...")

	// Give token to first alive node
	for _, node := range ring.Nodes {
		node.mu.Lock()
		if node.IsAlive {
			node.HasToken = true
			fmt.Printf("πŸͺ™ New token given to Node %d\n", node.ID)
			node.mu.Unlock()
			break
		}
		node.mu.Unlock()
	}

	time.Sleep(3 * time.Second)
	ring.PrintStatus()

	// Cleanup
	fmt.Println("\n\nπŸ›‘ Shutting down...")
	for _, node := range ring.Nodes {
		node.Stop()
	}

	fmt.Println("\nβœ“ Simulation complete!")
}

How It Works

1. Token Circulation

sequenceDiagram participant N0 as Node 0 participant N1 as Node 1 participant N2 as Node 2 participant N3 as Node 3 N0->>N0: Has token, use resource N0->>N1: Pass token N1->>N1: Has token, use resource N1->>N2: Pass token N2->>N2: Has token, no request N2->>N3: Pass token immediately N3->>N3: Has token, use resource N3->>N0: Pass token

2. Fairness Guarantee

Every node gets the token in order:

  • FIFO ordering
  • No starvation possible
  • Bounded waiting time: O(n) token passes

3. Token Loss Detection

// Each node tracks last token sighting
type TokenMonitor struct {
    lastSeen time.Time
    mu       sync.Mutex
}

func (tm *TokenMonitor) CheckLost() bool {
    tm.mu.Lock()
    defer tm.mu.Unlock()

    // If no token seen for n*timeout, assume lost
    return time.Since(tm.lastSeen) > n*passTimeout
}

Advanced: Token Ring with Fault Tolerance

Token Regeneration Protocol

package main

import (
	"fmt"
	"sync"
	"time"
)

// FaultTolerantNode extends Node with failure detection
type FaultTolerantNode struct {
	*Node
	LastTokenSeen time.Time
	IsSuspicious  bool
	ElectionTimer *time.Timer
}

// TokenRegenerationRing handles token loss
type TokenRegenerationRing struct {
	*TokenRing
	TokenTimeout  time.Duration
	MonitorActive bool
}

func NewFaultTolerantRing(numNodes int) *TokenRegenerationRing {
	ring := NewTokenRing(numNodes)

	ftr := &TokenRegenerationRing{
		TokenRing:     ring,
		TokenTimeout:  time.Duration(numNodes) * 2 * time.Second,
		MonitorActive: true,
	}

	// Start token monitor
	go ftr.monitorToken()

	return ftr
}

// monitorToken detects token loss
func (ftr *TokenRegenerationRing) monitorToken() {
	ticker := time.NewTicker(1 * time.Second)
	defer ticker.Stop()

	lastActivity := time.Now()

	for ftr.MonitorActive {
		<-ticker.C

		// Check if any node has token
		ftr.mu.Lock()
		hasToken := false
		for _, node := range ftr.Nodes {
			node.mu.Lock()
			if node.HasToken && node.IsAlive {
				hasToken = true
				lastActivity = time.Now()
			}
			node.mu.Unlock()
		}

		// Token loss detection
		if !hasToken && time.Since(lastActivity) > ftr.TokenTimeout {
			fmt.Println("\n⚠️  TOKEN LOST! Initiating regeneration...")
			ftr.regenerateToken()
			lastActivity = time.Now()
		}
		ftr.mu.Unlock()
	}
}

// regenerateToken creates new token
func (ftr *TokenRegenerationRing) regenerateToken() {
	// Election: highest ID alive node generates token
	var highest *Node
	for _, node := range ftr.Nodes {
		node.mu.Lock()
		if node.IsAlive {
			if highest == nil || node.ID > highest.ID {
				highest = node
			}
		}
		node.mu.Unlock()
	}

	if highest != nil {
		highest.mu.Lock()
		highest.HasToken = true
		highest.mu.Unlock()

		fmt.Printf("πŸͺ™ Token regenerated at Node %d\n", highest.ID)
	}
}

// RepairRing fixes ring after node failure
func (ftr *TokenRegenerationRing) RepairRing() {
	fmt.Println("\nπŸ”§ Repairing ring...")

	for _, node := range ftr.Nodes {
		node.mu.Lock()
		if !node.IsAlive {
			node.mu.Unlock()
			continue
		}

		// Find next alive node
		next := node.Next
		for {
			next.mu.Lock()
			if next.IsAlive {
				next.mu.Unlock()
				break
			}
			nextNext := next.Next
			next.mu.Unlock()
			next = nextNext
		}

		node.Next = next
		fmt.Printf("  Node %d β†’ Node %d\n", node.ID, next.ID)
		node.mu.Unlock()
	}

	fmt.Println("βœ“ Ring repaired")
}

func RunFaultTolerantRing() {
	fmt.Println("=== Fault-Tolerant Token Ring ===\n")

	ring := NewFaultTolerantRing(5)

	// Start nodes
	for _, node := range ring.Nodes {
		node.Start()
	}

	// Normal operation
	time.Sleep(2 * time.Second)

	// Node 2 crashes with token
	fmt.Println("\n=== Simulating Token Holder Failure ===")
	ring.Nodes[2].Crash()

	// Monitor will detect and regenerate
	time.Sleep(5 * time.Second)

	// Repair ring
	ring.RepairRing()

	time.Sleep(3 * time.Second)
	ring.PrintStatus()

	// Cleanup
	ring.MonitorActive = false
	for _, node := range ring.Nodes {
		node.Stop()
	}

	fmt.Println("\nβœ“ Fault-tolerant simulation complete!")
}

func main() {
	RunFaultTolerantRing()
}

Key Concepts

1. Fairness

Token ring provides perfect fairness:

  • Every node gets equal opportunity
  • Requests served in FIFO order
  • No priority inversion

2. Performance

// Waiting time for node i when all nodes want access
waitTime(i) = (n - 1) * criticalSectionTime + n * passTime

3. Fault Scenarios

Failure Type Detection Recovery
Token loss Timeout Regenerate at highest ID
Node crash Heartbeat miss Repair ring topology
Ring break Token not circulating Reconstruct ring
Duplicate token Multiple holders Highest ID keeps, others discard

Real-World Example: Distributed Lock Manager

package main

import (
	"fmt"
	"sync"
	"time"
)

// DistributedLock using token ring
type DistributedLock struct {
	Holders  []*LockHolder
	Token    *LockToken
	LockChan chan *LockToken
}

type LockHolder struct {
	ID        int
	Resources []string // Resources it wants to lock
	Granted   bool
	Next      *LockHolder
}

type LockToken struct {
	HolderID  int
	Resources map[string]bool // Currently locked resources
}

func NewDistributedLock(numHolders int) *DistributedLock {
	holders := make([]*LockHolder, numHolders)

	for i := 0; i < numHolders; i++ {
		holders[i] = &LockHolder{
			ID: i,
		}
	}

	// Create ring
	for i := 0; i < numHolders; i++ {
		holders[i].Next = holders[(i+1)%numHolders]
	}

	return &DistributedLock{
		Holders:  holders,
		Token:    &LockToken{Resources: make(map[string]bool)},
		LockChan: make(chan *LockToken, 1),
	}
}

func (dl *DistributedLock) RequestLock(holderID int, resources []string) {
	holder := dl.Holders[holderID]
	holder.Resources = resources

	fmt.Printf("[Holder %d] πŸ”’ Requesting lock on %v\n", holderID, resources)

	// Wait for token
	token := <-dl.LockChan

	// Check if resources are available
	canGrant := true
	for _, res := range resources {
		if token.Resources[res] {
			canGrant = false
			break
		}
	}

	if canGrant {
		// Grant lock
		for _, res := range resources {
			token.Resources[res] = true
		}
		holder.Granted = true
		fmt.Printf("[Holder %d] βœ… Lock granted on %v\n", holderID, resources)

		// Use resources
		time.Sleep(500 * time.Millisecond)

		// Release
		for _, res := range resources {
			delete(token.Resources, res)
		}
		fmt.Printf("[Holder %d] πŸ”“ Lock released on %v\n", holderID, resources)
	} else {
		fmt.Printf("[Holder %d] ❌ Lock denied (conflict)\n", holderID)
	}

	// Pass token
	dl.LockChan <- token
}

func RunDistributedLock() {
	fmt.Println("=== Distributed Lock Manager ===\n")

	dl := NewDistributedLock(3)

	// Give initial token
	dl.LockChan <- dl.Token

	var wg sync.WaitGroup

	// Holder 0 wants resources A, B
	wg.Add(1)
	go func() {
		defer wg.Done()
		dl.RequestLock(0, []string{"A", "B"})
	}()

	time.Sleep(100 * time.Millisecond)

	// Holder 1 wants resources B, C (conflict on B)
	wg.Add(1)
	go func() {
		defer wg.Done()
		dl.RequestLock(1, []string{"B", "C"})
	}()

	time.Sleep(100 * time.Millisecond)

	// Holder 2 wants resource C (should work after holder 0 releases)
	wg.Add(1)
	go func() {
		defer wg.Done()
		dl.RequestLock(2, []string{"C"})
	}()

	wg.Wait()
	fmt.Println("\nβœ“ All lock requests completed")
}

func main() {
	RunDistributedLock()
}

Performance Characteristics

Metric Value
Messages 1 per token pass
Waiting time O(n) worst case
Fairness Perfect (FIFO)
Starvation Impossible
Throughput Limited by token circulation
Scalability Poor for large n

Advantages

βœ“ Fair: FIFO ordering, no starvation βœ“ Simple: Easy to understand and implement βœ“ Deterministic: Predictable behavior βœ“ Deadlock-free: Only one token exists

Disadvantages

βœ— Single point of failure: Token loss breaks system βœ— Sequential: Only one access at a time βœ— High latency: Wait for token to circulate βœ— Poor scalability: O(n) wait time

When to Use

βœ“ Use when:

  • Fairness is critical
  • Small number of nodes
  • Predictable access pattern
  • Simple coordination needed

βœ— Avoid when:

  • High performance required
  • Large number of nodes
  • Frequent failures occur
  • Need concurrent access

Try It Yourself

  1. Measure fairness - Track wait times for each node
  2. Implement token regeneration - Detect and recover from loss
  3. Add priority levels - Some nodes get token more often
  4. Compare with mutex - Performance differences
  5. Simulate network delays - See impact on throughput

This is part 19 of “Golang Experiments: Classic Concurrency Problems”

Further Reading

  • IEEE 802.5: Token Ring network standard
  • Token-based mutual exclusion: Distributed systems textbooks
  • FDDI: Fiber Distributed Data Interface
  • Comparison with Lamport’s algorithm: Alternative distributed mutex