ABA Problem

ABA Problem

Dr. C.V. Suresh Babu

Introduction
• Queues are everywhere in parallel applications
and operating systems
• Many researchers have proposed queues
–
–
–
–

Hwang and Briggs
Gottlieb
Massalin
Et al. etc…

• Queue performance can be critical to operating
system performance
– Scheduling Queues
– Free memory lists
– Many other critical kernel operations

Concurrent FIFO Queue algorithms
• Blocking algorithms risk performance
degradation
– A process can be delayed or halted at inopportune
moments
• Scheduling preemption
• Page faults
• Cache misses

– Slow processes can prevent faster ones from
completing indefinitely

• Non-Blocking algorithms must solve the ABA
problem
– During contention, some process will complete
within a given number of operations

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data
CAS(&SM, value, newVal)

v1=Pop()
value = 5

Push (d) {
loop
value = SM
newVal = value +1
Stack Data = d
break

Stack

SM
5

…
x

time

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data

v1=Pop()
value = 5

Push (d) {
loop

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X
CAS(&SM,value,newVal)

value = SM
newVal = value +1
Stack Data = d
break

Stack

SM
4

…
x

time

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data

v1=Pop()
value = 5

Push (d) {
loop

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X

value = SM

v2 = x

newVal = value +1
Stack Data = d
break

Stack

SM
4

…
x

time

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data

v1=Pop()
value = 5

Push (d) {
loop

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X

value = SM

v2 = x

newVal = value +1

Push(z)

Stack Data = d
break

Stack

SM
4

…
x

time

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data

v1=Pop()
value = 5

Push (d) {
loop

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X

value = SM

v2 = x

newVal = value +1
Stack Data = d

Push(z)


value = 4

break

Stack

SM
4

…
x

time

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data

v1=Pop()
value = 5

Push (d) {
loop

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X

value = SM

v2 = x

newVal = value +1
Stack Data = d

Push(z)


value = 4

break

newVal=5

Stack

SM
4

…
x

time

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data

v1=Pop()
value = 5

Push (d) {
loop

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X

value = SM

v2 = x

newVal = value +1
Stack Data = d

Push(z)


value = 4

break

newVal=5

Stack


SM
5

…
z

time

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data

v1=Pop()
value = 5

Push (d) {

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X

loop


value = SM

v2 = x

newVal = value +1
Stack Data = d

Push(z)


value = 4

break

newVal=5

Stack


SM
5

…
z

time

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data

v1=Pop()
value = 5

Push (d) {

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X

loop


value = SM

v2 = x

newVal = value +1
Stack Data = d

Push(z)


value = 4

break

newVal=5

Stack

v1=x

SM
4

…
z

time

ABA problem

Pop () {
loop
value = SM
newVal = value -1

THREAD1

THREAD2

data = Stack Data

v1=Pop()
value = 5

Push (d) {

newVal = 4

data=X

return data

value = 5

newVal = 4

break

v2=Pop()

data=X

loop


value = SM

v2 = x

newVal = value +1
Stack Data = d

Push(z)


value = 4

break

newVal=5

Stack

v1=x

SM
4

…
z

CAS should fail but it succeeds
time

Thread1 has Thread2’s data

Solutions for ABA problem
Cache Kernel
• Add version # to data structures
• Increment # during every CAS instruction
LL/SC
• Fail if Cache Line has been written to

Solution for ABA problem

Pop () {
loop
value = SM

newVal = value -1

THREAD1

THREAD2

data = Stack Data
DCAS(&SM, value,

v1=Pop()
value = 5

return data
Push (d) {

newVal = 4

data=X

break

value = 5

newVal = 4

<ver++,newVal>)

v2=Pop()

data=X

loop

DCAS(&SM,value,ver,newVal)

value = SM

v2 = x

newVal = value +1
Stack Data = d

Push(z)

DCAS(&SM, value,

value = 4

<ver++,newVal>)

newVal=5

Stack

break

Will not incorrectly succeed

SM
5

…
z

(ver != ver+2)

time

Solution for ABA problem

Pop () {
loop
value = SM

newVal = value -1

THREAD1

THREAD2

data = Stack Data
DCAS(&SM, value,

v1=Pop()
value = 5

return data
Push (d) {

newVal = 4

data=Z

break

value = 5

newVal = 4

<ver++,newVal>)

v2=Pop()

data=X

loop


value = SM

v2 = x

newVal = value +1
Stack Data = d

Push(z)

DCAS(&SM, value,

value = 4

<ver++,newVal>)

newVal=5

Stack

break

Will not incorrectly succeed

SM
4

(ver != ver+2)

…
time

V1 = Z

Correctness Properties
1.
2.
3.
4.
5.

The linked list is always connected
Nodes only inserted after the last node
Nodes only deleted from beginning
Head always points to the first node
Tail always points to a node in the list

Queue # 1
• Non-Blocking Concurrent Queue
– enqueue()

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

enqueue(Q: pointer to queue t, value: data type)
node = new node()
node–>value = value
node–>next.ptr = NULL
loop
tail = Q–>Tail
next = tail.ptr–>next
if tail == Q–>Tail
if next.ptr == NULL
if CAS(&tail.ptr–>next,
next, <node, next.count+1>)
break
endif
else
CAS(&Q–>Tail, tail, <next.ptr,
tail.count+1>)
endif
endif
endloop
CAS(&Q–>Tail, tail, <node, tail.count+1>)

struct queue_t {
pointer_t Head
pointer_t Tail
}

initialize(Q: pointer to queue t)
node = new node()
Q–>Head = Q–>Tail = node

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

node = new node()

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

node = new node()

[NULL]

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

“enqueue(myQueue, D1)”
node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

[NULL]

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

enqueue(myQueue, D1)
node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

[NULL]

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

D1
[NULL]

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

D1
[NULL]

[NULL]

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

D1
[NULL]

tail

[NULL]

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

D1
[NULL]

tail

next

[NULL]

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
if CAS( &tail.ptr–>next, next,
<node, next.count+1>)
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

D1
[NULL]

tail

next

[NULL]

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

D1

+1
[NULL]

tail

next

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

node = new node()
Head
loop
tail = Q–>Tail
Tail
+1
if next.ptr == NULL
myQueue
break
endif
else
tail.count+1>)
endif
endif
endloop

D1

+1
[NULL]

Concurrent enqueues
• Suppose two processes call enqueue() at
the same time

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
if CAS( &tail.ptr–>next,
myQueue
break
Process 1
endif
Enqueue(myQueue, ABC)
else
tail.count+1>)
ABC
endif
endif
[NULL]
endloop

tail

next

node = new node()

D1
[NULL]

Process 2
Enqueue(myQueue, XYZ)

XYZ
[NULL]

tail

next

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
Process 1
endif
else
tail.count+1>)
ABC
endif
endif
[NULL]
endloop

tail

next

node = new node()

D1

+1
XYZ
[NULL]

Process 2

tail

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()
Head
loop
tail = Q–>Tail
Tail
if next.ptr == NULL
myQueue
break
Process 1
endif
else
CAS( &Q–>Tail, tail,
<next.ptr, tail.count+1>)
ABC
endif
endif
[NULL]
endloop

tail

next

node = new node()

D1

+1
XYZ
[NULL]

Process 2

tail

Queue #1 (cont.)
• Non-Blocking Concurrent Queue
– dequeue()

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

“dequeue(myQueue, pvalue)”

dequeue(Q: ptr to queue t, pvalue: ptr to data
type):bool
loop
Head
head = Q–>Head
tail = Q–>Tail
next = head–>next
if head == Q–>Head
Tail
if head.ptr == tail.ptr
if next.ptr == NULL
return FALSE
endif
myQueue
CAS(&Q–>Tail, tail, <next.ptr, tail.count+1>)
else
# Read value before CAS, otherwise another
# dequeue might free the next node
*pvalue = next.ptr–>value
if CAS ( &Q–>Head,
head, <next.ptr, head.count+1>)
break
endif
endif
endif
endloop
free(head.ptr)

D1
[NULL]

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()

dequeue(myQueue, pvalue)

type):bool
loop
Head
head = Q–>Head
tail = Q–>Tail
next = head–>next
Tail
if next.ptr == NULL
return FALSE
endif
myQueue
else
if CAS ( &Q–>Head,
head, <next.ptr, head.count+1>)
break
endif
endif
endif
endloop
free(head.ptr)

D1
[NULL]

head

tail
next

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()


type):bool
loop
Head
head = Q–>Head
tail = Q–>Tail
next = head–>next
Tail
if next.ptr == NULL
return FALSE
endif
myQueue
else
if CAS ( &Q–>Head, head,
<next.ptr, head.count+1>)
break
endif
endif
endif
endloop
free(head.ptr)

D1
[NULL]

head

tail
next

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()


type):bool
loop
Head
head = Q–>Head
+1
tail = Q–>Tail
next = head–>next
Tail
if next.ptr == NULL
return FALSE
endif
myQueue
else
break
endif
endif
endif
endloop
free(head.ptr)

D1
[NULL]

head

tail
next

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

node = new node()


type):bool
loop
Head
head = Q–>Head
+1
tail = Q–>Tail
next = head–>next
Tail
if next.ptr == NULL
return FALSE
endif
myQueue
else
break
endif
endif
endif
endloop
free(head.ptr)

D1
[NULL]

Concurrent dequeues
• Suppose two processes call dequeue() at
the same time

struct pointer_t {
node_t * ptr
uint count
}

struct node_t {
data_type value
pointer_t next
}

struct queue_t {
pointer_t Head
pointer_t Tail
}

dequeue(Q: ptr to queue t, pvalue: ptr to data type):bool
loop
head = Q–>Head
Head
tail = Q–>Tail
next = head–>next
Tail
if next.ptr == NULL
return FALSE
endif
else
if CAS ( &Q–>Head,head, <next.ptr, head.count+1>)
break
endif
“dequeue(myQueue,
endif
endif
endloop
free(head.ptr)
return TRUE

D1
[NULL]

pvalue)”

Queue #2
• Two-lock Concurrent Queue

struct node_t {
data_type value
node_t * next
}

struct queue_t {
pointer_t Head
pointer_t Tail
lock_type H_lock
lock_type T_lock
}

node = new node()
Q–>H lock = Q–>T lock = FREE

dequeue(Q: pointer to queue t, pvalue: pointer to data type): boolean
lock(&Q–>H lock)
node = Q–>Head
new head = node–>next
if new head == NULL
unlock(&Q–>H lock)
node = new node()
return FALSE
endif
*pvalue = new head–>value
lock(&Q–>T lock)
Q–>Head = new head
Q–>Tail–>next = node
unlock(&Q–>H lock)
Q–>Tail = node
free(node)
unlock(&Q–>T lock)
return TRUE

• Algorithms have same general structure only different
data types
• No loops, ‘busy waiting’ instead
• Only dequeues access Head Lock
• Only enqueues access Tail Lock

Performance Parameters
• Net execution time for one million
enqueue/dequeue pairs
• 12-processor Silicon Graphics Challenge
multiprocessor
• Algorithms compiled with using highest
optimization level
• Including many hand optimizations

Dedicated multiprocessor

Multiprogrammed system with 3
processes per processor

Multiprogrammed system with 2
processes per processor

Conclusion
• NBS clear winner for multiprocessor
multiprogrammed systems
• Above 5 processors, use the new nonblocking queue
• If hardware only supports test-and-set use
two lock queue
• For two or less processors use a single
lock algorithm for queues

ABA Problem

Recomendados

Recomendados

Más contenido relacionado

La actualidad más candente

La actualidad más candente (20)

Similar a ABA Problem

Similar a ABA Problem (20)

Más de Dr. C.V. Suresh Babu

Más de Dr. C.V. Suresh Babu (20)

Último

Último (20)

ABA Problem