diff --git a/README.md b/README.md index f65c45c..ba28933 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,74 @@ -[English](http://github.com/funny/sync/blob/master/README_EN.md) ---------- +介绍 +==== -[中文说明](http://github.com/funny/sync/blob/master/README_CN.md) ---------- \ No newline at end of file +这个包用来在开发调试期,帮助排查程序中的死锁情况。 + +用法 +==== + +通常我们项目中引用到原生`sync`包的代码会像这样: + +```go +package myapp + +import "sync" + +var MyLock sync.Mutex + +func MyFunc() { + MyLock.Lock() + defer MyLock.Unlock() + + // ....... +} +``` + +只需要将原来引用`sync`的代码改为引用`github.com/funny/sync`包,不需要修改别的代码: + + +```go +package myapp + +import "github.com/funny/sync" + +var MyLock sync.Mutex + +func MyFunc() { + MyLock.Lock() + defer MyLock.Unlock() + + // ....... +} +``` + +这时候死锁诊断还没有被启用,因为做了条件编译,所以锁的开销跟原生`sync`包是一样的。 + +当需要编译一个带死锁诊断的版本的时候,在`go build --tags`列表中加入`deadlock`标签。 + +例如这样: + +``` +go build -tags deadlock myproject +``` + +同样这个标签也用于单元测试,否则默认的单元测试会死锁: + +``` +go test -tags deadlock -v +``` + + +原理 +==== + +在开启死锁检查的时候,系统会维护一份全局的锁等待列表,其次每个锁都会有当前使用者的信息。 + +当一个goroutine要等待一个锁的时候,系统会到全局的等待列表里面查找当前这个锁的使用者,是否间接或直接的正在等待当前请求锁的这个goroutine。 + +死锁不一定只发生在两个goroutine之间,极端情况也可能是一个链条状的依赖关系,又或者可能出现自身重复加锁的死锁情况。 + +当出现死锁的时候,系统将提取死锁链上的所有goroutine的堆栈跟踪信息,方便排查故障原因。 + +因为需要维护一份全局的锁等待列表,所以这里会出现额外并且集中的一个全局锁开销,会导致明显的程序的并发性能下降。 + +全局锁的问题还会再继续研究和加以改进,但是目前这个包是不能用于生产环境的,只能用在开发和调试期作为死锁诊断的辅助工具。 diff --git a/README_CN.md b/README_CN.md deleted file mode 100644 index ba28933..0000000 --- a/README_CN.md +++ /dev/null @@ -1,74 +0,0 @@ -介绍 -==== - -这个包用来在开发调试期,帮助排查程序中的死锁情况。 - -用法 -==== - -通常我们项目中引用到原生`sync`包的代码会像这样: - -```go -package myapp - -import "sync" - -var MyLock sync.Mutex - -func MyFunc() { - MyLock.Lock() - defer MyLock.Unlock() - - // ....... -} -``` - -只需要将原来引用`sync`的代码改为引用`github.com/funny/sync`包,不需要修改别的代码: - - -```go -package myapp - -import "github.com/funny/sync" - -var MyLock sync.Mutex - -func MyFunc() { - MyLock.Lock() - defer MyLock.Unlock() - - // ....... -} -``` - -这时候死锁诊断还没有被启用,因为做了条件编译,所以锁的开销跟原生`sync`包是一样的。 - -当需要编译一个带死锁诊断的版本的时候,在`go build --tags`列表中加入`deadlock`标签。 - -例如这样: - -``` -go build -tags deadlock myproject -``` - -同样这个标签也用于单元测试,否则默认的单元测试会死锁: - -``` -go test -tags deadlock -v -``` - - -原理 -==== - -在开启死锁检查的时候,系统会维护一份全局的锁等待列表,其次每个锁都会有当前使用者的信息。 - -当一个goroutine要等待一个锁的时候,系统会到全局的等待列表里面查找当前这个锁的使用者,是否间接或直接的正在等待当前请求锁的这个goroutine。 - -死锁不一定只发生在两个goroutine之间,极端情况也可能是一个链条状的依赖关系,又或者可能出现自身重复加锁的死锁情况。 - -当出现死锁的时候,系统将提取死锁链上的所有goroutine的堆栈跟踪信息,方便排查故障原因。 - -因为需要维护一份全局的锁等待列表,所以这里会出现额外并且集中的一个全局锁开销,会导致明显的程序的并发性能下降。 - -全局锁的问题还会再继续研究和加以改进,但是目前这个包是不能用于生产环境的,只能用在开发和调试期作为死锁诊断的辅助工具。 diff --git a/README_EN.md b/README_EN.md deleted file mode 100644 index 3cf8a1c..0000000 --- a/README_EN.md +++ /dev/null @@ -1,73 +0,0 @@ -Introduction -============ - -This package is used to detect deadlock in Go program. - -Usage -===== - -Normally, we import default `sync` package in our project like this: - -```go -package myapp - -import "sync" - -var MyLock sync.Mutex - -func MyFunc() { - MyLock.Lock() - defer MyLock.Unlock() - - // ....... -} -``` - -Just replace the default `sync` to `github.com/funny/sync`, no need to change others: - - -```go -package myapp - -import "github.com/funny/sync" - -var MyLock sync.Mutex - -func MyFunc() { - MyLock.Lock() - defer MyLock.Unlock() - - // ....... -} -``` - -Currently, deadlock detection not yet enabled, the performance of `Mutext` and `RWMutex` just like default. - -When you need to compile a deadlock detection enabled version. Just add `deadlock` tag into `go build --tags` command. - -For example: - -``` -go build -tags deadlock myproject -``` - -This tag used for the unit test too. Otherwise the default unit test will deadlock: - -``` -go test -tags deadlock -v -``` - -How it works -============ - -When deadlock detection enabled, system will maintain a global lock waiting list, and each `Mutex` and `RWMutex` will keep owner goroutine's information. - -When a goroutine will waiting a lock, system will lookup the lock owner goroutine is whether waiting for the requester goroutine in directly or indirectly. - -Deadlock not only happens between two goroutines, sometimes the deadlock is a link, and deadlock happens when a goroutine repeat lock a `Mutext` too. - -When deadlock happens, system will dump stack trace of the gorotuines in the deadlock link. - -Because we need a global lock waiting list, so the deadlock detection will drop the performance. - -So, please don't use deadlock detection in production environment. \ No newline at end of file diff --git a/base_test.go b/base_test.go index da2316a..6fc8580 100644 --- a/base_test.go +++ b/base_test.go @@ -22,7 +22,15 @@ func Benchmark_Lock2(b *testing.B) { } } -func Test_NoDeadlock(t *testing.T) { +func Benchmark_Lock3(b *testing.B) { + var mutex RWMutex + for i := 0; i < b.N; i++ { + mutex.Lock() + mutex.Unlock() + } +} + +func Test_NoDeadlock1(t *testing.T) { var ( mutex Mutex wait WaitGroup @@ -50,3 +58,42 @@ func Test_NoDeadlock(t *testing.T) { wait.Wait() } + +func Test_NoDeadlock2(t *testing.T) { + var ( + mutex RWMutex + wait WaitGroup + ) + + wait.Add(1) + go func() { + for i := 0; i < 10000; i++ { + mutex.Lock() + strconv.Itoa(i) + mutex.Unlock() + } + wait.Done() + }() + + wait.Add(1) + go func() { + for i := 0; i < 10000; i++ { + mutex.Lock() + strconv.Itoa(i) + mutex.Unlock() + } + wait.Done() + }() + + wait.Add(1) + go func() { + for i := 0; i < 10000; i++ { + mutex.RLock() + strconv.Itoa(i) + mutex.RUnlock() + } + wait.Done() + }() + + wait.Wait() +} diff --git a/deadlock.go b/deadlock.go index 6ed69b2..3394081 100644 --- a/deadlock.go +++ b/deadlock.go @@ -4,9 +4,8 @@ package sync import ( "bytes" + "container/list" "github.com/funny/debug" - "github.com/funny/goid" - "strconv" "sync" ) @@ -16,107 +15,154 @@ type Mutex struct { } func (m *Mutex) Lock() { - waitInfo := m.monitor.wait() + waitInfo := m.monitor.wait('w') m.Mutex.Lock() m.monitor.using(waitInfo) } func (m *Mutex) Unlock() { - m.monitor.release() + m.monitor.release('w') m.Mutex.Unlock() } type RWMutex struct { - Mutex + monitor + sync.RWMutex } -func (rw *RWMutex) Lock() { - rw.Lock() +func (m *RWMutex) Lock() { + waitInfo := m.monitor.wait('w') + m.RWMutex.Lock() + m.monitor.using(waitInfo) } -func (rw *RWMutex) Unlock() { - rw.Unlock() +func (m *RWMutex) Unlock() { + m.monitor.release('w') + m.RWMutex.Unlock() } -func (rw *RWMutex) RLock() { - rw.Lock() +func (m *RWMutex) RLock() { + waitInfo := m.monitor.wait('r') + m.RWMutex.RLock() + m.monitor.using(waitInfo) } -func (rw *RWMutex) RUnlock() { - rw.Unlock() +func (m *RWMutex) RUnlock() { + m.monitor.release('r') + m.RWMutex.RUnlock() } var ( globalMutex = new(sync.Mutex) - waitingList = make(map[int32]*waiting) + waitingList = make(map[string]*lockUsage) titleStr = []byte("[DEAD LOCK]\n") goStr = []byte("goroutine ") waitStr = []byte(" wait") holdStr = []byte(" hold") + readStr = []byte(" read") + writeStr = []byte(" write") lineStr = []byte{'\n'} ) -type monitor struct { - holder int32 - holderStack debug.StackInfo +type lockUsage struct { + monitor *monitor + mode byte + goid string + stack debug.StackInfo } -type waiting struct { - monitor *monitor - holder int32 - holderStack debug.StackInfo +type monitor struct { + holders *list.List } -func (m *monitor) wait() *waiting { +func (m *monitor) wait(mode byte) *lockUsage { globalMutex.Lock() defer globalMutex.Unlock() - waitInfo := &waiting{m, goid.Get(), debug.StackTrace(3, 0)} - waitingList[waitInfo.holder] = waitInfo + waitInfo := &lockUsage{m, mode, debug.GoroutineID(), debug.StackTrace(3)} + waitingList[waitInfo.goid] = waitInfo + + if m.holders == nil { + m.holders = list.New() + } - m.verify([]*waiting{waitInfo}) + m.diagnose(mode, []*lockUsage{waitInfo}) return waitInfo } -func (m *monitor) verify(waitLink []*waiting) { - if m.holder != 0 { - // deadlock detected - if m.holder == waitLink[0].holder { - buf := new(bytes.Buffer) - buf.Write(titleStr) - for i := 0; i < len(waitLink); i++ { - buf.Write(goStr) - buf.WriteString(strconv.Itoa(int(waitLink[i].holder))) - buf.Write(waitStr) - buf.Write(lineStr) - buf.Write(waitLink[i].holderStack.Bytes(" ")) - - buf.Write(goStr) - buf.WriteString(strconv.Itoa(int(waitLink[i].monitor.holder))) - buf.Write(holdStr) - buf.Write(lineStr) - buf.Write(waitLink[i].monitor.holderStack.Bytes(" ")) +func (m *monitor) diagnose(mode byte, waitLink []*lockUsage) { + for i := m.holders.Front(); i != nil; i = i.Next() { + holder := i.Value.(*lockUsage) + if mode != 'r' || holder.mode != 'r' { + // deadlock detected + if holder.goid == waitLink[0].goid { + deadlockPanic(waitLink) + } + // the lock holder is waiting for another lock + if waitInfo, exists := waitingList[holder.goid]; exists { + waitInfo.monitor.diagnose(waitInfo.mode, append(waitLink, waitInfo)) } - panic(DeadlockError(buf.String())) - } - // the lock holder is waiting for another lock - if waitInfo, exists := waitingList[m.holder]; exists { - waitInfo.monitor.verify(append(waitLink, waitInfo)) } } } -func (m *monitor) using(waitInfo *waiting) { +func (m *monitor) using(waitInfo *lockUsage) { globalMutex.Lock() defer globalMutex.Unlock() - delete(waitingList, waitInfo.holder) - m.holder = waitInfo.holder - m.holderStack = waitInfo.holderStack + delete(waitingList, waitInfo.goid) + m.holders.PushBack(waitInfo) } -func (m *monitor) release() { - m.holder = 0 - m.holderStack = nil +func (m *monitor) release(mode byte) { + id := debug.GoroutineID() + for i := m.holders.Back(); i != nil; i = i.Prev() { + if info := i.Value.(*lockUsage); info.goid == id && info.mode == mode { + m.holders.Remove(i) + break + } + } +} + +func deadlockPanic(waitLink []*lockUsage) { + buf := new(bytes.Buffer) + buf.Write(titleStr) + for i := 0; i < len(waitLink); i++ { + buf.Write(goStr) + buf.WriteString(waitLink[i].goid) + buf.Write(waitStr) + if waitLink[i].mode == 'w' { + buf.Write(writeStr) + } else { + buf.Write(readStr) + } + buf.Write(lineStr) + buf.Write(waitLink[i].stack.Bytes(" ")) + + // lookup waiting for who + n := i + 1 + if n == len(waitLink) { + n = 0 + } + waitWho := waitLink[n] + + for j := waitLink[i].monitor.holders.Front(); j != nil; j = j.Next() { + waitHolder := j.Value.(*lockUsage) + if waitHolder.goid == waitWho.goid { + buf.Write(goStr) + buf.WriteString(waitHolder.goid) + buf.Write(holdStr) + if waitHolder.mode == 'w' { + buf.Write(writeStr) + } else { + buf.Write(readStr) + } + buf.Write(lineStr) + buf.Write(waitHolder.stack.Bytes(" ")) + break + } + } + } + panic(DeadlockError(buf.String())) } diff --git a/deadlock_test.go b/deadlock_test.go index 55b6c50..4b30c4d 100644 --- a/deadlock_test.go +++ b/deadlock_test.go @@ -106,3 +106,36 @@ func Test_DeadLock3(t *testing.T) { mutex3.Lock() }) } + +func Test_DeadLock4(t *testing.T) { + deadlockTest(t, func() { + var ( + mutex1 RWMutex + mutex2 RWMutex + mutex3 RWMutex + ) + + mutex1.Lock() + + var wait1 WaitGroup + wait1.Add(1) + go func() { + mutex2.RLock() + + var wait2 WaitGroup + wait2.Add(1) + go func() { + mutex3.Lock() + wait2.Done() + mutex2.Lock() + }() + wait2.Wait() + + wait1.Done() + mutex1.RLock() + }() + wait1.Wait() + + mutex3.Lock() + }) +}