Skip to content

runtime: segfaults in runtime.(*unwinder).next #73259

Open
@nsrip-dd

Description

@nsrip-dd

At Datadog, we've seen segfaults during runtime.(*unwinder).next. The programs are on Linux, running arm64 (in all the examples I've seen), on Go 1.24.1 and Go 1.23.6.

Here is the first example, on Go 1.24.1:

SIGSEGV: segmentation violation
PC=0x468da4 m=13 sigcode=1 addr=0x118
goroutine 0 [idle]:
runtime.(*unwinder).next(0xfc510200e438)
	/usr/local/go/src/runtime/traceback.go:458 +0x184
runtime.traceback2(0xfc510200e438, 0x1, 0x0, 0x2e)
	/usr/local/go/src/runtime/traceback.go:967 +0xcc
runtime.traceback1.func1(0x1)
	/usr/local/go/src/runtime/traceback.go:903 +0x54
runtime.traceback1(0x400a702540?, 0x417800?, 0x3?, 0x400a702540, 0x68?)
	/usr/local/go/src/runtime/traceback.go:927 +0x19c
runtime.traceback(...)
	/usr/local/go/src/runtime/traceback.go:803
runtime.tracebackothers.func1(0x400a702540)
	/usr/local/go/src/runtime/traceback.go:1279 +0x104
runtime.forEachGRace(0xfc510200e6c8)
	/usr/local/go/src/runtime/proc.go:720 +0x68
runtime.tracebackothers(0x40074efdc0?)
	/usr/local/go/src/runtime/traceback.go:1265 +0xcc
runtime.Stack.func1()
	/usr/local/go/src/runtime/mprof.go:1717 +0xb4
runtime.systemstack(0x0)
	/usr/local/go/src/runtime/asm_arm64.s:244 +0x6c

goroutine 989290 gp=0x40074efdc0 m=13 mp=0x400078e008 [running]:
runtime.systemstack_switch()
	/usr/local/go/src/runtime/asm_arm64.s:201 +0x8 fp=0x4007d22960 sp=0x4007d22950 pc=0x481048
runtime.Stack({0x40107d6000?, 0x100000?, 0x100000?}, 0x1)
	/usr/local/go/src/runtime/mprof.go:1707 +0xe0 fp=0x4007d22a00 sp=0x4007d22960 pc=0x43ab40
runtime/pprof.writeGoroutineStacks({0x27ac520, 0x4009abfd40})
	/usr/local/go/src/runtime/pprof/pprof.go:764 +0x6c fp=0x4007d22a40 sp=0x4007d22a00 pc=0x8f890c
runtime/pprof.writeGoroutine({0x27ac520?, 0x4009abfd40?}, 0x0?)
	/usr/local/go/src/runtime/pprof/pprof.go:753 +0x2c fp=0x4007d22a80 sp=0x4007d22a40 pc=0x8f884c
runtime/pprof.(*Profile).WriteTo(0x23cae63?, {0x27ac520?, 0x4009abfd40?}, 0x206e4c0?)
	/usr/local/go/src/runtime/pprof/pprof.go:377 +0x14c fp=0x4007d22b90 sp=0x4007d22a80 pc=0x8f5f5c
gopkg.in/DataDog/dd-trace-go.v1/profiler.(*profiler).lookupProfile(0x40000cac08?, {0x23cae63?, 0x780a278d9052?}, {0x27ac520, 0x4009abfd40}, 0x2)
	/go/pkg/mod/gopkg.in/!data!dog/dd-trace-go.v1@v1.72.2/profiler/profiler.go:136 +0x58 fp=0x4007d22bd0 sp=0x4007d22b90 pc=0x1681538
gopkg.in/DataDog/dd-trace-go.v1/profiler.init.func2(0x4008cfc0a0)
	/go/pkg/mod/gopkg.in/!data!dog/dd-trace-go.v1@v1.72.2/profiler/profile.go:168 +0xf4 fp=0x4007d22c60 sp=0x4007d22bd0 pc=0x167c414
gopkg.in/DataDog/dd-trace-go.v1/profiler.(*profiler).runProfile(0x4008cfc0a0, 0x5)
	/go/pkg/mod/gopkg.in/!data!dog/dd-trace-go.v1@v1.72.2/profiler/profile.go:348 +0x17c fp=0x4007d22e50 sp=0x4007d22c60 pc=0x167fabc
gopkg.in/DataDog/dd-trace-go.v1/profiler.(*profiler).collect.func2(0x5)
	/go/pkg/mod/gopkg.in/!data!dog/dd-trace-go.v1@v1.72.2/profiler/profiler.go:355 +0xb8 fp=0x4007d22fb0 sp=0x4007d22e50 pc=0x1682da8
gopkg.in/DataDog/dd-trace-go.v1/profiler.(*profiler).collect.gowrap2()
	/go/pkg/mod/gopkg.in/!data!dog/dd-trace-go.v1@v1.72.2/profiler/profiler.go:367 +0x30 fp=0x4007d22fd0 sp=0x4007d22fb0 pc=0x1682cb0
runtime.goexit({})

[ ... elided ... ]

r0      0xfc510200e438
r1      0x0
r2      0x1
r3      0x1
r4      0x400a702540
r5      0x0
r6      0x1
r7      0x0
r8      0x3627cb0
r9      0x1
r10     0x279e7e8
r11     0x6372732f6f672f6c
r12     0x656d69746e75722f
r13     0x6f672e636f72702f
r14     0x30372e3176406370
r15     0x7265746e692f302e
r16     0xfc510180ef10
r17     0xfc510200e0c0
r18     0x0
r19     0x0
r20     0xfc510200e0b4
r21     0xfc510200e498
r22     0x1
r23     0x400ba15108
r24     0x202a2e0
r25     0x0
r26     0xffffffffffffffff
r27     0x4058000
r28     0x40000fe380
r29     0xfc510200e038
lr      0x468c68
sp      0xfc510200e040
pc      0x468da4
fault   0x118

The crash happens on this line, during a call to runtime.Stack triggered by calling pprof.Lookup("goroutine").WriteTo(w, 2). Unfortunately there are not goroutine addresses in this output (not sure why) so it's hard to tell which goroutine's stack was being unwound in this case.

The other occurrence is in a different program, build with Go 1.23.6. It's segfaulting on the same line in runtime.(*unwinder).next, during garbage collection:

SIGSEGV: segmentation violation
PC=0x488148 m=21 sigcode=1 addr=0x118
goroutine 0 gp=0x4003a88380 m=21 mp=0x40085fa008 [idle]:
runtime.(*unwinder).next(0xe5b01f40e280)
        /usr/local/go/src/runtime/traceback.go:458 +0x188 fp=0xe5b01f40e230 sp=0xe5b01f40e1a0 pc=0x488148
runtime.scanstack(0x4002a8ea80, 0x400007b250)
        /usr/local/go/src/runtime/mgcmark.go:887 +0x290 fp=0xe5b01f40e370 sp=0xe5b01f40e230 pc=0x4460a0
runtime.markroot.func1()
        /usr/local/go/src/runtime/mgcmark.go:238 +0xa8 fp=0xe5b01f40e3c0 sp=0xe5b01f40e370 pc=0x444b78
runtime.markroot(0x400007b250, 0x234, 0x1)
        /usr/local/go/src/runtime/mgcmark.go:212 +0x1c8 fp=0xe5b01f40e470 sp=0xe5b01f40e3c0 pc=0x444848
runtime.gcDrain(0x400007b250, 0xb)
        /usr/local/go/src/runtime/mgcmark.go:1188 +0x434 fp=0xe5b01f40e4e0 sp=0xe5b01f40e470 pc=0x446b14
runtime.gcDrainMarkWorkerFractional(...)
        /usr/local/go/src/runtime/mgcmark.go:1118
runtime.gcBgMarkWorker.func2()
        /usr/local/go/src/runtime/mgc.go:1506 +0x7c fp=0xe5b01f40e530 sp=0xe5b01f40e4e0 pc=0x442a1c
runtime.systemstack(0x0)
        /usr/local/go/src/runtime/asm_arm64.s:244 +0x6c fp=0xe5b01f40e540 sp=0xe5b01f40e530 pc=0x4a3a3c

goroutine 9 gp=0x4000254a80 m=21 mp=0x40085fa008 [GC worker (active)]:
runtime.systemstack_switch()
        /usr/local/go/src/runtime/asm_arm64.s:201 +0x8 fp=0x4000cc4f10 sp=0x4000cc4f00 pc=0x4a39b8
runtime.gcBgMarkWorker(0x4000066690)
        /usr/local/go/src/runtime/mgc.go:1472 +0x200 fp=0x4000cc4fb0 sp=0x4000cc4f10 pc=0x4426d0
runtime.gcBgMarkStartWorkers.gowrap1()
        /usr/local/go/src/runtime/mgc.go:1328 +0x28 fp=0x4000cc4fd0 sp=0x4000cc4fb0 pc=0x442498
runtime.goexit({})
        /usr/local/go/src/runtime/asm_arm64.s:1223 +0x4 fp=0x4000cc4fd0 sp=0x4000cc4fd0 pc=0x4a5ee4

[ ... ]

goroutine 1267 gp=0x4002a8ea80 m=nil [runnable (scan)]:
runtime.asyncPreempt2()
        /usr/local/go/src/runtime/preempt.go:308 +0x3c fp=0x4004cec4c0 sp=0x4004cec4a0 pc=0x46353c
runtime.asyncPreempt()
        /usr/local/go/src/runtime/preempt_arm64.s:47 +0x9c fp=0x4004cec6b0 sp=0x4004cec4c0 pc=0x4a6a8c
github.com/vishvananda/netlink/nl.(*NetlinkSocket).Receive(0x14360300000000?)
        /go/pkg/mod/github.com/!data!dog/netlink@v1.0.1-0.20240223195320-c7a4f832a3d1/nl/nl_linux.go:803 +0x130 fp=0x4004cfc710 sp=0x4004cec6c0 pc=0xf95de0

The last goroutine appears to be the goroutine that was being scanned. The crash output ends there.

Metadata

Metadata

Labels

BugReportIssues describing a possible bug in the Go implementation.NeedsInvestigationSomeone must examine and confirm this is a valid issue and not a duplicate of an existing one.compiler/runtimeIssues related to the Go compiler and/or runtime.

Type

No type

Projects

Status

Todo

Relationships

None yet

Development

No branches or pull requests

Issue actions