Skip to content

Commit 638b55c

Browse files
robbat2ImSingee
andauthored
feat: expose MD raid component devices (#674)
* Support parse raid type for mdstat Note: rebased on top of master for reformatting Signed-off-by: Singee <git@singee.me> Signed-off-by: Robin H. Johnson <rjohnson@coreweave.com> * support linear type in mdstat Note: rebased on top of master for reformatting Signed-off-by: Singee <git@singee.me> Signed-off-by: Robin H. Johnson <rjohnson@coreweave.com> * change default type to unknown Note: rebased on top of master for reformatting Signed-off-by: Singee <git@singee.me> Signed-off-by: Robin H. Johnson <rjohnson@coreweave.com> * optimize raid type check Signed-off-by: Singee <git@singee.me> * feat: expose MD raid component devices Expose what component devices are part of a MD raid device, as well as the most common flags per-component. This will enable a future node_exporter metric showing which component of a RAID had failed. Signed-off-by: Robin H. Johnson <robbat2@orbis-terrarum.net> Signed-off-by: Robin H. Johnson <rjohnson@coreweave.com> * fix: update mdstat_test for reshaping testcase Signed-off-by: Robin H. Johnson <robbat2@gentoo.org> * doc: lint fix Signed-off-by: Robin H. Johnson <robbat2@gentoo.org> --------- Signed-off-by: Singee <git@singee.me> Signed-off-by: Robin H. Johnson <rjohnson@coreweave.com> Signed-off-by: Robin H. Johnson <robbat2@orbis-terrarum.net> Signed-off-by: Robin H. Johnson <robbat2@gentoo.org> Co-authored-by: Singee <git@singee.me>
1 parent b2bde72 commit 638b55c

File tree

2 files changed

+147
-37
lines changed

2 files changed

+147
-37
lines changed

���mdstat.go‎

Lines changed: 90 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,34 @@ var (
2727
recoveryLinePctRE = regexp.MustCompile(`= (.+)%`)
2828
recoveryLineFinishRE = regexp.MustCompile(`finish=(.+)min`)
2929
recoveryLineSpeedRE = regexp.MustCompile(`speed=(.+)[A-Z]`)
30-
componentDeviceRE = regexp.MustCompile(`(.*)\[\d+\]`)
30+
componentDeviceRE = regexp.MustCompile(`(.*)\[(\d+)\](\([SF]+\))?`)
31+
personalitiesPrefix = "Personalities : "
3132
)
3233

34+
type MDStatComponent struct {
35+
// Name of the component device.
36+
Name string
37+
// DescriptorIndex number of component device, e.g. the order in the superblock.
38+
DescriptorIndex int32
39+
// Flags per Linux drivers/md/md.[ch] as of v6.12-rc1
40+
// Subset that are exposed in mdstat
41+
WriteMostly bool
42+
Journal bool
43+
Faulty bool // "Faulty" is what kernel source uses for "(F)"
44+
Spare bool
45+
Replacement bool
46+
// Some additional flags that are NOT exposed in procfs today; they may
47+
// be available via sysfs.
48+
// In_sync, Bitmap_sync, Blocked, WriteErrorSeen, FaultRecorded,
49+
// BlockedBadBlocks, WantReplacement, Candidate, ...
50+
}
51+
3352
// MDStat holds info parsed from /proc/mdstat.
3453
type MDStat struct {
3554
// Name of the device.
3655
Name string
56+
// raid type of the device.
57+
Type string
3758
// activity-state of the device.
3859
ActivityState string
3960
// Number of active disks.
@@ -58,8 +79,8 @@ type MDStat struct {
5879
BlocksSyncedFinishTime float64
5980
// current sync speed (in Kilobytes/sec)
6081
BlocksSyncedSpeed float64
61-
// Name of md component devices
62-
Devices []string
82+
// component devices
83+
Devices []MDStatComponent
6384
}
6485

6586
// MDStat parses an mdstat-file (/proc/mdstat) and returns a slice of
@@ -80,28 +101,52 @@ func (fs FS) MDStat() ([]MDStat, error) {
80101
// parseMDStat parses data from mdstat file (/proc/mdstat) and returns a slice of
81102
// structs containing the relevant info.
82103
func parseMDStat(mdStatData []byte) ([]MDStat, error) {
104+
// TODO:
105+
// - parse global hotspares from the "unused devices" line.
83106
mdStats := []MDStat{}
84107
lines := strings.Split(string(mdStatData), "\n")
108+
knownRaidTypes := make(map[string]bool)
85109

86110
for i, line := range lines {
87111
if strings.TrimSpace(line) == "" || line[0] == ' ' ||
88-
strings.HasPrefix(line, "Personalities") ||
89112
strings.HasPrefix(line, "unused") {
90113
continue
91114
}
115+
// Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10]
116+
if len(knownRaidTypes) == 0 && strings.HasPrefix(line, personalitiesPrefix) {
117+
personalities := strings.Fields(line[len(personalitiesPrefix):])
118+
for _, word := range personalities {
119+
word := word[1 : len(word)-1]
120+
knownRaidTypes[word] = true
121+
}
122+
continue
123+
}
92124

93125
deviceFields := strings.Fields(line)
94126
if len(deviceFields) < 3 {
95127
return nil, fmt.Errorf("%w: Expected 3+ lines, got %q", ErrFileParse, line)
96128
}
97129
mdName := deviceFields[0] // mdx
98-
state := deviceFields[2] // active or inactive
130+
state := deviceFields[2] // active, inactive, broken
131+
132+
mdType := "unknown" // raid1, raid5, etc.
133+
var deviceStartIndex int
134+
if len(deviceFields) > 3 { // mdType may be in the 3rd or 4th field
135+
if isRaidType(deviceFields[3], knownRaidTypes) {
136+
mdType = deviceFields[3]
137+
deviceStartIndex = 4
138+
} else if len(deviceFields) > 4 && isRaidType(deviceFields[4], knownRaidTypes) {
139+
// if the 3rd field is (...), the 4th field is the mdType
140+
mdType = deviceFields[4]
141+
deviceStartIndex = 5
142+
}
143+
}
99144

100145
if len(lines) <= i+3 {
101146
return nil, fmt.Errorf("%w: Too few lines for md device: %q", ErrFileParse, mdName)
102147
}
103148

104-
// Failed disks have the suffix (F) & Spare disks have the suffix (S).
149+
// Failed (Faulty) disks have the suffix (F) & Spare disks have the suffix (S).
105150
fail := int64(strings.Count(line, "(F)"))
106151
spare := int64(strings.Count(line, "(S)"))
107152
active, total, down, size, err := evalStatusLine(lines[i], lines[i+1])
@@ -152,8 +197,14 @@ func parseMDStat(mdStatData []byte) ([]MDStat, error) {
152197
}
153198
}
154199

200+
devices, err := evalComponentDevices(deviceFields[deviceStartIndex:])
201+
if err != nil {
202+
return nil, fmt.Errorf("error parsing components in md device %q: %w", mdName, err)
203+
}
204+
155205
mdStats = append(mdStats, MDStat{
156206
Name: mdName,
207+
Type: mdType,
157208
ActivityState: state,
158209
DisksActive: active,
159210
DisksFailed: fail,
@@ -166,14 +217,24 @@ func parseMDStat(mdStatData []byte) ([]MDStat, error) {
166217
BlocksSyncedPct: pct,
167218
BlocksSyncedFinishTime: finish,
168219
BlocksSyncedSpeed: speed,
169-
Devices: evalComponentDevices(deviceFields),
220+
Devices: devices,
170221
})
171222
}
172223

173224
return mdStats, nil
174225
}
175226

227+
// check if a string's format is like the mdType
228+
// Rule 1: mdType should not be like (...)
229+
// Rule 2: mdType should not be like sda[0]
230+
// .
231+
func isRaidType(mdType string, knownRaidTypes map[string]bool) bool {
232+
_, ok := knownRaidTypes[mdType]
233+
return !strings.ContainsAny(mdType, "([") && ok
234+
}
235+
176236
func evalStatusLine(deviceLine, statusLine string) (active, total, down, size int64, err error) {
237+
// e.g. 523968 blocks super 1.2 [4/4] [UUUU]
177238
statusFields := strings.Fields(statusLine)
178239
if len(statusFields) < 1 {
179240
return 0, 0, 0, 0, fmt.Errorf("%w: Unexpected statusline %q: %w", ErrFileParse, statusLine, err)
@@ -264,17 +325,29 @@ func evalRecoveryLine(recoveryLine string) (blocksSynced int64, blocksToBeSynced
264325
return blocksSynced, blocksToBeSynced, pct, finish, speed, nil
265326
}
266327

267-
func evalComponentDevices(deviceFields []string) []string {
268-
mdComponentDevices := make([]string, 0)
269-
if len(deviceFields) > 3 {
270-
for _, field := range deviceFields[4:] {
271-
match := componentDeviceRE.FindStringSubmatch(field)
272-
if match == nil {
273-
continue
274-
}
275-
mdComponentDevices = append(mdComponentDevices, match[1])
328+
func evalComponentDevices(deviceFields []string) ([]MDStatComponent, error) {
329+
mdComponentDevices := make([]MDStatComponent, 0)
330+
for _, field := range deviceFields {
331+
match := componentDeviceRE.FindStringSubmatch(field)
332+
if match == nil {
333+
continue
334+
}
335+
descriptorIndex, err := strconv.ParseInt(match[2], 10, 32)
336+
if err != nil {
337+
return mdComponentDevices, fmt.Errorf("error parsing int from device %q: %w", match[2], err)
276338
}
339+
mdComponentDevices = append(mdComponentDevices, MDStatComponent{
340+
Name: match[1],
341+
DescriptorIndex: int32(descriptorIndex),
342+
// match may contain one or more of these
343+
// https://github.com/torvalds/linux/blob/7ec462100ef9142344ddbf86f2c3008b97acddbe/drivers/md/md.c#L8376-L8392
344+
Faulty: strings.Contains(match[3], "(F)"),
345+
Spare: strings.Contains(match[3], "(S)"),
346+
Journal: strings.Contains(match[3], "(J)"),
347+
Replacement: strings.Contains(match[3], "(R)"),
348+
WriteMostly: strings.Contains(match[3], "(W)"),
349+
})
277350
}
278351

279-
return mdComponentDevices
352+
return mdComponentDevices, nil
280353
}

0 commit comments

Comments
 (0)