Skip to content

Commit 8021a29

Browse files
neildgopherbot
authored andcommitted
publicsuffix: change node table entry size to 40 bits
The latest public suffix list overflows the 15 bits of text offset, and we're out of bits in the node entry. Bump the node size to 40 bits. Fixes golang/go#55849 Change-Id: Ia4b1b23799d569e4fbcfe38b4d5d9761b76f2d73 Reviewed-on: https://go-review.googlesource.com/c/net/+/434695 Reviewed-by: Nigel Tao <nigeltao@golang.org> Auto-Submit: Damien Neil <dneil@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Nigel Tao (INACTIVE; USE @golang.org INSTEAD) <nigeltao@google.com> Run-TryBot: Damien Neil <dneil@google.com>
1 parent bcab684 commit 8021a29

File tree

3 files changed

+9397
-9371
lines changed

3 files changed

+9397
-9371
lines changed

‎publicsuffix/gen.go

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,14 @@ import (
3737
)
3838

3939
const (
40-
// These sum of these four values must be no greater than 32.
40+
// This must be a multiple of 8 and no greater than 64.
41+
// Update nodeValue in list.go if this changes.
42+
nodesBits = 40
43+
44+
// These sum of these four values must be no greater than nodesBits.
4145
nodesBitsChildren = 10
4246
nodesBitsICANN = 1
43-
nodesBitsTextOffset = 15
47+
nodesBitsTextOffset = 16
4448
nodesBitsTextLength = 6
4549

4650
// These sum of these four values must be no greater than 32.
@@ -97,7 +101,7 @@ const (
97101
)
98102

99103
var (
100-
labelEncoding = map[string]uint32{}
104+
labelEncoding = map[string]uint64{}
101105
labelsList = []string{}
102106
labelsMap = map[string]bool{}
103107
rules = []string{}
@@ -127,7 +131,13 @@ func main() {
127131

128132
func main1() error {
129133
flag.Parse()
130-
if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 {
134+
if nodesBits > 64 {
135+
return fmt.Errorf("nodesBits is too large")
136+
}
137+
if nodesBits%8 != 0 {
138+
return fmt.Errorf("nodesBits must be a multiple of 8")
139+
}
140+
if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > nodesBits {
131141
return fmt.Errorf("not enough bits to encode the nodes table")
132142
}
133143
if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 {
@@ -312,6 +322,7 @@ package publicsuffix
312322
const version = %q
313323
314324
const (
325+
nodesBits = %d
315326
nodesBitsChildren = %d
316327
nodesBitsICANN = %d
317328
nodesBitsTextOffset = %d
@@ -334,6 +345,7 @@ const numTLD = %d
334345
335346
`
336347
fmt.Fprintf(w, header, *version,
348+
nodesBits,
337349
nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength,
338350
childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo,
339351
nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))
@@ -354,7 +366,7 @@ const numTLD = %d
354366
if length >= 1<<nodesBitsTextLength {
355367
return fmt.Errorf("text length %d is too large, or nodeBitsTextLength is too small", length)
356368
}
357-
labelEncoding[label] = uint32(offset)<<nodesBitsTextLength | uint32(length)
369+
labelEncoding[label] = uint64(offset)<<nodesBitsTextLength | uint64(length)
358370
}
359371
fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
360372
for len(text) > 0 {
@@ -372,9 +384,9 @@ const numTLD = %d
372384

373385
fmt.Fprintf(w, `
374386
375-
// nodes is the list of nodes. Each node is represented as a uint32, which
376-
// encodes the node's children, wildcard bit and node type (as an index into
377-
// the children array), ICANN bit and text.
387+
// nodes is the list of nodes. Each node is represented as a %v-bit integer,
388+
// which encodes the node's children, wildcard bit and node type (as an index
389+
// into the children array), ICANN bit and text.
378390
//
379391
// If the table was generated with the -comments flag, there is a //-comment
380392
// after each node's data. In it is the nodes-array indexes of the children,
@@ -383,15 +395,16 @@ const numTLD = %d
383395
// nodes that have children but don't match a domain label in their own right.
384396
// An I denotes an ICANN domain.
385397
//
386-
// The layout within the uint32, from MSB to LSB, is:
398+
// The layout within the node, from MSB to LSB, is:
387399
// [%2d bits] unused
388400
// [%2d bits] children index
389401
// [%2d bits] ICANN bit
390402
// [%2d bits] text index
391403
// [%2d bits] text length
392-
var nodes = [...]uint32{
404+
var nodes = [...]uint8{
393405
`,
394-
32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength,
406+
nodesBits,
407+
nodesBits-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength,
395408
nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength)
396409
if err := n.walk(w, printNode); err != nil {
397410
return err
@@ -558,14 +571,17 @@ func printNode(w io.Writer, n *node) error {
558571
if c.icann {
559572
encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset)
560573
}
561-
encoding |= uint32(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN)
574+
encoding |= uint64(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN)
575+
for i := nodesBits - 8; i >= 0; i -= 8 {
576+
fmt.Fprintf(w, "0x%02x, ", (encoding>>i)&0xff)
577+
}
562578
if *comments {
563-
fmt.Fprintf(w, "0x%08x, // n0x%04x c0x%04x (%s)%s %s %s %s\n",
564-
encoding, c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard),
579+
fmt.Fprintf(w, "// n0x%04x c0x%04x (%s)%s %s %s %s\n",
580+
c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard),
565581
nodeTypeStr(c.nodeType), icannStr(c.icann), c.label,
566582
)
567583
} else {
568-
fmt.Fprintf(w, "0x%x,\n", encoding)
584+
fmt.Fprintf(w, "\n")
569585
}
570586
}
571587
return nil

‎publicsuffix/list.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ loop:
101101
break
102102
}
103103

104-
u := nodes[f] >> (nodesBitsTextOffset + nodesBitsTextLength)
104+
u := uint32(nodeValue(f) >> (nodesBitsTextOffset + nodesBitsTextLength))
105105
icannNode = u&(1<<nodesBitsICANN-1) != 0
106106
u >>= nodesBitsICANN
107107
u = children[u&(1<<nodesBitsChildren-1)]
@@ -154,9 +154,18 @@ func find(label string, lo, hi uint32) uint32 {
154154
return notFound
155155
}
156156

157+
func nodeValue(i uint32) uint64 {
158+
off := uint64(i * (nodesBits / 8))
159+
return uint64(nodes[off])<<32 |
160+
uint64(nodes[off+1])<<24 |
161+
uint64(nodes[off+2])<<16 |
162+
uint64(nodes[off+3])<<8 |
163+
uint64(nodes[off+4])
164+
}
165+
157166
// nodeLabel returns the label for the i'th node.
158167
func nodeLabel(i uint32) string {
159-
x := nodes[i]
168+
x := nodeValue(i)
160169
length := x & (1<<nodesBitsTextLength - 1)
161170
x >>= nodesBitsTextLength
162171
offset := x & (1<<nodesBitsTextOffset - 1)

0 commit comments

Comments
 (0)