Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pkg/planner/core/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func TestMain(m *testing.M) {
testDataMap.LoadTestSuiteData("testdata", "index_merge_suite", true)
testDataMap.LoadTestSuiteData("testdata", "runtime_filter_generator_suite")
testDataMap.LoadTestSuiteData("testdata", "plan_cache_suite")
testDataMap.LoadTestSuiteData("testdata", "decorrelate_limit_suite", true)

indexMergeSuiteData = testDataMap["index_merge_suite"]
planSuiteUnexportedData = testDataMap["plan_suite_unexported"]
Expand Down Expand Up @@ -72,3 +73,11 @@ func GetIndexMergeSuiteData() testdata.TestData {
func GetRuntimeFilterGeneratorData() testdata.TestData {
return testDataMap["runtime_filter_generator_suite"]
}

func GetDecorrelateLimitSuiteData() testdata.TestData {
return testDataMap["decorrelate_limit_suite"]
}

func GetCascadesSuiteData() testdata.TestData {
return testDataMap["cascades_suite"]
}
24 changes: 24 additions & 0 deletions pkg/planner/core/plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/pingcap/tidb/pkg/planner/util/coretestsdk"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/pkg/testkit/testdata"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/dbterror/plannererrors"
"github.com/pingcap/tidb/pkg/util/plancodec"
Expand Down Expand Up @@ -732,3 +733,26 @@ func TestImportIntoBuildPlan(t *testing.T) {
require.ErrorIs(t, tk.ExecToErr("IMPORT INTO t3 FROM select * from t2"),
infoschema.ErrTableNotExists)
}

func TestDecorrelateLimitOptimization(t *testing.T) {
testkit.RunTestUnderCascadesWithDomain(t, func(t *testing.T, testKit *testkit.TestKit, dom *domain.Domain, cascades, caller string) {
testKit.MustExec("use test")
testKit.MustExec("CREATE TABLE IF NOT EXISTS employees (\n id INT PRIMARY KEY,\n name VARCHAR(50),\n dept_id INT,\n salary DECIMAL(10, 2),\n alias VARCHAR(50)\n)")
testKit.MustExec("CREATE TABLE IF NOT EXISTS employee_notes (\n id INT PRIMARY KEY,\n employee_id INT,\n note TEXT,\n created_at TIMESTAMP,\n INDEX idx_employee_id (employee_id)\n)")
var input []string
var output []struct {
SQL string
Plan []string
}
decorrelateLimitSuiteData := core.GetDecorrelateLimitSuiteData()
decorrelateLimitSuiteData.LoadTestCases(t, &input, &output, cascades, caller)
for i, sql := range input {
plan := testKit.MustQuery(sql)
testdata.OnRecord(func() {
output[i].SQL = sql
output[i].Plan = testdata.ConvertRowsToStrings(plan.Rows())
})
plan.Check(testkit.Rows(output[i].Plan...))
}
})
}
167 changes: 167 additions & 0 deletions pkg/planner/core/rule_decorrelate.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,46 @@ func (s *DecorrelateSolver) optimize(ctx context.Context, p base.LogicalPlan, gr
apply.SetChildren(outerPlan, innerPlan)
return s.optimize(ctx, p, groupByColumn)
} else if m, ok := innerPlan.(*logicalop.LogicalMaxOneRow); ok {
// Check if MaxOneRow's child is Limit or TopN, and if we can remove it for LeftOuterJoin
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems this PR doesn't handle the TopN case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this stage, topn is still just a LIMIT, so it doesn’t matter.

// Also handle the case where there's a Projection between MaxOneRow and Limit: MaxOneRow -> Projection -> Limit
if apply.JoinType == base.LeftOuterJoin {
mChild := m.Children()[0]
var removePlan base.LogicalPlan
var canRemove bool

if li, ok := mChild.(*logicalop.LogicalLimit); ok {
// Limit with non-0 offset cannot be removed, but we still check for redundant MaxOneRow
if li.Offset != 0 {
canRemove = false
} else {
// Check if join key is unique key
removePlan = li.Children()[0]
if isJoinKeyUniqueKey(apply, removePlan) {
canRemove = true
}
}
} else if proj, ok := mChild.(*logicalop.LogicalProjection); ok {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When will this happen? Can you provide some cases for this situations?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

// Check if Projection's child is Limit: MaxOneRow -> Projection -> Limit
if li, ok := proj.Children()[0].(*logicalop.LogicalLimit); ok {
// Limit with non-0 offset cannot be removed, but we still check for redundant MaxOneRow
if li.Offset != 0 {
canRemove = false
} else {
// Check if join key is unique key
removePlan = li.Children()[0]
if isJoinKeyUniqueKey(apply, removePlan) {
canRemove = true
}
}
}
}
// If LIMIT can be removed (join key is unique key), remove it and re-enter decorrelate solver
if canRemove {
apply.SetChildren(outerPlan, removePlan)
return s.optimize(ctx, p, groupByColumn)
}
}
// If child is already MaxOneRow, remove redundant wrapper
if m.Children()[0].MaxOneRow() {
innerPlan = m.Children()[0]
apply.SetChildren(outerPlan, innerPlan)
Expand Down Expand Up @@ -470,6 +510,133 @@ func (*DecorrelateSolver) Name() string {
return "decorrelate"
}

// isJoinKeyUniqueKey checks if join key is unique key.
// Returns true if the join key forms a unique key constraint.
func isJoinKeyUniqueKey(apply *logicalop.LogicalApply, plan base.LogicalPlan) bool {
var hasMultiRowOperator func(base.LogicalPlan) bool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function needs to guarantee contains all the cases which will generate more rows. If there lacks some cases, it will generate the wring answer. For example, please add some cases related to the unnest function, it will generate more rows? So here should be considered more seriously.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there may be mis-deletions down the road or in certain cases. But the NoDecorrelate hint lets us sidestep the issue, even if we miss maintaining the list when new funcs are introduced.

hasMultiRowOperator = func(p base.LogicalPlan) bool {
// Check if current node is a JOIN (excluding the outer Apply which is already a Join)
// todo: in/exists can also been evaluated to one row, but we don't handle it yet.
if _, ok := p.(*logicalop.LogicalJoin); ok {
return true
}
// Check if current node is UNION ALL
if _, ok := p.(*logicalop.LogicalUnionAll); ok {
return true
}
// Recursively check children
for _, child := range p.Children() {
if hasMultiRowOperator(child) {
return true
}
}
return false
}
if hasMultiRowOperator(plan) {
return false
}

// Extract join keys from Selection conditions and their children recursively
// Join conditions may be pushed down to DataSource or nested in child Selection nodes
innerJoinKeys := make([]*expression.Column, 0)

// Recursively extract all conditions from Selection nodes and their children
var extractConditions func(base.LogicalPlan)
extractConditions = func(p base.LogicalPlan) {
if sel, ok := p.(*logicalop.LogicalSelection); ok {
// Check conditions directly on Selection
for _, cond := range sel.Conditions {
if decExpr := apply.DeCorColFromEqExpr(cond); decExpr != nil {
if sf, ok := decExpr.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.EQ {
args := sf.GetArgs()
if len(args) == 2 {
if innerCol, ok := args[1].(*expression.Column); ok {
Copy link
Contributor

@Reminiscent Reminiscent Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we guarantee the args[1] rather than args[0] is the column from the inner side?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DeCorColFromEqExpr preserves the order of args[1], which is on the inner side.

if sel.Schema().Contains(innerCol) {
innerJoinKeys = append(innerJoinKeys, innerCol)
}
}
}
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make this a function? It has been used several times

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}
// Continue to check children recursively
} else if ds, ok := p.(*logicalop.DataSource); ok {
// Check conditions in DataSource (PushedDownConds may contain join key conditions)
for _, cond := range ds.PushedDownConds {
if decExpr := apply.DeCorColFromEqExpr(cond); decExpr != nil {
if sf, ok := decExpr.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.EQ {
args := sf.GetArgs()
if len(args) == 2 {
if innerCol, ok := args[1].(*expression.Column); ok {
if ds.Schema().Contains(innerCol) {
innerJoinKeys = append(innerJoinKeys, innerCol)
}
}
}
}
}
}
// Stop recursion at DataSource
return
}
// Continue recursion for other nodes
for _, child := range p.Children() {
extractConditions(child)
}
}

extractConditions(plan)
if len(innerJoinKeys) == 0 {
return false
}

// Find the underlying DataSource to get PKOrUK
var findDataSource func(base.LogicalPlan) *logicalop.DataSource
findDataSource = func(p base.LogicalPlan) *logicalop.DataSource {
if ds, ok := p.(*logicalop.DataSource); ok {
return ds
}
for _, child := range p.Children() {
if ds := findDataSource(child); ds != nil {
return ds
}
}
return nil
}
ds := findDataSource(plan)
if ds == nil {
return false
}

// Use PKOrUK from DataSource Schema directly
if len(ds.Schema().PKOrUK) == 0 {
return false
}

// Check if join keys form a unique key
for _, keyInfo := range ds.Schema().PKOrUK {
allMatch := true
for _, keyCol := range keyInfo {
found := false
for _, joinKey := range innerJoinKeys {
if keyCol.ID == joinKey.ID && keyCol.ID != 0 {
found = true
break
}
}
if !found {
allMatch = false
break
}
}
if allMatch && len(keyInfo) == len(innerJoinKeys) && len(keyInfo) > 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems no need to check len(keyInfo) == len(innerJoinKeys). For example, the unique key is (a, b). And the filter columns contain (a, b, c). The (a, b) can guarantee the uniqueness.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, good idea~

return true
}
}

return false
}

// Return true if we should skip decorrelation for LeftOuterApply + Projection.
func skipDecorrelateProjectionForLeftOuterApply(apply *logicalop.LogicalApply, proj *logicalop.LogicalProjection) bool {
allConst := len(proj.Exprs) > 0
Expand Down
22 changes: 22 additions & 0 deletions pkg/planner/core/testdata/decorrelate_limit_suite_in.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"Name": "TestDecorrelateLimitOptimization",
"Cases": [
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary , ( select salary FROM employees e2 WHERE e2.id = e.id LIMIT 1 OFFSET 0 ) AS avg_dept_salary FROM employees e WHERE e.dept_id > 1",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary , ( select salary FROM employees e2 WHERE e2.id = e.id LIMIT 1 OFFSET 1 ) AS avg_dept_salary FROM employees e WHERE e.dept_id > 1",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, ( SELECT e2.salary FROM employees e2 WHERE e2.dept_id = e.dept_id LIMIT 1 OFFSET 0) AS avg_dept_salary FROM employees e WHERE e.dept_id = 1",
"EXPLAIN format = 'plan_tree' SELECT e.id, e.name, e.salary, (SELECT en.note FROM employees e2 JOIN employee_notes en ON en.employee_id = e2.id WHERE e2.id = e.id ORDER BY en.created_at DESC LIMIT 1) AS latest_note FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT en.note FROM employees e2 JOIN employee_notes en ON en.employee_id = e2.id LEFT JOIN employees e3 ON e3.id = e2.dept_id WHERE e2.id = e.id LIMIT 1) AS note_multi_join FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 INNER JOIN employee_notes en ON en.employee_id = e2.id WHERE e2.id = e.id LIMIT 1) AS salary_inner_join FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT AVG(e2.salary) FROM employees e2 WHERE e2.id = e.id GROUP BY e2.dept_id HAVING AVG(e2.salary) > 1000 LIMIT 1) AS avg_salary_having FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT count(e2.dept_id) FROM employees e2 WHERE e2.id = e.id limit 1) AS distinct_dept_id FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 WHERE e2.id = e.id AND e2.dept_id IN ( SELECT dept_id FROM employees e3 WHERE e3.id = e.id LIMIT 1 ) LIMIT 1) AS salary_nested FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 WHERE e2.id = e.id AND EXISTS ( SELECT 1 FROM employee_notes en WHERE en.employee_id = e2.id ) LIMIT 1) AS salary_exists FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT e2.salary FROM employees e2 WHERE e2.id = e.id ORDER BY e2.dept_id, e2.salary DESC LIMIT 1) AS salary_order_multi FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT DISTINCT e2.dept_id FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS distinct_dept_id FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT ROW_NUMBER() OVER (ORDER BY e2.salary DESC) FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS row_num FROM employees e",
"EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT DISTINCT e2.dept_id FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS distinct_dept_id FROM employees e"
]
}
]

Loading