From 3aabb2ae9de3b85e4ff8341f163f7e73cc5a0a7c Mon Sep 17 00:00:00 2001 From: Sophie Alpert Date: Sat, 6 Sep 2025 00:30:34 -0700 Subject: [PATCH 1/2] Fix missing EvalPlanQual recheck for TID scans --- src/backend/executor/nodeTidrangescan.c | 10 ++ src/backend/executor/nodeTidscan.c | 19 +++- .../isolation/expected/eval-plan-qual.out | 94 +++++++++++++++++++ src/test/isolation/specs/eval-plan-qual.spec | 14 +++ 4 files changed, 133 insertions(+), 4 deletions(-) diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index 26f7420b64b0..1bce8d6cbfe6 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -274,6 +274,16 @@ TidRangeNext(TidRangeScanState *node) static bool TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot) { + if (!TidRangeEval(node)) + return false; + + Assert(ItemPointerIsValid(&slot->tts_tid)); + + /* Recheck the ctid is still within range */ + if (ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 || + ItemPointerCompare(&slot->tts_tid, &node->trss_maxtid) > 0) + return false; + return true; } diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 5e56e29a15fc..d50c6600358c 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -402,12 +402,23 @@ TidNext(TidScanState *node) static bool TidRecheck(TidScanState *node, TupleTableSlot *slot) { + ItemPointer match; + + /* WHERE CURRENT OF always intends to resolve to the latest tuple */ + if (node->tss_isCurrentOf) + return true; + + if (node->tss_TidList == NULL) + TidListEval(node); + /* - * XXX shouldn't we check here to make sure tuple matches TID list? In - * runtime-key case this is not certain, is it? However, in the WHERE - * CURRENT OF case it might not match anyway ... + * Binary search the TidList to see if this ctid is mentioned and return + * true if it is. */ - return true; + match = (ItemPointer) bsearch(&slot->tts_tid, node->tss_TidList, + node->tss_NumTids, sizeof(ItemPointerData), + itemptr_comparator); + return match != NULL; } diff --git a/src/test/isolation/expected/eval-plan-qual.out b/src/test/isolation/expected/eval-plan-qual.out index 032d4208d51f..3d31d0f84e59 100644 --- a/src/test/isolation/expected/eval-plan-qual.out +++ b/src/test/isolation/expected/eval-plan-qual.out @@ -1218,6 +1218,100 @@ subid|id (1 row) +starting permutation: tid1 tid2 c1 c2 read +step tid1: UPDATE accounts SET balance = balance + 100 WHERE ctid = '(0,1)' RETURNING accountid, balance; +accountid|balance +---------+------- +checking | 700 +(1 row) + +step tid2: UPDATE accounts SET balance = balance + 200 WHERE ctid = '(0,1)' RETURNING accountid, balance; +step c1: COMMIT; +step tid2: <... completed> +accountid|balance +---------+------- +(0 rows) + +step c2: COMMIT; +step read: SELECT * FROM accounts ORDER BY accountid; +accountid|balance|balance2 +---------+-------+-------- +checking | 700| 1400 +savings | 600| 1200 +(2 rows) + + +starting permutation: tid1 tidsucceed2 c1 c2 read +step tid1: UPDATE accounts SET balance = balance + 100 WHERE ctid = '(0,1)' RETURNING accountid, balance; +accountid|balance +---------+------- +checking | 700 +(1 row) + +step tidsucceed2: UPDATE accounts SET balance = balance + 200 WHERE ctid = '(0,1)' OR ctid = '(0,3)' RETURNING accountid, balance; +step c1: COMMIT; +step tidsucceed2: <... completed> +accountid|balance +---------+------- +checking | 900 +(1 row) + +step c2: COMMIT; +step read: SELECT * FROM accounts ORDER BY accountid; +accountid|balance|balance2 +---------+-------+-------- +checking | 900| 1800 +savings | 600| 1200 +(2 rows) + + +starting permutation: tidrange1 tidrange2 c1 c2 read +step tidrange1: UPDATE accounts SET balance = balance + 100 WHERE ctid BETWEEN '(0,1)' AND '(0,1)' RETURNING accountid, balance; +accountid|balance +---------+------- +checking | 700 +(1 row) + +step tidrange2: UPDATE accounts SET balance = balance + 200 WHERE ctid BETWEEN '(0,1)' AND '(0,1)' RETURNING accountid, balance; +step c1: COMMIT; +step tidrange2: <... completed> +accountid|balance +---------+------- +(0 rows) + +step c2: COMMIT; +step read: SELECT * FROM accounts ORDER BY accountid; +accountid|balance|balance2 +---------+-------+-------- +checking | 700| 1400 +savings | 600| 1200 +(2 rows) + + +starting permutation: tid1 tid2 r1 c2 read +step tid1: UPDATE accounts SET balance = balance + 100 WHERE ctid = '(0,1)' RETURNING accountid, balance; +accountid|balance +---------+------- +checking | 700 +(1 row) + +step tid2: UPDATE accounts SET balance = balance + 200 WHERE ctid = '(0,1)' RETURNING accountid, balance; +step r1: ROLLBACK; +step tid2: <... completed> +accountid|balance +---------+------- +checking | 800 +(1 row) + +step c2: COMMIT; +step read: SELECT * FROM accounts ORDER BY accountid; +accountid|balance|balance2 +---------+-------+-------- +checking | 800| 1600 +savings | 600| 1200 +(2 rows) + + starting permutation: simplepartupdate conditionalpartupdate c1 c2 read_part step simplepartupdate: update parttbl set b = b + 10; diff --git a/src/test/isolation/specs/eval-plan-qual.spec b/src/test/isolation/specs/eval-plan-qual.spec index 07307e623e47..c6eee6855868 100644 --- a/src/test/isolation/specs/eval-plan-qual.spec +++ b/src/test/isolation/specs/eval-plan-qual.spec @@ -99,6 +99,10 @@ step upsert1 { WHERE NOT EXISTS (SELECT 1 FROM upsert); } +# Tests for Tid / Tid Range Scan +step tid1 { UPDATE accounts SET balance = balance + 100 WHERE ctid = '(0,1)' RETURNING accountid, balance; } +step tidrange1 { UPDATE accounts SET balance = balance + 100 WHERE ctid BETWEEN '(0,1)' AND '(0,1)' RETURNING accountid, balance; } + # tests with table p check inheritance cases: # readp1/writep1/readp2 tests a bug where nodeLockRows did the wrong thing # when the first updated tuple was in a non-first child table. @@ -241,6 +245,11 @@ step updateforcip3 { step wrtwcte { UPDATE table_a SET value = 'tableAValue2' WHERE id = 1; } step wrjt { UPDATE jointest SET data = 42 WHERE id = 7; } +step tid2 { UPDATE accounts SET balance = balance + 200 WHERE ctid = '(0,1)' RETURNING accountid, balance; } +step tidrange2 { UPDATE accounts SET balance = balance + 200 WHERE ctid BETWEEN '(0,1)' AND '(0,1)' RETURNING accountid, balance; } +# here, recheck succeeds; (0,3) is the id that step tid1 will assign +step tidsucceed2 { UPDATE accounts SET balance = balance + 200 WHERE ctid = '(0,1)' OR ctid = '(0,3)' RETURNING accountid, balance; } + step conditionalpartupdate { update parttbl set c = -c where b < 10; } @@ -392,6 +401,11 @@ permutation wrtwcte readwcte c1 c2 permutation wrjt selectjoinforupdate c2 c1 permutation wrjt selectresultforupdate c2 c1 permutation wrtwcte multireadwcte c1 c2 +permutation tid1 tid2 c1 c2 read +permutation tid1 tidsucceed2 c1 c2 read +permutation tidrange1 tidrange2 c1 c2 read +# test that a rollback on s1 has s2 perform the update on the original row +permutation tid1 tid2 r1 c2 read permutation simplepartupdate conditionalpartupdate c1 c2 read_part permutation simplepartupdate complexpartupdate c1 c2 read_part From 8e84c81d4dca0f01d76fb79ba80af6bcfa30cde4 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Mon, 15 Sep 2025 21:59:36 +1200 Subject: [PATCH 2/2] Reduce rescan overheads in TID [Range] Scan Until how the TIDs to scan have been recalculated after every rescan of both TID Scan and TID Range Scan. Here we adjust things so that the calculated list of TIDs or calculated range of TIDs is only marked as needing recalculated whenever a parameter has changed that might require a recalculation. --- src/backend/executor/nodeTidrangescan.c | 59 ++++++++++++++++++------- src/backend/executor/nodeTidscan.c | 17 +++++-- src/backend/optimizer/plan/createplan.c | 25 ++++++++--- src/include/nodes/execnodes.h | 4 ++ src/include/nodes/plannodes.h | 4 ++ 5 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index 1bce8d6cbfe6..39239c6692b1 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -128,14 +128,17 @@ TidExprListCreate(TidRangeScanState *tidrangestate) * TidRangeEval * * Compute and set node's block and offset range to scan by evaluating - * node->trss_tidexprs. Returns false if we detect the range cannot - * contain any tuples. Returns true if it's possible for the range to - * contain tuples. We don't bother validating that trss_mintid is less - * than or equal to trss_maxtid, as the scan_set_tidrange() table AM - * function will handle that. + * node->trss_tidexprs. Sets node's trss_rangeIsEmpty to true if the + * calculated range must be empty of any tuples, otherwise sets + * trss_rangeIsEmpty to false and sets trss_mintid and trss_maxtid to the + * calculated range. + * + * We don't bother validating that trss_mintid is less than or equal to + * trss_maxtid, as the scan_set_tidrange() table AM function will handle + * that. * ---------------------------------------------------------------- */ -static bool +static void TidRangeEval(TidRangeScanState *node) { ExprContext *econtext = node->ss.ps.ps_ExprContext; @@ -165,7 +168,10 @@ TidRangeEval(TidRangeScanState *node) /* If the bound is NULL, *nothing* matches the qual. */ if (isNull) - return false; + { + node->trss_rangeIsEmpty = true; + return; + } if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND) { @@ -207,7 +213,7 @@ TidRangeEval(TidRangeScanState *node) ItemPointerCopy(&lowerBound, &node->trss_mintid); ItemPointerCopy(&upperBound, &node->trss_maxtid); - return true; + node->trss_rangeIsEmpty = false; } /* ---------------------------------------------------------------- @@ -234,12 +240,19 @@ TidRangeNext(TidRangeScanState *node) slot = node->ss.ss_ScanTupleSlot; direction = estate->es_direction; - if (!node->trss_inScan) + /* First time through, compute TID range to scan */ + if (!node->trss_rangeCalcDone) { - /* First time through, compute TID range to scan */ - if (!TidRangeEval(node)) - return NULL; + TidRangeEval(node); + node->trss_rangeCalcDone = true; + } + + /* Check if the range was detected not to contain any tuples */ + if (node->trss_rangeIsEmpty) + return NULL; + if (!node->trss_inScan) + { if (scandesc == NULL) { scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation, @@ -274,13 +287,18 @@ TidRangeNext(TidRangeScanState *node) static bool TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot) { - if (!TidRangeEval(node)) - return false; + /* First call? Compute the TID Range */ + if (!node->trss_rangeCalcDone) + { + TidRangeEval(node); + node->trss_rangeCalcDone = true; + } Assert(ItemPointerIsValid(&slot->tts_tid)); /* Recheck the ctid is still within range */ - if (ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 || + if (node->trss_rangeIsEmpty || + ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 || ItemPointerCompare(&slot->tts_tid, &node->trss_maxtid) > 0) return false; @@ -322,6 +340,13 @@ ExecReScanTidRangeScan(TidRangeScanState *node) /* mark scan as not in progress, and tid range list as not computed yet */ node->trss_inScan = false; + /* + * Set the Tid Range to be recalculated when any parameters that might + * effect the calculated range have changed. + */ + if (bms_overlap(node->ss.ps.chgParam, ((TidRangeScan *) node->ss.ps.plan)->tidparamids)) + node->trss_rangeCalcDone = false; + /* * We must wait until TidRangeNext before calling table_rescan_tidrange. */ @@ -380,6 +405,10 @@ ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags) * mark scan as not in progress, and TID range as not computed yet */ tidrangestate->trss_inScan = false; + tidrangestate->trss_rangeCalcDone = false; + + /* This will be calculated correctly in TidRangeEval() */ + tidrangestate->trss_rangeIsEmpty = true; /* * open the scan relation diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index d50c6600358c..b40a31e0d2fa 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -457,10 +457,19 @@ ExecTidScan(PlanState *pstate) void ExecReScanTidScan(TidScanState *node) { - if (node->tss_TidList) - pfree(node->tss_TidList); - node->tss_TidList = NULL; - node->tss_NumTids = 0; + /* + * Set the TidList to be recalculated when any parameters that might + * effect the computed list has changed. + */ + if (bms_overlap(node->ss.ps.chgParam, + ((TidScan *) node->ss.ps.plan)->tidparamids)) + { + if (node->tss_TidList) + pfree(node->tss_TidList); + node->tss_TidList = NULL; + node->tss_NumTids = 0; + } + node->tss_TidPtr = -1; /* not really necessary, but seems good form */ diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 6791cbeb416e..87e2597ee8c4 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -201,9 +201,10 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist, List *bitmapqualorig, Index scanrelid); static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid, - List *tidquals); + List *tidquals, Bitmapset *paramids); static TidRangeScan *make_tidrangescan(List *qptlist, List *qpqual, - Index scanrelid, List *tidrangequals); + Index scanrelid, List *tidrangequals, + Bitmapset *paramids); static SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual, Index scanrelid, @@ -3384,6 +3385,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, TidScan *scan_plan; Index scan_relid = best_path->path.parent->relid; List *tidquals = best_path->tidquals; + Bitmapset *paramids; /* it should be a base rel... */ Assert(scan_relid > 0); @@ -3459,10 +3461,13 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, replace_nestloop_params(root, (Node *) scan_clauses); } + paramids = pull_paramids((Expr *) tidquals); + scan_plan = make_tidscan(tlist, scan_clauses, scan_relid, - tidquals); + tidquals, + paramids); copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); @@ -3481,6 +3486,7 @@ create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path, TidRangeScan *scan_plan; Index scan_relid = best_path->path.parent->relid; List *tidrangequals = best_path->tidrangequals; + Bitmapset *paramids; /* it should be a base rel... */ Assert(scan_relid > 0); @@ -3524,10 +3530,13 @@ create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path, replace_nestloop_params(root, (Node *) scan_clauses); } + paramids = pull_paramids((Expr *) tidrangequals); + scan_plan = make_tidrangescan(tlist, scan_clauses, scan_relid, - tidrangequals); + tidrangequals, + paramids); copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); @@ -5622,7 +5631,8 @@ static TidScan * make_tidscan(List *qptlist, List *qpqual, Index scanrelid, - List *tidquals) + List *tidquals, + Bitmapset *paramids) { TidScan *node = makeNode(TidScan); Plan *plan = &node->scan.plan; @@ -5633,6 +5643,7 @@ make_tidscan(List *qptlist, plan->righttree = NULL; node->scan.scanrelid = scanrelid; node->tidquals = tidquals; + node->tidparamids = paramids; return node; } @@ -5641,7 +5652,8 @@ static TidRangeScan * make_tidrangescan(List *qptlist, List *qpqual, Index scanrelid, - List *tidrangequals) + List *tidrangequals, + Bitmapset *paramids) { TidRangeScan *node = makeNode(TidRangeScan); Plan *plan = &node->scan.plan; @@ -5652,6 +5664,7 @@ make_tidrangescan(List *qptlist, plan->righttree = NULL; node->scan.scanrelid = scanrelid; node->tidrangequals = tidrangequals; + node->tidparamids = paramids; return node; } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 71857feae482..e625d04363ef 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1926,6 +1926,8 @@ typedef struct TidScanState * trss_mintid the lowest TID in the scan range * trss_maxtid the highest TID in the scan range * trss_inScan is a scan currently in progress? + * trss_rangeCalcDone has the TID range been calculated yet? + * trss_rangeIsEmpty true if the TID range is certainly empty * ---------------- */ typedef struct TidRangeScanState @@ -1935,6 +1937,8 @@ typedef struct TidRangeScanState ItemPointerData trss_mintid; ItemPointerData trss_maxtid; bool trss_inScan; + bool trss_rangeCalcDone; + bool trss_rangeIsEmpty; } TidRangeScanState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 29d7732d6a03..a3d220e9d5e6 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -677,6 +677,8 @@ typedef struct TidScan Scan scan; /* qual(s) involving CTID = something */ List *tidquals; + /* Set of ParamIds mentioned in tidquals */ + Bitmapset *tidparamids; } TidScan; /* ---------------- @@ -691,6 +693,8 @@ typedef struct TidRangeScan Scan scan; /* qual(s) involving CTID op something */ List *tidrangequals; + /* Set of ParamIds mentioned in tidrangequals */ + Bitmapset *tidparamids; } TidRangeScan; /* ----------------