这篇文章主要讲解了“postgresql中hash_inner_and_outer函数分析”,文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习“Postgresql
这篇文章主要讲解了“postgresql中hash_inner_and_outer函数分析”,文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习“Postgresql中hash_inner_and_outer函数分析”吧!
Cost相关
注意:实际使用的参数值通过系统配置文件定义,而不是这里的常量定义!
typedef double Cost;
#define DEFAULT_SEQ_PAGE_COST 1.0 //顺序扫描page的成本
#define DEFAULT_RANDOM_PAGE_COST 4.0 //随机扫描page的成本
#define DEFAULT_CPU_TUPLE_COST 0.01 //处理一个元组的CPU成本
#define DEFAULT_CPU_INDEX_TUPLE_COST 0.005 //处理一个索引元组的CPU成本
#define DEFAULT_CPU_OPERATOR_COST 0.0025 //执行一次操作或函数的CPU成本
#define DEFAULT_PARALLEL_TUPLE_COST 0.1 //并行执行,从一个worker传输一个元组到另一个worker的成本
#define DEFAULT_PARALLEL_SETUP_COST 1000.0 //构建并行执行环境的成本
#define DEFAULT_EFFECTIVE_CACHE_SIZE 524288
double seq_page_cost = DEFAULT_SEQ_PAGE_COST;
double random_page_cost = DEFAULT_RANDOM_PAGE_COST;
double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
double parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
double parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
Cost disable_cost = 1.0e10;//1后面10个0,通过设置一个巨大的成本,让优化器自动放弃此路径
int max_parallel_workers_per_gather = 2;//每次gather使用的worker数
hash join的算法实现伪代码如下:
Step 1
FOR small_table_row IN (SELECT * FROM small_table)
LOOP
slot := HASH(small_table_row.join_key);
INSERT_HASH_TABLE(slot,small_table_row);
END LOOP;
Step 2
FOR large_table_row IN (SELECT * FROM large_table) LOOP
slot := HASH(large_table_row.join_key);
small_table_row = LOOKUP_HASH_TABLE(slot,large_table_row.join_key);
IF small_table_row FOUND THEN
output small_table_row + large_table_row;
END IF;
END LOOP;
hash_inner_and_outer
该函数创建hash join访问路径。
//------------------------------------------------ hash_inner_and_outer
static void
hash_inner_and_outer(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
JoinPathExtraData *extra)
{
JoinType save_jointype = jointype;
bool isouterjoin = IS_OUTER_JOIN(jointype);
List *hashclauses;
ListCell *l;
hashclauses = NIL;
foreach(l, extra->restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
continue;
if (!restrictinfo->can_join ||
restrictinfo->hashjoinoperator == InvalidOid)
continue;
if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
continue;
hashclauses = lappend(hashclauses, restrictinfo);//加入到hash条件中
}
//如发现可用于hash连接的条件,则构建hash连接访问路径,如无则无法构建
if (hashclauses)
{
Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
Path *cheapest_total_outer = outerrel->cheapest_total_path;
Path *cheapest_total_inner = innerrel->cheapest_total_path;
if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
return;//直接退出
//如果需要保证唯一性,丢弃参数化
if (jointype == JOIN_UNIQUE_OUTER)
{
cheapest_total_outer = (Path *)
create_unique_path(root, outerrel,
cheapest_total_outer, extra->sjinfo);
Assert(cheapest_total_outer);
jointype = JOIN_INNER;
try_hashjoin_path(root,
joinrel,
cheapest_total_outer,
cheapest_total_inner,
hashclauses,
jointype,
extra);
}
else if (jointype == JOIN_UNIQUE_INNER)
{
cheapest_total_inner = (Path *)
create_unique_path(root, innerrel,
cheapest_total_inner, extra->sjinfo);
Assert(cheapest_total_inner);
jointype = JOIN_INNER;
try_hashjoin_path(root,
joinrel,
cheapest_total_outer,
cheapest_total_inner,
hashclauses,
jointype,
extra);
if (cheapest_startup_outer != NULL &&
cheapest_startup_outer != cheapest_total_outer)
try_hashjoin_path(root,
joinrel,
cheapest_startup_outer,
cheapest_total_inner,
hashclauses,
jointype,
extra);
}
else//其他连接类型
{
ListCell *lc1;
ListCell *lc2;
if (cheapest_startup_outer != NULL)//启动成本最低的外表访问路径
try_hashjoin_path(root,
joinrel,
cheapest_startup_outer,
cheapest_total_inner,
hashclauses,
jointype,
extra);//构建hash join访问路径
foreach(lc1, outerrel->cheapest_parameterized_paths)//遍历外表参数化路径
{
Path *outerpath = (Path *) lfirst(lc1);
if (PATH_PARAM_BY_REL(outerpath, innerrel))
continue;
foreach(lc2, innerrel->cheapest_parameterized_paths)//遍历内表参数化路径
{
Path *innerpath = (Path *) lfirst(lc2);
if (PATH_PARAM_BY_REL(innerpath, outerrel))
continue;
if (outerpath == cheapest_startup_outer &&
innerpath == cheapest_total_inner)
continue;
try_hashjoin_path(root,
joinrel,
outerpath,
innerpath,
hashclauses,
jointype,
extra);//构建hash连接访问路径
}
}
}
if (joinrel->consider_parallel &&
save_jointype != JOIN_UNIQUE_OUTER &&
save_jointype != JOIN_FULL &&
save_jointype != JOIN_RIGHT &&
outerrel->partial_pathlist != NIL &&
bms_is_empty(joinrel->lateral_relids))
{
Path *cheapest_partial_outer;
Path *cheapest_partial_inner = NULL;
Path *cheapest_safe_inner = NULL;
cheapest_partial_outer =
(Path *) linitial(outerrel->partial_pathlist);
if (innerrel->partial_pathlist != NIL && enable_parallel_hash)
{
cheapest_partial_inner =
(Path *) linitial(innerrel->partial_pathlist);
try_partial_hashjoin_path(root, joinrel,
cheapest_partial_outer,
cheapest_partial_inner,
hashclauses, jointype, extra,
true );
}
if (cheapest_total_inner->parallel_safe)
cheapest_safe_inner = cheapest_total_inner;
else if (save_jointype != JOIN_UNIQUE_INNER)
cheapest_safe_inner =
get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
if (cheapest_safe_inner != NULL)
try_partial_hashjoin_path(root, joinrel,
cheapest_partial_outer,
cheapest_safe_inner,
hashclauses, jointype, extra,
false );
}
}
}
//----------------------------- try_hashjoin_path
static void
try_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *hashclauses,
JoinType jointype,
JoinPathExtraData *extra)
{
Relids required_outer;
JoinCostWorkspace workspace;
required_outer = calc_non_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
!bms_overlap(required_outer, extra->param_source_rels))
{
bms_free(required_outer);
return;
}
initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
outer_path, inner_path, extra, false);//初步估算成本
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
NIL, required_outer))//初始判断
{
add_path(joinrel, (Path *)
create_hashjoin_path(root,
joinrel,
jointype,
&workspace,
extra,
outer_path,
inner_path,
false,
extra->restrictlist,
required_outer,
hashclauses));//创建hash join访问路径,并添加
}
else
{
bms_free(required_outer);
}
}
//------------------ create_hashjoin_path
HashPath *
create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
JoinPathExtraData *extra,
Path *outer_path,
Path *inner_path,
bool parallel_hash,
List *restrict_clauses,
Relids required_outer,
List *hashclauses)
{
HashPath *pathnode = makeNode(HashPath);
pathnode->jpath.path.pathtype = T_HashJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.path.pathtarget = joinrel->reltarget;
pathnode->jpath.path.param_info =
get_joinrel_parampathinfo(root,
joinrel,
outer_path,
inner_path,
extra->sjinfo,
required_outer,
&restrict_clauses);
pathnode->jpath.path.parallel_aware =
joinrel->consider_parallel && parallel_hash;
pathnode->jpath.path.parallel_safe = joinrel->consider_parallel &&
outer_path->parallel_safe && inner_path->parallel_safe;
pathnode->jpath.path.parallel_workers = outer_path->parallel_workers;
pathnode->jpath.path.pathkeys = NIL;
pathnode->jpath.jointype = jointype;
pathnode->jpath.inner_unique = extra->inner_unique;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->path_hashclauses = hashclauses;
final_cost_hashjoin(root, pathnode, workspace, extra);//最终的成本估算
return pathnode;
}
测试脚本如下
testdb=# explain verbose select dw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.je
testdb-# from t_dwxx dw,lateral (select gr.grbh,gr.xm,jf.ny,jf.je
testdb(# from t_grxx gr inner join t_jfxx jf
testdb(# on gr.dwbh = dw.dwbh
testdb(# and gr.grbh = jf.grbh) grjf
testdb-# order by dw.dwbh;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Sort (cost=20070.93..20320.93 rows=100000 width=47)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm, jf.ny, jf.je
Sort Key: dw.dwbh
-> Hash Join (cost=3754.00..8689.61 rows=100000 width=47)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm, jf.ny, jf.je
Inner Unique: true
Hash Cond: ((gr.dwbh)::text = (dw.dwbh)::text)
-> Hash Join (cost=3465.00..8138.00 rows=100000 width=31)
Output: gr.grbh, gr.xm, gr.dwbh, jf.ny, jf.je
Hash Cond: ((jf.grbh)::text = (gr.grbh)::text)
-> Seq Scan on public.t_jfxx jf (cost=0.00..1637.00 rows=100000 width=20)
Output: jf.ny, jf.je, jf.grbh
-> Hash (cost=1726.00..1726.00 rows=100000 width=16)
Output: gr.grbh, gr.xm, gr.dwbh
-> Seq Scan on public.t_grxx gr (cost=0.00..1726.00 rows=100000 width=16)
Output: gr.grbh, gr.xm, gr.dwbh
-> Hash (cost=164.00..164.00 rows=10000 width=20)
Output: dw.dwmc, dw.dwbh, dw.dwdz
-> Seq Scan on public.t_dwxx dw (cost=0.00..164.00 rows=10000 width=20)
Output: dw.dwmc, dw.dwbh, dw.dwdz
(20 rows)
启动gdb,设置断点跟踪
(gdb) b hash_inner_and_outer
Breakpoint 1 at 0x7b066b: file joinpath.c, line 1684.
(gdb) c
Continuing.
Breakpoint 1, hash_inner_and_outer (root=0x2676078, joinrel=0x26d2bc0, outerrel=0x26814e0, innerrel=0x2682a10,
jointype=JOIN_INNER, extra=0x7ffd6ea6b9d0) at joinpath.c:1684
1684 JoinType save_jointype = jointype;
连接类型为JOIN_INNER
(gdb) p jointype
$1 = JOIN_INNER
1号和3号RTE的连接(即t_dwxx和t_grxx)
(gdb) p *joinrel->relids->Words
$3 = 10
开始遍历连接条件,获取hash连接条件
1697 foreach(l, extra->restrictlist)
(gdb)
1699 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
成功获取,t_dwxx.dwbh = t_grxx.dwbh
(gdb)
1697 foreach(l, extra->restrictlist)
(gdb)
1722 if (hashclauses)
(gdb) p *hashclauses
$4 = {type = T_List, length = 1, head = 0x26d4068, tail = 0x26d4068}
获取成本最低的外表启动路径/成本最低的外表访问路径/成本最低的内部访问路径
分别是外表顺序扫描/外表顺序扫描/内部顺序扫描
(gdb) n
1729 Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
(gdb)
1730 Path *cheapest_total_outer = outerrel->cheapest_total_path;
(gdb)
1731 Path *cheapest_total_inner = innerrel->cheapest_total_path;
(gdb) p *cheapest_startup_outer
$5 = {type = T_Path, pathtype = T_SeqScan, parent = 0x26814e0, pathtarget = 0x2681718, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10000, startup_cost = 0, total_cost = 164,
pathkeys = 0x0}
(gdb) p *cheapest_total_outer
$6 = {type = T_Path, pathtype = T_SeqScan, parent = 0x26814e0, pathtarget = 0x2681718, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10000, startup_cost = 0, total_cost = 164,
pathkeys = 0x0}
(gdb) p *cheapest_total_inner
$7 = {type = T_Path, pathtype = T_SeqScan, parent = 0x2682a10, pathtarget = 0x2682c48, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 100000, startup_cost = 0, total_cost = 1726,
pathkeys = 0x0}
如外表成本最低的启动路径不为NULL,则尝试hash连接
(gdb) n
1740 PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
(gdb)
1739 if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
(gdb)
1744 if (jointype == JOIN_UNIQUE_OUTER)
(gdb)
1760 else if (jointype == JOIN_UNIQUE_INNER)
(gdb)
1796 if (cheapest_startup_outer != NULL)
(gdb)
1797 try_hashjoin_path(root,
进入try_hashjoin_path
(gdb) step
try_hashjoin_path (root=0x2676078, joinrel=0x26d2bc0, outer_path=0x26853b8, inner_path=0x26cf610, hashclauses=0x26d4090,
jointype=JOIN_INNER, extra=0x7ffd6ea6b9d0) at joinpath.c:737
737 required_outer = calc_non_nestloop_required_outer(outer_path,
try_hashjoin_path->初步估算成本
...
751 initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
(gdb) p workspace
$9 = {startup_cost = 3465, total_cost = 4261, run_cost = 796, inner_run_cost = 0,
inner_rescan_run_cost = 6.9528109284473596e-310, outer_rows = 3.7882102964330281e-317,
inner_rows = 2.0115578425988515e-316, outer_skip_rows = 2.0115578425988515e-316,
inner_skip_rows = 6.9528109284331305e-310, numbuckets = 131072, numbatches = 2, inner_rows_total = 100000}
try_hashjoin_path->进入函数create_hashjoin_path
(gdb) n
759 create_hashjoin_path(root,
(gdb) step
create_hashjoin_path (root=0x2676078, joinrel=0x26d2bc0, jointype=JOIN_INNER, workspace=0x7ffd6ea6b850,
extra=0x7ffd6ea6b9d0, outer_path=0x26853b8, inner_path=0x26cf610, parallel_hash=false, restrict_clauses=0x26d3098,
required_outer=0x0, hashclauses=0x26d4090) at pathnode.c:2330
2330 HashPath *pathnode = makeNode(HashPath);
try_hashjoin_path->create_hashjoin_path->计算成本并返回
(gdb)
2370 final_cost_hashjoin(root, pathnode, workspace, extra);
(gdb)
2372 return pathnode;
(gdb)
2373 }
(gdb) p *pathnode
$10 = {jpath = {path = {type = T_HashPath, pathtype = T_HashJoin, parent = 0x26d2bc0, pathtarget = 0x26d2df8,
param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 100000,
startup_cost = 3465, total_cost = 5386, pathkeys = 0x0}, jointype = JOIN_INNER, inner_unique = false,
outerjoinpath = 0x26853b8, innerjoinpath = 0x26cf610, joinrestrictinfo = 0x26d3098}, path_hashclauses = 0x26d4090,
num_batches = 2, inner_rows_total = 100000}
try_hashjoin_path->添加路径
(gdb) n
try_hashjoin_path (root=0x2676078, joinrel=0x26d2bc0, outer_path=0x26853b8, inner_path=0x26cf610, hashclauses=0x26d4090,
jointype=JOIN_INNER, extra=0x7ffd6ea6b9d0) at joinpath.c:758
758 add_path(joinrel, (Path *)
(gdb)
776 }
(gdb)
回到hash_inner_and_outer,继续循环
(gdb)
hash_inner_and_outer (root=0x2676078, joinrel=0x26d2bc0, outerrel=0x26814e0, innerrel=0x2682a10, jointype=JOIN_INNER,
extra=0x7ffd6ea6b9d0) at joinpath.c:1805
1805 foreach(lc1, outerrel->cheapest_parameterized_paths)
结束函数调用
1904 }
(gdb)
add_paths_to_joinrel (root=0x2676078, joinrel=0x26d2bc0, outerrel=0x26814e0, innerrel=0x2682a10, jointype=JOIN_INNER,
sjinfo=0x7ffd6ea6bac0, restrictlist=0x26d3098) at joinpath.c:315
315 if (joinrel->fdwroutine &&
(gdb) p *joinrel->pathlist
$11 = {type = T_List, length = 2, head = 0x26d4160, tail = 0x26d3e30}
查看joinrel的路径链表
(gdb) p *(Node *)joinrel->pathlist->head->data.ptr_value
$12 = {type = T_HashPath}
(gdb) p *(Node *)joinrel->pathlist->head->next->data.ptr_value
$13 = {type = T_MergePath}
(gdb) p *(HashPath *)joinrel->pathlist->head->data.ptr_value
$14 = {jpath = {path = {type = T_HashPath, pathtype = T_HashJoin, parent = 0x26d2bc0, pathtarget = 0x26d2df8,
param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 100000,
startup_cost = 3465, total_cost = 5386, pathkeys = 0x0}, jointype = JOIN_INNER, inner_unique = false,
outerjoinpath = 0x26853b8, innerjoinpath = 0x26cf610, joinrestrictinfo = 0x26d3098}, path_hashclauses = 0x26d4090,
num_batches = 2, inner_rows_total = 100000}
(gdb) p *(MergePath *)joinrel->pathlist->head->next->data.ptr_value
$15 = {jpath = {path = {type = T_MergePath, pathtype = T_MergeJoin, parent = 0x26d2bc0, pathtarget = 0x26d2df8,
param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 100000,
startup_cost = 10035.66023721841, total_cost = 11955.396048959938, pathkeys = 0x2685928}, jointype = JOIN_INNER,
inner_unique = false, outerjoinpath = 0x26ce070, innerjoinpath = 0x26cf610, joinrestrictinfo = 0x26d3098},
path_mergeclauses = 0x26d3eb8, outersorTKEys = 0x0, innersortkeys = 0x26d3f18, skip_mark_restore = false,
materialize_inner = false}
感谢各位的阅读,以上就是“PostgreSQL中hash_inner_and_outer函数分析”的内容了,经过本文的学习后,相信大家对PostgreSQL中hash_inner_and_outer函数分析这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是编程网,小编将为大家推送更多相关知识点的文章,欢迎关注!
--结束END--
本文标题: PostgreSQL中hash_inner_and_outer函数分析
本文链接: https://lsjlt.com/news/64795.html(转载时请注明来源链接)
有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341
2024-10-23
2024-10-22
2024-10-22
2024-10-22
2024-10-22
2024-10-22
2024-10-22
2024-10-22
2024-10-22
2024-10-22
回答
回答
回答
回答
回答
回答
回答
回答
回答
回答
0