diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml index 2a25f21eb4b8d..1f5efd9e6d9fc 100644 --- a/doc/src/sgml/parallel.sgml +++ b/doc/src/sgml/parallel.sgml @@ -151,9 +151,10 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%'; The query writes any data or locks any database rows. If a query contains a data-modifying operation either at the top level or within - a CTE, no parallel plans for that query will be generated. This is a - limitation of the current implementation which could be lifted in a - future release. + a CTE, no parallel plans for that query will be generated. As an + exception, the commands CREATE TABLE, SELECT + INTO, and CREATE MATERIALIZED VIEW which create a new + table and populate it can use a parallel plan. @@ -241,15 +242,6 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%'; - - - A prepared statement is executed using a CREATE TABLE .. AS - EXECUTE .. statement. This construct converts what otherwise - would have been a read-only operation into a read-write operation, - making it ineligible for parallel query. - - - The transaction isolation level is serializable. This situation diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 2d321171fad54..f73894676ed27 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2581,15 +2581,17 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options) { /* - * For now, parallel operations are required to be strictly read-only. - * Unlike heap_update() and heap_delete(), an insert should never create a - * combo CID, so it might be possible to relax this restriction, but not - * without more thought and testing. - */ - if (IsInParallelMode()) + * Parallel operations are required to be strictly read-only in a parallel + * worker. Parallel inserts are not safe even in the leader in the + * general case, because group locking means that heavyweight locks for + * relation extension or GIN page locks will not conflict between members + * of a lock group, but we don't prohibit that case here because there are + * useful special cases that we can safely allow, such as CREATE TABLE AS. + */ + if (IsParallelWorker()) ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE), - errmsg("cannot insert tuples during a parallel operation"))); + errmsg("cannot insert tuples in a parallel worker"))); if (relation->rd_rel->relhasoids) { diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 97f9c55d6e764..17cbce0bdbdd2 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -326,8 +326,8 @@ ExecCreateTableAs(CreateTableAsStmt *stmt, const char *queryString, query = linitial_node(Query, rewritten); Assert(query->commandType == CMD_SELECT); - /* plan the query --- note we disallow parallelism */ - plan = pg_plan_query(query, 0, params); + /* plan the query */ + plan = pg_plan_query(query, CURSOR_OPT_PARALLEL_OK, params); /* * Use a snapshot with an updated command ID to ensure this query sees diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 953e74d73cae3..fa1b716530791 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -400,8 +400,6 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, * We have to rewrite the contained SELECT and then pass it back to * ExplainOneQuery. It's probably not really necessary to copy the * contained parsetree another time, but let's be safe. - * - * Like ExecCreateTableAs, disallow parallelism in the plan. */ CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt; List *rewritten; @@ -409,7 +407,7 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, rewritten = QueryRewrite(castNode(Query, copyObject(ctas->query))); Assert(list_length(rewritten) == 1); ExplainOneQuery(linitial_node(Query, rewritten), - 0, ctas->into, es, + CURSOR_OPT_PARALLEL_OK, ctas->into, es, queryString, params, queryEnv); } else if (IsA(utilityStmt, DeclareCursorStmt)) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 53dd2f0fc0cb0..76147d9d65b82 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1697,11 +1697,9 @@ ExecutePlan(EState *estate, /* * If the plan might potentially be executed multiple times, we must force - * it to run without parallelism, because we might exit early. Also - * disable parallelism when writing into a relation, because no database - * changes are allowed in parallel mode. + * it to run without parallelism, because we might exit early. */ - if (!execute_once || dest->mydest == DestIntoRel) + if (!execute_once) use_parallel_mode = false; estate->es_use_parallel_mode = use_parallel_mode; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ea37ba887ffb6..a6730dbce4dbe 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -257,6 +257,16 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) * to values that don't permit parallelism, or if parallel-unsafe * functions are present in the query tree. * + * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE + * MATERIALIZED VIEW to use parallel plans, but this is safe only because + * the command is writing into a completely new table which workers won't + * be able to see. If the workers could see the table, the fact that + * group locking would cause them to ignore the leader's heavyweight + * relation extension lock and GIN page locks would make this unsafe. + * We'll have to fix that somehow if we want to allow parallel inserts in + * general; updates and deletes have additional problems especially around + * combo CIDs.) + * * For now, we don't try to use parallel mode if we're running inside a * parallel worker. We might eventually be able to relax this * restriction, but for now it seems best not to have parallel workers diff --git a/src/test/regress/expected/write_parallel.out b/src/test/regress/expected/write_parallel.out new file mode 100644 index 0000000000000..0c4da2591a0fc --- /dev/null +++ b/src/test/regress/expected/write_parallel.out @@ -0,0 +1,79 @@ +-- +-- PARALLEL +-- +-- Serializable isolation would disable parallel query, so explicitly use an +-- arbitrary other level. +begin isolation level repeatable read; +-- encourage use of parallel plans +set parallel_setup_cost=0; +set parallel_tuple_cost=0; +set min_parallel_table_scan_size=0; +set max_parallel_workers_per_gather=4; +-- +-- Test write operations that has an underlying query that is eligble +-- for parallel plans +-- +explain (costs off) create table parallel_write as + select length(stringu1) from tenk1 group by length(stringu1); + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +create table parallel_write as + select length(stringu1) from tenk1 group by length(stringu1); +drop table parallel_write; +explain (costs off) select length(stringu1) into parallel_write + from tenk1 group by length(stringu1); + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +select length(stringu1) into parallel_write + from tenk1 group by length(stringu1); +drop table parallel_write; +explain (costs off) create materialized view parallel_mat_view as + select length(stringu1) from tenk1 group by length(stringu1); + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +create materialized view parallel_mat_view as + select length(stringu1) from tenk1 group by length(stringu1); +drop materialized view parallel_mat_view; +prepare prep_stmt as select length(stringu1) from tenk1 group by length(stringu1); +explain (costs off) create table parallel_write as execute prep_stmt; + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +create table parallel_write as execute prep_stmt; +drop table parallel_write; +rollback; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index eefdeeacaef6f..a114ac92c17ad 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -96,6 +96,7 @@ test: rules psql_crosstab amutils # run by itself so it can run parallel workers test: select_parallel +test: write_parallel # no relation related tests can be put in this group test: publication subscription diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 76b0de30a7a51..ef275d0d9a2b5 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -134,6 +134,7 @@ test: stats_ext test: rules test: psql_crosstab test: select_parallel +test: write_parallel test: publication test: subscription test: amutils diff --git a/src/test/regress/sql/write_parallel.sql b/src/test/regress/sql/write_parallel.sql new file mode 100644 index 0000000000000..78b479cedf4b6 --- /dev/null +++ b/src/test/regress/sql/write_parallel.sql @@ -0,0 +1,42 @@ +-- +-- PARALLEL +-- + +-- Serializable isolation would disable parallel query, so explicitly use an +-- arbitrary other level. +begin isolation level repeatable read; + +-- encourage use of parallel plans +set parallel_setup_cost=0; +set parallel_tuple_cost=0; +set min_parallel_table_scan_size=0; +set max_parallel_workers_per_gather=4; + +-- +-- Test write operations that has an underlying query that is eligble +-- for parallel plans +-- +explain (costs off) create table parallel_write as + select length(stringu1) from tenk1 group by length(stringu1); +create table parallel_write as + select length(stringu1) from tenk1 group by length(stringu1); +drop table parallel_write; + +explain (costs off) select length(stringu1) into parallel_write + from tenk1 group by length(stringu1); +select length(stringu1) into parallel_write + from tenk1 group by length(stringu1); +drop table parallel_write; + +explain (costs off) create materialized view parallel_mat_view as + select length(stringu1) from tenk1 group by length(stringu1); +create materialized view parallel_mat_view as + select length(stringu1) from tenk1 group by length(stringu1); +drop materialized view parallel_mat_view; + +prepare prep_stmt as select length(stringu1) from tenk1 group by length(stringu1); +explain (costs off) create table parallel_write as execute prep_stmt; +create table parallel_write as execute prep_stmt; +drop table parallel_write; + +rollback;