From 27dc1241ba8fd9fcededb1d3832a823abe7c3ba6 Mon Sep 17 00:00:00 2001
From: Xiaoran Wang <wxiaoran@vmware.com>
Date: Mon, 13 May 2024 10:15:20 +0800
Subject: [PATCH 01/48] Add function cbdb_relation_size (#428)

Add function cbdb_relation_size

It can be used to fetch the size of a batch of relations as below

SELECT * FROM
cbdb_relation_size((SELECT array_agg(oid) FROM pg_class));

It has better performance than pg_relation_size in such case, more details
see the comment on the function

Co-authored-by: Xiaoran Wang <wangxiaoran@hashdata.cn>
---
 src/backend/catalog/system_functions.sql      |   9 +
 src/backend/utils/adt/dbsize.c                | 274 ++++++++++++++++++
 src/include/catalog/pg_proc.dat               |  15 +
 .../expected/cbdb_db_size_functions.out       |  44 +++
 src/test/regress/greenplum_schedule           |   2 +-
 .../regress/sql/cbdb_db_size_functions.sql    |  29 ++
 6 files changed, 372 insertions(+), 1 deletion(-)
 create mode 100644 src/test/regress/expected/cbdb_db_size_functions.out
 create mode 100644 src/test/regress/sql/cbdb_db_size_functions.sql

diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index 51cd0e41939..d191f44005a 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -281,6 +281,15 @@ CREATE OR REPLACE FUNCTION pg_relation_size(regclass)
  PARALLEL UNSAFE STRICT COST 1
 RETURN pg_relation_size($1, 'main');
 
+CREATE OR REPLACE FUNCTION
+ cbdb_relation_size(in reloids oid[], out reloid oid, out size int8)
+ RETURNS SETOF record
+ LANGUAGE sql
+ PARALLEL UNSAFE STRICT COST 1
+BEGIN ATOMIC
+select * from cbdb_relation_size($1, 'main');
+END;
+
 CREATE OR REPLACE FUNCTION obj_description(oid, name)
  RETURNS text
  LANGUAGE sql
diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index ceec9a4afe3..6210966e1e7 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -25,6 +25,7 @@
 #include "commands/tablespace.h"
 #include "common/relpath.h"
 #include "executor/spi.h"
+#include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/fd.h"
 #include "utils/acl.h"
@@ -54,6 +55,7 @@
 #define half_rounded(x)   (((x) + ((x) < 0 ? -1 : 1)) / 2)
 
 static int64 calculate_total_relation_size(Relation rel);
+static HTAB *cbdb_get_size_from_segDBs(const char *cmd, int32 relnum);
 
 /* Hook for plugins to calculate relation size */
 relation_size_hook_type relation_size_hook = NULL;
@@ -1325,3 +1327,275 @@ pg_relation_filepath(PG_FUNCTION_ARGS)
 
 	PG_RETURN_TEXT_P(cstring_to_text(path));
 }
+
+/**
+ * cbdb_relation_size accepts a group of relation
+ * oids and return their size.
+ * arg0: oid array
+ * arg1: fork name
+ *
+ * cbdb_relation_size is similar to pg_relation_size
+ * but when getting multiple relations's size, it can
+ * get better performance. On each segment, it gets a
+ * group of relations's size once and sum them up on
+ * the dispatcher. Compared with pg_relation_size,
+ * which only computes one relation's size at one time
+ * and dispatches the sql command for different relations
+ * multiple times, it saves a lot of work.
+ *
+ * If there are duplicated oids in the oid array,
+ * cbdb_relation_size doesn't deal with that now.
+ */
+typedef struct
+{
+	Oid 	reloid;
+	int64 	size;
+} RelSize;
+
+typedef struct
+{
+	int32	index;
+	int32 	num_entries;
+	RelSize *relsize;
+} get_relsize_cxt;
+
+Datum
+cbdb_relation_size(PG_FUNCTION_ARGS)
+{
+	FuncCallContext	*funcctx;
+	get_relsize_cxt	*cxt;
+	int32			len = 0; /* the length of oid array */
+	Relation		rel;
+	StringInfoData	oidInfo;
+	RelSize			*result;
+
+	ForkNumber		forkNumber;
+	ArrayType  		*array = PG_GETARG_ARRAYTYPE_P(0);
+	text	   		*forkName = PG_GETARG_TEXT_PP(1);
+	Oid 			*oidArray =  (Oid *) ARR_DATA_PTR(array);
+
+
+	if (array_contains_nulls(array))
+		ereport(ERROR, (errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
+			errmsg("cannot work with arrays containing NULLs")));
+
+	/* caculate all the relation size */
+	if (SRF_IS_FIRSTCALL())
+	{
+#define RELSIZE_NATTS 2
+		MemoryContext oldcontext;
+		/* create a function context for cross-call persistence */
+		funcctx = SRF_FIRSTCALL_INIT();
+		len = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array));
+		forkNumber = forkname_to_number(text_to_cstring(forkName));
+		/* Switch to memory context appropriate for multiple function calls */
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+		TupleDesc tupdesc = CreateTemplateTupleDesc(RELSIZE_NATTS);
+		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "reloid", OIDOID, -1, 0);
+		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "size", INT8OID, -1, 0);
+		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+		result = (RelSize*) palloc0(sizeof(RelSize) * len);
+
+		ERROR_ON_ENTRY_DB();
+
+		int relnum = 0; /* the num of oid appended to the oidInfo */
+		for (int i = 0; i< len; i++)
+		{
+			result[i].reloid = oidArray[i];
+
+			rel = try_relation_open(oidArray[i], AccessShareLock, false);
+
+			/*
+			 * Before 9.2, we used to throw an error if the relation didn't exist, but
+			 * that makes queries like "SELECT pg_relation_size(oid) FROM pg_class"
+			 * less robust, because while we scan pg_class with an MVCC snapshot,
+			 * someone else might drop the table. It's better to return NULL for
+			 * already-dropped tables than throw an error and abort the whole query.
+			 *
+			 * For cbdb_relation_size, for rel not existed, just set the size to 0
+			 */
+			if (rel == NULL)
+			{
+				continue;
+			}
+
+			/* for foreign table, only get its size on the dispatcher */
+			if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+			{
+				FdwRoutine *fdwroutine;
+				bool        ok = false;
+
+				fdwroutine = GetFdwRoutineForRelation(rel, false);
+
+				if (fdwroutine->GetRelationSizeOnSegment != NULL)
+					ok = fdwroutine->GetRelationSizeOnSegment(rel, &result[i].size);
+
+				if (!ok)
+					ereport(WARNING,
+							(errmsg("skipping \"%s\" --- cannot calculate this foreign table size",
+									RelationGetRelationName(rel))));
+				relation_close(rel, AccessShareLock);
+				continue;
+			}
+
+			result[i].size = calculate_relation_size(rel, forkNumber);
+			relation_close(rel, AccessShareLock);
+
+			relnum ++;
+			if (Gp_role == GP_ROLE_DISPATCH)
+			{
+				if (relnum == 1)
+				{
+					initStringInfo(&oidInfo);
+					appendStringInfo(&oidInfo, "%u", oidArray[i]);
+				}
+				else
+					appendStringInfo(&oidInfo, ",%u", oidArray[i]);
+			}
+		}
+
+		if (Gp_role == GP_ROLE_DISPATCH && relnum > 0)
+		{
+			char	*sql;
+			HTAB	*segsize;
+			sql = psprintf("select * from pg_catalog.cbdb_relation_size(array[%s]::oid[], '%s')", oidInfo.data,
+					forkNames[forkNumber]);
+			segsize = cbdb_get_size_from_segDBs(sql, relnum);
+			pfree(oidInfo.data);
+			pfree(sql);
+
+			for (int i = 0; i< len; i++)
+			{
+				bool 	found;
+				RelSize *entry;
+				Oid oid = result[i].reloid;
+				entry = hash_search(segsize, &oid, HASH_FIND, &found);
+				/* some tables may only exist on dispatcher */
+				if (found)
+				{
+					result[i].size += entry->size;
+				}
+			}
+		}
+
+		cxt = (get_relsize_cxt *) palloc(sizeof(get_relsize_cxt));
+		cxt->num_entries = len;
+		cxt->index = 0;
+		cxt->relsize = result;
+
+		funcctx->user_fctx = cxt;
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+	cxt = (get_relsize_cxt *) funcctx->user_fctx;
+
+	while (cxt->index < cxt->num_entries)
+	{
+		RelSize 	*relsize = &cxt->relsize[cxt->index];
+		Datum       values[RELSIZE_NATTS];
+		bool        nulls[RELSIZE_NATTS];
+		HeapTuple	tuple;
+		Datum		res;
+
+		MemSet(nulls, 0, sizeof(nulls));
+		values[0] = ObjectIdGetDatum(relsize->reloid);
+		values[1] = Int64GetDatum(relsize->size);
+		cxt->index++;
+		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+		res = HeapTupleGetDatum(tuple);
+
+		SRF_RETURN_NEXT(funcctx, res);
+	}
+
+	SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Helper function to dispatch a size-returning command.
+ *
+ * Dispatches the given SQL query to segments, and sums up the results.
+ */
+static HTAB*
+cbdb_get_size_from_segDBs(const char *cmd, int32 relnum)
+{
+	CdbPgResults cdb_pgresults = {NULL, 0};
+	int			i;
+	HTAB       *res_htab = NULL;
+
+	Assert(Gp_role == GP_ROLE_DISPATCH);
+
+	if (!res_htab)
+	{
+		HASHCTL     hctl;
+
+		memset(&hctl, 0, sizeof(HASHCTL));
+		hctl.keysize = sizeof(Oid);
+		hctl.entrysize = sizeof(RelSize);
+		hctl.hcxt = CurrentMemoryContext;
+
+		res_htab = hash_create("cbdb_get_size_from_segDBs",
+				relnum,
+				&hctl,
+				HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+	}
+	if (relnum == 0)
+		return res_htab;
+
+	CdbDispatchCommand(cmd, DF_WITH_SNAPSHOT, &cdb_pgresults);
+
+	for (i = 0; i < cdb_pgresults.numResults; i++)
+	{
+		ExecStatusType status;
+		int ntuples;
+		int nfields;
+
+		struct pg_result *pgresult = cdb_pgresults.pg_results[i];
+
+		status = PQresultStatus(pgresult);
+		if (status != PGRES_TUPLES_OK)
+		{
+			cdbdisp_clearCdbPgResults(&cdb_pgresults);
+			ereport(ERROR,
+					(errmsg("unexpected result from segment: %d",
+							status)));
+		}
+
+		ntuples = PQntuples(pgresult);
+		nfields = PQnfields(pgresult);
+
+		if (ntuples != relnum || nfields != RELSIZE_NATTS)
+		{
+			cdbdisp_clearCdbPgResults(&cdb_pgresults);
+			ereport(ERROR,
+					(errmsg("unexpected shape of result from segment (%d rows, %d cols)",
+							ntuples, nfields)));
+		}
+
+		for ( int j = 0; j < ntuples; j++)
+		{
+			bool		found;
+			RelSize		*entry;
+			int64		size;
+			if (PQgetisnull(pgresult, j, 0) || PQgetisnull(pgresult, j, 1))
+				continue;
+
+			Oid oid = DatumGetObjectId(DirectFunctionCall1(oidin,
+						CStringGetDatum(PQgetvalue(pgresult, j, 0))));
+			size = DatumGetInt64(DirectFunctionCall1(int8in,
+						CStringGetDatum(PQgetvalue(pgresult, j, 1))));
+			entry = hash_search(res_htab, &oid, HASH_ENTER, &found);
+			if (!found)
+			{
+				entry->reloid = oid;
+				entry->size = size;
+			}
+			else
+			{
+				entry->size += size;
+			}
+		}
+	}
+	return res_htab;
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 67c37b223fd..5baebc4094a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -12518,3 +12518,18 @@
   prosrc => 'pg_export_snapshot_def', proexeclocation => 's' },
 ]
 
+#Cloudberry specific functions
+{ oid => '8960',
+  descr => 'disk space usage for the main fork of a group of tables or indexes',
+  proname => 'cbdb_relation_size', prolang => 'sql', provolatile => 'v', proparallel => 'u',
+  prorettype => 'record', prorows => '100', proretset => 't',
+  proargtypes => '_oid', proallargtypes => '{_oid,oid,int8}',
+  proargmodes => '{i,o,o}', proargnames => '{reloids,reloid,size}',
+  prosrc => 'see system_functions.sql' },
+{ oid => '8961',
+  descr => 'disk space usage for the specified fork of a group of tables or indexes',
+  proname => 'cbdb_relation_size', provolatile => 'v', proparallel => 'u',
+  prorettype => 'record', prorows => '100', proretset => 't',
+  proargtypes => '_oid text', proallargtypes => '{_oid,text,oid,int8}',
+  proargmodes => '{i,i,o,o}', proargnames => '{reloids,forkname,reloid,size}',
+  prosrc => 'cbdb_relation_size' },
diff --git a/src/test/regress/expected/cbdb_db_size_functions.out b/src/test/regress/expected/cbdb_db_size_functions.out
new file mode 100644
index 00000000000..ebe28016515
--- /dev/null
+++ b/src/test/regress/expected/cbdb_db_size_functions.out
@@ -0,0 +1,44 @@
+-- start_ignore
+DROP TABLE IF EXISTS cbdbheapsizetest;
+DROP TABLE IF EXISTS cbdbaosizetest;
+DROP EXTERNAL TABLE IF EXISTS cbdbsize_t_ext;
+-- end_ignore
+-- create heap table
+CREATE TABLE cbdbheapsizetest(a int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO cbdbheapsizetest select generate_series(1, 1000);
+-- create ao table
+CREATE TABLE cbdbaosizetest (a int) WITH (appendonly=true, orientation=row);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into cbdbaosizetest select generate_series(1, 100000);
+-- create EXTERNAL table
+CREATE EXTERNAL TABLE cbdbsize_t_ext (a integer) LOCATION ('file://127.0.0.1/tmp/foo') FORMAT 'text';
+WITH cbdbrelsize AS (
+	SELECT *
+	FROM cbdb_relation_size((SELECT array['cbdbsize_t_ext'::regclass,'cbdbheapsizetest'::regclass, 'cbdbaosizetest'::regclass]))
+), pgrelsize AS (
+	SELECT pg_relation_size(oid) as size, relname, oid FROM pg_class where oid in ('cbdbsize_t_ext'::regclass,'cbdbheapsizetest'::regclass, 'cbdbaosizetest'::regclass)
+)
+SELECT pgrelsize.relname, pgrelsize.size, cbdbrelsize.size
+FROM pgrelsize FULL JOIN cbdbrelsize
+ON pgrelsize.oid = cbdbrelsize.reloid
+WHERE pgrelsize.size != cbdbrelsize.size;
+WARNING:  skipping "cbdbsize_t_ext" --- cannot calculate this foreign table size
+WARNING:  skipping "cbdbsize_t_ext" --- cannot calculate this foreign table size
+ relname | size | size 
+---------+------+------
+(0 rows)
+
+SELECT * FROM cbdb_relation_size(array[]::oid[], 'main');
+ reloid | size 
+--------+------
+(0 rows)
+
+SELECT size FROM cbdb_relation_size(array['cbdbheapsizetest'::regclass], 'fsm');
+ size 
+------
+    0
+(1 row)
+
diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule
index 38f7e0c7fa7..71d0e97eaa1 100755
--- a/src/test/regress/greenplum_schedule
+++ b/src/test/regress/greenplum_schedule
@@ -127,7 +127,7 @@ test: gp_runtime_filter
 #test: olap_window
 #test: tpch500GB
 
-test: db_size_functions
+test: db_size_functions cbdb_db_size_functions
 
 # FIXME: These tests no longer work, because they try to set
 # gp_interconnect_type, which doesn't work:
diff --git a/src/test/regress/sql/cbdb_db_size_functions.sql b/src/test/regress/sql/cbdb_db_size_functions.sql
new file mode 100644
index 00000000000..95e86fb4e3c
--- /dev/null
+++ b/src/test/regress/sql/cbdb_db_size_functions.sql
@@ -0,0 +1,29 @@
+-- start_ignore
+DROP TABLE IF EXISTS cbdbheapsizetest;
+DROP TABLE IF EXISTS cbdbaosizetest;
+DROP EXTERNAL TABLE IF EXISTS cbdbsize_t_ext;
+-- end_ignore
+-- create heap table
+CREATE TABLE cbdbheapsizetest(a int);
+INSERT INTO cbdbheapsizetest select generate_series(1, 1000);
+
+-- create ao table
+CREATE TABLE cbdbaosizetest (a int) WITH (appendonly=true, orientation=row);
+insert into cbdbaosizetest select generate_series(1, 100000);
+
+-- create EXTERNAL table
+CREATE EXTERNAL TABLE cbdbsize_t_ext (a integer) LOCATION ('file://127.0.0.1/tmp/foo') FORMAT 'text';
+
+WITH cbdbrelsize AS (
+	SELECT *
+	FROM cbdb_relation_size((SELECT array['cbdbsize_t_ext'::regclass,'cbdbheapsizetest'::regclass, 'cbdbaosizetest'::regclass]))
+), pgrelsize AS (
+	SELECT pg_relation_size(oid) as size, relname, oid FROM pg_class where oid in ('cbdbsize_t_ext'::regclass,'cbdbheapsizetest'::regclass, 'cbdbaosizetest'::regclass)
+)
+SELECT pgrelsize.relname, pgrelsize.size, cbdbrelsize.size
+FROM pgrelsize FULL JOIN cbdbrelsize
+ON pgrelsize.oid = cbdbrelsize.reloid
+WHERE pgrelsize.size != cbdbrelsize.size;
+
+SELECT * FROM cbdb_relation_size(array[]::oid[], 'main');
+SELECT size FROM cbdb_relation_size(array['cbdbheapsizetest'::regclass], 'fsm');

From 9053b816ffc6be3f30d3fe2471d96c8d0b04971f Mon Sep 17 00:00:00 2001
From: zhangwenchao <656540940@qq.com>
Date: Sat, 11 May 2024 11:29:03 +0800
Subject: [PATCH 02/48] Remove cbload relevant codes.

As cbload is implemented by go language which is not friendly for
compilation, we remove it's relevant codes. We will refactor cbload
by python or other languages.
---
 gpMgmt/bin/Makefile                |   1 -
 gpMgmt/bin/cbload/Makefile         |  25 ---
 gpMgmt/bin/cbload/go.mod           |  25 ---
 gpMgmt/bin/cbload/go.sum           | 173 -----------------
 gpMgmt/bin/cbload/loader/loader.go | 261 --------------------------
 gpMgmt/bin/cbload/log/log.go       |  59 ------
 gpMgmt/bin/cbload/main.go          |  20 --
 gpMgmt/bin/cbload/option/option.go | 286 -----------------------------
 gpMgmt/bin/cbload/worker/worker.go | 168 -----------------
 9 files changed, 1018 deletions(-)
 delete mode 100644 gpMgmt/bin/cbload/Makefile
 delete mode 100644 gpMgmt/bin/cbload/go.mod
 delete mode 100644 gpMgmt/bin/cbload/go.sum
 delete mode 100644 gpMgmt/bin/cbload/loader/loader.go
 delete mode 100644 gpMgmt/bin/cbload/log/log.go
 delete mode 100644 gpMgmt/bin/cbload/main.go
 delete mode 100644 gpMgmt/bin/cbload/option/option.go
 delete mode 100644 gpMgmt/bin/cbload/worker/worker.go

diff --git a/gpMgmt/bin/Makefile b/gpMgmt/bin/Makefile
index 7cab2313cca..2b4c7483e2a 100644
--- a/gpMgmt/bin/Makefile
+++ b/gpMgmt/bin/Makefile
@@ -9,7 +9,6 @@ endif
 
 SUBDIRS = stream gpcheckcat_modules gpconfig_modules gpssh_modules gppylib lib
 SUBDIRS += ifaddrs
-SUBDIRS += cbload
 
 $(recurse)
 
diff --git a/gpMgmt/bin/cbload/Makefile b/gpMgmt/bin/cbload/Makefile
deleted file mode 100644
index d6226382ad3..00000000000
--- a/gpMgmt/bin/cbload/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-subdir = gpMgmt/bin/cbload
-top_builddir = ../../..
-include $(top_builddir)/src/Makefile.global
-
-.DEFAULT_GOAL := all
-
-export GOPROXY ?= https://proxy.golang.org
-
-all: build
-
-build :
-	go mod download
-	go build -o cbload github.com/cloudberrydb/cbload
-
-clean :
-	rm -f cbload
-
-install: all
-	$(INSTALL_PROGRAM) 'cbload' $(bindir)
-
-uninstall:
-	rm -f $(bindir)/cbload
-
-distclean:
-	rm -f cbload
diff --git a/gpMgmt/bin/cbload/go.mod b/gpMgmt/bin/cbload/go.mod
deleted file mode 100644
index 86c8b57390c..00000000000
--- a/gpMgmt/bin/cbload/go.mod
+++ /dev/null
@@ -1,25 +0,0 @@
-module github.com/cloudberrydb/cbload
-
-go 1.19
-
-require (
-	github.com/inconshreveable/mousetrap v1.1.0 // indirect
-	github.com/jackc/chunkreader/v2 v2.0.1 // indirect
-	github.com/jackc/pgconn v1.14.1 // indirect
-	github.com/jackc/pgio v1.0.0 // indirect
-	github.com/jackc/pgpassfile v1.0.0 // indirect
-	github.com/jackc/pgproto3/v2 v2.3.2 // indirect
-	github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
-	github.com/mattn/go-colorable v0.1.1 // indirect
-	github.com/mattn/go-isatty v0.0.7 // indirect
-	github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect
-	github.com/pkg/errors v0.9.1 // indirect
-	github.com/sirupsen/logrus v1.9.3 // indirect
-	github.com/spf13/cobra v1.8.0 // indirect
-	github.com/spf13/pflag v1.0.5 // indirect
-	github.com/x-cray/logrus-prefixed-formatter v0.5.2 // indirect
-	golang.org/x/crypto v0.6.0 // indirect
-	golang.org/x/sys v0.16.0 // indirect
-	golang.org/x/term v0.16.0 // indirect
-	golang.org/x/text v0.7.0 // indirect
-)
diff --git a/gpMgmt/bin/cbload/go.sum b/gpMgmt/bin/cbload/go.sum
deleted file mode 100644
index 70bdf7b843c..00000000000
--- a/gpMgmt/bin/cbload/go.sum
+++ /dev/null
@@ -1,173 +0,0 @@
-github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ=
-github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
-github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
-github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
-github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
-github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
-github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
-github.com/jackc/chunkreader v1.0.0 h1:4s39bBR8ByfqH+DKm8rQA3E1LHZWB9XWcrz8fqaZbe0=
-github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo=
-github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
-github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8=
-github.com/jackc/chunkreader/v2 v2.0.1/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
-github.com/jackc/pgconn v0.0.0-20190420214824-7e0022ef6ba3/go.mod h1:jkELnwuX+w9qN5YIfX0fl88Ehu4XC3keFuOJJk9pcnA=
-github.com/jackc/pgconn v0.0.0-20190824142844-760dd75542eb/go.mod h1:lLjNuW/+OfW9/pnVKPazfWOgNfH2aPem8YQ7ilXGvJE=
-github.com/jackc/pgconn v0.0.0-20190831204454-2fabfa3c18b7/go.mod h1:ZJKsE/KZfsUgOEh9hBm+xYTstcNHg7UPMVJqRfQxq4s=
-github.com/jackc/pgconn v1.8.0/go.mod h1:1C2Pb36bGIP9QHGBYCjnyhqu7Rv3sGshaQUvmfGIB/o=
-github.com/jackc/pgconn v1.9.0/go.mod h1:YctiPyvzfU11JFxoXokUOOKQXQmDMoJL9vJzHH8/2JY=
-github.com/jackc/pgconn v1.14.1 h1:smbxIaZA08n6YuxEX1sDyjV/qkbtUtkH20qLkR9MUR4=
-github.com/jackc/pgconn v1.14.1/go.mod h1:9mBNlny0UvkgJdCDvdVHYSjI+8tD2rnKK69Wz8ti++E=
-github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE=
-github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8=
-github.com/jackc/pgmock v0.0.0-20190831213851-13a1b77aafa2/go.mod h1:fGZlG77KXmcq05nJLRkk0+p82V8B8Dw8KN2/V9c/OAE=
-github.com/jackc/pgmock v0.0.0-20201204152224-4fe30f7445fd/go.mod h1:hrBW0Enj2AZTNpt/7Y5rr2xe/9Mn757Wtb2xeBzPv2c=
-github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65/go.mod h1:5R2h2EEX+qri8jOWMbJCtaPWkrrNc7OHwsp2TCqp7ak=
-github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
-github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
-github.com/jackc/pgproto3 v1.1.0 h1:FYYE4yRw+AgI8wXIinMlNjBbp/UitDJwfj5LqqewP1A=
-github.com/jackc/pgproto3 v1.1.0/go.mod h1:eR5FA3leWg7p9aeAqi37XOTgTIbkABlvcPB3E5rlc78=
-github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190420180111-c116219b62db/go.mod h1:bhq50y+xrl9n5mRYyCBFKkpRVTLYJVWeCc+mEAI3yXA=
-github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190609003834-432c2951c711/go.mod h1:uH0AWtUmuShn0bcesswc4aBTWGvw0cAxIJp+6OB//Wg=
-github.com/jackc/pgproto3/v2 v2.0.0-rc3/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM=
-github.com/jackc/pgproto3/v2 v2.0.0-rc3.0.20190831210041-4c03ce451f29/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM=
-github.com/jackc/pgproto3/v2 v2.0.6/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
-github.com/jackc/pgproto3/v2 v2.1.1/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
-github.com/jackc/pgproto3/v2 v2.3.2 h1:7eY55bdBeCz1F2fTzSz69QC+pG46jYq9/jtSPiJ5nn0=
-github.com/jackc/pgproto3/v2 v2.3.2/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
-github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b/go.mod h1:vsD4gTJCa9TptPL8sPkXrLZ+hDuNrZCnj29CQpr4X1E=
-github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
-github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
-github.com/jackc/pgtype v0.0.0-20190421001408-4ed0de4755e0/go.mod h1:hdSHsc1V01CGwFsrv11mJRHWJ6aifDLfdV3aVjFF0zg=
-github.com/jackc/pgtype v0.0.0-20190824184912-ab885b375b90/go.mod h1:KcahbBH1nCMSo2DXpzsoWOAfFkdEtEJpPbVLq8eE+mc=
-github.com/jackc/pgtype v0.0.0-20190828014616-a8802b16cc59/go.mod h1:MWlu30kVJrUS8lot6TQqcg7mtthZ9T0EoIBFiJcmcyw=
-github.com/jackc/pgx/v4 v4.0.0-20190420224344-cc3461e65d96/go.mod h1:mdxmSJJuR08CZQyj1PVQBHy9XOp5p8/SHH6a0psbY9Y=
-github.com/jackc/pgx/v4 v4.0.0-20190421002000-1b8f0016e912/go.mod h1:no/Y67Jkk/9WuGR0JG/JseM9irFbnEPbuWV2EELPNuM=
-github.com/jackc/pgx/v4 v4.0.0-pre1.0.20190824185557-6972a5742186/go.mod h1:X+GQnOEnf1dqHGpw7JmHqHc1NxDoalibchSk9/RWuDc=
-github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
-github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
-github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
-github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
-github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
-github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw=
-github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
-github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
-github.com/lib/pq v1.1.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
-github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
-github.com/mattn/go-colorable v0.1.1 h1:G1f5SKeVxmagw/IyvzvtZE4Gybcc4Tr1tf7I8z0XgOg=
-github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
-github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
-github.com/mattn/go-isatty v0.0.7 h1:UvyT9uN+3r7yLEYSlJsbQGdsaB/a0DlgWP3pql6iwOc=
-github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
-github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI=
-github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
-github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
-github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
-github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
-github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
-github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
-github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4=
-github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
-github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
-github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
-github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
-github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
-github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
-github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
-github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
-github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
-github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
-github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
-github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJzfthRT6usrui8uGmg=
-github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE=
-github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
-github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
-go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
-go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
-go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
-go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
-go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
-golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
-golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.6.0 h1:qfktjS5LUO+fFKeJXZ+ikTRijMmljikvG68fpMMruSc=
-golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
-golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
-golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY=
-golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
-golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE=
-golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
-golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
-golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/tools v0.0.0-20190823170909-c4a336ef6a2f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/inconshreveable/log15.v2 v2.0.0-20180818164646-67afb5ed74ec/go.mod h1:aPpfJ7XW+gOuirDoZ8gHhLh3kZ1B08FtV2bbmy7Jv3s=
-gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/gpMgmt/bin/cbload/loader/loader.go b/gpMgmt/bin/cbload/loader/loader.go
deleted file mode 100644
index 05fa252bc69..00000000000
--- a/gpMgmt/bin/cbload/loader/loader.go
+++ /dev/null
@@ -1,261 +0,0 @@
-package loader
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"os/signal"
-	"path/filepath"
-	"time"
-
-	"github.com/cloudberrydb/cbload/log"
-	"github.com/cloudberrydb/cbload/option"
-	"github.com/cloudberrydb/cbload/worker"
-	"github.com/jackc/pgconn"
-	"github.com/pkg/errors"
-	"github.com/spf13/cobra"
-)
-
-type Loader struct {
-	option  *option.Option
-	conns   []*pgconn.PgConn
-	workMgr *worker.WorkManager
-}
-
-func NewLoader() *Loader {
-	l := &Loader{}
-
-	l.option = option.NewOption()
-
-	return l
-}
-
-func (l *Loader) makeConnection() (*pgconn.PgConn, error) {
-	url := fmt.Sprintf("application_name=cbload user=%v password='%v' host=%v port=%v dbname=%v",
-		l.option.User, l.option.Password, l.option.Host, l.option.Port, l.option.Database)
-
-	config, err := pgconn.ParseConfig(url)
-	if err != nil {
-		return nil, err
-	}
-
-	conn, err := pgconn.ConnectConfig(context.Background(), config)
-	if err != nil {
-		return nil, err
-	}
-
-	return conn, nil
-}
-
-func (l *Loader) listFiles(prefix string, patterns []string) ([]string, []string, error) {
-	files := make(map[string]struct{})
-
-	for _, p := range patterns {
-		matches, err := filepath.Glob(p)
-		if err != nil {
-			return nil, nil, err
-		}
-
-		for _, f := range matches {
-			files[f] = struct{}{}
-		}
-	}
-
-	localFiles := make([]string, 0)
-	destFiles := make([]string, 0)
-	for k, _ := range files {
-		localFiles = append(localFiles, k)
-		destFiles = append(destFiles, filepath.Join(prefix, k))
-	}
-
-	return localFiles, destFiles, nil
-}
-
-func (l *Loader) populateFiles(prefix string, files []string) ([]string, []string, error) {
-	isDirMode, err := l.option.IsDirectoryMode()
-	if err != nil {
-		return nil, nil, err
-	}
-
-	if !isDirMode {
-		return l.listFiles(prefix, files)
-	}
-
-	localFiles := make([]string, 0)
-	destFiles := make([]string, 0)
-	err = filepath.Walk(files[0], func(path string, info os.FileInfo, err error) error {
-		if err != nil {
-			return err
-		}
-
-		if info.IsDir() {
-			return nil
-		}
-
-		if info.Size() == 0 {
-			log.Logger.Infof("Ignore empty file: \"%v\"", path)
-			return nil
-		}
-
-		localFiles = append(localFiles, path)
-
-		relPath, _ := filepath.Rel(files[0], path)
-		destFiles = append(destFiles, filepath.Join(prefix, relPath))
-		return nil
-	})
-
-	if err != nil {
-		return nil, nil, err
-	}
-	return localFiles, destFiles, nil
-}
-
-func (l *Loader) dispatchFiles(localFiles, destFiles []string) ([][]string, [][]string) {
-	localTaskFiles := make([][]string, l.option.NumTasks)
-	destTaskFiles := make([][]string, l.option.NumTasks)
-	for i := 0; i < l.option.NumTasks; i++ {
-		localTaskFiles[i] = make([]string, 0)
-		destTaskFiles[i] = make([]string, 0)
-	}
-
-	for i, file := range localFiles {
-		j := i % l.option.NumTasks
-		localTaskFiles[j] = append(localTaskFiles[j], file)
-		destTaskFiles[j] = append(destTaskFiles[j], destFiles[i])
-	}
-
-	return localTaskFiles, destTaskFiles
-}
-
-func (l *Loader) start() (chan struct{}, chan struct{}, error) {
-	// step 0 retrieve files
-	lTotalFiles, rTotalFiles, err := l.populateFiles(l.option.DestPath, l.option.InputFile)
-	if err != nil {
-		return nil, nil, err
-	}
-
-	if len(lTotalFiles) == 0 {
-		return nil, nil, nil
-	}
-
-	// step 1 dispatch files
-	if len(lTotalFiles) < l.option.NumTasks {
-		l.option.NumTasks = len(lTotalFiles)
-	}
-	localTaskFiles, destTaskFiles := l.dispatchFiles(lTotalFiles, rTotalFiles)
-
-	// step 2 making connection
-	l.conns = make([]*pgconn.PgConn, l.option.NumTasks)
-	log.Logger.Debug("Making database connection...")
-	for i := 0; i < len(l.conns); i++ {
-		conn, err := l.makeConnection()
-		if err != nil {
-			return nil, nil, err
-		}
-
-		l.conns[i] = conn
-	}
-	log.Logger.Debug("Successfully connected to database")
-
-	// step 3 create worker
-	log.Logger.Debug("Creating worker instance...")
-	l.workMgr = worker.NewWorkManager(l.option.Table, l.option.Tag, l.conns, localTaskFiles, destTaskFiles, l.option.StopOnError)
-	doneCh, quickDieCh := l.workMgr.Start()
-	log.Logger.Debug("Worker instances created successfully")
-
-	return doneCh, quickDieCh, nil
-}
-
-func (l *Loader) stop() {
-	// stop worker
-	log.Logger.Debug("Stopping worker instance...")
-	if l.workMgr != nil {
-		l.workMgr.Stop()
-	}
-	log.Logger.Debug("Worker instances have stopped")
-
-	// closing connection
-	log.Logger.Debug("Closing database connection...")
-	for _, conn := range l.conns {
-		if conn != nil {
-			conn.Close(context.Background())
-		}
-	}
-	log.Logger.Debug("Database connections have closed")
-}
-
-func (l *Loader) Initialize(cmd *cobra.Command) {
-	flagSet := cmd.Flags()
-
-	flagSet.StringVar(&l.option.Host, option.HOST, "", "Host to connect to (default localhost)")
-	flagSet.IntVar(&l.option.Port, option.PORT, 0, "Port to connect to (default 5432)")
-	flagSet.StringVar(&l.option.User, option.USER, "", "User to connect as (default gpadmin)")
-	flagSet.StringVar(&l.option.Database, option.DATABASE, "", "Database to connect to (default gpadmin)")
-	flagSet.BoolVar(&l.option.ForcePasswordAuth, option.FORCE_PASSWORD_AUTH, false, "Force a password prompt (default false)")
-	flagSet.StringVar(&l.option.Table, option.TABLE, "", "Table to load to")
-	flagSet.StringVar(&l.option.Tag, option.TAG, "", "File tag")
-	flagSet.StringVar(&l.option.DestPath, option.DEST_PATH, "", "Path relative to the table root directory (default: root directory of the table)")
-	flagSet.StringSliceVar(&l.option.InputFile, option.INPUT_FILE, []string{}, "Input files or directory")
-	flagSet.IntVar(&l.option.NumTasks, option.TASKS, 1, "The maximum number of files that concurrently loads")
-	flagSet.StringVar(&l.option.LogFile, option.LOGFILE, "", "Log output to logfile (default none)")
-	flagSet.BoolVar(&l.option.StopOnError, option.STOP_ON_ERROR, false, "Stop loading files when an error occurs (default false)")
-	flagSet.BoolVar(&l.option.Verbose, option.VERBOSE, false, "Indicates that the tool should generate verbose output (default false)")
-	flagSet.Bool("help", false, "Print help info and exit")
-	flagSet.Bool("version", false, "Print version info and exit")
-
-	cmd.MarkFlagRequired(option.TABLE)
-	cmd.MarkFlagRequired(option.INPUT_FILE)
-}
-
-func (l *Loader) GetVersion() string {
-	return "1.0.0"
-}
-
-func (l *Loader) Run(cmd *cobra.Command, args []string) {
-	// initialize log
-	log.Initialize(l.option.LogFile, l.option.Verbose)
-	defer log.ShutDown()
-
-	// parse options
-	err := l.option.Parse()
-	if err != nil {
-		log.Logger.Fatalf("%v", err)
-	}
-
-	interrupt := make(chan os.Signal, 1)
-	signal.Notify(interrupt, os.Interrupt)
-
-	// start loader
-	doneCh, quickDieCh, err := l.start()
-	if err != nil {
-		log.Logger.Fatalf("Failed to start loader: %v", err)
-	}
-
-	if doneCh != nil {
-	loop:
-		for {
-			select {
-			case <-interrupt:
-				err = errors.New("Caught interrupt signal, exitting...")
-				log.Logger.Errorf("%v", err)
-				break loop
-			case <-quickDieCh:
-				err = errors.New("Caught internal error, exitting...")
-				log.Logger.Errorf("%v", err)
-				break loop
-			case <-doneCh:
-				log.Logger.Infof("successfully loaded %v files, failed %v files",
-					l.workMgr.GetNumSucceedFiles(), l.workMgr.GetNumFailedFiles())
-				break loop
-			case <-time.After(25 * time.Millisecond):
-				log.Logger.Debug("Event loop")
-			}
-		}
-	}
-
-	l.stop()
-
-	if err != nil {
-		os.Exit(1)
-	}
-}
diff --git a/gpMgmt/bin/cbload/log/log.go b/gpMgmt/bin/cbload/log/log.go
deleted file mode 100644
index 4a0a45405be..00000000000
--- a/gpMgmt/bin/cbload/log/log.go
+++ /dev/null
@@ -1,59 +0,0 @@
-package log
-
-import (
-	"io"
-	"os"
-	"strings"
-
-	"github.com/sirupsen/logrus"
-	prefixed "github.com/x-cray/logrus-prefixed-formatter"
-)
-
-var Logger = logrus.New()
-var logFileHandle *os.File
-
-func Initialize(file string, verbose bool) (err error) {
-	formater := new(prefixed.TextFormatter)
-	formater.TimestampFormat = "2006-01-02 15:04:05"
-	formater.FullTimestamp = true
-
-	Logger.Formatter = formater
-
-	if len(file) > 0 {
-		logFileHandle, err = os.OpenFile(file, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
-		if err != nil {
-			return err
-		}
-
-		Logger.Out = io.MultiWriter(logFileHandle, os.Stderr)
-	}
-
-	if verbose {
-		setLogLevel("debug")
-	} else {
-		setLogLevel("info")
-	}
-
-	return nil
-}
-
-func setLogLevel(logLevelString string) {
-	switch strings.ToLower(logLevelString) {
-	case "debug":
-		Logger.SetLevel(logrus.DebugLevel)
-	case "info":
-		Logger.SetLevel(logrus.InfoLevel)
-	case "warning":
-		Logger.SetLevel(logrus.WarnLevel)
-	case "error":
-		Logger.SetLevel(logrus.ErrorLevel)
-	case "fatal":
-		Logger.SetLevel(logrus.FatalLevel)
-	default:
-		Logger.Warn("Unknown log level \"" + logLevelString + "\". Falling back to INFO.")
-	}
-}
-
-func ShutDown() {
-	logFileHandle.Close()
-}
diff --git a/gpMgmt/bin/cbload/main.go b/gpMgmt/bin/cbload/main.go
deleted file mode 100644
index 12e4bbbdac5..00000000000
--- a/gpMgmt/bin/cbload/main.go
+++ /dev/null
@@ -1,20 +0,0 @@
-package main
-
-import (
-	"github.com/cloudberrydb/cbload/loader"
-	"github.com/spf13/cobra"
-)
-
-func main() {
-	l := loader.NewLoader()
-
-	var rootCmd = &cobra.Command{
-		Use:     "cbload",
-		Short:   "load file(s) into Cloudberry Database",
-		Version: l.GetVersion(),
-		Run:     l.Run,
-	}
-
-	l.Initialize(rootCmd)
-	rootCmd.Execute()
-}
diff --git a/gpMgmt/bin/cbload/option/option.go b/gpMgmt/bin/cbload/option/option.go
deleted file mode 100644
index a8f0720b76c..00000000000
--- a/gpMgmt/bin/cbload/option/option.go
+++ /dev/null
@@ -1,286 +0,0 @@
-package option
-
-import (
-	"bufio"
-	"fmt"
-	"os"
-	"strconv"
-	"strings"
-	"syscall"
-
-	"github.com/cloudberrydb/cbload/log"
-	"github.com/pkg/errors"
-	"golang.org/x/term"
-)
-
-const (
-	HOST                = "host"
-	PORT                = "port"
-	USER                = "user"
-	DATABASE            = "database"
-	FORCE_PASSWORD_AUTH = "force-password-auth"
-	TABLE               = "table"
-	TAG                 = "tag"
-	DEST_PATH           = "dest-path"
-	INPUT_FILE          = "input-file"
-	TASKS               = "tasks"
-	LOGFILE             = "logfile"
-	STOP_ON_ERROR       = "stop-on-error"
-	VERBOSE             = "verbose"
-)
-
-type Option struct {
-	Host              string
-	Port              int
-	User              string
-	Database          string
-	Password          string
-	ForcePasswordAuth bool
-	Table             string
-	Tag               string
-	DestPath          string
-	InputFile         []string
-	LogFile           string
-	NumTasks          int
-	StopOnError       bool
-	Verbose           bool
-}
-
-func NewOption() *Option {
-	o := &Option{}
-
-	return o
-}
-
-func (o *Option) Parse() error {
-	var err error
-
-	o.Host = o.getHostOrDefault(o.Host)
-	o.Port = o.getPortOrDefault(o.Port)
-	o.User = o.getUserOrDefault(o.User)
-	o.Database = o.getDatabaseOrDefault(o.Database)
-	o.Password, err = o.getPasswordOrDefault(o.ForcePasswordAuth)
-	if err != nil {
-		return err
-	}
-
-	if o.NumTasks > 256 || o.NumTasks < 1 {
-		return errors.Errorf("Invalid value: \"%v\" for --tasks option, the number of tasks must be in the range of 1 to 256", o.NumTasks)
-	}
-
-	return nil
-}
-
-func (o *Option) getHostOrDefault(host string) string {
-	result := host
-
-	if len(result) == 0 {
-		result = os.Getenv("PGHOST")
-	}
-
-	if len(result) == 0 {
-		result = "localhost"
-	}
-
-	return result
-}
-
-func (o *Option) getPortOrDefault(port int) int {
-	result := port
-
-	if result == 0 {
-		p, err := strconv.Atoi(os.Getenv("PGPORT"))
-		if err == nil {
-			result = p
-		}
-	}
-
-	if result == 0 {
-		result = 5432
-	}
-
-	return result
-}
-
-func (o *Option) getUserOrDefault(user string) string {
-	result := user
-
-	if len(result) == 0 {
-		result = os.Getenv("PGUSER")
-	}
-
-	if len(result) == 0 {
-		result = os.Getenv("USER")
-		if len(o.User) == 0 {
-			result = os.Getenv("LOGNAME")
-		}
-		if len(o.User) == 0 {
-			result = os.Getenv("USERNAME")
-		}
-	}
-
-	if len(result) == 0 {
-		result = "gpadmin"
-	}
-
-	return result
-}
-
-func (o *Option) getDatabaseOrDefault(database string) string {
-	result := database
-
-	if len(result) == 0 {
-		result = os.Getenv("PGDATABASE")
-	}
-
-	if len(result) == 0 {
-		result = o.User
-	}
-
-	return result
-}
-
-func (o *Option) getPasswordOrDefault(forcePasswordAuth bool) (string, error) {
-	result := ""
-	var err error
-
-	if forcePasswordAuth {
-		fmt.Printf("Password: ")
-		bp, err := term.ReadPassword(int(syscall.Stdin))
-		fmt.Printf("\n")
-		if err != nil {
-			return result, err
-		}
-		result = string(bp)
-	}
-
-	if len(result) == 0 {
-		result = os.Getenv("PGPASSWORD")
-	}
-
-	if len(result) == 0 {
-		passFile := os.Getenv("PGPASSFILE")
-		if len(passFile) == 0 {
-			dir := os.Getenv("HOME")
-			if len(dir) == 0 {
-				dir = "."
-			}
-			passFile = dir + "/.pgpass"
-		}
-
-		result, err = o.readPGPass(passFile)
-		if err != nil {
-			return result, err
-		}
-	}
-
-	return result, nil
-}
-
-func (o *Option) readPGPass(file string) (string, error) {
-	password := ""
-
-	f, err := os.Open(file)
-	if err != nil {
-		log.Logger.Debugf("%v", err)
-		return password, nil
-	}
-
-	defer f.Close()
-	sc := bufio.NewScanner(f)
-
-	for sc.Scan() {
-		elems := o.splitPGPassLine(sc.Text())
-		if len(elems) != 5 {
-			return password, errors.Errorf("pgpass file: invalid line \"%v\":there should be 5 fields in a line, seperated by colon", sc.Text())
-		}
-
-		if elems[0] != "*" && strings.ToLower(elems[0]) != strings.ToLower(o.Host) {
-			continue
-		}
-
-		if elems[1] != "*" {
-			p, err := strconv.Atoi(elems[1])
-
-			if err != nil {
-				return password, errors.Errorf("pgpass file: invalid line \"%v\":port number should be integer", sc.Text())
-			}
-
-			if p != o.Port {
-				continue
-			}
-		}
-
-		if elems[2] != "*" && elems[2] != o.Database {
-			continue
-		}
-
-		if elems[3] != "*" && elems[3] != o.User {
-			continue
-		}
-
-		password = elems[4]
-		break
-	}
-
-	return password, nil
-}
-
-func (o *Option) splitPGPassLine(line string) []string {
-	escape := false
-	results := make([]string, 0)
-	elem := make([]byte, 0)
-	bline := []byte(line)
-
-	for _, c := range bline {
-		if !escape && c == '\\' {
-			escape = true
-		} else if !escape && c == ':' {
-			results = append(results, string(elem))
-			elem = make([]byte, 0)
-		} else {
-			elem = append(elem, c)
-			escape = false
-		}
-	}
-
-	if escape {
-		elem = append(elem, '\\')
-	}
-
-	results = append(results, string(elem))
-
-	return results
-}
-
-func (o *Option) IsDirectoryMode() (bool, error) {
-	numDirs := 0
-	for _, file := range o.InputFile {
-		stat, err := os.Stat(file)
-		if err != nil {
-			return false, err
-		}
-
-		if stat.IsDir() {
-			numDirs++
-		}
-	}
-
-	if len(o.InputFile) == 1 {
-		if numDirs == 1 {
-			return true, nil
-		}
-
-		return false, nil
-	}
-
-	if numDirs == len(o.InputFile) {
-		return false, errors.Errorf("Only one directory can be specified")
-	}
-
-	if numDirs > 0 {
-		return false, errors.Errorf("File and directory cannot be specified at the same time")
-	}
-
-	return false, nil
-}
diff --git a/gpMgmt/bin/cbload/worker/worker.go b/gpMgmt/bin/cbload/worker/worker.go
deleted file mode 100644
index 0d0460b7af3..00000000000
--- a/gpMgmt/bin/cbload/worker/worker.go
+++ /dev/null
@@ -1,168 +0,0 @@
-package worker
-
-import (
-	"bufio"
-	"context"
-	"fmt"
-	"os"
-	"sync"
-	"sync/atomic"
-
-	"github.com/cloudberrydb/cbload/log"
-	"github.com/jackc/pgconn"
-)
-
-type Worker struct {
-	context           context.Context
-	cancelContextFunc context.CancelFunc
-	conn              *pgconn.PgConn
-	id                int
-	localFiles        []string
-	destFiles         []string
-	manager           *WorkManager
-}
-
-type WorkManager struct {
-	sync.Mutex
-	waitGroup      sync.WaitGroup
-	table          string
-	tag            string
-	conns          []*pgconn.PgConn
-	localFiles     [][]string
-	destFiles      [][]string
-	workers        []*Worker
-	doneCh         chan struct{}
-	quickDieCh     chan struct{}
-	syncCh         chan struct{}
-	numFileSucceed atomic.Uint32
-	numFileFailed  atomic.Uint32
-	stopOnError    bool
-}
-
-func NewWorkManager(table, tag string, conns []*pgconn.PgConn, localFiles, destFiles [][]string, stopOnError bool) *WorkManager {
-	wm := &WorkManager{
-		table:       table,
-		tag:         tag,
-		conns:       conns,
-		localFiles:  localFiles,
-		destFiles:   destFiles,
-		stopOnError: stopOnError,
-		workers:     make([]*Worker, len(conns)),
-		doneCh:      make(chan struct{}, 1),
-		quickDieCh:  make(chan struct{}, 1),
-		syncCh:      make(chan struct{}, 1),
-	}
-	return wm
-}
-
-func (wm *WorkManager) Start() (chan struct{}, chan struct{}) {
-	log.Logger.Debug("WorkManager starting...")
-	numWorkers := len(wm.conns)
-
-	wm.waitGroup.Add(numWorkers)
-	for i := 0; i < numWorkers; i++ {
-		wm.workers[i] = newWorker(wm, i, wm.conns[i], wm.localFiles[i], wm.destFiles[i])
-		go wm.workers[i].Run()
-	}
-
-	go func() {
-		wm.waitGroup.Wait()
-		log.Logger.Debug("All workers have completed")
-		wm.syncCh <- struct{}{}
-		wm.doneCh <- struct{}{}
-		log.Logger.Debug("The main thread has already been notified")
-	}()
-
-	return wm.doneCh, wm.quickDieCh
-}
-
-func (wm *WorkManager) GetNumSucceedFiles() uint32 {
-	return wm.numFileSucceed.Load()
-}
-
-func (wm *WorkManager) GetNumFailedFiles() uint32 {
-	return wm.numFileFailed.Load()
-}
-
-func (wm *WorkManager) Stop() {
-	log.Logger.Debug("WorkManager stopping...")
-
-	numWorkers := len(wm.conns)
-	for i := 0; i < numWorkers; i++ {
-		wm.workers[i].Stop()
-	}
-
-	<-wm.syncCh
-	log.Logger.Debug("WorkManager stopped")
-}
-
-func newWorker(manager *WorkManager, id int, conn *pgconn.PgConn, localFiles, destFiles []string) *Worker {
-	ctx, cancelFunc := context.WithCancel(context.Background())
-	worker := &Worker{ctx, cancelFunc, conn, id, localFiles, destFiles, manager}
-	return worker
-}
-
-func (w *Worker) Run() {
-	defer w.manager.waitGroup.Done()
-
-	log.Logger.Debugf("Worker [%v]: started", w.id)
-
-loop:
-	for i, file := range w.localFiles {
-		fh, err := os.Open(file)
-		if err != nil {
-			if w.handleError("unable to open file", file, err) {
-				break
-			}
-			continue
-		}
-
-		log.Logger.Infof("Worker [%v]: loading file \"%v\" into \"%v:%v\"...", w.id, file, w.manager.table, w.destFiles[i])
-
-		_, err = w.conn.CopyFrom(w.context,
-			bufio.NewReader(fh),
-			w.formCopyStatement(w.destFiles[i]))
-		if err != nil {
-			if w.handleError("unable to upload file", file, err) {
-				break
-			}
-			continue
-		}
-
-		w.manager.numFileSucceed.Add(1)
-		fh.Close()
-		log.Logger.Infof("Worker [%v]: successfully loaded", w.id)
-
-		select {
-		case <-w.context.Done():
-			break loop
-		default:
-		}
-	}
-
-	log.Logger.Debugf("Worker [%v]: stopped", w.id)
-}
-
-func (w *Worker) handleError(title, file string, err error) bool {
-	w.manager.numFileFailed.Add(1)
-	log.Logger.Errorf("Worker [%v]: %v \"%v\": %v", w.id, title, file, err)
-	if w.manager.stopOnError {
-		w.manager.quickDieCh <- struct{}{}
-		return true
-	}
-
-	return false
-}
-
-func (w *Worker) formCopyStatement(file string) string {
-	if len(w.manager.tag) == 0 {
-		return fmt.Sprintf("copy binary %s from stdin '%s';", w.manager.table, file)
-	}
-
-	return fmt.Sprintf("copy binary %s from stdin '%s' with tag '%s';", w.manager.table, file, w.manager.tag)
-}
-
-func (w *Worker) Stop() {
-	log.Logger.Debugf("Worker [%v]: stopping...", w.id)
-	w.cancelContextFunc()
-}

From f7b5540ed9326687fde886ab05e9348419b871e1 Mon Sep 17 00:00:00 2001
From: Noah Misch <noah@leadboat.com>
Date: Wed, 9 Feb 2022 18:16:56 -0800
Subject: [PATCH 03/48] Fix back-patch of "Avoid race in RelationBuildDesc()
 ..."

The back-patch of commit fdd965d074d46765c295223b119ca437dbcac973 broke
CLOBBER_CACHE_ALWAYS for v9.6 through v13.  It updated the
InvalidateSystemCaches() call for CLOBBER_CACHE_RECURSIVELY, neglecting
the one for CLOBBER_CACHE_ALWAYS.  Back-patch to v13, v12, v11, and v10.

Reviewed by Tomas Vondra.  Reported by Tomas Vondra.

Discussion: https://postgr.es/m/df7b4c0b-7d92-f03f-75c4-9e08b269a716@enterprisedb.com
---
 src/backend/utils/cache/inval.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 5c41466a4ff..5a986349e2a 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -785,7 +785,18 @@ AcceptInvalidationMessages(void)
 	 * recursive reloads it's unlikely you'll learn more.
 	 *----------
 	 */
-#ifdef DISCARD_CACHES_ENABLED
+#if defined(CLOBBER_CACHE_ALWAYS)
+	{
+		static bool in_recursion = false;
+
+		if (!in_recursion)
+		{
+			in_recursion = true;
+			InvalidateSystemCachesExtended(true);
+			in_recursion = false;
+		}
+	}
+#elif defined(CLOBBER_CACHE_RECURSIVELY)
 	{
 		static int	recursion_depth = 0;
 

From 5a433c8f84fd8fb2c3d0f8b2deef4507217e54f1 Mon Sep 17 00:00:00 2001
From: Zhenghua Lyu <kainwen@gmail.com>
Date: Thu, 10 Feb 2022 13:24:03 +0800
Subject: [PATCH 04/48] Fix getResUsage integer overflow.

In the function getResUsage it will set the group Id (type Oid)'s
value from a string. Previous code use pg_atoi to do the value parse,
this is not correct because type Oid is uint, we should use
atooid. Another place is building the SQL involving group id it uses
"%d", this commits fix this by using "%u".
---
 src/backend/access/transam/varsup.c           | 21 +++++++++++++
 src/backend/utils/resgroup/resgroup_helper.c  |  5 ++-
 .../resgroup/resgroup_large_group_id.out      | 31 +++++++++++++++++++
 .../isolation2/isolation2_resgroup_schedule   |  3 ++
 .../sql/resgroup/resgroup_large_group_id.sql  | 13 ++++++++
 5 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 src/test/isolation2/expected/resgroup/resgroup_large_group_id.out
 create mode 100644 src/test/isolation2/sql/resgroup/resgroup_large_group_id.sql

diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index fc866ba9d97..5ca18d1b77d 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -26,6 +26,7 @@
 #include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "cdb/cdbutil.h"
+#include "utils/faultinjector.h"
 #include "utils/guc.h"
 #include "utils/syscache.h"
 
@@ -624,6 +625,26 @@ GetNewObjectIdUnderLock(void)
 	(ShmemVariableCache->nextOid)++;
 	(ShmemVariableCache->oidCount)--;
 
+#ifdef FAULT_INJECTOR
+	if (SIMPLE_FAULT_INJECTOR("bump_oid") == FaultInjectorTypeSkip)
+	{
+		/*
+		 * CDB: we encounter high oid issues several times, we should
+		 * have some test-utils to verify logic under larger oid.
+		 *
+		 * NOTE: we do not have undo-bump, so take care when you decide to
+		 * use this fault inject. Currently, only resgroup test job uses it,
+		 * that is safe, becase resgroup job is an independent pipeline job.
+		 */
+		Oid large_oid = (1U<<31)+5; /* this value will overflow if taken as int32 */
+		if (ShmemVariableCache->nextOid < large_oid)
+		{
+			ShmemVariableCache->nextOid = large_oid + 1;
+			result = large_oid;
+		}
+	}
+#endif
+
 	return result;
 }
 
diff --git a/src/backend/utils/resgroup/resgroup_helper.c b/src/backend/utils/resgroup/resgroup_helper.c
index 68941a639c8..2148ffe3e34 100644
--- a/src/backend/utils/resgroup/resgroup_helper.c
+++ b/src/backend/utils/resgroup/resgroup_helper.c
@@ -105,7 +105,7 @@ getResUsage(ResGroupStatCtx *ctx, Oid inGroupId)
 		initStringInfo(&buffer);
 		appendStringInfo(&buffer,
 						 "SELECT groupid, cpu_usage, memory_usage "
-						 "FROM pg_resgroup_get_status(%d)",
+						 "FROM pg_resgroup_get_status(%u)",
 						 inGroupId);
 
 		CdbDispatchCommand(buffer.data, DF_WITH_SNAPSHOT, &cdb_pgresults);
@@ -133,8 +133,7 @@ getResUsage(ResGroupStatCtx *ctx, Oid inGroupId)
 			{
 				const char *result;
 				ResGroupStat *row = &ctx->groups[j];
-				Oid groupId = pg_atoi(PQgetvalue(pg_result, j, 0),
-									  sizeof(Oid), 0);
+				Oid groupId = atooid(PQgetvalue(pg_result, j, 0));
 
 				Assert(groupId == row->groupId);
 
diff --git a/src/test/isolation2/expected/resgroup/resgroup_large_group_id.out b/src/test/isolation2/expected/resgroup/resgroup_large_group_id.out
new file mode 100644
index 00000000000..558cfad58a3
--- /dev/null
+++ b/src/test/isolation2/expected/resgroup/resgroup_large_group_id.out
@@ -0,0 +1,31 @@
+-- Test resgroup oid larger than int32.
+select gp_inject_fault('bump_oid', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = -1;
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+create resource group rg_large_oid with (cpu_rate_limit=20, memory_limit=10);
+CREATE
+
+select gp_inject_fault('bump_oid', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1;
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+select max(oid)::bigint > (power(2,31) + 1)::bigint from pg_resgroup;
+ ?column? 
+----------
+ t        
+(1 row)
+
+-- count(*) > 0 to run the SQL but do not display the result
+select count(*) > 0 from pg_resgroup_get_status(NULL);
+ ?column? 
+----------
+ t        
+(1 row)
+
+drop resource group rg_large_oid;
+DROP
diff --git a/src/test/isolation2/isolation2_resgroup_schedule b/src/test/isolation2/isolation2_resgroup_schedule
index e94ee880a4b..c6aad426821 100644
--- a/src/test/isolation2/isolation2_resgroup_schedule
+++ b/src/test/isolation2/isolation2_resgroup_schedule
@@ -51,4 +51,7 @@ test: resgroup/resgroup_functions
 # dump info
 test: resgroup/resgroup_dumpinfo
 
+# test larget group id
+test: resgroup/resgroup_large_group_id
+
 test: resgroup/disable_resgroup
diff --git a/src/test/isolation2/sql/resgroup/resgroup_large_group_id.sql b/src/test/isolation2/sql/resgroup/resgroup_large_group_id.sql
new file mode 100644
index 00000000000..10ec72fa8b6
--- /dev/null
+++ b/src/test/isolation2/sql/resgroup/resgroup_large_group_id.sql
@@ -0,0 +1,13 @@
+-- Test resgroup oid larger than int32.
+select gp_inject_fault('bump_oid', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = -1;
+
+create resource group rg_large_oid with (cpu_rate_limit=20, memory_limit=10);
+
+select gp_inject_fault('bump_oid', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1;
+
+select max(oid)::bigint > (power(2,31) + 1)::bigint from pg_resgroup;
+
+-- count(*) > 0 to run the SQL but do not display the result
+select count(*) > 0 from pg_resgroup_get_status(NULL);
+
+drop resource group rg_large_oid;

From da172f24752e810c4cb8d7da89f7da4c43d63cf3 Mon Sep 17 00:00:00 2001
From: FairyFar <fairyfar@msn.com>
Date: Fri, 11 Feb 2022 23:23:04 +0800
Subject: [PATCH 05/48] removed redefined PG_AUTOCONF_FILENAME in guc.h
 (#13081)

---
 src/include/utils/guc.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index d49f00a5bab..9d7f5f8909e 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -29,14 +29,6 @@
  */
 #define RESERVED_FTS_CONNECTIONS (1)
 
-
-/*
- * Automatic configuration file name for ALTER SYSTEM.
- * This file will be used to store values of configuration parameters
- * set by ALTER SYSTEM command.
- */
-#define PG_AUTOCONF_FILENAME		"postgresql.auto.conf"
-
 /* upper limit for GUC variables measured in kilobytes of memory */
 /* note that various places assume the byte size fits in a "long" variable */
 #if SIZEOF_SIZE_T > 4 && SIZEOF_LONG > 4

From 92d66aa334568d5d315bf2fac467bc41c423260a Mon Sep 17 00:00:00 2001
From: David Kimura <dkimura@vmware.com>
Date: Mon, 31 Jan 2022 17:55:03 +0000
Subject: [PATCH 06/48] Pass subquery context through callstack

ESubqueryCtxt indicates whether a subquery appears in the project list
(EsqctxtValue) or comparison predicate (EsqctxtFilter).  Currently we
can incorrectly recalculate the context inside PexprSubqueryPred().

Following query incorrectly determined the subquery was in the
comparison predicate:
    ```
    SELECT (SELECT 1)=ALL(SELECT generate_series(1,2));
    ```

Using the incorrect subquery context can cause ORCA to create a plan
that incorrect creates an inner (instead of left outer) correlated apply
operator. Ultimately, that mistake can lead to wrong results.

Fix is the pass the subquery context when available rather than
recalculate.
---
 .../libgpopt/include/gpopt/xforms/CSubqueryHandler.h |  3 ++-
 .../gporca/libgpopt/src/xforms/CSubqueryHandler.cpp  | 12 ++++++------
 .../gporca/libgpopt/src/xforms/CXformUtils.cpp       |  3 ++-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h b/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h
index bfe1d0987d7..b4dd303abf1 100644
--- a/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h
+++ b/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h
@@ -272,7 +272,8 @@ class CSubqueryHandler
 	// build an expression for the quantified comparison of the subquery
 	CExpression *PexprSubqueryPred(CExpression *pexprOuter,
 								   CExpression *pexprSubquery,
-								   CExpression **ppexprResult);
+								   CExpression **ppexprResult,
+								   CSubqueryHandler::ESubqueryCtxt esqctxt);
 
 	// main driver
 	BOOL FProcess(
diff --git a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp
index 5ab73c4ffd8..970de9e728d 100644
--- a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp
+++ b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp
@@ -179,7 +179,8 @@ CSubqueryHandler::PexprReplace(CMemoryPool *mp, CExpression *pexprInput,
 CExpression *
 CSubqueryHandler::PexprSubqueryPred(CExpression *pexprOuter,
 									CExpression *pexprSubquery,
-									CExpression **ppexprResult)
+									CExpression **ppexprResult,
+									CSubqueryHandler::ESubqueryCtxt esqctxt)
 {
 	GPOS_ASSERT(CUtils::FQuantifiedSubquery(pexprSubquery->Pop()));
 
@@ -187,7 +188,6 @@ CSubqueryHandler::PexprSubqueryPred(CExpression *pexprOuter,
 	CExpression *pexprNewLogical = nullptr;
 
 	CExpression *pexprScalarChild = (*pexprSubquery)[1];
-	CSubqueryHandler::ESubqueryCtxt esqctxt = CSubqueryHandler::EsqctxtFilter;
 
 	// If pexprScalarChild is a non-scalar subquery such as follows,
 	// EXPLAIN SELECT * FROM t3 WHERE (c = ANY(SELECT c FROM t2)) IN (SELECT b from t1);
@@ -1227,7 +1227,7 @@ CSubqueryHandler::FCreateCorrelatedApplyForQuantifiedSubquery(
 	CExpression *pexprResult = nullptr;
 	CSubqueryHandler sh(mp, true /* fEnforceCorrelatedApply */);
 	CExpression *pexprPredicate =
-		sh.PexprSubqueryPred(pexprInner, pexprSubquery, &pexprResult);
+		sh.PexprSubqueryPred(pexprInner, pexprSubquery, &pexprResult, esqctxt);
 
 	pexprInner->AddRef();
 	if (EsqctxtFilter == esqctxt)
@@ -1452,7 +1452,7 @@ CSubqueryHandler::FRemoveAnySubquery(CExpression *pexprOuter,
 	// build subquery quantified comparison
 	CExpression *pexprResult = nullptr;
 	CExpression *pexprPredicate =
-		PexprSubqueryPred(pexprInner, pexprSubquery, &pexprResult);
+		PexprSubqueryPred(pexprInner, pexprSubquery, &pexprResult, esqctxt);
 
 	// generate a select for the quantified predicate
 	pexprInner->AddRef();
@@ -1623,8 +1623,8 @@ CSubqueryHandler::FRemoveAllSubquery(CExpression *pexprOuter,
 		{
 			// build subquery quantified comparison
 			CExpression *pexprResult = nullptr;
-			CExpression *pexprPredicate =
-				PexprSubqueryPred(pexprInner, pexprSubquery, &pexprResult);
+			CExpression *pexprPredicate = PexprSubqueryPred(
+				pexprInner, pexprSubquery, &pexprResult, esqctxt);
 
 			*ppexprResidualScalar =
 				CUtils::PexprScalarConstBool(mp, true /*value*/);
diff --git a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
index 9472b5b9524..d206be48c80 100644
--- a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
+++ b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
@@ -847,7 +847,8 @@ CXformUtils::SubqueryAnyToAgg(
 	CExpression *pexprResult = nullptr;
 	CSubqueryHandler sh(mp, false /* fEnforceCorrelatedApply */);
 	CExpression *pexprSubqPred =
-		sh.PexprSubqueryPred(pexprInner, pexprSubquery, &pexprResult);
+		sh.PexprSubqueryPred(pexprInner, pexprSubquery, &pexprResult,
+							 CSubqueryHandler::EsqctxtFilter);
 	CScalarCmp *scalarCmp = CScalarCmp::PopConvert(pexprSubqPred->Pop());
 
 	GPOS_ASSERT(nullptr != scalarCmp);

From 984f94c2b0baa78638ba5deac8f9e1238e89bee5 Mon Sep 17 00:00:00 2001
From: David Kimura <dkimura@vmware.com>
Date: Mon, 31 Jan 2022 18:51:31 +0000
Subject: [PATCH 07/48] Restrict predicate push down on left outer correlated
 apply

Given the following query:
```
SELECT 3 = ALL (SELECT generate_series(2, 3)) FROM (values (1),(2)) v(a);
```

Without predicate push down:
```
Physical plan:
+--CPhysicalComputeScalar
   |--CPhysicalCorrelatedLeftOuterNLJoin
   |  |--CPhysicalConstTableGet Columns: ["column1" (0)] Values: [(1); (2)]
   |  |--CPhysicalComputeScalar
   |  |  |--CPhysicalConstTableGet
   |  |  +--CScalarProjectList
   |  |     |--CScalarProjectElement "ColRef_0004" (4)
   |  |     |  +--CScalarConst (1)
   |  |     +--CScalarProjectElement "generate_series" (2)
   |  |        +--CScalarFunc (generate_series)
   |  |           |--CScalarConst (2)
   |  |           +--CScalarConst (3)
   |  +--CScalarCmp (=)
   |     |--CScalarConst (3)
   |     +--CScalarIdent "generate_series" (2)
   +--CScalarProjectList
      +--CScalarProjectElement "?column?" (3)
         +--CScalarIdent "ColRef_0004" (4)
```

With predicate push down:
```
Physical plan:
+--CPhysicalComputeScalar
   |--CPhysicalCorrelatedLeftOuterNLJoin
   |  |--CPhysicalConstTableGet Columns: ["column1" (0)] Values: [(1); (2)]
   |  |--CPhysicalComputeScalar
   |  |  |--CPhysicalFilter
   |  |  |  |--CPhysicalComputeScalar
   |  |  |  |  |--CPhysicalConstTableGet
   |  |  |  |  +--CScalarProjectList
   |  |  |  |     +--CScalarProjectElement "generate_series" (2)
   |  |  |  |        +--CScalarFunc (generate_series)
   |  |  |  |           |--CScalarConst (2)
   |  |  |  |           +--CScalarConst (3)
   |  |  |  +--CScalarCmp (=)
   |  |  |     |--CScalarConst (3)
   |  |  |     +--CScalarIdent "generate_series" (2)
   |  |  +--CScalarProjectList
   |  |     +--CScalarProjectElement "ColRef_0004" (4)
   |  |        +--CScalarConst (1)
   |  +--CScalarConst (1)
   +--CScalarProjectList
      +--CScalarProjectElement "?column?" (3)
         +--CScalarIdent "ColRef_0004" (4)
```

Both of these plans result in a SUBPLAN node of type ALL_SUBLINK. Inside
ExecScanSubPlan() executor will check that all evaulated expressions are
true. Issue is that if we push the predicate down and filter out all the
expressions that evaluate to false, then in example query we would
incorrectly return true from ExecSubPlan().

Fix is to avoid pushing down the predicate in the case of SUBPLAN of
type ALL_SUBLINK.
---
 .../CLogicalLeftOuterCorrelatedApply.h        |  8 ++++
 .../CLogicalLeftOuterCorrelatedApply.cpp      | 44 ++++++++++++++++++-
 .../libgpopt/src/operators/CNormalizer.cpp    | 14 +++++-
 3 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalLeftOuterCorrelatedApply.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalLeftOuterCorrelatedApply.h
index 7f3ed6b959f..63a3c0bc497 100644
--- a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalLeftOuterCorrelatedApply.h
+++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalLeftOuterCorrelatedApply.h
@@ -30,6 +30,8 @@ namespace gpopt
 class CLogicalLeftOuterCorrelatedApply : public CLogicalLeftOuterApply
 {
 private:
+	BOOL m_allow_predicate_pushdown{true};
+
 public:
 	CLogicalLeftOuterCorrelatedApply(const CLogicalLeftOuterCorrelatedApply &) =
 		delete;
@@ -77,6 +79,12 @@ class CLogicalLeftOuterCorrelatedApply : public CLogicalLeftOuterApply
 		return true;
 	}
 
+	BOOL
+	IsPredicatePushDownAllowed() const
+	{
+		return m_allow_predicate_pushdown;
+	}
+
 	// conversion function
 	static CLogicalLeftOuterCorrelatedApply *
 	PopConvert(COperator *pop)
diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalLeftOuterCorrelatedApply.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalLeftOuterCorrelatedApply.cpp
index 400442e8b01..7f9d1429c62 100644
--- a/src/backend/gporca/libgpopt/src/operators/CLogicalLeftOuterCorrelatedApply.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CLogicalLeftOuterCorrelatedApply.cpp
@@ -40,7 +40,49 @@ CLogicalLeftOuterCorrelatedApply::CLogicalLeftOuterCorrelatedApply(
 //---------------------------------------------------------------------------
 CLogicalLeftOuterCorrelatedApply::CLogicalLeftOuterCorrelatedApply(
 	CMemoryPool *mp, CColRefArray *pdrgpcrInner, EOperatorId eopidOriginSubq)
-	: CLogicalLeftOuterApply(mp, pdrgpcrInner, eopidOriginSubq)
+	: CLogicalLeftOuterApply(mp, pdrgpcrInner, eopidOriginSubq),
+	  // In the case of subquery all, we cannot push down the predicate.
+	  // Example query:
+	  //
+	  //   SELECT (SELECT 1) = ALL (SELECT generate_series(1, 2));
+	  //
+	  // Physical plan:
+	  // ```
+	  // +--CPhysicalComputeScalar
+	  //    |--CPhysicalCorrelatedLeftOuterNLJoin
+	  //    |  |--CPhysicalConstTableGet Columns: ["" (0)]
+	  //    |  |--CPhysicalComputeScalar
+	  //    |  |  |--CPhysicalCorrelatedLeftOuterNLJoin
+	  //    |  |  |  |--CPhysicalComputeScalar
+	  //    |  |  |  |  |--CPhysicalConstTableGet Columns: ["" (1)] Values: [(1)]
+	  //    |  |  |  |  +--CScalarProjectList
+	  //    |  |  |  |     +--CScalarProjectElement "generate_series" (2)
+	  //    |  |  |  |        +--CScalarFunc (generate_series)
+	  //    |  |  |  |           |--CScalarConst (1)
+	  //    |  |  |  |           +--CScalarConst (2)
+	  //    |  |  |  |--CPhysicalComputeScalar
+	  //    |  |  |  |  |--CPhysicalConstTableGet Columns: ["" (3)] Values: [(1)]
+	  //    |  |  |  |  +--CScalarProjectList
+	  //    |  |  |  |     +--CScalarProjectElement "?column?" (4)
+	  //    |  |  |  |        +--CScalarConst (1)
+	  //    |  |  |  +--CScalarConst (1)
+	  //    |  |  +--CScalarProjectList
+	  //    |  |     +--CScalarProjectElement "ColRef_0006" (6)
+	  //    |  |        +--CScalarConst (1)
+	  //    |  +--CScalarCmp (=)
+	  //    |     |--CScalarIdent "?column?" (4)
+	  //    |     +--CScalarIdent "generate_series" (2)
+	  //    +--CScalarProjectList
+	  //       +--CScalarProjectElement "?column?" (5)
+	  //          +--CScalarIdent "ColRef_0006" (6)
+	  // ```
+	  //
+	  // If we push down CScalarCmp as a filter then we would incorrectly
+	  // discard the tuple 2 output from generate_series. Instead we want to
+	  // preserve it for a NULL match in the LOJ so that we correctly evaulate
+	  // subplan ALL_SUBLINK.
+	  m_allow_predicate_pushdown(COperator::EopScalarSubqueryAll !=
+								 eopidOriginSubq)
 {
 }
 
diff --git a/src/backend/gporca/libgpopt/src/operators/CNormalizer.cpp b/src/backend/gporca/libgpopt/src/operators/CNormalizer.cpp
index 9739ab89857..4b179a90ba3 100644
--- a/src/backend/gporca/libgpopt/src/operators/CNormalizer.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CNormalizer.cpp
@@ -18,6 +18,7 @@
 #include "gpopt/base/CUtils.h"
 #include "gpopt/operators/CLogical.h"
 #include "gpopt/operators/CLogicalInnerJoin.h"
+#include "gpopt/operators/CLogicalLeftOuterCorrelatedApply.h"
 #include "gpopt/operators/CLogicalLeftOuterJoin.h"
 #include "gpopt/operators/CLogicalNAryJoin.h"
 #include "gpopt/operators/CLogicalProject.h"
@@ -1078,7 +1079,6 @@ CNormalizer::PushThru(CMemoryPool *mp, CExpression *pexprLogical,
 		case COperator::EopLogicalInnerCorrelatedApply:
 		case COperator::EopLogicalLeftOuterJoin:
 		case COperator::EopLogicalLeftOuterApply:
-		case COperator::EopLogicalLeftOuterCorrelatedApply:
 		case COperator::EopLogicalLeftSemiApply:
 		case COperator::EopLogicalLeftSemiApplyIn:
 		case COperator::EopLogicalLeftSemiCorrelatedApplyIn:
@@ -1088,9 +1088,19 @@ CNormalizer::PushThru(CMemoryPool *mp, CExpression *pexprLogical,
 		case COperator::EopLogicalLeftSemiJoin:
 			PushThruJoin(mp, pexprLogical, pexprConj, ppexprResult);
 			break;
-
+		case COperator::EopLogicalLeftOuterCorrelatedApply:
 		default:
 		{
+			if (COperator::EopLogicalLeftOuterCorrelatedApply ==
+					pexprLogical->Pop()->Eopid() &&
+				CLogicalLeftOuterCorrelatedApply::PopConvert(
+					pexprLogical->Pop())
+					->IsPredicatePushDownAllowed())
+			{
+				PushThruJoin(mp, pexprLogical, pexprConj, ppexprResult);
+				break;
+			}
+
 			// can't push predicates through, start a new normalization path
 			CExpression *pexprNormalized =
 				PexprRecursiveNormalize(mp, pexprLogical);

From 03c8baef2ec402d32a4c2d875a276a3bed91b147 Mon Sep 17 00:00:00 2001
From: David Kimura <dkimura@vmware.com>
Date: Mon, 31 Jan 2022 20:00:01 +0000
Subject: [PATCH 08/48] Allow subplan test to contain scalar ident as left
 expression

In order to support subplan test expressions with scalar ident params on
the left side of the test expression, we must allow more than one column
to be projected from the restricted result node beneath.
---
 .../translate/CTranslatorDXLToScalar.cpp      | 43 +++++++++---
 .../libgpopt/include/gpopt/base/CColRefSet.h  |  3 +
 .../gpopt/translate/CTranslatorExprToDXL.h    |  5 +-
 .../gporca/libgpopt/src/base/CColRefSet.cpp   | 24 ++++++-
 .../src/translate/CTranslatorExprToDXL.cpp    | 69 +++++++++++++++----
 .../dxl/operators/CDXLScalarSubPlan.h         | 11 ++-
 .../src/operators/CDXLScalarSubPlan.cpp       |  6 +-
 .../gpopt/translate/CTranslatorDXLToScalar.h  |  1 +
 8 files changed, 137 insertions(+), 25 deletions(-)

diff --git a/src/backend/gpopt/translate/CTranslatorDXLToScalar.cpp b/src/backend/gpopt/translate/CTranslatorDXLToScalar.cpp
index 6d87816dca4..331650e65bc 100644
--- a/src/backend/gpopt/translate/CTranslatorDXLToScalar.cpp
+++ b/src/backend/gpopt/translate/CTranslatorDXLToScalar.cpp
@@ -806,7 +806,8 @@ CTranslatorDXLToScalar::TranslateDXLScalarSubplanToScalar(
 	SubLinkType slink = CTranslatorUtils::MapDXLSubplanToSublinkType(
 		dxlop->GetDxlSubplanType());
 	Expr *test_expr = TranslateDXLSubplanTestExprToScalar(
-		dxlop->GetDxlTestExpr(), slink, colid_var, &param_ids);
+		dxlop->GetDxlTestExpr(), slink, colid_var, dxlop->FOuterParam(),
+		&param_ids);
 
 	const CDXLColRefArray *outer_refs = dxlop->GetDxlOuterColRefsArray();
 
@@ -890,7 +891,7 @@ CTranslatorDXLToScalar::TranslateDXLScalarSubplanToScalar(
 Expr *
 CTranslatorDXLToScalar::TranslateDXLSubplanTestExprToScalar(
 	CDXLNode *test_expr_node, SubLinkType slink, CMappingColIdVar *colid_var,
-	List **param_ids)
+	BOOL has_outer_refs, List **param_ids)
 {
 	if (EXPR_SUBLINK == slink || EXISTS_SUBLINK == slink ||
 		NOT_EXISTS_SUBLINK == slink)
@@ -923,19 +924,45 @@ CTranslatorDXLToScalar::TranslateDXLSubplanTestExprToScalar(
 	CDXLNode *outer_child_node = (*test_expr_node)[0];
 	CDXLNode *inner_child_node = (*test_expr_node)[1];
 
+	CContextDXLToPlStmt *dxl_to_plstmt_ctxt =
+		(dynamic_cast<CMappingColIdVarPlStmt *>(colid_var))
+			->GetDXLToPlStmtContext();
+
 	// translate outer expression (can be a deep scalar tree)
-	Expr *outer_arg_expr = TranslateDXLToScalar(outer_child_node, colid_var);
-	args = gpdb::LAppend(args, outer_arg_expr);
+	Expr *outer_arg_expr = nullptr;
+	if (has_outer_refs)
+	{
+		Param *param1 = MakeNode(Param);
+		param1->paramkind = PARAM_EXEC;
+
+		// Ident
+		CDXLScalarIdent *outer_ident =
+			CDXLScalarIdent::Cast(outer_child_node->GetOperator());
+		Expr *outer_expr = (Expr *) param1;
+
+		// finalize outer expression
+		param1->paramtype = CMDIdGPDB::CastMdid(outer_ident->MdidType())->Oid();
+		param1->paramtypmod = outer_ident->TypeModifier();
+		param1->paramid = dxl_to_plstmt_ctxt->GetNextParamId(param1->paramtype);
+
+		// test expression is used for non-scalar subplan,
+		// first arg of test expression must be an EXEC param1 referring to subplan output
+		args = gpdb::LAppend(args, outer_expr);
+
+		// also, add this param1 to subplan param1 ids before translating other params
+		*param_ids = gpdb::LAppendInt(*param_ids, param1->paramid);
+	}
+	else
+	{
+		outer_arg_expr = TranslateDXLToScalar(outer_child_node, colid_var);
+		args = gpdb::LAppend(args, outer_arg_expr);
+	}
 
 	// translate inner expression (only certain forms supported)
 	// second arg must be an EXEC param which is replaced during query execution with subplan output
 	Param *param = MakeNode(Param);
 	param->paramkind = PARAM_EXEC;
 
-	CContextDXLToPlStmt *dxl_to_plstmt_ctxt =
-		(dynamic_cast<CMappingColIdVarPlStmt *>(colid_var))
-			->GetDXLToPlStmtContext();
-
 	CDXLScalarIdent *inner_ident = nullptr;
 	Expr *inner_expr = nullptr;
 	if (EdxlopScalarIdent == inner_child_node->GetOperator()->GetDXLOperator())
diff --git a/src/backend/gporca/libgpopt/include/gpopt/base/CColRefSet.h b/src/backend/gporca/libgpopt/include/gpopt/base/CColRefSet.h
index 3a38ec04f17..007d6671cff 100644
--- a/src/backend/gporca/libgpopt/include/gpopt/base/CColRefSet.h
+++ b/src/backend/gporca/libgpopt/include/gpopt/base/CColRefSet.h
@@ -120,6 +120,9 @@ class CColRefSet : public CBitSet, public DbgPrintMixin<CColRefSet>
 	// convert to array
 	CColRefArray *Pdrgpcr(CMemoryPool *mp) const;
 
+	// convert to id colref map
+	IntToColRefMap *Phmicr(CMemoryPool *mp) const;
+
 	// hash function
 	ULONG HashValue();
 
diff --git a/src/backend/gporca/libgpopt/include/gpopt/translate/CTranslatorExprToDXL.h b/src/backend/gporca/libgpopt/include/gpopt/translate/CTranslatorExprToDXL.h
index 269e4ec2fa6..aeea672aa96 100644
--- a/src/backend/gporca/libgpopt/include/gpopt/translate/CTranslatorExprToDXL.h
+++ b/src/backend/gporca/libgpopt/include/gpopt/translate/CTranslatorExprToDXL.h
@@ -625,7 +625,10 @@ class CTranslatorExprToDXL
 	CDXLNode *PdxlnProjectBoolConst(CDXLNode *dxlnode, BOOL value);
 
 	// helper to build a Result expression with project list restricted to required column
-	CDXLNode *PdxlnRestrictResult(CDXLNode *dxlnode, CColRef *colref);
+	CDXLNode *PdxlnRestrictResult(CDXLNode *dxlnode, const CColRef *colref);
+
+	// helper to build a Result expression with project list restricted to required columns
+	CDXLNode *PdxlnRestrictResult(CDXLNode *dxlnode, const CColRefSet *colrefs);
 
 	//	helper to build subplans from correlated LOJ
 	void BuildSubplansForCorrelatedLOJ(
diff --git a/src/backend/gporca/libgpopt/src/base/CColRefSet.cpp b/src/backend/gporca/libgpopt/src/base/CColRefSet.cpp
index a3ba3ee37bc..716e071958b 100644
--- a/src/backend/gporca/libgpopt/src/base/CColRefSet.cpp
+++ b/src/backend/gporca/libgpopt/src/base/CColRefSet.cpp
@@ -88,7 +88,6 @@ CColRefSet::FMember(const CColRef *colref) const
 	return CBitSet::Get(colref->Id());
 }
 
-
 //---------------------------------------------------------------------------
 //	@function:
 //		CColRefSet::PcrAny
@@ -294,6 +293,29 @@ CColRefSet::Pdrgpcr(CMemoryPool *mp) const
 }
 
 
+//---------------------------------------------------------------------------
+//	@function:
+//		CColRefSet::Phmicr
+//
+//	@doc:
+//		Convert set into map
+//
+//---------------------------------------------------------------------------
+IntToColRefMap *
+CColRefSet::Phmicr(CMemoryPool *mp) const
+{
+	IntToColRefMap *phmicr = GPOS_NEW(mp) IntToColRefMap(mp);
+
+	CColRefSetIter crsi(*this);
+	while (crsi.Advance())
+	{
+		phmicr->Insert(GPOS_NEW(mp) INT(crsi.Pcr()->Id()), crsi.Pcr());
+	}
+
+	return phmicr;
+}
+
+
 //---------------------------------------------------------------------------
 //	@function:
 //		CColRefSet::HashValue
diff --git a/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp b/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp
index ae35120faaf..a3d421b8cd8 100644
--- a/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp
+++ b/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp
@@ -3317,10 +3317,35 @@ CTranslatorExprToDXL::BuildSubplans(
 //
 //---------------------------------------------------------------------------
 CDXLNode *
-CTranslatorExprToDXL::PdxlnRestrictResult(CDXLNode *dxlnode, CColRef *colref)
+CTranslatorExprToDXL::PdxlnRestrictResult(CDXLNode *dxlnode,
+										  const CColRef *colref)
+{
+	CDXLNode *dxlresult = nullptr;
+	CColRefSet *pcrInner = GPOS_NEW(m_mp) CColRefSet(m_mp);
+
+	pcrInner->Include(colref);
+	dxlresult = PdxlnRestrictResult(dxlnode, pcrInner);
+	pcrInner->Release();
+
+	return dxlresult;
+}
+
+
+//---------------------------------------------------------------------------
+//	@function:
+//		CTranslatorExprToDXL::PdxlnRestrictResult
+//
+//	@doc:
+//		Helper to build a Result expression with project list
+//		restricted to required columns
+//
+//---------------------------------------------------------------------------
+CDXLNode *
+CTranslatorExprToDXL::PdxlnRestrictResult(CDXLNode *dxlnode,
+										  const CColRefSet *colrefs)
 {
 	GPOS_ASSERT(nullptr != dxlnode);
-	GPOS_ASSERT(nullptr != colref);
+	GPOS_ASSERT(nullptr != colrefs);
 
 	CDXLNode *pdxlnProjListOld = (*dxlnode)[0];
 	const ULONG ulPrjElems = pdxlnProjListOld->Arity();
@@ -3339,12 +3364,17 @@ CTranslatorExprToDXL::PdxlnRestrictResult(CDXLNode *dxlnode, CColRef *colref)
 		CDXLScalarProjList *pdxlopPrL = GPOS_NEW(m_mp) CDXLScalarProjList(m_mp);
 		CDXLNode *pdxlnProjListNew = GPOS_NEW(m_mp) CDXLNode(m_mp, pdxlopPrL);
 
+		IntToColRefMap *phmicr = colrefs->Phmicr(m_mp);
+
 		for (ULONG ul = 0; ul < ulPrjElems; ul++)
 		{
 			CDXLNode *child_dxlnode = (*pdxlnProjListOld)[ul];
 			CDXLScalarProjElem *pdxlPrjElem =
 				CDXLScalarProjElem::Cast(child_dxlnode->GetOperator());
-			if (pdxlPrjElem->Id() == colref->Id())
+
+			const INT colid = pdxlPrjElem->Id();
+			CColRef *colref = phmicr->Find(&colid);
+			if (colref)
 			{
 				// create a new project element that simply points to required column,
 				// we cannot re-use child_dxlnode here since it may have a deep expression with columns inaccessible
@@ -3354,7 +3384,10 @@ CTranslatorExprToDXL::PdxlnRestrictResult(CDXLNode *dxlnode, CColRef *colref)
 				pdxlnProjListNew->AddChild(pdxlnPrEl);
 			}
 		}
-		GPOS_ASSERT(1 == pdxlnProjListNew->Arity());
+
+		phmicr->Release();
+
+		GPOS_ASSERT(colrefs->Size() == pdxlnProjListNew->Arity());
 
 		pdxlnResult = GPOS_NEW(m_mp)
 			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLPhysicalResult(m_mp));
@@ -3409,8 +3442,10 @@ CTranslatorExprToDXL::PdxlnQuantifiedSubplan(
 		pulNonGatherMotions, pfDML, false /*fRemap*/, false /*fRoot*/);
 
 	// find required column from inner child
-	CColRef *pcrInner = (*pdrgpcrInner)[0];
+	CColRefSet *pcrInner = GPOS_NEW(m_mp) CColRefSet(m_mp);
+	pcrInner->Include((*pdrgpcrInner)[0]);
 
+	BOOL outerParam = false;
 	if (fCorrelatedLOJ)
 	{
 		// overwrite required inner column based on scalar expression
@@ -3421,14 +3456,24 @@ CTranslatorExprToDXL::PdxlnQuantifiedSubplan(
 		pcrsUsed->Intersection(pcrsInner);
 		if (0 < pcrsUsed->Size())
 		{
-			GPOS_ASSERT(1 == pcrsUsed->Size());
+			GPOS_ASSERT(1 == pcrsUsed->Size() || 2 == pcrsUsed->Size());
+
+			// Both sides of the SubPlan test expression can come from the
+			// inner side. So we need to pass pcrsUsed instead of pcrInner into
+			// PdxlnRestrictResult()
+			outerParam = pcrsUsed->Size() > 1;
 
-			pcrInner = pcrsUsed->PcrFirst();
+			pcrInner->Release();
+			pcrInner = pcrsUsed;
+		}
+		else
+		{
+			pcrsUsed->Release();
 		}
-		pcrsUsed->Release();
 	}
 
 	CDXLNode *inner_dxlnode = PdxlnRestrictResult(pdxlnInnerChild, pcrInner);
+	pcrInner->Release();
 	if (nullptr == inner_dxlnode)
 	{
 		GPOS_RAISE(
@@ -3445,10 +3490,10 @@ CTranslatorExprToDXL::PdxlnQuantifiedSubplan(
 	mdid->AddRef();
 
 	// construct a subplan node, with the inner child under it
-	CDXLNode *pdxlnSubPlan = GPOS_NEW(m_mp) CDXLNode(
-		m_mp,
-		GPOS_NEW(m_mp) CDXLScalarSubPlan(m_mp, mdid, dxl_colref_array,
-										 dxl_subplan_type, dxlnode_test_expr));
+	CDXLNode *pdxlnSubPlan = GPOS_NEW(m_mp)
+		CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarSubPlan(
+						   m_mp, mdid, dxl_colref_array, dxl_subplan_type,
+						   dxlnode_test_expr, outerParam));
 	pdxlnSubPlan->AddChild(inner_dxlnode);
 
 	// add to hashmap
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/dxl/operators/CDXLScalarSubPlan.h b/src/backend/gporca/libnaucrates/include/naucrates/dxl/operators/CDXLScalarSubPlan.h
index 54855430eec..00518f7b955 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/dxl/operators/CDXLScalarSubPlan.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/dxl/operators/CDXLScalarSubPlan.h
@@ -63,6 +63,9 @@ class CDXLScalarSubPlan : public CDXLScalar
 	// test expression -- not null if quantified/existential subplan
 	CDXLNode *m_dxlnode_test_expr;
 
+	// does test expression contain outer param
+	BOOL m_outer_param;
+
 public:
 	CDXLScalarSubPlan(CDXLScalarSubPlan &) = delete;
 
@@ -70,7 +73,7 @@ class CDXLScalarSubPlan : public CDXLScalar
 	CDXLScalarSubPlan(CMemoryPool *mp, IMDId *first_col_type_mdid,
 					  CDXLColRefArray *dxl_colref_array,
 					  EdxlSubPlanType dxl_subplan_type,
-					  CDXLNode *dxlnode_test_expr);
+					  CDXLNode *dxlnode_test_expr, BOOL outer_param = false);
 
 	~CDXLScalarSubPlan() override;
 
@@ -108,6 +111,12 @@ class CDXLScalarSubPlan : public CDXLScalar
 		return m_dxlnode_test_expr;
 	}
 
+	BOOL
+	FOuterParam() const
+	{
+		return m_outer_param;
+	}
+
 	// serialize operator in DXL format
 	void SerializeToDXL(CXMLSerializer *xml_serializer,
 						const CDXLNode *dxlnode) const override;
diff --git a/src/backend/gporca/libnaucrates/src/operators/CDXLScalarSubPlan.cpp b/src/backend/gporca/libnaucrates/src/operators/CDXLScalarSubPlan.cpp
index 42c4bb4fe37..6274d9236a4 100644
--- a/src/backend/gporca/libnaucrates/src/operators/CDXLScalarSubPlan.cpp
+++ b/src/backend/gporca/libnaucrates/src/operators/CDXLScalarSubPlan.cpp
@@ -33,12 +33,14 @@ CDXLScalarSubPlan::CDXLScalarSubPlan(CMemoryPool *mp,
 									 IMDId *first_col_type_mdid,
 									 CDXLColRefArray *dxl_colref_array,
 									 EdxlSubPlanType dxl_subplan_type,
-									 CDXLNode *dxlnode_test_expr)
+									 CDXLNode *dxlnode_test_expr,
+									 BOOL outer_param)
 	: CDXLScalar(mp),
 	  m_first_col_type_mdid(first_col_type_mdid),
 	  m_dxl_colref_array(dxl_colref_array),
 	  m_dxl_subplan_type(dxl_subplan_type),
-	  m_dxlnode_test_expr(dxlnode_test_expr)
+	  m_dxlnode_test_expr(dxlnode_test_expr),
+	  m_outer_param(outer_param)
 {
 	GPOS_ASSERT(EdxlSubPlanTypeSentinel > dxl_subplan_type);
 	GPOS_ASSERT_IMP(EdxlSubPlanTypeAny == dxl_subplan_type ||
diff --git a/src/include/gpopt/translate/CTranslatorDXLToScalar.h b/src/include/gpopt/translate/CTranslatorDXLToScalar.h
index e4fa180b69f..bd5672ccaea 100644
--- a/src/include/gpopt/translate/CTranslatorDXLToScalar.h
+++ b/src/include/gpopt/translate/CTranslatorDXLToScalar.h
@@ -150,6 +150,7 @@ class CTranslatorDXLToScalar
 	Expr *TranslateDXLSubplanTestExprToScalar(CDXLNode *test_expr_node,
 											  SubLinkType slink,
 											  CMappingColIdVar *colid_var,
+											  BOOL has_outer_refs,
 											  List **param_ids_list);
 
 	// translate subplan parameters

From 4a70411fbf760b06b22962d8bca9decca221185b Mon Sep 17 00:00:00 2001
From: David Kimura <dkimura@vmware.com>
Date: Thu, 27 Jan 2022 23:40:31 +0000
Subject: [PATCH 09/48] Add correlated subquery testcases

---
 .../AnyPredicate-Over-UnionOfConsts.mdp       |   4 +
 .../dxl/minidump/ScalarSubq-Eq-SubqAll-1.mdp  | 332 ++++++++++++++++
 .../dxl/minidump/ScalarSubq-Eq-SubqAll-2.mdp  | 360 ++++++++++++++++++
 .../gpopt/xforms/CSubqueryHandlerTest.cpp     |   4 +-
 .../regress/expected/correlated_subquery.out  | 128 +++++++
 src/test/regress/greenplum_schedule           |   2 +-
 src/test/regress/sql/correlated_subquery.sql  |  21 +
 7 files changed, 849 insertions(+), 2 deletions(-)
 create mode 100644 src/backend/gporca/data/dxl/minidump/ScalarSubq-Eq-SubqAll-1.mdp
 create mode 100644 src/backend/gporca/data/dxl/minidump/ScalarSubq-Eq-SubqAll-2.mdp
 create mode 100644 src/test/regress/expected/correlated_subquery.out
 create mode 100644 src/test/regress/sql/correlated_subquery.sql

diff --git a/src/backend/gporca/data/dxl/minidump/AnyPredicate-Over-UnionOfConsts.mdp b/src/backend/gporca/data/dxl/minidump/AnyPredicate-Over-UnionOfConsts.mdp
index cb496402c58..66786c38994 100644
--- a/src/backend/gporca/data/dxl/minidump/AnyPredicate-Over-UnionOfConsts.mdp
+++ b/src/backend/gporca/data/dxl/minidump/AnyPredicate-Over-UnionOfConsts.mdp
@@ -1,5 +1,9 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/">
+  <dxl:Comment><![CDATA[
+    EXPLAIN SELECT 1 = ANY(SELECT 3 WHERE false UNION SELECT 2);
+  ]]>
+  </dxl:Comment>
   <dxl:Thread Id="0">
     <dxl:OptimizerConfig>
       <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/>
diff --git a/src/backend/gporca/data/dxl/minidump/ScalarSubq-Eq-SubqAll-1.mdp b/src/backend/gporca/data/dxl/minidump/ScalarSubq-Eq-SubqAll-1.mdp
new file mode 100644
index 00000000000..2fbe8cfa699
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/ScalarSubq-Eq-SubqAll-1.mdp
@@ -0,0 +1,332 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/">
+  <dxl:Comment><![CDATA[
+    SET optimizer_enforce_subplans = 1;
+    SELECT (SELECT 1) = ALL (SELECT generate_series(1, 2));
+  ]]>
+  </dxl:Comment>
+  <dxl:Thread Id="0">
+    <dxl:OptimizerConfig>
+      <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/>
+      <dxl:StatisticsConfig DampingFactorFilter="0.750000" DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" MaxStatsBuckets="100"/>
+      <dxl:CTEConfig CTEInliningCutoff="0"/>
+      <dxl:WindowOids RowNumber="3100" Rank="3101"/>
+      <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3">
+        <dxl:CostParams>
+          <dxl:CostParam Name="NLJFactor" Value="1024.000000" LowerBound="1023.500000" UpperBound="1024.500000"/>
+        </dxl:CostParams>
+      </dxl:CostModelConfig>
+      <dxl:Hint MinNumOfPartsToRequireSortOnInsert="2147483647" JoinArityForAssociativityCommutativity="18" ArrayExpansionThreshold="100" JoinOrderDynamicProgThreshold="10" BroadcastThreshold="100000" EnforceConstraintsOnDML="false" PushGroupByBelowSetopThreshold="10" XformBindThreshold="0"/>
+      <dxl:TraceFlags Value="102001,102002,102003,102043,102074,102120,102144,103001,103003,103014,103021,103022,103026,103027,103029,103033,103038,103040,104002,104003,104004,104005,105000,106000"/>
+    </dxl:OptimizerConfig>
+    <dxl:Metadata SystemIds="0.GPDB">
+      <dxl:GPDBScalarOp Mdid="0.518.1.0" Name="&lt;&gt;" ComparisonType="NEq" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.23.1.0"/>
+        <dxl:RightType Mdid="0.23.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.144.1.0"/>
+        <dxl:Commutator Mdid="0.518.1.0"/>
+        <dxl:InverseOp Mdid="0.96.1.0"/>
+      </dxl:GPDBScalarOp>
+      <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2222.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/>
+        <dxl:EqualityOp Mdid="0.91.1.0"/>
+        <dxl:InequalityOp Mdid="0.85.1.0"/>
+        <dxl:LessThanOp Mdid="0.58.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.59.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/>
+        <dxl:ComparisonOp Mdid="0.1693.1.0"/>
+        <dxl:ArrayType Mdid="0.1000.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.20.1.0" Name="Int8" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="8" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:EqualityOp Mdid="0.410.1.0"/>
+        <dxl:InequalityOp Mdid="0.411.1.0"/>
+        <dxl:LessThanOp Mdid="0.412.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.414.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.413.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.415.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1016.1.0"/>
+        <dxl:MinAgg Mdid="0.2131.1.0"/>
+        <dxl:MaxAgg Mdid="0.2115.1.0"/>
+        <dxl:AvgAgg Mdid="0.2100.1.0"/>
+        <dxl:SumAgg Mdid="0.2107.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:EqualityOp Mdid="0.96.1.0"/>
+        <dxl:InequalityOp Mdid="0.518.1.0"/>
+        <dxl:LessThanOp Mdid="0.97.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.523.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.521.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1007.1.0"/>
+        <dxl:MinAgg Mdid="0.2132.1.0"/>
+        <dxl:MaxAgg Mdid="0.2116.1.0"/>
+        <dxl:AvgAgg Mdid="0.2101.1.0"/>
+        <dxl:SumAgg Mdid="0.2108.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:GPDBScalarOp Mdid="0.410.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.20.1.0"/>
+        <dxl:RightType Mdid="0.20.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.467.1.0"/>
+        <dxl:Commutator Mdid="0.410.1.0"/>
+        <dxl:InverseOp Mdid="0.411.1.0"/>
+        <dxl:HashOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyHashOpfamily Mdid="0.7100.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.1977.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.7100.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBScalarOp Mdid="0.413.1.0" Name="&gt;" ComparisonType="GT" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.20.1.0"/>
+        <dxl:RightType Mdid="0.20.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.470.1.0"/>
+        <dxl:Commutator Mdid="0.412.1.0"/>
+        <dxl:InverseOp Mdid="0.414.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBFunc Mdid="0.1067.1.0" Name="generate_series" ReturnsSet="true" Stability="Immutable" DataAccess="NoSQL" IsStrict="true" IsNDVPreserving="false" IsAllowedForPS="false">
+        <dxl:ResultType Mdid="0.23.1.0"/>
+      </dxl:GPDBFunc>
+      <dxl:GPDBAgg Mdid="0.2108.1.0" Name="sum" IsSplittable="true" HashAggCapable="true">
+        <dxl:ResultType Mdid="0.20.1.0"/>
+        <dxl:IntermediateResultType Mdid="0.20.1.0"/>
+      </dxl:GPDBAgg>
+      <dxl:GPDBScalarOp Mdid="0.91.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.16.1.0"/>
+        <dxl:RightType Mdid="0.16.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.60.1.0"/>
+        <dxl:Commutator Mdid="0.91.1.0"/>
+        <dxl:InverseOp Mdid="0.85.1.0"/>
+        <dxl:HashOpfamily Mdid="0.2222.1.0"/>
+        <dxl:LegacyHashOpfamily Mdid="0.7124.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.424.1.0"/>
+          <dxl:Opfamily Mdid="0.2222.1.0"/>
+          <dxl:Opfamily Mdid="0.7124.1.0"/>
+          <dxl:Opfamily Mdid="0.10002.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBScalarOp Mdid="0.96.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.23.1.0"/>
+        <dxl:RightType Mdid="0.23.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.65.1.0"/>
+        <dxl:Commutator Mdid="0.96.1.0"/>
+        <dxl:InverseOp Mdid="0.518.1.0"/>
+        <dxl:HashOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyHashOpfamily Mdid="0.7100.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.1977.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.7100.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+    </dxl:Metadata>
+    <dxl:Query>
+      <dxl:OutputColumns>
+        <dxl:Ident ColId="6" ColName="?column?" TypeMdid="0.16.1.0"/>
+      </dxl:OutputColumns>
+      <dxl:CTEList/>
+      <dxl:LogicalProject>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="6" Alias="?column?">
+            <dxl:SubqueryAll OperatorName="=" OperatorMdid="0.96.1.0" ColId="3">
+              <dxl:ScalarSubquery ColId="5">
+                <dxl:LogicalProject>
+                  <dxl:ProjList>
+                    <dxl:ProjElem ColId="5" Alias="?column?">
+                      <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                    </dxl:ProjElem>
+                  </dxl:ProjList>
+                  <dxl:LogicalConstTable>
+                    <dxl:Columns>
+                      <dxl:Column ColId="4" Attno="1" ColName="" TypeMdid="0.16.1.0"/>
+                    </dxl:Columns>
+                    <dxl:ConstTuple>
+                      <dxl:Datum TypeMdid="0.16.1.0" Value="true"/>
+                    </dxl:ConstTuple>
+                  </dxl:LogicalConstTable>
+                </dxl:LogicalProject>
+              </dxl:ScalarSubquery>
+              <dxl:LogicalProject>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="3" Alias="generate_series">
+                    <dxl:FuncExpr FuncId="0.1067.1.0" FuncRetSet="true" TypeMdid="0.23.1.0">
+                      <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                      <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/>
+                    </dxl:FuncExpr>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+                <dxl:LogicalConstTable>
+                  <dxl:Columns>
+                    <dxl:Column ColId="2" Attno="1" ColName="" TypeMdid="0.16.1.0"/>
+                  </dxl:Columns>
+                  <dxl:ConstTuple>
+                    <dxl:Datum TypeMdid="0.16.1.0" Value="true"/>
+                  </dxl:ConstTuple>
+                </dxl:LogicalConstTable>
+              </dxl:LogicalProject>
+            </dxl:SubqueryAll>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:LogicalConstTable>
+          <dxl:Columns>
+            <dxl:Column ColId="1" Attno="1" ColName="" TypeMdid="0.16.1.0"/>
+          </dxl:Columns>
+          <dxl:ConstTuple>
+            <dxl:Datum TypeMdid="0.16.1.0" Value="true"/>
+          </dxl:ConstTuple>
+        </dxl:LogicalConstTable>
+      </dxl:LogicalProject>
+    </dxl:Query>
+    <dxl:Plan Id="0" SpaceSize="0">
+      <dxl:Result>
+        <dxl:Properties>
+          <dxl:Cost StartupCost="0" TotalCost="452414879.449778" Rows="1.000000" Width="1"/>
+        </dxl:Properties>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="5" Alias="?column?">
+            <dxl:SubPlan TypeMdid="0.16.1.0" SubPlanType="AllSubPlan">
+              <dxl:TestExpr>
+                <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0">
+                  <dxl:Ident ColId="4" ColName="?column?" TypeMdid="0.23.1.0"/>
+                  <dxl:Ident ColId="2" ColName="generate_series" TypeMdid="0.23.1.0"/>
+                </dxl:Comparison>
+              </dxl:TestExpr>
+              <dxl:ParamList/>
+              <dxl:Result>
+                <dxl:Properties>
+                  <dxl:Cost StartupCost="0" TotalCost="441380.314547" Rows="1000.000000" Width="9"/>
+                </dxl:Properties>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="2" Alias="generate_series">
+                    <dxl:Ident ColId="2" ColName="generate_series" TypeMdid="0.23.1.0"/>
+                  </dxl:ProjElem>
+                  <dxl:ProjElem ColId="4" Alias="?column?">
+                    <dxl:Ident ColId="4" ColName="?column?" TypeMdid="0.23.1.0"/>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+                <dxl:Filter/>
+                <dxl:OneTimeFilter/>
+                <dxl:Result>
+                  <dxl:Properties>
+                    <dxl:Cost StartupCost="0" TotalCost="441380.314547" Rows="1000.000000" Width="9"/>
+                  </dxl:Properties>
+                  <dxl:ProjList>
+                    <dxl:ProjElem ColId="6" Alias="ColRef_0006">
+                      <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+                    </dxl:ProjElem>
+                    <dxl:ProjElem ColId="2" Alias="generate_series">
+                      <dxl:Ident ColId="2" ColName="generate_series" TypeMdid="0.23.1.0"/>
+                    </dxl:ProjElem>
+                    <dxl:ProjElem ColId="4" Alias="?column?">
+                      <dxl:SubPlan TypeMdid="0.23.1.0" SubPlanType="ScalarSubPlan">
+                        <dxl:TestExpr/>
+                        <dxl:ParamList/>
+                        <dxl:Result>
+                          <dxl:Properties>
+                            <dxl:Cost StartupCost="0" TotalCost="0.000005" Rows="1.000000" Width="4"/>
+                          </dxl:Properties>
+                          <dxl:ProjList>
+                            <dxl:ProjElem ColId="4" Alias="?column?">
+                              <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                            </dxl:ProjElem>
+                          </dxl:ProjList>
+                          <dxl:Filter/>
+                          <dxl:OneTimeFilter/>
+                          <dxl:Result>
+                            <dxl:Properties>
+                              <dxl:Cost StartupCost="0" TotalCost="0.000001" Rows="1.000000" Width="1"/>
+                            </dxl:Properties>
+                            <dxl:ProjList>
+                              <dxl:ProjElem ColId="3" Alias="">
+                                <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+                              </dxl:ProjElem>
+                            </dxl:ProjList>
+                            <dxl:Filter/>
+                            <dxl:OneTimeFilter/>
+                          </dxl:Result>
+                        </dxl:Result>
+                      </dxl:SubPlan>
+                    </dxl:ProjElem>
+                  </dxl:ProjList>
+                  <dxl:Filter/>
+                  <dxl:OneTimeFilter/>
+                  <dxl:Result>
+                    <dxl:Properties>
+                      <dxl:Cost StartupCost="0" TotalCost="0.000005" Rows="1.000000" Width="4"/>
+                    </dxl:Properties>
+                    <dxl:ProjList>
+                      <dxl:ProjElem ColId="2" Alias="generate_series">
+                        <dxl:FuncExpr FuncId="0.1067.1.0" FuncRetSet="true" TypeMdid="0.23.1.0">
+                          <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                          <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/>
+                        </dxl:FuncExpr>
+                      </dxl:ProjElem>
+                    </dxl:ProjList>
+                    <dxl:Filter/>
+                    <dxl:OneTimeFilter/>
+                    <dxl:Result>
+                      <dxl:Properties>
+                        <dxl:Cost StartupCost="0" TotalCost="0.000001" Rows="1.000000" Width="1"/>
+                      </dxl:Properties>
+                      <dxl:ProjList>
+                        <dxl:ProjElem ColId="1" Alias="">
+                          <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+                        </dxl:ProjElem>
+                      </dxl:ProjList>
+                      <dxl:Filter/>
+                      <dxl:OneTimeFilter/>
+                    </dxl:Result>
+                  </dxl:Result>
+                </dxl:Result>
+              </dxl:Result>
+            </dxl:SubPlan>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:Filter/>
+        <dxl:OneTimeFilter/>
+        <dxl:Result>
+          <dxl:Properties>
+            <dxl:Cost StartupCost="0" TotalCost="0.000001" Rows="1.000000" Width="1"/>
+          </dxl:Properties>
+          <dxl:ProjList>
+            <dxl:ProjElem ColId="0" Alias="">
+              <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+            </dxl:ProjElem>
+          </dxl:ProjList>
+          <dxl:Filter/>
+          <dxl:OneTimeFilter/>
+        </dxl:Result>
+      </dxl:Result>
+    </dxl:Plan>
+  </dxl:Thread>
+</dxl:DXLMessage>
diff --git a/src/backend/gporca/data/dxl/minidump/ScalarSubq-Eq-SubqAll-2.mdp b/src/backend/gporca/data/dxl/minidump/ScalarSubq-Eq-SubqAll-2.mdp
new file mode 100644
index 00000000000..6a9b0fdb942
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/ScalarSubq-Eq-SubqAll-2.mdp
@@ -0,0 +1,360 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/">
+  <dxl:Comment><![CDATA[
+    SET optimizer_enforce_subplans = 1;
+    SELECT (SELECT a+1) = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+  ]]>
+  </dxl:Comment>
+  <dxl:Thread Id="0">
+    <dxl:OptimizerConfig>
+      <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/>
+      <dxl:StatisticsConfig DampingFactorFilter="0.750000" DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" MaxStatsBuckets="100"/>
+      <dxl:CTEConfig CTEInliningCutoff="0"/>
+      <dxl:WindowOids RowNumber="3100" Rank="3101"/>
+      <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3">
+        <dxl:CostParams>
+          <dxl:CostParam Name="NLJFactor" Value="1024.000000" LowerBound="1023.500000" UpperBound="1024.500000"/>
+        </dxl:CostParams>
+      </dxl:CostModelConfig>
+      <dxl:Hint MinNumOfPartsToRequireSortOnInsert="2147483647" JoinArityForAssociativityCommutativity="18" ArrayExpansionThreshold="100" JoinOrderDynamicProgThreshold="10" BroadcastThreshold="100000" EnforceConstraintsOnDML="false" PushGroupByBelowSetopThreshold="10" XformBindThreshold="0"/>
+      <dxl:TraceFlags Value="102001,102002,102003,102043,102074,102120,102144,103001,103003,103014,103021,103022,103026,103027,103029,103033,103038,103040,104002,104003,104004,104005,105000,106000"/>
+    </dxl:OptimizerConfig>
+    <dxl:Metadata SystemIds="0.GPDB">
+      <dxl:GPDBScalarOp Mdid="0.518.1.0" Name="&lt;&gt;" ComparisonType="NEq" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.23.1.0"/>
+        <dxl:RightType Mdid="0.23.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.144.1.0"/>
+        <dxl:Commutator Mdid="0.518.1.0"/>
+        <dxl:InverseOp Mdid="0.96.1.0"/>
+      </dxl:GPDBScalarOp>
+      <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2222.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/>
+        <dxl:EqualityOp Mdid="0.91.1.0"/>
+        <dxl:InequalityOp Mdid="0.85.1.0"/>
+        <dxl:LessThanOp Mdid="0.58.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.59.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/>
+        <dxl:ComparisonOp Mdid="0.1693.1.0"/>
+        <dxl:ArrayType Mdid="0.1000.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.20.1.0" Name="Int8" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="8" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:EqualityOp Mdid="0.410.1.0"/>
+        <dxl:InequalityOp Mdid="0.411.1.0"/>
+        <dxl:LessThanOp Mdid="0.412.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.414.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.413.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.415.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1016.1.0"/>
+        <dxl:MinAgg Mdid="0.2131.1.0"/>
+        <dxl:MaxAgg Mdid="0.2115.1.0"/>
+        <dxl:AvgAgg Mdid="0.2100.1.0"/>
+        <dxl:SumAgg Mdid="0.2107.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:EqualityOp Mdid="0.96.1.0"/>
+        <dxl:InequalityOp Mdid="0.518.1.0"/>
+        <dxl:LessThanOp Mdid="0.97.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.523.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.521.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1007.1.0"/>
+        <dxl:MinAgg Mdid="0.2132.1.0"/>
+        <dxl:MaxAgg Mdid="0.2116.1.0"/>
+        <dxl:AvgAgg Mdid="0.2101.1.0"/>
+        <dxl:SumAgg Mdid="0.2108.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:GPDBScalarOp Mdid="0.410.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.20.1.0"/>
+        <dxl:RightType Mdid="0.20.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.467.1.0"/>
+        <dxl:Commutator Mdid="0.410.1.0"/>
+        <dxl:InverseOp Mdid="0.411.1.0"/>
+        <dxl:HashOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyHashOpfamily Mdid="0.7100.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.1977.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.7100.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBScalarOp Mdid="0.413.1.0" Name="&gt;" ComparisonType="GT" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.20.1.0"/>
+        <dxl:RightType Mdid="0.20.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.470.1.0"/>
+        <dxl:Commutator Mdid="0.412.1.0"/>
+        <dxl:InverseOp Mdid="0.414.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBScalarOp Mdid="0.551.1.0" Name="+" ComparisonType="Other" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.23.1.0"/>
+        <dxl:RightType Mdid="0.23.1.0"/>
+        <dxl:ResultType Mdid="0.23.1.0"/>
+        <dxl:OpFunc Mdid="0.177.1.0"/>
+        <dxl:Commutator Mdid="0.551.1.0"/>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBFunc Mdid="0.1067.1.0" Name="generate_series" ReturnsSet="true" Stability="Immutable" DataAccess="NoSQL" IsStrict="true" IsNDVPreserving="false" IsAllowedForPS="false">
+        <dxl:ResultType Mdid="0.23.1.0"/>
+      </dxl:GPDBFunc>
+      <dxl:GPDBAgg Mdid="0.2108.1.0" Name="sum" IsSplittable="true" HashAggCapable="true">
+        <dxl:ResultType Mdid="0.20.1.0"/>
+        <dxl:IntermediateResultType Mdid="0.20.1.0"/>
+      </dxl:GPDBAgg>
+      <dxl:GPDBScalarOp Mdid="0.91.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.16.1.0"/>
+        <dxl:RightType Mdid="0.16.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.60.1.0"/>
+        <dxl:Commutator Mdid="0.91.1.0"/>
+        <dxl:InverseOp Mdid="0.85.1.0"/>
+        <dxl:HashOpfamily Mdid="0.2222.1.0"/>
+        <dxl:LegacyHashOpfamily Mdid="0.7124.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.424.1.0"/>
+          <dxl:Opfamily Mdid="0.2222.1.0"/>
+          <dxl:Opfamily Mdid="0.7124.1.0"/>
+          <dxl:Opfamily Mdid="0.10002.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBScalarOp Mdid="0.96.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.23.1.0"/>
+        <dxl:RightType Mdid="0.23.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.65.1.0"/>
+        <dxl:Commutator Mdid="0.96.1.0"/>
+        <dxl:InverseOp Mdid="0.518.1.0"/>
+        <dxl:HashOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyHashOpfamily Mdid="0.7100.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.1977.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.7100.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+    </dxl:Metadata>
+    <dxl:Query>
+      <dxl:OutputColumns>
+        <dxl:Ident ColId="7" ColName="?column?" TypeMdid="0.16.1.0"/>
+        <dxl:Ident ColId="1" ColName="a" TypeMdid="0.23.1.0"/>
+      </dxl:OutputColumns>
+      <dxl:CTEList/>
+      <dxl:LogicalProject>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="7" Alias="?column?">
+            <dxl:SubqueryAll OperatorName="=" OperatorMdid="0.96.1.0" ColId="4">
+              <dxl:ScalarSubquery ColId="6">
+                <dxl:LogicalProject>
+                  <dxl:ProjList>
+                    <dxl:ProjElem ColId="6" Alias="?column?">
+                      <dxl:OpExpr OperatorName="+" OperatorMdid="0.551.1.0" OperatorType="0.23.1.0">
+                        <dxl:Ident ColId="1" ColName="column1" TypeMdid="0.23.1.0"/>
+                        <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                      </dxl:OpExpr>
+                    </dxl:ProjElem>
+                  </dxl:ProjList>
+                  <dxl:LogicalConstTable>
+                    <dxl:Columns>
+                      <dxl:Column ColId="5" Attno="1" ColName="" TypeMdid="0.16.1.0"/>
+                    </dxl:Columns>
+                    <dxl:ConstTuple>
+                      <dxl:Datum TypeMdid="0.16.1.0" Value="true"/>
+                    </dxl:ConstTuple>
+                  </dxl:LogicalConstTable>
+                </dxl:LogicalProject>
+              </dxl:ScalarSubquery>
+              <dxl:LogicalProject>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="4" Alias="generate_series">
+                    <dxl:FuncExpr FuncId="0.1067.1.0" FuncRetSet="true" TypeMdid="0.23.1.0">
+                      <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/>
+                      <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/>
+                    </dxl:FuncExpr>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+                <dxl:LogicalConstTable>
+                  <dxl:Columns>
+                    <dxl:Column ColId="3" Attno="1" ColName="" TypeMdid="0.16.1.0"/>
+                  </dxl:Columns>
+                  <dxl:ConstTuple>
+                    <dxl:Datum TypeMdid="0.16.1.0" Value="true"/>
+                  </dxl:ConstTuple>
+                </dxl:LogicalConstTable>
+              </dxl:LogicalProject>
+            </dxl:SubqueryAll>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:LogicalConstTable>
+          <dxl:Columns>
+            <dxl:Column ColId="1" Attno="1" ColName="column1" TypeMdid="0.23.1.0"/>
+          </dxl:Columns>
+          <dxl:ConstTuple>
+            <dxl:Datum TypeMdid="0.23.1.0" Value="1"/>
+          </dxl:ConstTuple>
+          <dxl:ConstTuple>
+            <dxl:Datum TypeMdid="0.23.1.0" Value="2"/>
+          </dxl:ConstTuple>
+        </dxl:LogicalConstTable>
+      </dxl:LogicalProject>
+    </dxl:Query>
+    <dxl:Plan Id="0" SpaceSize="0">
+      <dxl:Result>
+        <dxl:Properties>
+          <dxl:Cost StartupCost="0" TotalCost="452414966.423292" Rows="2.000000" Width="5"/>
+        </dxl:Properties>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="5" Alias="?column?">
+            <dxl:SubPlan TypeMdid="0.16.1.0" SubPlanType="AllSubPlan">
+              <dxl:TestExpr>
+                <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0">
+                  <dxl:Ident ColId="4" ColName="?column?" TypeMdid="0.23.1.0"/>
+                  <dxl:Ident ColId="2" ColName="generate_series" TypeMdid="0.23.1.0"/>
+                </dxl:Comparison>
+              </dxl:TestExpr>
+              <dxl:ParamList>
+                <dxl:Param ColId="0" ColName="column1" TypeMdid="0.23.1.0"/>
+              </dxl:ParamList>
+              <dxl:Result>
+                <dxl:Properties>
+                  <dxl:Cost StartupCost="0" TotalCost="441380.314547" Rows="1000.000000" Width="9"/>
+                </dxl:Properties>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="2" Alias="generate_series">
+                    <dxl:Ident ColId="2" ColName="generate_series" TypeMdid="0.23.1.0"/>
+                  </dxl:ProjElem>
+                  <dxl:ProjElem ColId="4" Alias="?column?">
+                    <dxl:Ident ColId="4" ColName="?column?" TypeMdid="0.23.1.0"/>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+                <dxl:Filter/>
+                <dxl:OneTimeFilter/>
+                <dxl:Result>
+                  <dxl:Properties>
+                    <dxl:Cost StartupCost="0" TotalCost="441380.314547" Rows="1000.000000" Width="9"/>
+                  </dxl:Properties>
+                  <dxl:ProjList>
+                    <dxl:ProjElem ColId="6" Alias="ColRef_0006">
+                      <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+                    </dxl:ProjElem>
+                    <dxl:ProjElem ColId="2" Alias="generate_series">
+                      <dxl:Ident ColId="2" ColName="generate_series" TypeMdid="0.23.1.0"/>
+                    </dxl:ProjElem>
+                    <dxl:ProjElem ColId="4" Alias="?column?">
+                      <dxl:SubPlan TypeMdid="0.23.1.0" SubPlanType="ScalarSubPlan">
+                        <dxl:TestExpr/>
+                        <dxl:ParamList>
+                          <dxl:Param ColId="0" ColName="column1" TypeMdid="0.23.1.0"/>
+                        </dxl:ParamList>
+                        <dxl:Result>
+                          <dxl:Properties>
+                            <dxl:Cost StartupCost="0" TotalCost="0.000005" Rows="1.000000" Width="4"/>
+                          </dxl:Properties>
+                          <dxl:ProjList>
+                            <dxl:ProjElem ColId="4" Alias="?column?">
+                              <dxl:OpExpr OperatorName="+" OperatorMdid="0.551.1.0" OperatorType="0.23.1.0">
+                                <dxl:Ident ColId="0" ColName="column1" TypeMdid="0.23.1.0"/>
+                                <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                              </dxl:OpExpr>
+                            </dxl:ProjElem>
+                          </dxl:ProjList>
+                          <dxl:Filter/>
+                          <dxl:OneTimeFilter/>
+                          <dxl:Result>
+                            <dxl:Properties>
+                              <dxl:Cost StartupCost="0" TotalCost="0.000001" Rows="1.000000" Width="1"/>
+                            </dxl:Properties>
+                            <dxl:ProjList>
+                              <dxl:ProjElem ColId="3" Alias="">
+                                <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+                              </dxl:ProjElem>
+                            </dxl:ProjList>
+                            <dxl:Filter/>
+                            <dxl:OneTimeFilter/>
+                          </dxl:Result>
+                        </dxl:Result>
+                      </dxl:SubPlan>
+                    </dxl:ProjElem>
+                  </dxl:ProjList>
+                  <dxl:Filter/>
+                  <dxl:OneTimeFilter/>
+                  <dxl:Result>
+                    <dxl:Properties>
+                      <dxl:Cost StartupCost="0" TotalCost="0.000005" Rows="1.000000" Width="4"/>
+                    </dxl:Properties>
+                    <dxl:ProjList>
+                      <dxl:ProjElem ColId="2" Alias="generate_series">
+                        <dxl:FuncExpr FuncId="0.1067.1.0" FuncRetSet="true" TypeMdid="0.23.1.0">
+                          <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/>
+                          <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/>
+                        </dxl:FuncExpr>
+                      </dxl:ProjElem>
+                    </dxl:ProjList>
+                    <dxl:Filter/>
+                    <dxl:OneTimeFilter/>
+                    <dxl:Result>
+                      <dxl:Properties>
+                        <dxl:Cost StartupCost="0" TotalCost="0.000001" Rows="1.000000" Width="1"/>
+                      </dxl:Properties>
+                      <dxl:ProjList>
+                        <dxl:ProjElem ColId="1" Alias="">
+                          <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+                        </dxl:ProjElem>
+                      </dxl:ProjList>
+                      <dxl:Filter/>
+                      <dxl:OneTimeFilter/>
+                    </dxl:Result>
+                  </dxl:Result>
+                </dxl:Result>
+              </dxl:Result>
+            </dxl:SubPlan>
+          </dxl:ProjElem>
+          <dxl:ProjElem ColId="0" Alias="a">
+            <dxl:Ident ColId="0" ColName="column1" TypeMdid="0.23.1.0"/>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:Filter/>
+        <dxl:OneTimeFilter/>
+        <dxl:Values>
+          <dxl:Properties>
+            <dxl:Cost StartupCost="0" TotalCost="0.000008" Rows="2.000000" Width="4"/>
+          </dxl:Properties>
+          <dxl:ProjList>
+            <dxl:ProjElem ColId="0" Alias="column1">
+              <dxl:Ident ColId="0" ColName="column1" TypeMdid="0.23.1.0"/>
+            </dxl:ProjElem>
+          </dxl:ProjList>
+          <dxl:ValuesList>
+            <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+          </dxl:ValuesList>
+          <dxl:ValuesList>
+            <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/>
+          </dxl:ValuesList>
+        </dxl:Values>
+      </dxl:Result>
+    </dxl:Plan>
+  </dxl:Thread>
+</dxl:DXLMessage>
diff --git a/src/backend/gporca/server/src/unittest/gpopt/xforms/CSubqueryHandlerTest.cpp b/src/backend/gporca/server/src/unittest/gpopt/xforms/CSubqueryHandlerTest.cpp
index 0c730984807..8b87f813205 100644
--- a/src/backend/gporca/server/src/unittest/gpopt/xforms/CSubqueryHandlerTest.cpp
+++ b/src/backend/gporca/server/src/unittest/gpopt/xforms/CSubqueryHandlerTest.cpp
@@ -34,7 +34,9 @@ const CHAR *rgszSubqueryHandlerMinidumpFileNames[] = {
 	"../data/dxl/minidump/CorrelatedSubqueryWithAggWindowFunc.mdp",
 	"../data/dxl/minidump/AllSubqueryWithSubqueryInScalar.mdp",
 	"../data/dxl/minidump/AnySubqueryWithAllSubqueryInScalar.mdp",
-	"../data/dxl/minidump/AnySubqueryWithSubqueryInScalar.mdp"};
+	"../data/dxl/minidump/AnySubqueryWithSubqueryInScalar.mdp",
+	"../data/dxl/minidump/ScalarSubq-Eq-SubqAll-1.mdp",
+	"../data/dxl/minidump/ScalarSubq-Eq-SubqAll-2.mdp"};
 
 //---------------------------------------------------------------------------
 //	@function:
diff --git a/src/test/regress/expected/correlated_subquery.out b/src/test/regress/expected/correlated_subquery.out
new file mode 100644
index 00000000000..99625097746
--- /dev/null
+++ b/src/test/regress/expected/correlated_subquery.out
@@ -0,0 +1,128 @@
+SET optimizer_enforce_subplans = 1;
+SET optimizer_trace_fallback=on;
+SET client_min_messages=log;
+SELECT a = ALL (SELECT generate_series(1, 2)), a FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT a = ALL (SELECT generate_series(1, 2)), a FROM (values (1),(2)) v(a);
+ ?column? | a 
+----------+---
+ f        | 1
+ f        | 2
+(2 rows)
+
+SELECT a = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT a = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+ ?column? | a 
+----------+---
+ f        | 1
+ t        | 2
+(2 rows)
+
+SELECT 1 = ALL (SELECT generate_series(1, 2)) FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT 1 = ALL (SELECT generate_series(1, 2)) FROM (values (1),(2)) v(a);
+ ?column? 
+----------
+ f
+ f
+(2 rows)
+
+SELECT 2 = ALL (SELECT generate_series(2, 2)) FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT 2 = ALL (SELECT generate_series(2, 2)) FROM (values (1),(2)) v(a);
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT 2 = ALL (SELECT generate_series(2, 3)) FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT 2 = ALL (SELECT generate_series(2, 3)) FROM (values (1),(2)) v(a);
+ ?column? 
+----------
+ f
+ f
+(2 rows)
+
+SELECT 2+1 = ALL (SELECT generate_series(2, 3)) FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT 2+1 = ALL (SELECT generate_series(2, 3)) FROM (values (1),(2)) v(a);
+ ?column? 
+----------
+ f
+ f
+(2 rows)
+
+SELECT 2+1 = ALL (SELECT generate_series(3, 3)) FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT 2+1 = ALL (SELECT generate_series(3, 3)) FROM (values (1),(2)) v(a);
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT (SELECT a) = ALL (SELECT generate_series(1, 2)), a FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT (SELECT a) = ALL (SELECT generate_series(1, 2)), a FROM (values (1),(2)) v(a);
+ ?column? | a 
+----------+---
+ f        | 1
+ f        | 2
+(2 rows)
+
+SELECT (SELECT a) = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT (SELECT a) = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+ ?column? | a 
+----------+---
+ f        | 1
+ t        | 2
+(2 rows)
+
+SELECT (SELECT a+1) = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT (SELECT a+1) = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+ ?column? | a 
+----------+---
+ t        | 1
+ f        | 2
+(2 rows)
+
+SELECT (SELECT 1) = ALL (SELECT generate_series(1, 1)) FROM (values (1),(2)) v(a);
+LOG:  statement: SELECT (SELECT 1) = ALL (SELECT generate_series(1, 1)) FROM (values (1),(2)) v(a);
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT (SELECT 1) = ALL (SELECT generate_series(1, 2)) FROM  (values (1),(2)) v(a);
+LOG:  statement: SELECT (SELECT 1) = ALL (SELECT generate_series(1, 2)) FROM  (values (1),(2)) v(a);
+ ?column? 
+----------
+ f
+ f
+(2 rows)
+
+SELECT (SELECT 3) = ALL (SELECT generate_series(3, 3)) FROM  (values (1),(2)) v(a);
+LOG:  statement: SELECT (SELECT 3) = ALL (SELECT generate_series(3, 3)) FROM  (values (1),(2)) v(a);
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT (SELECT 1) = ALL (SELECT generate_series(1, 1));
+LOG:  statement: SELECT (SELECT 1) = ALL (SELECT generate_series(1, 1));
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT (SELECT 1) = ALL (SELECT generate_series(1, 2));
+LOG:  statement: SELECT (SELECT 1) = ALL (SELECT generate_series(1, 2));
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT (SELECT 3) = ALL (SELECT generate_series(3, 3));
+LOG:  statement: SELECT (SELECT 3) = ALL (SELECT generate_series(3, 3));
+ ?column? 
+----------
+ t
+(1 row)
+
diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule
index 71d0e97eaa1..c12d6ad33f5 100755
--- a/src/test/regress/greenplum_schedule
+++ b/src/test/regress/greenplum_schedule
@@ -191,7 +191,7 @@ test: qp_misc_jiras qp_with_clause qp_executor qp_olap_windowerr qp_olap_window
 test: qp_with_functional_inlining qp_with_functional_noinlining
 test: qp_functions_in_contexts_setup
 # test: qp_misc_rio_join_small qp_misc_rio qp_correlated_query qp_targeted_dispatch qp_gist_indexes2 qp_gist_indexes3 qp_gist_indexes4 qp_query_execution qp_functions_in_from qp_functions_in_select qp_functions_in_subquery qp_functions_in_subquery_column qp_functions_in_subquery_constant qp_functions_in_with
-test: qp_misc_rio_join_small qp_correlated_query qp_targeted_dispatch qp_gist_indexes2 qp_gist_indexes3 qp_gist_indexes4 qp_query_execution qp_functions_in_from qp_functions_in_select qp_functions_in_subquery qp_functions_in_subquery_column qp_functions_in_with
+test: qp_misc_rio_join_small qp_correlated_query qp_targeted_dispatch qp_gist_indexes2 qp_gist_indexes3 qp_gist_indexes4 qp_query_execution qp_functions_in_from qp_functions_in_select qp_functions_in_subquery qp_functions_in_subquery_column qp_functions_in_with correlated_subquery
 
 test: dpe qp_dpe qp_subquery qp_left_anti_semi_join qp_union_intersect qp_functions qp_functions_idf qp_regexp qp_resource_queue qp_orca_fallback
 
diff --git a/src/test/regress/sql/correlated_subquery.sql b/src/test/regress/sql/correlated_subquery.sql
new file mode 100644
index 00000000000..b4a6d8283c3
--- /dev/null
+++ b/src/test/regress/sql/correlated_subquery.sql
@@ -0,0 +1,21 @@
+SET optimizer_enforce_subplans = 1;
+SET optimizer_trace_fallback=on;
+SET client_min_messages=log;
+
+SELECT a = ALL (SELECT generate_series(1, 2)), a FROM (values (1),(2)) v(a);
+SELECT a = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+SELECT 1 = ALL (SELECT generate_series(1, 2)) FROM (values (1),(2)) v(a);
+SELECT 2 = ALL (SELECT generate_series(2, 2)) FROM (values (1),(2)) v(a);
+SELECT 2 = ALL (SELECT generate_series(2, 3)) FROM (values (1),(2)) v(a);
+SELECT 2+1 = ALL (SELECT generate_series(2, 3)) FROM (values (1),(2)) v(a);
+SELECT 2+1 = ALL (SELECT generate_series(3, 3)) FROM (values (1),(2)) v(a);
+SELECT (SELECT a) = ALL (SELECT generate_series(1, 2)), a FROM (values (1),(2)) v(a);
+SELECT (SELECT a) = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+SELECT (SELECT a+1) = ALL (SELECT generate_series(2, 2)), a FROM (values (1),(2)) v(a);
+SELECT (SELECT 1) = ALL (SELECT generate_series(1, 1)) FROM (values (1),(2)) v(a);
+SELECT (SELECT 1) = ALL (SELECT generate_series(1, 2)) FROM  (values (1),(2)) v(a);
+SELECT (SELECT 3) = ALL (SELECT generate_series(3, 3)) FROM  (values (1),(2)) v(a);
+
+SELECT (SELECT 1) = ALL (SELECT generate_series(1, 1));
+SELECT (SELECT 1) = ALL (SELECT generate_series(1, 2));
+SELECT (SELECT 3) = ALL (SELECT generate_series(3, 3));

From 3e7efe16570b1c56f71e0d674e16c9a68d04cf98 Mon Sep 17 00:00:00 2001
From: Alexandra Wang <alexandra.wanglei@gmail.com>
Date: Tue, 8 Feb 2022 16:43:09 +0800
Subject: [PATCH 10/48] GPORCA CI: add workload6 to explain pipeline

---
 .../concourse/test_explain_pipeline.yml       | 113 ++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/src/backend/gporca/concourse/test_explain_pipeline.yml b/src/backend/gporca/concourse/test_explain_pipeline.yml
index 6d8a31da844..81759a2b82c 100644
--- a/src/backend/gporca/concourse/test_explain_pipeline.yml
+++ b/src/backend/gporca/concourse/test_explain_pipeline.yml
@@ -171,6 +171,28 @@ resources:
     json_key: ((concourse-gcs-resources-orca-service-account-key))
     versioned_file: ((pipeline-name))/explain_results/explain_((workload4))_results.tar.gz
 
+## resources for workload 6
+- name: explain_output_((workload6))_branch
+  type: gcs
+  source:
+    bucket: ((gcs-bucket-orca))
+    json_key: ((concourse-gcs-resources-orca-service-account-key))
+    versioned_file: ((pipeline-name))/explain_intermediate/explain_((workload6))_branch.tar.gz
+
+- name: explain_output_((workload6))_baseline
+  type: gcs
+  source:
+    bucket: ((gcs-bucket-orca))
+    json_key: ((concourse-gcs-resources-orca-service-account-key))
+    versioned_file: ((pipeline-name))/explain_intermediate/explain_((workload6))_baseline.tar.gz
+
+- name: explain_((workload6))_results
+  type: gcs
+  source:
+    bucket: ((gcs-bucket-orca))
+    json_key: ((concourse-gcs-resources-orca-service-account-key))
+    versioned_file: ((pipeline-name))/explain_results/explain_((workload6))_results.tar.gz
+
 ## ======================================================================
 ## jobs
 ## ======================================================================
@@ -631,3 +653,94 @@ jobs:
       put: explain_((workload4))_results
       params:
         file: diffs/explain_test_results.tar.gz
+
+## ======================================================================
+## workload6
+## ======================================================================
+
+- name: run_explain_suite_((workload6))
+  max_in_flight: 1
+  plan:
+  - in_parallel:
+    - get: gporca-commits-to-test
+      passed:
+      - compile_branch
+    - get: bin_gpdb
+      passed:
+      - compile_branch
+      resource: bin_gpdb_centos7_branch
+      trigger: true
+    - get: gpdb_src
+      resource: gpdb_main_src
+    - get: gp_workloads
+      params:
+        disable_git_lfs: true
+        depth: 1
+  - task: run_explain_suite
+    file: gpdb_src/concourse/tasks/run_explain_suite.yml
+    params:
+      MODE: orca
+      WORKLOAD: ((workload6))
+    on_success:
+      try:
+        put: explain_output_((workload6))_branch
+        params:
+          file: output/explain_ouput.tar.gz
+
+- name: run_explain_suite_((workload6))_baseline
+  max_in_flight: 1
+  plan:
+  - in_parallel:
+    - get: gporca-commits-to-test
+      passed:
+      - compile_baseline
+    - get: bin_gpdb
+      resource: bin_gpdb_centos7_baseline
+      passed:
+      - compile_baseline
+      trigger: true
+    - get: gpdb_src
+      resource: gpdb_main_src
+      params:
+        submodules: none
+    - get: gp_workloads
+      params:
+        disable_git_lfs: true
+        depth: 1
+  - task: run_explain_suite_baseline
+    file: gpdb_src/concourse/tasks/run_explain_suite.yml
+    params:
+      MODE: orca
+      WORKLOAD: ((workload6))
+    on_success:
+      try:
+        put: explain_output_((workload6))_baseline
+        params:
+          file: output/explain_ouput.tar.gz
+
+- name: diff_explain_results_((workload6))
+  plan:
+  - in_parallel:
+    - get: gporca-commits-to-test
+      passed:
+      - run_explain_suite_((workload6))
+      - run_explain_suite_((workload6))_baseline
+      trigger: true
+    - get: gpdb_src
+      resource: gpdb_main_src
+      params:
+        submodules: none
+    - get: explain_output
+      resource: explain_output_((workload6))_branch
+      passed:
+      - run_explain_suite_((workload6))
+    - get: explain_output_baseline
+      resource: explain_output_((workload6))_baseline
+      passed:
+      - run_explain_suite_((workload6))_baseline
+  - task: diff_explain_results_((workload6))
+    file: gpdb_src/concourse/tasks/diff_explain_results_with_baseline.yml
+    ensure:
+      put: explain_((workload6))_results
+      params:
+        file: diffs/explain_test_results.tar.gz

From cc1850d85321312ec2a4a8db9c4ee66c9df09372 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 15 Feb 2022 12:21:28 -0500
Subject: [PATCH 11/48] Improve subscriber's error message for wrong
 publication relkind.

Pre-v13 versions only support logical replication from plain tables,
while v13 and later also allow partitioned tables to be published.
If you tried to subscribe an older server to such a publication,
you got "table XXX not found on publisher", which is pretty
unhelpful/confusing.  Arrange to deliver a more on-point error
message.  As commit c314c147c did in v13, remove the relkind check
from the query WHERE clause altogether, so that "not there"
is distinguishable from "wrong relkind".

Per report from Radoslav Nedyalkov.  Patch v10-v12.

Discussion: https://postgr.es/m/2952568.1644876730@sss.pgh.pa.us
---
 src/backend/replication/logical/tablesync.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index 4c6f8646cb8..d104ccf86e1 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -700,9 +700,10 @@ fetch_remote_table_info(char *nspname, char *relname,
 	WalRcvExecResult *res;
 	StringInfoData cmd;
 	TupleTableSlot *slot;
-	Oid			tableRow[] = {OIDOID, CHAROID, CHAROID};
-	Oid			attrRow[] = {TEXTOID, OIDOID, BOOLOID};
+	Oid			tableRow[3] = {OIDOID, CHAROID, CHAROID};
+	Oid			attrRow[4] = {TEXTOID, OIDOID, INT4OID, BOOLOID};
 	bool		isnull;
+	char		relkind;
 	int			natt;
 
 	lrel->nspname = nspname;
@@ -738,9 +739,20 @@ fetch_remote_table_info(char *nspname, char *relname,
 	Assert(!isnull);
 	lrel->replident = DatumGetChar(slot_getattr(slot, 2, &isnull));
 	Assert(!isnull);
-	lrel->relkind = DatumGetChar(slot_getattr(slot, 3, &isnull));
+	relkind = DatumGetChar(slot_getattr(slot, 3, &isnull));
 	Assert(!isnull);
 
+	/*
+	 * Newer PG versions allow things that aren't plain tables to appear in
+	 * publications.  We don't handle that in this version, but try to provide
+	 * a useful error message.
+	 */
+	if (relkind != RELKIND_RELATION)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("logical replication source relation \"%s.%s\" is not a table",
+						nspname, relname)));
+
 	ExecDropSingleTupleTableSlot(slot);
 	walrcv_clear_result(res);
 

From 2d733aff2e6c851315ab6a0e89eedb4cc97bdf8d Mon Sep 17 00:00:00 2001
From: Chris Hajas <chajas@vmware.com>
Date: Tue, 8 Feb 2022 15:33:38 -0800
Subject: [PATCH 12/48] Fix flaky aggregates test

Sometimes these prepared statements would be replanned and log ORCA
fallbacks, which are expected and ok. However, to make this more
deterministic, reset the plan cache in the test.
---
 src/test/regress/expected/aggregates.out      |  2 ++
 .../regress/expected/aggregates_optimizer.out | 22 +++++++++++++++++--
 src/test/regress/sql/aggregates.sql           |  2 ++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index c47e03dfd88..62548a4c69c 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2773,6 +2773,8 @@ create aggregate my_avg_init2(int4)
    finalfunc = avg_finalfn,
    initcond = '(4,0)'
 );
+-- reset the plan cache, sometimes it would re-plan these prepared statements and log ORCA fallbacks
+discard plans;
 -- state should be shared if INITCONDs are matching
 select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one);
 NOTICE:  avg_transfn called with 1
diff --git a/src/test/regress/expected/aggregates_optimizer.out b/src/test/regress/expected/aggregates_optimizer.out
index 33b127d375e..634744680d9 100644
--- a/src/test/regress/expected/aggregates_optimizer.out
+++ b/src/test/regress/expected/aggregates_optimizer.out
@@ -2929,12 +2929,30 @@ create aggregate my_avg_init2(int4)
    finalfunc = avg_finalfn,
    initcond = '(4,0)'
 );
+-- reset the plan cache, sometimes it would re-plan these prepared statements and log ORCA fallbacks
+discard plans;
 -- state should be shared if INITCONDs are matching
 select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one);
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
 NOTICE:  avg_transfn called with 1
-NOTICE:  avg_transfn called with 1
-NOTICE:  avg_transfn called with 3
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
 NOTICE:  avg_transfn called with 3
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
  my_sum_init | my_avg_init 
 -------------+-------------
           14 |           7
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 9268a11b3df..16b811d9bbe 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1103,6 +1103,8 @@ create aggregate my_avg_init2(int4)
    initcond = '(4,0)'
 );
 
+-- reset the plan cache, sometimes it would re-plan these prepared statements and log ORCA fallbacks
+discard plans;
 -- state should be shared if INITCONDs are matching
 select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one);
 

From d9b99c1bbd240170dc16ec9fa1d599deecb3e716 Mon Sep 17 00:00:00 2001
From: Chris Hajas <chajas@vmware.com>
Date: Tue, 8 Feb 2022 15:57:51 -0800
Subject: [PATCH 13/48] Enable optimizer_trace_fallback in gp_dqa ICW test

We saw a flake here, but haven't been able to repro. It's a fallback to
planner, but we're not exactly sure why. Hopefully this will give us a
hint if it happens again, and it's good to enable this so we can more
easily tell when and why Orca falls back.
---
 src/test/regress/expected/gp_dqa.out          |   1 +
 .../regress/expected/gp_dqa_optimizer.out     | 149 ++++++++++++++++++
 src/test/regress/sql/gp_dqa.sql               |   2 +
 3 files changed, 152 insertions(+)

diff --git a/src/test/regress/expected/gp_dqa.out b/src/test/regress/expected/gp_dqa.out
index 36643b10471..110611f01c5 100644
--- a/src/test/regress/expected/gp_dqa.out
+++ b/src/test/regress/expected/gp_dqa.out
@@ -4,6 +4,7 @@
 -- differences by setting 'extra_float_digits'. This isn't enough for all of
 -- the queries, so a few also use TO_CHAR() to truncate the results further.
 set extra_float_digits=0;
+SET optimizer_trace_fallback to on;
 drop table if exists dqa_t1;
 NOTICE:  table "dqa_t1" does not exist, skipping
 drop table if exists dqa_t2;
diff --git a/src/test/regress/expected/gp_dqa_optimizer.out b/src/test/regress/expected/gp_dqa_optimizer.out
index 3897ae46f1b..bcc734b3978 100644
--- a/src/test/regress/expected/gp_dqa_optimizer.out
+++ b/src/test/regress/expected/gp_dqa_optimizer.out
@@ -4,6 +4,7 @@
 -- differences by setting 'extra_float_digits'. This isn't enough for all of
 -- the queries, so a few also use TO_CHAR() to truncate the results further.
 set extra_float_digits=0;
+SET optimizer_trace_fallback to on;
 drop table if exists dqa_t1;
 NOTICE:  table "dqa_t1" does not exist, skipping
 drop table if exists dqa_t2;
@@ -114,12 +115,16 @@ explain (costs off) select count(distinct d), sum(distinct d) from dqa_t1 group
 (11 rows)
 
 select count(distinct d), count(distinct dt) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count 
 -------+-------
     23 |    34
 (1 row)
 
 explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                             QUERY PLAN                            
 ------------------------------------------------------------------
  Finalize Aggregate
@@ -136,12 +141,16 @@ explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1;
 (11 rows)
 
 select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count | count 
 -------+-------+-------
     23 |    10 |    34
 (1 row)
 
 explain (costs off) select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                             QUERY PLAN                            
 ------------------------------------------------------------------
  Finalize Aggregate
@@ -158,6 +167,8 @@ explain (costs off) select count(distinct d), count(distinct c), count(distinct
 (11 rows)
 
 select count(distinct d), count(distinct dt) from dqa_t1 group by c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count 
 -------+-------
     10 |    10
@@ -173,6 +184,8 @@ select count(distinct d), count(distinct dt) from dqa_t1 group by c;
 (10 rows)
 
 explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 group by c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                QUERY PLAN                               
 ------------------------------------------------------------------------
  Finalize HashAggregate
@@ -194,6 +207,8 @@ explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 gro
 (16 rows)
 
 select count(distinct d), count(distinct dt) from dqa_t1 group by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count 
 -------+-------
      1 |     5
@@ -222,6 +237,8 @@ select count(distinct d), count(distinct dt) from dqa_t1 group by d;
 (23 rows)
 
 explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 group by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                QUERY PLAN                               
 ------------------------------------------------------------------------
  Finalize HashAggregate
@@ -483,12 +500,16 @@ explain (costs off) select count(distinct i), sum(distinct i) from dqa_t1 group
 (9 rows)
 
 select count(distinct c), count(distinct dt) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count 
 -------+-------
     10 |    34
 (1 row)
 
 explain (costs off) select count(distinct c), count(distinct dt) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                             QUERY PLAN                            
 ------------------------------------------------------------------
  Finalize Aggregate
@@ -505,6 +526,8 @@ explain (costs off) select count(distinct c), count(distinct dt) from dqa_t1;
 (11 rows)
 
 select count(distinct c), count(distinct dt), i from dqa_t1 group by i;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count | i  
 -------+-------+----
      5 |     9 |  3
@@ -522,6 +545,8 @@ select count(distinct c), count(distinct dt), i from dqa_t1 group by i;
 (12 rows)
 
 explain (costs off) select count(distinct c), count(distinct dt), i from dqa_t1 group by i;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                QUERY PLAN                               
 ------------------------------------------------------------------------
  Finalize HashAggregate
@@ -543,6 +568,8 @@ explain (costs off) select count(distinct c), count(distinct dt), i from dqa_t1
 (16 rows)
 
 select count(distinct i), count(distinct c), d from dqa_t1 group by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count | d  
 -------+-------+----
      5 |     5 |  3
@@ -571,6 +598,8 @@ select count(distinct i), count(distinct c), d from dqa_t1 group by d;
 (23 rows)
 
 explain (costs off) select count(distinct i), count(distinct c), d from dqa_t1 group by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                QUERY PLAN                               
 ------------------------------------------------------------------------
  Finalize HashAggregate
@@ -702,6 +731,8 @@ explain (costs off) select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where d
 
 -- multidqa with groupby and order by
 select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 group by i,c order by i,c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  sum | count | count | i  | c 
 -----+-------+-------+----+---
   14 |     1 |     1 |  0 | 0
@@ -767,6 +798,8 @@ select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 gro
 (60 rows)
 
 explain (costs off) select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 group by i,c order by i,c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                      QUERY PLAN                                     
 ------------------------------------------------------------------------------------
  Sort
@@ -877,12 +910,16 @@ explain (costs off) select to_char(corr(distinct d, i), '9.99999999999999') from
 
 -- multi args multidqa
 select count(distinct c), corr(distinct d, i) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count |        corr        
 -------+--------------------
     10 | 0.0824013341460019
 (1 row)
 
 explain (costs off) select count(distinct c), corr(distinct d, i) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                              QUERY PLAN                                              
 -----------------------------------------------------------------------------------------------------
  Finalize Aggregate
@@ -899,12 +936,16 @@ explain (costs off) select count(distinct c), corr(distinct d, i) from dqa_t1;
 (11 rows)
 
 select count(distinct d), corr(distinct d, i) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count |        corr        
 -------+--------------------
     23 | 0.0824013341460019
 (1 row)
 
 explain (costs off) select count(distinct d), corr(distinct d, i) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                              QUERY PLAN                                              
 -----------------------------------------------------------------------------------------------------
  Finalize Aggregate
@@ -921,12 +962,16 @@ explain (costs off) select count(distinct d), corr(distinct d, i) from dqa_t1;
 (11 rows)
 
 select count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count |        corr        
 -------+-------+--------------------
     23 |    12 | 0.0824013341460019
 (1 row)
 
 explain (costs off) select count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                QUERY PLAN                                               
 --------------------------------------------------------------------------------------------------------
  Finalize Aggregate
@@ -943,12 +988,16 @@ explain (costs off) select count(distinct d), count(distinct i), corr(distinct d
 (11 rows)
 
 select count(distinct c), count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count | count |        corr        
 -------+-------+-------+--------------------
     10 |    23 |    12 | 0.0824013341460019
 (1 row)
 
 explain (costs off) select count(distinct c), count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                  QUERY PLAN                                                 
 ------------------------------------------------------------------------------------------------------------
  Finalize Aggregate
@@ -966,6 +1015,8 @@ explain (costs off) select count(distinct c), count(distinct d), count(distinct
 
 -- multi args multidqa with group by
 select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | corr | d  
 -------+------+----
      5 |      |  0
@@ -994,6 +1045,8 @@ select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d;
 (23 rows)
 
 explain (costs off) select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                   QUERY PLAN                                                  
 --------------------------------------------------------------------------------------------------------------
  Finalize HashAggregate
@@ -1015,6 +1068,8 @@ explain (costs off) select count(distinct c), corr(distinct d, i), d from dqa_t1
 (16 rows)
 
 select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | corr | d  | i  
 -------+------+----+----
      1 |      |  0 |  0
@@ -1120,6 +1175,8 @@ select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i;
 (100 rows)
 
 explain (costs off) select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                    QUERY PLAN                                                    
 -----------------------------------------------------------------------------------------------------------------
  Finalize HashAggregate
@@ -1141,6 +1198,8 @@ explain (costs off) select count(distinct c), corr(distinct d, i), d, i from dqa
 (16 rows)
 
 select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count |        corr        |     dt     
 -------+--------------------+------------
      3 |   0.59603956067927 | 06-25-2009
@@ -1180,6 +1239,8 @@ select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt;
 (34 rows)
 
 explain (costs off) select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                   QUERY PLAN                                                   
 ---------------------------------------------------------------------------------------------------------------
  Finalize HashAggregate
@@ -1201,6 +1262,8 @@ explain (costs off) select count(distinct c), corr(distinct d, i), dt from dqa_t
 (16 rows)
 
 select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | corr | i  
 -------+------+----
      9 |      |  0
@@ -1218,6 +1281,8 @@ select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i;
 (12 rows)
 
 explain (costs off) select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                   QUERY PLAN                                                  
 --------------------------------------------------------------------------------------------------------------
  Finalize HashAggregate
@@ -1239,6 +1304,8 @@ explain (costs off) select count(distinct d), corr(distinct d, i), i from dqa_t1
 (16 rows)
 
 select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | corr | d  
 -------+------+----
      1 |      |  0
@@ -1267,6 +1334,8 @@ select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d;
 (23 rows)
 
 explain (costs off) select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                 QUERY PLAN                                                 
 -----------------------------------------------------------------------------------------------------------
  Finalize HashAggregate
@@ -1288,6 +1357,8 @@ explain (costs off) select count(distinct d), corr(distinct d, i), d from dqa_t1
 (16 rows)
 
 select count(distinct d),  to_char(corr(distinct d, i), '9.99999999999999'), c from dqa_t1 group by c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count |      to_char      | c 
 -------+-------------------+---
     10 |   .13670602618479 | 0
@@ -1303,6 +1374,8 @@ select count(distinct d),  to_char(corr(distinct d, i), '9.99999999999999'), c f
 (10 rows)
 
 explain (costs off) select count(distinct d),  to_char(corr(distinct d, i), '9.99999999999999'), c from dqa_t1 group by c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                   QUERY PLAN                                                  
 --------------------------------------------------------------------------------------------------------------
  Finalize HashAggregate
@@ -1359,6 +1432,8 @@ from
      fact_route_aggregation T218094
 where  ( T43883.device_id = T218094.device_id ) 
 group by T43883.platform;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c9 
 ----+----+----+----+----+----+----+----+----
 (0 rows)
@@ -1384,6 +1459,8 @@ insert into t2_mdqa select i % 10 , i % 5, i || 'value' from generate_series(1,
 insert into t2_mdqa select i % 10 , i % 5, i || 'value' from generate_series(1, 20) i;
 -- simple mdqa
 select count(distinct t1.a), count(distinct t2.b), t1.c, t2.c from t1_mdqa t1, t2_mdqa t2 where t1.c = t2.c group by t1.c, t2.c order by t1.c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count |    c    |    c    
 -------+-------+---------+---------
      1 |     1 | 10value | 10value
@@ -1410,6 +1487,8 @@ select count(distinct t1.a), count(distinct t2.b), t1.c, t2.c from t1_mdqa t1, t
 
 -- distinct on top of some mdqas
 select distinct sum(distinct t1.a), avg(t2.a), sum(distinct t2.b), t1.a, t2.b from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.a, t2.b order by t1.a;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  sum |          avg           | sum | a | b 
 -----+------------------------+-----+---+---
    0 | 0.00000000000000000000 |   0 | 0 | 0
@@ -1420,6 +1499,8 @@ select distinct sum(distinct t1.a), avg(t2.a), sum(distinct t2.b), t1.a, t2.b fr
 (5 rows)
 
 select distinct sum (distinct t1.a), avg(distinct t2.a), sum(distinct t2.b), t1.c from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.c order by t1.c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  sum |          avg           | sum |    c    
 -----+------------------------+-----+---------
    0 | 0.00000000000000000000 |   0 | 10value
@@ -1446,6 +1527,8 @@ select distinct sum (distinct t1.a), avg(distinct t2.a), sum(distinct t2.b), t1.
 
 -- distinct on group by fields
 select distinct t1.c , sum(distinct t1.a), count(t2.b), sum(distinct t2.b) from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.c order by t1.c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
     c    | sum | count | sum 
 ---------+-----+-------+-----
  10value |   0 |     8 |   0
@@ -1472,6 +1555,8 @@ select distinct t1.c , sum(distinct t1.a), count(t2.b), sum(distinct t2.b) from
 
 -- distinct on normal aggregates
 select distinct sum(t1.a), avg(distinct t2.a), sum(distinct (t1.a + t2.a)), t1.a, t2.b from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.a, t2.b order by t1.a;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  sum |          avg           | sum | a | b 
 -----+------------------------+-----+---+---
    0 | 0.00000000000000000000 |   0 | 0 | 0
@@ -1482,6 +1567,8 @@ select distinct sum(t1.a), avg(distinct t2.a), sum(distinct (t1.a + t2.a)), t1.a
 (5 rows)
 
 select distinct avg(t1.a + t2.b), count(distinct t1.c), count(distinct char_length(t1.c)), t1.a, t2.b from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.a, t2.b order by t1.a;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
           avg           | count | count | a | b 
 ------------------------+-------+-------+---+---
  0.00000000000000000000 |     4 |     2 | 0 | 0
@@ -1508,6 +1595,8 @@ HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur
 insert into gp_dqa_r  select i , i %10, i%5 from generate_series(1,20) i;
 insert into gp_dqa_s select i, i %15, i%10 from generate_series(1,30) i;
 select a, d, count(distinct b) as c1, count(distinct c) as c2 from gp_dqa_r, gp_dqa_s where ( e = a ) group by d, a order by a,d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  a  | d  | c1 | c2 
 ----+----+----+----
   1 |  1 |  1 |  1
@@ -1549,6 +1638,8 @@ d as c9
 from gp_dqa_r, gp_dqa_s
 where ( e = a ) 
 group by d order by c9;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  c1 | c2 | c3 | c2 | c9 
 ----+----+----+----+----
   1 |  2 |  1 |  1 |  1
@@ -1588,6 +1679,8 @@ d as c9
 from gp_dqa_r, gp_dqa_s
 where ( e = a ) 
 group by d order by c9;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  c1 | c2 | c9 
 ----+----+----
   1 |  1 |  1
@@ -1624,6 +1717,8 @@ select distinct count(distinct b) as c1, count(distinct c) as c2, d as c9
 from gp_dqa_r, gp_dqa_s
 where ( e = a ) 
 group by d order by c9;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  c1 | c2 | c9 
 ----+----+----
   1 |  1 |  1
@@ -1657,6 +1752,8 @@ group by d order by c9;
 (28 rows)
 
 select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 from gp_dqa_r, gp_dqa_s group by d order by c9;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  d  | c1 | c2 | c9 
 ----+----+----+----
   1 | 10 |  5 |  1
@@ -1692,6 +1789,8 @@ select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 fro
 (30 rows)
 
 select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 from gp_dqa_r, gp_dqa_s group by d, a order by c9;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  d  | c1 | c2 | c9 
 ----+----+----+----
   1 |  1 |  1 |  1
@@ -1727,18 +1826,24 @@ select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 fro
 (30 rows)
 
 select distinct count(distinct b) as c1, count(distinct c) as c2 from gp_dqa_r, gp_dqa_s;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  c1 | c2 
 ----+----
  10 |  5
 (1 row)
 
 select distinct count(distinct b) as c1, count(distinct c) as c2 from gp_dqa_r;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  c1 | c2 
 ----+----
  10 |  5
 (1 row)
 
 select distinct count(distinct b) as c1, count(distinct c) as c2, d, a from gp_dqa_r, gp_dqa_s where ( e = a)group by d, a order by a,d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  c1 | c2 | d  | a  
 ----+----+----+----
   1 |  1 |  1 |  1
@@ -1776,6 +1881,8 @@ ERROR:  for SELECT DISTINCT, ORDER BY expressions must appear in select list
 LINE 1: ...as c2, d from gp_dqa_r, gp_dqa_s group by d, a order by d,a;
                                                                      ^
 select distinct count(distinct b) as c1, count(distinct c) as c2, d from gp_dqa_r, gp_dqa_s group by d, a order by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  c1 | c2 | d  
 ----+----+----
   1 |  1 |  1
@@ -1811,6 +1918,8 @@ select distinct count(distinct b) as c1, count(distinct c) as c2, d from gp_dqa_
 (30 rows)
 
 select distinct count(distinct b) as c1, count(distinct c) as c2, d from gp_dqa_r, gp_dqa_s group by d order by d;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  c1 | c2 | d  
 ----+----+----
  10 |  5 |  1
@@ -1858,6 +1967,8 @@ create table gp_dqa_t2 (a int, c int) distributed by (a);
 insert into gp_dqa_t1 select i , i %5 from generate_series(1,10) i;
 insert into gp_dqa_t2 select i , i %4 from generate_series(1,10) i;
 select distinct A.a, sum(distinct A.b), count(distinct B.c) from gp_dqa_t1 A left join gp_dqa_t2 B on (A.a = B.a) group by A.a order by A.a;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  a  | sum | count 
 ----+-----+-------
   1 |   1 |     1
@@ -1873,6 +1984,8 @@ select distinct A.a, sum(distinct A.b), count(distinct B.c) from gp_dqa_t1 A lef
 (10 rows)
 
 select distinct A.a, sum(distinct A.b), count(distinct B.c) from gp_dqa_t1 A right join gp_dqa_t2 B on (A.a = B.a) group by A.a order by A.a;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  a  | sum | count 
 ----+-----+-------
   1 |   1 |     1
@@ -1925,12 +2038,16 @@ explain (costs off) select count(distinct d) from dqa_t1 group by i;
 (11 rows)
 
 select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count | count 
 -------+-------+-------
     23 |    10 |    34
 (1 row)
 
 select count(distinct c), count(distinct dt), i from dqa_t1 group by i;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  count | count | i  
 -------+-------+----
      5 |     9 |  3
@@ -1954,11 +2071,15 @@ create table foo_mdqa(x int, y int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 SELECT distinct C.z, count(distinct FS.x), count(distinct FS.y) FROM (SELECT 1 AS z FROM generate_series(1,10)) C, foo_mdqa FS GROUP BY z;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  z | count | count 
 ---+-------+-------
 (0 rows)
 
 SELECT distinct C.z, count(distinct FS.x), count(distinct FS.y) FROM (SELECT i AS z FROM generate_series(1,10) i) C, foo_mdqa FS GROUP BY z;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  z | count | count 
 ---+-------+-------
 (0 rows)
@@ -1977,6 +2098,10 @@ HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur
 insert into nonullstab select 1, 1 from generate_series(1, 100);
 -- This returns wrong result. countall(distinct a) should return 1.
 select countall(distinct a), count(distinct b) from nonullstab;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
  countall | count 
 ----------+-------
         1 |     1
@@ -1990,12 +2115,16 @@ create table dqa_f2(x int, y int, z int) distributed by (x);
 insert into dqa_f1 select i%17, i%5 , i%3 from generate_series(1,1000) i;
 insert into dqa_f2 select i % 13, i % 5 , i % 11 from generate_series(1,1000) i;
 select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
  sum | sum 
 -----+-----
  136 |  10
 (1 row)
 
 select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by b;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
  sum | sum 
 -----+-----
  136 |   0
@@ -2006,6 +2135,8 @@ select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0
 (5 rows)
 
 select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
  sum | sum 
 -----+-----
  136 |  10
@@ -2014,12 +2145,16 @@ select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0
 (3 rows)
 
 select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
  sum | sum 
 -----+-----
   78 |  10
 (1 row)
 
 select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1 group by c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
  sum | sum 
 -----+-----
   78 |  10
@@ -2028,12 +2163,16 @@ select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), s
 (3 rows)
 
 select count(distinct a) filter (where a > 3),count( distinct b) filter (where a > 4), sum(distinct b) filter( where a > 4) from dqa_f1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
  count | count | sum 
 -------+-------+-----
     13 |     5 |  10
 (1 row)
 
 explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
                                               QUERY PLAN                                              
 ------------------------------------------------------------------------------------------------------
  Finalize Aggregate  (cost=20.66..20.67 rows=1 width=16)
@@ -2050,6 +2189,8 @@ explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (whe
 (11 rows)
 
 explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by b;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
                                                  QUERY PLAN                                                 
 ------------------------------------------------------------------------------------------------------------
  Finalize HashAggregate  (cost=21.62..21.67 rows=5 width=20)
@@ -2071,6 +2212,8 @@ explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (whe
 (16 rows)
 
 explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
                                                  QUERY PLAN                                                  
 -------------------------------------------------------------------------------------------------------------
  Finalize HashAggregate  (cost=21.20..21.23 rows=3 width=20)
@@ -2092,6 +2235,8 @@ explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (whe
 (16 rows)
 
 explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
                                                              QUERY PLAN                                                             
 ------------------------------------------------------------------------------------------------------------------------------------
  Finalize Aggregate  (cost=96.41..96.42 rows=1 width=16)
@@ -2114,6 +2259,8 @@ explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x
 (17 rows)
 
 explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1 group by c;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
                                                                 QUERY PLAN                                                                
 ------------------------------------------------------------------------------------------------------------------------------------------
  Finalize HashAggregate  (cost=181.11..181.14 rows=3 width=20)
@@ -2141,6 +2288,8 @@ explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x
 (22 rows)
 
 explain select count(distinct a) filter (where a > 3),count( distinct b) filter (where a > 4), sum(distinct b) filter( where a > 4) from dqa_f1;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Aggregate functions with FILTER
                                               QUERY PLAN                                              
 ------------------------------------------------------------------------------------------------------
  Finalize Aggregate  (cost=20.67..20.68 rows=1 width=24)
diff --git a/src/test/regress/sql/gp_dqa.sql b/src/test/regress/sql/gp_dqa.sql
index ab8c0d91aac..808cee12c7e 100644
--- a/src/test/regress/sql/gp_dqa.sql
+++ b/src/test/regress/sql/gp_dqa.sql
@@ -5,6 +5,8 @@
 -- the queries, so a few also use TO_CHAR() to truncate the results further.
 set extra_float_digits=0;
 
+SET optimizer_trace_fallback to on;
+
 drop table if exists dqa_t1;
 drop table if exists dqa_t2;
 

From 00f312f5f2c445af1b079d5777db85a23c2c14af Mon Sep 17 00:00:00 2001
From: Huansong Fu <fuhuansong@gmail.com>
Date: Fri, 11 Feb 2022 12:11:26 -0800
Subject: [PATCH 14/48] Existing reloption shouldn't affect redistribution in
 ALTER TABLE SET DISTRIBUTE

This fixes an issue where existing reloptions would cause a reorganize in
situations that it should not. We used to allow ALTER TABLE SET DISTRIBUTE WITH
clause to have additional storage options, and when they are present, we
will re-distribute data over the segments.

However since 50f2e3bbb88, we don't allow new reloptions to be supplied in the
ALTER TABLE SET DISTRIBUTED WITH clause. So this commit is really a follow-up,
removing redistribution decision making logic based on reloptions.
Also renamed new_rel_opts() to get_rel_opts() accordingly.
---
 src/backend/commands/tablecmds.c              | 85 +++----------------
 .../expected/alter_distribution_policy.out    | 50 +++++++++++
 .../regress/sql/alter_distribution_policy.sql | 30 +++++++
 3 files changed, 93 insertions(+), 72 deletions(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 3cc182ee5d3..982d4603aed 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -17581,8 +17581,11 @@ build_ctas_with_dist(Relation rel, DistributedBy *dist_clause,
 	return queryDesc;
 }
 
+/*
+ * GPDB: Convenience function to get reloptions for a given relation.
+ */
 static Datum
-new_rel_opts(Relation rel)
+get_rel_opts(Relation rel)
 {
 	Datum newOptions = PointerGetDatum(NULL);
 
@@ -18127,7 +18130,6 @@ static void
 ATExecExpandTableCTAS(AlterTableCmd *rootCmd, Relation rel, AlterTableCmd *cmd, int numsegments)
 {
 	RangeVar			*tmprv;
-	Datum				newOptions;
 	Oid					tmprelid;
 	Oid					relid = RelationGetRelid(rel);
 	ReindexParams		params = {0};
@@ -18167,10 +18169,8 @@ ATExecExpandTableCTAS(AlterTableCmd *rootCmd, Relation rel, AlterTableCmd *cmd,
 		distby = make_distributedby_for_rel(rel);
 		distby->numsegments = numsegments;
 
-		newOptions = new_rel_opts(rel);
-
 		queryDesc = build_ctas_with_dist(rel, distby,
-						untransformRelOptions(newOptions),
+						untransformRelOptions(get_rel_opts(rel)),
 						&tmprv,
 						true);
 
@@ -18300,7 +18300,6 @@ ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd)
 	bool        rand_pol = false;
 	bool        rep_pol = false;
 	bool        force_reorg = false;
-	Datum		newOptions = PointerGetDatum(NULL);
 	bool		need_reorg;
 	bool		change_policy = false;
 	int			numsegments;
@@ -18401,8 +18400,6 @@ ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd)
 			lwith = nlist;
 		}
 
-		newOptions = new_rel_opts(rel);
-
 		if (ldistro)
 			change_policy = true;
 
@@ -18445,8 +18442,8 @@ ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd)
 
 				cmd->policy = policy;
 
-				/* only need to rebuild if have new storage options */
-				if (!(DatumGetPointer(newOptions) || force_reorg))
+				/* no need to rebuild if REORGANIZE=false*/
+				if (!force_reorg)
 					goto l_distro_fini;
 			}
 		}
@@ -18465,12 +18462,9 @@ ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd)
 
 			policy = createReplicatedGpPolicy(ldistro->numsegments);
 
-			/* rebuild if have new storage options or policy changed */
-			if (!DatumGetPointer(newOptions) &&
-				GpPolicyIsReplicated(rel->rd_cdbpolicy))
-			{
+			/* rebuild only if policy changed */
+			if (GpPolicyIsReplicated(rel->rd_cdbpolicy))
 				goto l_distro_fini;
-			}
 
 			/*
 			 * system columns is not visiable to users for replicated table,
@@ -18573,11 +18567,9 @@ ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd)
 													 ldistro->numsegments);
 
 				/*
-				 * See if the old policy is the same as the new one but
-				 * remember, we still might have to rebuild if there are new
-				 * storage options.
+				 * See if the old policy is the same as the new one.
 				 */
-				if (!DatumGetPointer(newOptions) && !force_reorg &&
+				if (!force_reorg &&
 					(policy->nattrs == rel->rd_cdbpolicy->nattrs))
 				{
 					int i;
@@ -18704,7 +18696,7 @@ ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd)
 
 			/* Step (b) - build CTAS */
 			queryDesc = build_ctas_with_dist(rel, ldistro,
-											 untransformRelOptions(newOptions),
+											 untransformRelOptions(get_rel_opts(rel)),
 											 &tmprv,
 											 true);
 
@@ -18796,7 +18788,6 @@ ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd)
 		backend_id = cmd->backendId - 1;
 		tmprv = make_temp_table_name(rel, backend_id);
 
-		newOptions = new_rel_opts(rel);
 		need_reorg = true;
 	}
 
@@ -18837,59 +18828,9 @@ ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd)
 							ReadNextMultiXactId(),
 							NULL);
 
-		/*
-		 * Make changes from swapping relation files visible before updating
-		 * options below or else we get an already updated tuple error.
-		 */
+		/* Make changes from swapping relation files visible. */
 		CommandCounterIncrement();
 
-		if (DatumGetPointer(newOptions))
-		{
-			Datum		repl_val[Natts_pg_class];
-			bool		repl_null[Natts_pg_class];
-			bool		repl_repl[Natts_pg_class];
-			HeapTuple	newOptsTuple;
-			HeapTuple	tuple;
-			Relation	relationRelation;
-
-			/*
-			 * All we need do here is update the pg_class row; the new
-			 * options will be propagated into relcaches during
-			 * post-commit cache inval.
-			 */
-			MemSet(repl_val, 0, sizeof(repl_val));
-			MemSet(repl_null, false, sizeof(repl_null));
-			MemSet(repl_repl, false, sizeof(repl_repl));
-
-			if (newOptions != (Datum) 0)
-				repl_val[Anum_pg_class_reloptions - 1] = newOptions;
-			else
-				repl_null[Anum_pg_class_reloptions - 1] = true;
-
-			repl_repl[Anum_pg_class_reloptions - 1] = true;
-
-			relationRelation = table_open(RelationRelationId, RowExclusiveLock);
-			tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(tarrelid));
-
-			Assert(HeapTupleIsValid(tuple));
-			newOptsTuple = heap_modify_tuple(tuple, RelationGetDescr(relationRelation),
-											 repl_val, repl_null, repl_repl);
-
-			CatalogTupleUpdate(relationRelation, &tuple->t_self, newOptsTuple);
-
-			heap_freetuple(newOptsTuple);
-
-			ReleaseSysCache(tuple);
-
-			table_close(relationRelation, RowExclusiveLock);
-
-			/*
-			 * Increment cmd counter to make updates visible; this is
-			 * needed because the same tuple has to be updated again
-			 */
-			CommandCounterIncrement();
-		}
-
 		/* now, reindex */
 		reindex_relation(tarrelid, 0, &params);
 	}
diff --git a/src/test/regress/expected/alter_distribution_policy.out b/src/test/regress/expected/alter_distribution_policy.out
index a5e3875ddad..1d7e50df447 100644
--- a/src/test/regress/expected/alter_distribution_policy.out
+++ b/src/test/regress/expected/alter_distribution_policy.out
@@ -1550,3 +1550,53 @@ select check_redistributed('insert into t_distbya select * from t_reorganize', '
 
 reset optimizer;
 reset gp_force_random_redistribution;
+-- When reorganize=false, we won't reorganize and this shouldn't be affected by the existing reloptions.
+CREATE TABLE public.t_reorganize_false (
+a integer,
+b integer
+) with (appendonly=false, autovacuum_enabled=false) DISTRIBUTED BY (a);
+-- Insert values which will all be on one segment
+INSERT INTO t_reorganize_false VALUES (0, generate_series(1,100));
+SELECT gp_segment_id,count(*)￼ from t_reorganize_false GROUP BY 1;
+ gp_segment_id |  ￼  
+---------------+-----
+             1 | 100
+(1 row)
+
+-- Change the distribution policy but because REORGANIZE=false, it should NOT be re-distributed 
+ALTER TABLE t_reorganize_false SET WITH (REORGANIZE=false) DISTRIBUTED RANDOMLY;
+SELECT gp_segment_id,count(*)￼ from t_reorganize_false GROUP BY 1;
+ gp_segment_id |  ￼  
+---------------+-----
+             1 | 100
+(1 row)
+
+DROP TABLE t_reorganize_false;
+-- Same rule should apply to partitioned table too
+CREATE TABLE public.t_reorganize_false (
+a integer,
+b integer
+)
+DISTRIBUTED BY (a) PARTITION BY RANGE(b)
+(
+PARTITION "00" START (0) END (1000) WITH (tablename='t_reorganize_false_0', appendonly='false', autovacuum_enabled=false),
+PARTITION "01" START (1000) END (2000) WITH (tablename='t_reorganize_false_1', appendonly='false', autovacuum_enabled=false),
+DEFAULT PARTITION def WITH (tablename='t_reorganize_false_def', appendonly='false', autovacuum_enabled=false)
+);
+-- Insert values which will all be on one segment
+INSERT INTO t_reorganize_false VALUES (0, generate_series(1,100));
+SELECT gp_segment_id,count(*) from t_reorganize_false GROUP BY 1;
+ gp_segment_id | count 
+---------------+-------
+             1 |   100
+(1 row)
+
+-- Should NOT be re-distributed
+ALTER TABLE t_reorganize_false SET WITH (REORGANIZE=false) DISTRIBUTED RANDOMLY;
+SELECT gp_segment_id,count(*) from t_reorganize_false GROUP BY 1;
+ gp_segment_id | count 
+---------------+-------
+             1 |   100
+(1 row)
+
+DROP TABLE t_reorganize_false;
diff --git a/src/test/regress/sql/alter_distribution_policy.sql b/src/test/regress/sql/alter_distribution_policy.sql
index f6479f3a974..71d9490872e 100644
--- a/src/test/regress/sql/alter_distribution_policy.sql
+++ b/src/test/regress/sql/alter_distribution_policy.sql
@@ -531,3 +531,33 @@ select check_redistributed('insert into t_distbya select * from t_reorganize', '
 
 reset optimizer;
 reset gp_force_random_redistribution;
+-- When reorganize=false, we won't reorganize and this shouldn't be affected by the existing reloptions.
+CREATE TABLE public.t_reorganize_false (
+a integer,
+b integer
+) with (appendonly=false, autovacuum_enabled=false) DISTRIBUTED BY (a);
+-- Insert values which will all be on one segment
+INSERT INTO t_reorganize_false VALUES (0, generate_series(1,100));
+SELECT gp_segment_id,count(*)￼ from t_reorganize_false GROUP BY 1;
+-- Change the distribution policy but because REORGANIZE=false, it should NOT be re-distributed 
+ALTER TABLE t_reorganize_false SET WITH (REORGANIZE=false) DISTRIBUTED RANDOMLY;
+SELECT gp_segment_id,count(*)￼ from t_reorganize_false GROUP BY 1;
+DROP TABLE t_reorganize_false;
+-- Same rule should apply to partitioned table too
+CREATE TABLE public.t_reorganize_false (
+a integer,
+b integer
+)
+DISTRIBUTED BY (a) PARTITION BY RANGE(b)
+(
+PARTITION "00" START (0) END (1000) WITH (tablename='t_reorganize_false_0', appendonly='false', autovacuum_enabled=false),
+PARTITION "01" START (1000) END (2000) WITH (tablename='t_reorganize_false_1', appendonly='false', autovacuum_enabled=false),
+DEFAULT PARTITION def WITH (tablename='t_reorganize_false_def', appendonly='false', autovacuum_enabled=false)
+);
+-- Insert values which will all be on one segment
+INSERT INTO t_reorganize_false VALUES (0, generate_series(1,100));
+SELECT gp_segment_id,count(*) from t_reorganize_false GROUP BY 1;
+-- Should NOT be re-distributed
+ALTER TABLE t_reorganize_false SET WITH (REORGANIZE=false) DISTRIBUTED RANDOMLY;
+SELECT gp_segment_id,count(*) from t_reorganize_false GROUP BY 1;
+DROP TABLE t_reorganize_false;

From 22fc48b08eaf48d93e90b9c21462d1604cf4f621 Mon Sep 17 00:00:00 2001
From: Alexey Gordeev <goa@arenadata.io>
Date: Wed, 9 Jun 2021 13:47:37 +0500
Subject: [PATCH 15/48] Fix segfault on execution of multilevel correlated
 queries.

Execution of multilevel correlated queries with high level of nesting
can cause segfault(when using array_agg, json_agg) or can provide wrong
results (when using classic aggs like sum()). Due to some GP
limitations, correlated subqueries with skip-level correlations are not
supported. Additional check condition is provided to prevent such
queries from planning. QueryHasDistributedRelation function, used by
this check, doesn't recurse over subplans and may return wrong results
for distributed RTE_RELATION entries hided by RTE_SUBQUERY entries.

Commit fixes such behavior by adding optional recursion to
QueryHasDistributedRelation function. Additional regression test is
included. Additional information can be found at issue #12054.
---
 src/backend/optimizer/plan/subselect.c                   | 9 +++++++--
 src/include/optimizer/subselect.h                        | 2 +-
 src/test/regress/expected/qp_correlated_query.out        | 6 ++++++
 .../regress/expected/qp_correlated_query_optimizer.out   | 6 ++++++
 src/test/regress/sql/qp_correlated_query.sql             | 4 ++++
 5 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index ebb5bbb10a0..aaed8b99c1f 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -156,7 +156,7 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod,
 /**
  * Returns true if query refers to a distributed table.
  */
-bool QueryHasDistributedRelation(Query *q)
+bool QueryHasDistributedRelation(Query *q, bool recursive)
 {
 	ListCell   *rt = NULL;
 
@@ -164,6 +164,11 @@ bool QueryHasDistributedRelation(Query *q)
 	{
 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
 
+		if (rte->rtekind == RTE_SUBQUERY
+				&& recursive
+				&& QueryHasDistributedRelation(rte->subquery, true))
+			return true;
+
 		if (rte->relid != InvalidOid
 				&& rte->rtekind == RTE_RELATION)
 		{
@@ -325,7 +330,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
 
 	if ((Gp_role == GP_ROLE_DISPATCH)
 			&& IsSubqueryMultiLevelCorrelated(subquery)
-			&& QueryHasDistributedRelation(subquery))
+			&& QueryHasDistributedRelation(subquery, root->is_correlated_subplan))
 	{
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 08dfb95364e..7f4c9afa950 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -53,6 +53,6 @@ extern bool IsSubqueryMultiLevelCorrelated(Query *sq);
 
 extern List *generate_subquery_vars(PlannerInfo *root, List *tlist,
 					   Index varno);
-extern bool QueryHasDistributedRelation(Query *q);
+extern bool QueryHasDistributedRelation(Query *q, bool recursive);
 
 #endif							/* SUBSELECT_H */
diff --git a/src/test/regress/expected/qp_correlated_query.out b/src/test/regress/expected/qp_correlated_query.out
index 07a3ab10a1c..d35e89ea6e0 100644
--- a/src/test/regress/expected/qp_correlated_query.out
+++ b/src/test/regress/expected/qp_correlated_query.out
@@ -1662,6 +1662,12 @@ select (select avg(x) from qp_csq_t1, qp_csq_t2 where qp_csq_t1.a = any (select
  4.0000000000000000
 (4 rows)
 
+-- Planner should fail due to skip-level correlation not supported. Query should not cause segfault like in issue #12054.
+select A.j, (select array_agg(a_B) from (select B.j, (select array_agg(a_C) from (select C.j from C where C.i = A.i) a_C) from B where B.i = A.i order by A.j) a_B) from A;
+ERROR:  correlated subquery with skip-level correlations is not supported
+-- Planner should fail due to skip-level correlation not supported. Query should not return wrong results like in issue #12054.
+select A.j, (select array_agg(a_B) from (select B.j, (select sum(a_C.j) from (select C.j from C where C.i = A.i) a_C) from B where B.i = A.i order by A.j) a_B) from A;
+ERROR:  correlated subquery with skip-level correlations is not supported
 -- ----------------------------------------------------------------------
 -- Test: Correlated Subquery: CSQ with multiple columns (Heap)
 -- ----------------------------------------------------------------------
diff --git a/src/test/regress/expected/qp_correlated_query_optimizer.out b/src/test/regress/expected/qp_correlated_query_optimizer.out
index c238b0633a4..a604629ae1e 100644
--- a/src/test/regress/expected/qp_correlated_query_optimizer.out
+++ b/src/test/regress/expected/qp_correlated_query_optimizer.out
@@ -1775,6 +1775,12 @@ select (select avg(x) from qp_csq_t1, qp_csq_t2 where qp_csq_t1.a = any (select
  4.0000000000000000
 (4 rows)
 
+-- Planner should fail due to skip-level correlation not supported. Query should not cause segfault like in issue #12054.
+select A.j, (select array_agg(a_B) from (select B.j, (select array_agg(a_C) from (select C.j from C where C.i = A.i) a_C) from B where B.i = A.i order by A.j) a_B) from A;
+ERROR:  correlated subquery with skip-level correlations is not supported
+-- Planner should fail due to skip-level correlation not supported. Query should not return wrong results like in issue #12054.
+select A.j, (select array_agg(a_B) from (select B.j, (select sum(a_C.j) from (select C.j from C where C.i = A.i) a_C) from B where B.i = A.i order by A.j) a_B) from A;
+ERROR:  correlated subquery with skip-level correlations is not supported
 -- ----------------------------------------------------------------------
 -- Test: Correlated Subquery: CSQ with multiple columns (Heap)
 -- ----------------------------------------------------------------------
diff --git a/src/test/regress/sql/qp_correlated_query.sql b/src/test/regress/sql/qp_correlated_query.sql
index 46048efc3b7..6c30e018391 100644
--- a/src/test/regress/sql/qp_correlated_query.sql
+++ b/src/test/regress/sql/qp_correlated_query.sql
@@ -342,6 +342,10 @@ SELECT a, (SELECT (SELECT d FROM qp_csq_t3 WHERE a=c)) FROM qp_csq_t1 GROUP BY a
 select A.i, (select C.j from C group by C.j having max(C.j) = any (select min(B.j) from B)) as C_j from A,B,C where A.i = 99 order by A.i, C_j limit 10;
 select (select avg(x) from qp_csq_t1, qp_csq_t2 where qp_csq_t1.a = any (select x)) as avg_x from qp_csq_t1 order by 1;
 
+-- Planner should fail due to skip-level correlation not supported. Query should not cause segfault like in issue #12054.
+select A.j, (select array_agg(a_B) from (select B.j, (select array_agg(a_C) from (select C.j from C where C.i = A.i) a_C) from B where B.i = A.i order by A.j) a_B) from A;
+-- Planner should fail due to skip-level correlation not supported. Query should not return wrong results like in issue #12054.
+select A.j, (select array_agg(a_B) from (select B.j, (select sum(a_C.j) from (select C.j from C where C.i = A.i) a_C) from B where B.i = A.i order by A.j) a_B) from A;
 
 -- ----------------------------------------------------------------------
 -- Test: Correlated Subquery: CSQ with multiple columns (Heap)

From 69ae47e3f65d887c940ea535510baf64703bfa9b Mon Sep 17 00:00:00 2001
From: Jasper Li <lij55@users.noreply.github.com>
Date: Tue, 22 Feb 2022 16:34:25 +0800
Subject: [PATCH 16/48] =?UTF-8?q?set=20memory=20allocated=20by=20malloc=20?=
 =?UTF-8?q?to=200=20to=20fix=20'error:=20=E2=80=98data=E2=80=99=20may=20be?=
 =?UTF-8?q?=20used=20uninitialized'=20(#13103)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* set to 0 with bzero to fix 'error: ‘data’ may be used uninitialized'

* change to memset for compatibiity

Co-authored-by: jasper li <ljasper@vmware.com>
---
 src/backend/utils/hyperloglog/gp_hyperloglog.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/backend/utils/hyperloglog/gp_hyperloglog.c b/src/backend/utils/hyperloglog/gp_hyperloglog.c
index c093111fe85..f9ce8e33245 100644
--- a/src/backend/utils/hyperloglog/gp_hyperloglog.c
+++ b/src/backend/utils/hyperloglog/gp_hyperloglog.c
@@ -573,6 +573,7 @@ gp_hll_compress_dense(GpHLLCounter hloglog)
                  errmsg("out of memory"),
                  errdetail("Failed on request of size %zu.", data_rawsize)));
     }
+    memset(data, 0, data_rawsize);
 
     /* put all registers in a normal array  i.e. remove dense packing so
      * lz compression can work optimally */

From 23f9d061ea3871487a5c13649dce5524cf4116ac Mon Sep 17 00:00:00 2001
From: "Kevin.wyh" <wuyuhao28@gmail.com>
Date: Tue, 22 Feb 2022 18:50:51 +0800
Subject: [PATCH 17/48] ORCA: avoid returning output of a CTE producer (#12776)

When dealing with queries which have CTE and result set being used
more than once, ORCA produces a plan with Sequence Node and
ShareInputScan Node. Here is an example:

explain (costs off) with tmp_t as (select id from test) select a.id from tmp_t a join tmp_t b on a.id = b.id;
                         QUERY PLAN
------------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)
   ->  Sequence
         ->  Shared Scan (share slice:id 1:0)
               ->  Seq Scan on test
         ->  Hash Join
               Hash Cond: (share0_ref3.id = share0_ref2.id)
               ->  Shared Scan (share slice:id 1:0)
               ->  Hash
                     ->  Shared Scan (share slice:id 1:0)
 Optimizer: Pivotal Optimizer (GPORCA)
(10 rows)

This plan has three ShareInputScan nodes: one producer and two
consumers, all of them read from the tuplestore and return tuples to
upper node.  However, the CTE producer does not need to do so, because
the Sequence Node only receives the last subplan's output tuples.

When tuplestore is small, the extra read from the CTE producer is at
low cost, but when tuplestore is huge, this read is heavy and would
cause bad performance of the entire query.

This commit adds a flag discard_output in ShareInputScan plan node to
indicate when a ShareInputScan can avoid reading from tuplestore and
return early. The flag is set to true by ORCA when translating CTE producer,
and set to false elsewhere.

Fixes #12710

Co-authored-by: wuyuhao28 <wuyuhao28@github.com>
Co-authored-by: Alexandra Wang <walexandra@vmware.com>
Reviewed-by: Zhenghua Lyu <kainwen@gmail.com>
Reviewed-by: Shreedhar Hardikar <shardikar@vmware.com>
---
 src/backend/executor/nodeShareInputScan.c              | 9 +++++++++
 src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp | 2 ++
 src/backend/optimizer/plan/planshare.c                 | 1 +
 src/include/nodes/plannodes.h                          | 3 +++
 4 files changed, 15 insertions(+)

diff --git a/src/backend/executor/nodeShareInputScan.c b/src/backend/executor/nodeShareInputScan.c
index 1bb05edf674..4886e5d377c 100644
--- a/src/backend/executor/nodeShareInputScan.c
+++ b/src/backend/executor/nodeShareInputScan.c
@@ -366,6 +366,15 @@ ExecShareInputScan(PlanState *pstate)
 	/* if first time call, need to initialize the tuplestore state.  */
 	if (!node->isready)
 		init_tuplestore_state(node);
+	
+	/*
+	 * Return NULL when necessary.
+	 * This could help improve performance, especially when tuplestore is huge, because ShareInputScan 
+	 * do not need to read tuple from tuplestore when discard_output is true, which means current 
+	 * ShareInputScan is one but not the last one of Sequence's subplans.
+	 */
+	if (sisc->discard_output)
+	  return NULL;
 
 	slot = node->ss.ps.ps_ResultTupleSlot;
 
diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
index 0a75541c1a6..65894b7d779 100644
--- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
+++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
@@ -3738,6 +3738,7 @@ CTranslatorDXLToPlStmt::TranslateDXLCTEProducerToSharedScan(
 	// create the shared input scan representing the CTE Producer
 	ShareInputScan *shared_input_scan = MakeNode(ShareInputScan);
 	shared_input_scan->share_id = cte_id;
+	shared_input_scan->discard_output = true;
 	Plan *plan = &(shared_input_scan->scan.plan);
 	plan->plan_node_id = m_dxl_to_plstmt_context->GetNextPlanId();
 
@@ -3795,6 +3796,7 @@ CTranslatorDXLToPlStmt::TranslateDXLCTEConsumerToSharedScan(
 
 	ShareInputScan *share_input_scan_cte_consumer = MakeNode(ShareInputScan);
 	share_input_scan_cte_consumer->share_id = cte_id;
+	share_input_scan_cte_consumer->discard_output = false;
 
 	Plan *plan = &(share_input_scan_cte_consumer->scan.plan);
 	plan->plan_node_id = m_dxl_to_plstmt_context->GetNextPlanId();
diff --git a/src/backend/optimizer/plan/planshare.c b/src/backend/optimizer/plan/planshare.c
index 1b5c53f549e..75983b8ffe3 100644
--- a/src/backend/optimizer/plan/planshare.c
+++ b/src/backend/optimizer/plan/planshare.c
@@ -39,6 +39,7 @@ make_shareinputscan(PlannerInfo *root, Plan *inputplan)
 	sisc->producer_slice_id = -1;
 	sisc->this_slice_id = -1;
 	sisc->nconsumers = 0;
+	sisc->discard_output = false;
 
 	sisc->scan.plan.qual = NIL;
 	sisc->scan.plan.righttree = NULL;
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 64679cbe527..fbac2ca02b9 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1099,6 +1099,9 @@ typedef struct ShareInputScan
 
 	/* Number of consumer slices participating, not including the producer. */
 	int			nconsumers;
+
+	/* Discard the scan output? True for ORCA CTE producer, false otherwise. */
+	bool        discard_output;
 } ShareInputScan;
 
 /* ----------------

From bdfd62ffb4a875e08cb61c5c88d4d46ecb28ba79 Mon Sep 17 00:00:00 2001
From: Alexey Gordeev <42867699+InnerLife0@users.noreply.github.com>
Date: Wed, 23 Feb 2022 10:51:55 +0500
Subject: [PATCH 18/48] Fix index corruption when invalid snapshot used with AO
 tables. (#12863)

Using of regular MVCC snapshot to access AO-tables metadata may cause index building (performed globally) inconsistency, because tuples inserted (the same globally) at another opened transactions are not visible.

Commit fixes such behavior using SnapshotSelf for metadata access. Additional isolation test shows how to reproduce the bug before the fix.
---
 src/backend/access/aocs/aocsam.c              |  6 +-
 src/backend/access/appendonly/appendonlyam.c  |  6 +-
 .../uao/snapshot_index_corruption.source      | 35 ++++++++
 src/test/isolation2/isolation2_schedule       |  2 +
 .../uao/snapshot_index_corruption.source      | 83 +++++++++++++++++++
 5 files changed, 128 insertions(+), 4 deletions(-)
 create mode 100644 src/test/isolation2/input/uao/snapshot_index_corruption.source
 create mode 100644 src/test/isolation2/output/uao/snapshot_index_corruption.source

diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c
index 0ae994659b1..bbc42dc81c3 100644
--- a/src/backend/access/aocs/aocsam.c
+++ b/src/backend/access/aocs/aocsam.c
@@ -507,13 +507,15 @@ aocs_beginscan(Relation relation,
 	RelationIncrementReferenceCount(relation);
 
 	/*
-	 * the append-only meta data should never be fetched with
+	 * The append-only meta data should never be fetched with
 	 * SnapshotAny as bogus results are returned.
+	 * We use SnapshotSelf for metadata, as regular MVCC snapshot can hide newly
+	 * globally inserted tuples from global index build process.
 	 */
 	if (snapshot != SnapshotAny)
 		aocsMetaDataSnapshot = snapshot;
 	else
-		aocsMetaDataSnapshot = GetTransactionSnapshot();
+		aocsMetaDataSnapshot = SnapshotSelf;
 
 	seginfo = GetAllAOCSFileSegInfo(relation, aocsMetaDataSnapshot, &total_seg, NULL);
 	return aocs_beginscan_internal(relation,
diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c
index d4681bd4aa3..32a306f3ae0 100755
--- a/src/backend/access/appendonly/appendonlyam.c
+++ b/src/backend/access/appendonly/appendonlyam.c
@@ -1638,10 +1638,12 @@ appendonly_beginscan(Relation relation,
 	if (appendOnlyMetaDataSnapshot == SnapshotAny)
 	{
 		/*
-		 * the append-only meta data should never be fetched with
+		 * The append-only meta data should never be fetched with
 		 * SnapshotAny as bogus results are returned.
+		 * We use SnapshotSelf for metadata, as regular MVCC snapshot can hide
+		 * newly globally inserted tuples from global index build process.
 		 */
-		appendOnlyMetaDataSnapshot = GetTransactionSnapshot();
+		appendOnlyMetaDataSnapshot = SnapshotSelf;
 	}
 
 	/*
diff --git a/src/test/isolation2/input/uao/snapshot_index_corruption.source b/src/test/isolation2/input/uao/snapshot_index_corruption.source
new file mode 100644
index 00000000000..48c989b1af7
--- /dev/null
+++ b/src/test/isolation2/input/uao/snapshot_index_corruption.source
@@ -0,0 +1,35 @@
+-- @Description Test index corruption when invalid snapshot used.
+--
+-- Create AO table, insert few rows on it.
+drop table if exists test_ao;
+create table test_ao(i bigint) using @amname@ distributed by (i);
+insert into test_ao select generate_series(1,100);
+-- Test 1
+-- Begin single-insert transaction.
+1: begin;
+1: insert into test_ao values(101);
+-- Try to create index, it should hold on lock before commit below.
+2&: create index test_ao_idx on test_ao(i);
+-- Commit single-insert transaction, so index continues creation.
+1: commit;
+-- Force index usage and check row is here (false before fix).
+2<:
+2: set optimizer=off;
+2: set enable_seqscan=off;
+2: explain (costs off) select i from test_ao where i = 101;
+2: select i from test_ao where i = 101;
+
+-- Test 2
+-- Drop incomplete index
+1: drop index test_ao_idx;
+-- Check row is here and start repeatable read transaction.
+2: select i from test_ao where i = 100;
+2: begin;
+2: set transaction isolation level repeatable read;
+2: select 1;
+-- Update row selected above and create new index
+1: update test_ao set i = 200 where i = 100;
+1: create index test_ao_idx on test_ao(i);
+-- For the repeatable read isolation level row still there.
+2: explain (costs off) select i from test_ao where i = 100;
+2: select i from test_ao where i = 100;
diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule
index 806c62a4d7f..03f9f7c467d 100644
--- a/src/test/isolation2/isolation2_schedule
+++ b/src/test/isolation2/isolation2_schedule
@@ -144,6 +144,7 @@ test: uao/select_while_vacuum_serializable2_row
 test: uao/selectinsert_while_vacuum_row
 test: uao/selectinsertupdate_while_vacuum_row
 test: uao/selectupdate_while_vacuum_row
+test: uao/snapshot_index_corruption_row
 test: uao/update_while_vacuum_row
 test: uao/vacuum_self_serializable_row
 test: uao/vacuum_self_serializable2_row
@@ -200,6 +201,7 @@ test: uao/select_while_vacuum_serializable2_column
 test: uao/selectinsert_while_vacuum_column
 test: uao/selectinsertupdate_while_vacuum_column
 test: uao/selectupdate_while_vacuum_column
+test: uao/snapshot_index_corruption_column
 test: uao/update_while_vacuum_column
 test: uao/vacuum_self_serializable_column
 test: uao/vacuum_self_serializable2_column
diff --git a/src/test/isolation2/output/uao/snapshot_index_corruption.source b/src/test/isolation2/output/uao/snapshot_index_corruption.source
new file mode 100644
index 00000000000..59b00e9f10c
--- /dev/null
+++ b/src/test/isolation2/output/uao/snapshot_index_corruption.source
@@ -0,0 +1,83 @@
+-- @Description Test index corruption when invalid snapshot used.
+--
+-- Create AO table, insert few rows on it.
+drop table if exists test_ao;
+DROP
+create table test_ao(i bigint) using @amname@ distributed by (i);
+CREATE
+insert into test_ao select generate_series(1,100);
+INSERT 100
+-- Test 1
+-- Begin single-insert transaction.
+1: begin;
+BEGIN
+1: insert into test_ao values(101);
+INSERT 1
+-- Try to create index, it should hold on lock before commit below.
+2&: create index test_ao_idx on test_ao(i);  <waiting ...>
+-- Commit single-insert transaction, so index continues creation.
+1: commit;
+COMMIT
+-- Force index usage and check row is here (false before fix).
+2<:  <... completed>
+CREATE
+2: set optimizer=off;
+SET
+2: set enable_seqscan=off;
+SET
+2: explain (costs off) select i from test_ao where i = 101;
+ QUERY PLAN                                   
+----------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)     
+   ->  Bitmap Heap Scan on test_ao            
+         Recheck Cond: (i = 101)              
+         ->  Bitmap Index Scan on test_ao_idx 
+               Index Cond: (i = 101)          
+ Optimizer: Postgres query optimizer          
+(6 rows)
+2: select i from test_ao where i = 101;
+ i   
+-----
+ 101 
+(1 row)
+
+-- Test 2
+-- Drop incomplete index
+1: drop index test_ao_idx;
+DROP
+-- Check row is here and start repeatable read transaction.
+2: select i from test_ao where i = 100;
+ i   
+-----
+ 100 
+(1 row)
+2: begin;
+BEGIN
+2: set transaction isolation level repeatable read;
+SET
+2: select 1;
+ ?column? 
+----------
+ 1        
+(1 row)
+-- Update row selected above and create new index
+1: update test_ao set i = 200 where i = 100;
+UPDATE 1
+1: create index test_ao_idx on test_ao(i);
+CREATE
+-- For the repeatable read isolation level row still there.
+2: explain (costs off) select i from test_ao where i = 100;
+ QUERY PLAN                                   
+----------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)     
+   ->  Bitmap Heap Scan on test_ao            
+         Recheck Cond: (i = 100)              
+         ->  Bitmap Index Scan on test_ao_idx 
+               Index Cond: (i = 100)          
+ Optimizer: Postgres query optimizer          
+(6 rows)
+2: select i from test_ao where i = 100;
+ i   
+-----
+ 100 
+(1 row)

From 9e115fbfb3798644b3b58ba459d6f55e43797038 Mon Sep 17 00:00:00 2001
From: xiaoxiao <53000479+xiaoxiaoHe-E@users.noreply.github.com>
Date: Thu, 24 Feb 2022 10:44:44 +0800
Subject: [PATCH 19/48] et staging table schema to gpload:external:schema in
 yaml file (#13110)

---
 gpMgmt/bin/gpload.py                       | 24 ++++++++++++++--------
 gpMgmt/bin/gpload_test/gpload2/query33.ans |  4 ++--
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/gpMgmt/bin/gpload.py b/gpMgmt/bin/gpload.py
index a6ce8f471dc..2d7c0a1eb7f 100755
--- a/gpMgmt/bin/gpload.py
+++ b/gpMgmt/bin/gpload.py
@@ -2254,9 +2254,9 @@ def get_reuse_staging_table_query(self, encoding_conditions):
         return:
             sql(string)
         '''
-        sql = """SELECT oid::regclass
-                 FROM pg_class
-                 WHERE relname = 'staging_gpload_reusable_%s';""" % (encoding_conditions)
+        sql = """SELECT oid::regclass \
+FROM pg_class \
+WHERE relname = 'staging_gpload_reusable_%s';""" % (encoding_conditions)
 
         self.log(self.DEBUG, "query used to identify reusable temporary relations: %s" % sql)
         return sql
@@ -2273,7 +2273,8 @@ def get_table_oid(self, tableName):
                 pass
         return None
 
-    def get_ext_schematable(self, schemaName, tableName):
+
+    def get_schematable(self, schemaName, tableName):
         '''
         return formated table name
         '''
@@ -2448,7 +2449,7 @@ def create_external_table(self):
                               AND n.nspname !~ '^pg_toast'"""
                 result = self.db.query(sql).getresult()
                 if len(result) > 0:
-                    self.extSchemaTable = self.get_ext_schematable(quote_unident(self.extSchemaName), self.extTableName)
+                    self.extSchemaTable = self.get_schematable(quote_unident(self.extSchemaName), self.extTableName)
                     self.log(self.INFO, "reusing external staging table %s" % self.extSchemaTable)
                     return
             # staging table is not specified, we need to find it manually
@@ -2468,7 +2469,7 @@ def create_external_table(self):
                     self.extTableName = (resultList[0])[0]
                     # fast match result is only table name, so we need add schema info
                     if self.fast_match:
-                        self.extSchemaTable = self.get_ext_schematable(quote_unident(self.extSchemaName), self.extTableName)
+                        self.extSchemaTable = self.get_schematable(quote_unident(self.extSchemaName), self.extTableName)
                     else:
                         self.extSchemaTable = self.extTableName
                     self.log(self.INFO, "reusing external table %s" % self.extSchemaTable)
@@ -2479,13 +2480,13 @@ def create_external_table(self):
                 # around
 
                 self.extTableName = "ext_gpload_reusable_%s" % self.unique_suffix
-                self.log(self.INFO, "did not find an external table to reuse. creating %s" % self.get_ext_schematable(self.extSchemaName, self.extTableName))
+                self.log(self.INFO, "did not find an external table to reuse. creating %s" % self.get_schematable(self.extSchemaName, self.extTableName))
 
         # process the single quotes in order to successfully create an external table.
         self.formatOpts = self.formatOpts.replace("'\''","E'\\''")
 
         # construct a CREATE EXTERNAL TABLE statement and execute it
-        self.extSchemaTable = self.get_ext_schematable(self.extSchemaName, self.extTableName)
+        self.extSchemaTable = self.get_schematable(self.extSchemaName, self.extTableName)
         sql = "create external table %s" % self.extSchemaTable
         sql += "(%s)" % ','.join(['%s %s' % (a[0], a[1]) for a in from_cols])
 
@@ -2566,10 +2567,12 @@ def create_staging_table(self):
             # we no longer need the timestamp, since we will never want to create few
             # tables with same encoding_conditions
             self.staging_table_name = "staging_gpload_reusable_%s" % (encoding_conditions)
+            self.staging_table_name = self.get_schematable(self.extSchemaName, self.staging_table_name)
             self.log(self.INFO, "did not find a staging table to reuse. creating %s" % self.staging_table_name)
 
         # MPP-14667 - self.reuse_tables should change one, and only one, aspect of how we build the following table,
         # and that is, whether it's a temp table or not. In other words, is_temp_table = '' iff self.reuse_tables == True.
+
         sql = 'CREATE %sTABLE %s ' % (is_temp_table, self.staging_table_name)
         cols = ['"%s" %s' % (a[0], a[1]) for a in target_columns]
         sql += "(%s)" % ','.join(cols)
@@ -2882,6 +2885,7 @@ def do_method(self):
         # Is the table to be truncated before the load?
         preload = self.getconfig('gpload:preload', list, default=None)
         method = self.getconfig('gpload:output:mode', str, 'insert').lower()
+        external = self.getconfig('gpload:external', list, default=None)
         self.log_errors = self.getconfig('gpload:input:log_errors', bool, False)
         truncate = False
         self.reuse_tables = False
@@ -2889,6 +2893,10 @@ def do_method(self):
         if not self.options.no_auto_trans and not method=='insert':
             self.db.query("BEGIN")
 
+        self.extSchemaName = self.getconfig('gpload:external:schema', str, None)
+        if self.extSchemaName == '%':
+            self.extSchemaName = self.schema
+
         if preload:
             truncate = self.getconfig('gpload:preload:truncate',bool,False)
             self.reuse_tables = self.getconfig('gpload:preload:reuse_tables',bool,False)
diff --git a/gpMgmt/bin/gpload_test/gpload2/query33.ans b/gpMgmt/bin/gpload_test/gpload2/query33.ans
index b69631bff50..93756fe0acf 100644
--- a/gpMgmt/bin/gpload_test/gpload2/query33.ans
+++ b/gpMgmt/bin/gpload_test/gpload2/query33.ans
@@ -1,7 +1,7 @@
 2018-07-24 06:14:29|INFO|gpload session started 2018-07-24 06:14:29
 2018-07-24 06:14:29|INFO|setting schema 'public' for table 'texttable'
 2018-07-24 06:14:29|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30
-2018-07-24 06:14:29|INFO|did not find a staging table to reuse. creating staging_gpload_reusable_afbaac0da7ced19791c9ab9c537f41d3
+2018-07-24 06:14:29|INFO|did not find a staging table to reuse. creating test.staging_gpload_reusable_afbaac0da7ced19791c9ab9c537f41d3
 2018-07-24 06:14:29|INFO|did not find an external table to reuse. creating test.ext_gpload_reusable_d2e95f76_8f08_11e8_8c76_0242ac110002
 2018-07-24 06:14:29|INFO|running time: 0.40 seconds
 2018-07-24 06:14:29|INFO|rows Inserted          = 16
@@ -11,7 +11,7 @@
 2018-07-24 06:14:30|INFO|gpload session started 2018-07-24 06:14:30
 2018-07-24 06:14:30|INFO|setting schema 'public' for table 'texttable'
 2018-07-24 06:14:30|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30
-2018-07-24 06:14:30|INFO|reusing staging table staging_gpload_reusable_afbaac0da7ced19791c9ab9c537f41d3
+2018-07-24 06:14:30|INFO|reusing staging table test.staging_gpload_reusable_afbaac0da7ced19791c9ab9c537f41d3
 2018-07-24 06:14:30|INFO|reusing external table test.ext_gpload_reusable_d2e95f76_8f08_11e8_8c76_0242ac110002
 2018-07-24 06:14:30|INFO|running time: 0.31 seconds
 2018-07-24 06:14:30|INFO|rows Inserted          = 0

From 0f7dd394d53e9601c4fb7eaae038c3dded479b26 Mon Sep 17 00:00:00 2001
From: Xing Guo <higuoxing+github@gmail.com>
Date: Fri, 25 Feb 2022 09:29:34 +0800
Subject: [PATCH 20/48] Fix incorrect amount of memory allocated for WindowAgg.
 (#13124)

This patch helps fix incorrect amount of memory allocated for WindowAgg.
Greenplum uses statement_mem rather than work_mem to control the memory
allocated to queries. Besides, test cases are attached for this patch.
---
 src/backend/executor/nodeWindowAgg.c          |  54 +++++-
 src/test/regress/expected/misc_jiras.out      |  34 +++-
 .../expected/statement_mem_for_windowagg.out  | 176 ++++++++++++++++++
 src/test/regress/greenplum_schedule           |   4 +-
 src/test/regress/sql/misc_jiras.sql           |  16 +-
 .../sql/statement_mem_for_windowagg.sql       |  57 ++++++
 6 files changed, 328 insertions(+), 13 deletions(-)
 create mode 100644 src/test/regress/expected/statement_mem_for_windowagg.out
 create mode 100644 src/test/regress/sql/statement_mem_for_windowagg.sql

diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index 75933ca5488..6a626f1de77 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -40,6 +40,7 @@
 #include "executor/executor.h"
 #include "executor/nodeWindowAgg.h"
 #include "miscadmin.h"
+#include "nodes/execnodes.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "parser/parse_agg.h"
@@ -49,6 +50,7 @@
 #include "utils/builtins.h"
 #include "utils/datum.h"
 #include "utils/expandeddatum.h"
+#include "utils/faultinjector.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/regproc.h"
@@ -269,7 +271,7 @@ initialize_windowaggregate(WindowAggState *winstate,
 								  peraggstate->distinctLtOper,
 								  peraggstate->distinctColl,
 								  false, /* nullsFirstFlag */
-								  work_mem,
+								  PlanStateOperatorMemKB((PlanState *) winstate),
 								  NULL, /* coordinate */
 								  false);
 	}
@@ -686,6 +688,24 @@ perform_distinct_windowaggregate(WindowAggState *winstate,
 
 	tuplesort_performsort(peraggstate->distinctSortState);
 
+#ifdef FAULT_INJECTOR
+	/*
+	 * This routine is used for tracing whether the sort operation of DISTINCT-qualified
+	 * WindowAgg spills to disk.
+	 */
+	if (SIMPLE_FAULT_INJECTOR("distinct_winagg_perform_sort") == FaultInjectorTypeSkip)
+	{
+		TuplesortInstrumentation sortstats;
+		tuplesort_get_stats(peraggstate->distinctSortState, &sortstats);
+		if (sortstats.spaceType == SORT_SPACE_TYPE_MEMORY)
+			ereport(NOTICE,
+					(errmsg("distinct winagg sortstats: sort operation fitted in memory")));
+		else
+			ereport(NOTICE,
+					(errmsg("distinct winagg sortstats: sort operation spilled to disk")));
+	}
+#endif
+
 	/* load the first tuple from spool */
 	if (tuplesort_getdatum(peraggstate->distinctSortState, true,
 						   &fcinfo->args[1].value, &fcinfo->args[1].isnull, NULL))
@@ -1360,7 +1380,9 @@ begin_partition(WindowAggState *winstate)
 	}
 
 	/* Create new tuplestore for this partition */
-	winstate->buffer = tuplestore_begin_heap(false, false, work_mem);
+	winstate->buffer =
+		tuplestore_begin_heap(false, false,
+							  PlanStateOperatorMemKB((PlanState *) winstate));
 
 	/*
 	 * Set up read pointers for the tuplestore.  The current pointer doesn't
@@ -2426,6 +2448,34 @@ ExecWindowAgg(PlanState *pstate)
 	 */
 	spool_tuples(winstate, winstate->currentpos);
 
+#ifdef FAULT_INJECTOR
+	/*
+	 * This routine is used for testing if we have allocated enough memory
+	 * for the tuplestore (winstate->buffer) in begin_partition(). If all
+	 * tuples of the current partition can be fitted in the memory, we
+	 * emit a notice saying 'fitted in memory'. If they cannot be fitted in
+	 * the memory, we emit a notice saying 'spilled to disk'. If there're
+	 * no input rows, we emit a notice saying 'no input rows'.
+	 *
+	 * NOTE: The fault-injector only triggers once, we emit the notice when
+	 * we finishes spooling all the tuples of the first partition.
+	 */
+	if (winstate->partition_spooled &&
+		winstate->currentpos >= winstate->spooled_rows &&
+		SIMPLE_FAULT_INJECTOR("winagg_after_spool_tuples") == FaultInjectorTypeSkip)
+	{
+		if (winstate->buffer)
+		{
+			if (tuplestore_in_memory(winstate->buffer))
+				ereport(NOTICE, (errmsg("winagg: tuplestore fitted in memory")));
+			else
+				ereport(NOTICE, (errmsg("winagg: tuplestore spilled to disk")));
+		}
+		else
+			ereport(NOTICE, (errmsg("winagg: no input rows")));
+	}
+#endif
+
 	/* Move to the next partition if we reached the end of this partition */
 	if (winstate->partition_spooled &&
 		winstate->currentpos >= winstate->spooled_rows)
diff --git a/src/test/regress/expected/misc_jiras.out b/src/test/regress/expected/misc_jiras.out
index 2c9e78fab5a..ebe7446972e 100644
--- a/src/test/regress/expected/misc_jiras.out
+++ b/src/test/regress/expected/misc_jiras.out
@@ -1,4 +1,5 @@
 drop schema if exists misc_jiras;
+NOTICE:  schema "misc_jiras" does not exist, skipping
 create schema misc_jiras;
 --
 -- Test backward scanning of tuplestore spill files.
@@ -12,11 +13,21 @@ create schema misc_jiras;
 create table misc_jiras.t1 (c1 int, c2 text, c3 smallint) distributed by (c1);
 insert into misc_jiras.t1 select i % 13, md5(i::text), i % 3
   from generate_series(1, 20000) i;
--- tuplestore uses work_mem to control the in-memory data size, set a small
--- value to trigger the spilling.
-set work_mem to '64kB';
-WARNING:  "work_mem": setting is deprecated, and may be removed in a future release.
+-- tuplestore in windowagg uses statement_mem to control the in-memory data size,
+-- set a small value to trigger the spilling.
+set statement_mem to '512kB';
 set extra_float_digits=0; -- the last decimal digits are somewhat random
+-- Inject fault at 'winagg_after_spool_tuples' to show that the tuplestore spills
+-- to disk.
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault 
+-----------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
 select sum(cc) from (
     select c1
          , c2
@@ -28,13 +39,24 @@ select sum(cc) from (
       from misc_jiras.t1
      group by 1, 2
 ) tt;
+NOTICE:  winagg: tuplestore spilled to disk  (seg0 slice1 127.0.0.1:7002 pid=54719)
+NOTICE:  winagg: tuplestore spilled to disk  (seg1 slice1 127.0.0.1:7003 pid=54720)
+NOTICE:  winagg: tuplestore spilled to disk  (seg2 slice1 127.0.0.1:7004 pid=54721)
    sum   
 ---------
  10006.5
 (1 row)
 
-reset work_mem;
-WARNING:  "work_mem": setting is deprecated, and may be removed in a future release.
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault 
+-----------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
+reset statement_mem;
 -- non-ASCII multibyte character should show up correctly in error messages.
 select '溋' || (B'1');
 ERROR:  "溋" is not a valid binary digit
diff --git a/src/test/regress/expected/statement_mem_for_windowagg.out b/src/test/regress/expected/statement_mem_for_windowagg.out
new file mode 100644
index 00000000000..a2e44474ec3
--- /dev/null
+++ b/src/test/regress/expected/statement_mem_for_windowagg.out
@@ -0,0 +1,176 @@
+CREATE TABLE dummy_table(x int, y int) DISTRIBUTED BY (y);
+INSERT INTO dummy_table SELECT generate_series(0, 10000), 0;
+INSERT INTO dummy_table SELECT generate_series(0, 10000), 3;
+INSERT INTO dummy_table SELECT generate_series(0, 10000), 10;
+-- 1. Test that if we set statement_mem to a larger value, the tuplestore
+-- for caching the tuples in partition used in WindowAgg is able to be fitted
+-- in memory.
+SET statement_mem TO '2048kB';
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault 
+-----------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
+EXPLAIN ANALYZE SELECT AVG(x) OVER (PARTITION BY y) FROM dummy_table;
+NOTICE:  winagg: tuplestore fitted in memory  (seg1 slice1 127.0.0.1:7003 pid=43473)
+NOTICE:  winagg: tuplestore fitted in memory  (seg0 slice1 127.0.0.1:7002 pid=43472)
+NOTICE:  winagg: tuplestore fitted in memory  (seg2 slice1 127.0.0.1:7004 pid=43474)
+                                                         QUERY PLAN                                                          
+-----------------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=8) (actual time=6.520..15.872 rows=30003 loops=1)
+   ->  WindowAgg  (cost=0.00..431.00 rows=1 width=8) (actual time=10.607..13.115 rows=10001 loops=1)
+         Partition By: y
+         ->  Sort  (cost=0.00..431.00 rows=1 width=8) (actual time=5.298..6.324 rows=10001 loops=1)
+               Sort Key: y
+               Sort Method:  quicksort  Memory: 5040kB
+               ->  Seq Scan on dummy_table  (cost=0.00..431.00 rows=1 width=8) (actual time=0.036..3.299 rows=10001 loops=1)
+ Planning Time: 5.241 ms
+   (slice0)    Executor memory: 48K bytes.
+   (slice1)    Executor memory: 606K bytes avg x 3 workers, 606K bytes max (seg0).  Work_mem: 606K bytes max.
+ Memory used:  2048kB
+ Optimizer: Pivotal Optimizer (GPORCA)
+ Execution Time: 17.225 ms
+(13 rows)
+
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault 
+-----------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
+-- 2. Test that if we set statement_mem to a smaller value, the tuplestore
+-- for caching the tuples in partition used in WindowAgg will be spilled to disk.
+SET statement_mem TO '1024kB';
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault 
+-----------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
+EXPLAIN ANALYZE SELECT AVG(x) OVER (PARTITION BY y) FROM dummy_table;
+NOTICE:  winagg: tuplestore spilled to disk  (seg0 slice1 127.0.0.1:7002 pid=43472)
+NOTICE:  winagg: tuplestore spilled to disk  (seg1 slice1 127.0.0.1:7003 pid=43473)
+NOTICE:  winagg: tuplestore spilled to disk  (seg2 slice1 127.0.0.1:7004 pid=43474)
+                                                         QUERY PLAN                                                          
+-----------------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=8) (actual time=8.784..13.923 rows=30003 loops=1)
+   ->  WindowAgg  (cost=0.00..431.00 rows=1 width=8) (actual time=8.390..9.720 rows=10001 loops=1)
+         Partition By: y
+         ->  Sort  (cost=0.00..431.00 rows=1 width=8) (actual time=3.125..4.135 rows=10001 loops=1)
+               Sort Key: y
+               Sort Method:  external merge  Disk: 6144kB
+               ->  Seq Scan on dummy_table  (cost=0.00..431.00 rows=1 width=8) (actual time=0.032..1.589 rows=10001 loops=1)
+ Planning Time: 3.174 ms
+   (slice0)    Executor memory: 42K bytes.
+   (slice1)    Executor memory: 391K bytes avg x 3 workers, 391K bytes max (seg0).  Work_mem: 391K bytes max.
+ Memory used:  1024kB
+ Optimizer: Pivotal Optimizer (GPORCA)
+ Execution Time: 15.235 ms
+(13 rows)
+
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault 
+-----------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
+-- 3. Test that if we set statement_mem to a larger value, the tuplesort
+-- operation in DISTINCT-qualified WindowAgg is able to be fitted in memory.
+SET statement_mem TO '1024kB';
+SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault_infinite 
+--------------------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
+EXPLAIN ANALYZE SELECT AVG(DISTINCT x) OVER (PARTITION BY y) FROM dummy_table;
+NOTICE:  distinct winagg sortstats: sort operation fitted in memory  (seg0 slice1 127.0.0.1:7002 pid=43472)
+NOTICE:  distinct winagg sortstats: sort operation fitted in memory  (seg1 slice1 127.0.0.1:7003 pid=43473)
+NOTICE:  distinct winagg sortstats: sort operation fitted in memory  (seg2 slice1 127.0.0.1:7004 pid=43474)
+                                                              QUERY PLAN                                                               
+---------------------------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)  (cost=2446.06..4096.31 rows=86100 width=36) (actual time=10.348..16.027 rows=30003 loops=1)
+   ->  WindowAgg  (cost=2446.06..2948.31 rows=28700 width=36) (actual time=10.186..11.795 rows=10001 loops=1)
+         Partition By: y
+         ->  Sort  (cost=2446.06..2517.81 rows=28700 width=8) (actual time=3.678..4.683 rows=10001 loops=1)
+               Sort Key: y
+               Sort Method:  external merge  Disk: 6144kB
+               ->  Seq Scan on dummy_table  (cost=0.00..321.00 rows=28700 width=8) (actual time=0.030..1.919 rows=10001 loops=1)
+ Planning Time: 1.695 ms
+   (slice0)    Executor memory: 50K bytes.
+   (slice1)    Executor memory: 391K bytes avg x 3 workers, 391K bytes max (seg0).  Work_mem: 391K bytes max.
+ Memory used:  1024kB
+ Optimizer: Postgres query optimizer
+ Execution Time: 18.747 ms
+(13 rows)
+
+SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault_infinite 
+--------------------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
+-- 4. Test that if we set statement_mem to a smaller value, the tuplesort
+-- operation in DISTINCT-qualified WindowAgg will be spilled to disk.
+SET statement_mem TO '128kB';
+SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault_infinite 
+--------------------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
+EXPLAIN ANALYZE SELECT AVG(DISTINCT x) OVER (PARTITION BY y) FROM dummy_table;
+NOTICE:  distinct winagg sortstats: sort operation spilled to disk  (seg0 slice1 127.0.0.1:7002 pid=43472)
+NOTICE:  distinct winagg sortstats: sort operation spilled to disk  (seg1 slice1 127.0.0.1:7003 pid=43473)
+NOTICE:  distinct winagg sortstats: sort operation spilled to disk  (seg2 slice1 127.0.0.1:7004 pid=43474)
+                                                              QUERY PLAN                                                               
+---------------------------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)  (cost=2446.06..4096.31 rows=86100 width=36) (actual time=13.040..19.232 rows=30003 loops=1)
+   ->  WindowAgg  (cost=2446.06..2948.31 rows=28700 width=36) (actual time=12.768..14.527 rows=10001 loops=1)
+         Partition By: y
+         ->  Sort  (cost=2446.06..2517.81 rows=28700 width=8) (actual time=4.278..5.449 rows=10001 loops=1)
+               Sort Key: y
+               Sort Method:  external merge  Disk: 9216kB
+               ->  Seq Scan on dummy_table  (cost=0.00..321.00 rows=28700 width=8) (actual time=0.029..1.746 rows=10001 loops=1)
+ Planning Time: 1.509 ms
+   (slice0)    Executor memory: 39K bytes.
+   (slice1)    Executor memory: 275K bytes avg x 3 workers, 275K bytes max (seg0).  Work_mem: 275K bytes max.
+ Memory used:  128kB
+ Optimizer: Postgres query optimizer
+ Execution Time: 22.056 ms
+(13 rows)
+
+SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+ gp_inject_fault_infinite 
+--------------------------
+ Success:
+ Success:
+ Success:
+(3 rows)
+
+-- Do some clean-ups.
+DROP TABLE dummy_table;
+RESET statement_mem;
diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule
index c12d6ad33f5..66c5129c356 100755
--- a/src/test/regress/greenplum_schedule
+++ b/src/test/regress/greenplum_schedule
@@ -54,10 +54,12 @@ test: leastsquares opr_sanity_gp decode_expr bitmapscan bitmapscan_ao case_gp li
 test: gpcopy
 
 test: orca_static_pruning orca_groupingsets_fallbacks
-test: filter gpctas gpdist gpdist_opclasses gpdist_legacy_opclasses matrix sublink table_functions olap_setup complex opclass_ddl information_schema guc_env_var gp_explain distributed_transactions explain_format olap_plans misc_jiras gp_copy_dtx
+test: filter gpctas gpdist gpdist_opclasses gpdist_legacy_opclasses matrix sublink table_functions olap_setup complex opclass_ddl information_schema guc_env_var gp_explain distributed_transactions explain_format olap_plans gp_copy_dtx
 # below test(s) inject faults so each of them need to be in a separate group
 test: guc_gp
 test: toast
+test: misc_jiras
+test: statement_mem_for_windowagg
 
 # namespace_gp test will show diff if concurrent tests use temporary tables.
 # So run it separately.
diff --git a/src/test/regress/sql/misc_jiras.sql b/src/test/regress/sql/misc_jiras.sql
index b90e908f37c..1536496d505 100644
--- a/src/test/regress/sql/misc_jiras.sql
+++ b/src/test/regress/sql/misc_jiras.sql
@@ -15,12 +15,17 @@ create table misc_jiras.t1 (c1 int, c2 text, c3 smallint) distributed by (c1);
 insert into misc_jiras.t1 select i % 13, md5(i::text), i % 3
   from generate_series(1, 20000) i;
 
--- tuplestore uses work_mem to control the in-memory data size, set a small
--- value to trigger the spilling.
-set work_mem to '64kB';
+-- tuplestore in windowagg uses statement_mem to control the in-memory data size,
+-- set a small value to trigger the spilling.
+set statement_mem to '512kB';
 
 set extra_float_digits=0; -- the last decimal digits are somewhat random
 
+-- Inject fault at 'winagg_after_spool_tuples' to show that the tuplestore spills
+-- to disk.
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
 select sum(cc) from (
     select c1
          , c2
@@ -33,7 +38,10 @@ select sum(cc) from (
      group by 1, 2
 ) tt;
 
-reset work_mem;
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
+reset statement_mem;
 
 -- non-ASCII multibyte character should show up correctly in error messages.
 select '溋' || (B'1');
diff --git a/src/test/regress/sql/statement_mem_for_windowagg.sql b/src/test/regress/sql/statement_mem_for_windowagg.sql
new file mode 100644
index 00000000000..28339d55ba7
--- /dev/null
+++ b/src/test/regress/sql/statement_mem_for_windowagg.sql
@@ -0,0 +1,57 @@
+CREATE TABLE dummy_table(x int, y int) DISTRIBUTED BY (y);
+INSERT INTO dummy_table SELECT generate_series(0, 10000), 0;
+INSERT INTO dummy_table SELECT generate_series(0, 10000), 3;
+INSERT INTO dummy_table SELECT generate_series(0, 10000), 10;
+
+-- 1. Test that if we set statement_mem to a larger value, the tuplestore
+-- for caching the tuples in partition used in WindowAgg is able to be fitted
+-- in memory.
+SET statement_mem TO '2048kB';
+
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
+EXPLAIN ANALYZE SELECT AVG(x) OVER (PARTITION BY y) FROM dummy_table;
+
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
+-- 2. Test that if we set statement_mem to a smaller value, the tuplestore
+-- for caching the tuples in partition used in WindowAgg will be spilled to disk.
+SET statement_mem TO '1024kB';
+
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
+EXPLAIN ANALYZE SELECT AVG(x) OVER (PARTITION BY y) FROM dummy_table;
+
+SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
+-- 3. Test that if we set statement_mem to a larger value, the tuplesort
+-- operation in DISTINCT-qualified WindowAgg is able to be fitted in memory.
+SET statement_mem TO '1024kB';
+
+SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
+EXPLAIN ANALYZE SELECT AVG(DISTINCT x) OVER (PARTITION BY y) FROM dummy_table;
+
+SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
+-- 4. Test that if we set statement_mem to a smaller value, the tuplesort
+-- operation in DISTINCT-qualified WindowAgg will be spilled to disk.
+SET statement_mem TO '128kB';
+
+SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'skip', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
+EXPLAIN ANALYZE SELECT AVG(DISTINCT x) OVER (PARTITION BY y) FROM dummy_table;
+
+SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'reset', dbid)
+  FROM gp_segment_configuration WHERE role='p' AND content>=0;
+
+-- Do some clean-ups.
+DROP TABLE dummy_table;
+RESET statement_mem;

From afbeb68a2cf173b05d7ef1a3eab1b16fc3b2ea94 Mon Sep 17 00:00:00 2001
From: Adam Lee <adlee@vmware.com>
Date: Thu, 24 Feb 2022 06:59:28 +0000
Subject: [PATCH 21/48] Remove num_segments option from the foreign table layer

aa7c74cfa47 - Support num_segments option for foreign servers and tables

Commit above introduces the support of num_segments option, however in
practice the option for foreign tables confuses the user.

This commit removes it from the foreign table layer, only the foreign
servers support.
---
 src/backend/commands/foreigncmds.c            |  1 -
 src/backend/foreign/foreign.c                 |  9 +--------
 src/backend/optimizer/util/pathnode.c         | 14 +++++++++++--
 src/backend/optimizer/util/plancat.c          |  3 ---
 src/include/foreign/foreign.h                 |  3 +--
 src/include/nodes/pathnodes.h                 |  3 +--
 src/test/regress/expected/gp_foreign_data.out | 20 ++++++++-----------
 src/test/regress/sql/gp_foreign_data.sql      | 19 +++++++++---------
 8 files changed, 33 insertions(+), 39 deletions(-)

diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c
index b39b2c69b43..a61b0b71d35 100644
--- a/src/backend/commands/foreigncmds.c
+++ b/src/backend/commands/foreigncmds.c
@@ -191,7 +191,6 @@ transformGenericOptions(Oid catalogId,
 	if (catalogId != UserMappingRelationId)
 	{
 		SeparateOutMppExecute(&resultOptions);
-		SeparateOutNumSegments(&resultOptions);
 	}
 
 	if (OidIsValid(fdwvalidator))
diff --git a/src/backend/foreign/foreign.c b/src/backend/foreign/foreign.c
index ed619d47348..4e55bf5d606 100644
--- a/src/backend/foreign/foreign.c
+++ b/src/backend/foreign/foreign.c
@@ -366,20 +366,13 @@ GetForeignTable(Oid relid)
 	else
 		ft->options = untransformRelOptions(datum);
 
-	ForeignServer *server = GetForeignServer(ft->serverid);
-
 	ft->exec_location = SeparateOutMppExecute(&ft->options);
 	if (ft->exec_location == FTEXECLOCATION_NOT_DEFINED)
 	{
+		ForeignServer *server = GetForeignServer(ft->serverid);
 		ft->exec_location = server->exec_location;
 	}
 
-	ft->num_segments = SeparateOutNumSegments(&ft->options);
-	if (ft->num_segments <= 0)
-	{
-		ft->num_segments = server->num_segments;
-	}
-
 	ReleaseSysCache(tp);
 
 	return ft;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 71b8ba267e1..540d27abc48 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3639,13 +3639,18 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->path.total_cost = total_cost;
 	pathnode->path.pathkeys = pathkeys;
 
+	ForeignServer *server = NULL;
 	switch (rel->exec_location)
 	{
 		case FTEXECLOCATION_ANY:
 			CdbPathLocus_MakeGeneral(&(pathnode->path.locus));
 			break;
 		case FTEXECLOCATION_ALL_SEGMENTS:
-			CdbPathLocus_MakeStrewn(&(pathnode->path.locus), rel->num_segments, 0);
+			server = GetForeignServer(rel->serverid);
+			if (server)
+				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), server->num_segments);
+			else
+				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), getgpsegmentCount());
 			break;
 		case FTEXECLOCATION_COORDINATOR:
 			CdbPathLocus_MakeEntry(&(pathnode->path.locus));
@@ -3704,13 +3709,18 @@ create_foreign_join_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->path.total_cost = total_cost;
 	pathnode->path.pathkeys = pathkeys;
 
+	ForeignServer *server = NULL;
 	switch (rel->exec_location)
 	{
 		case FTEXECLOCATION_ANY:
 			CdbPathLocus_MakeGeneral(&(pathnode->path.locus));
 			break;
 		case FTEXECLOCATION_ALL_SEGMENTS:
-			CdbPathLocus_MakeStrewn(&(pathnode->path.locus), rel->num_segments, 0);
+			server = GetForeignServer(rel->serverid);
+			if (server)
+				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), server->num_segments);
+			else
+				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), getgpsegmentCount());
 			break;
 		case FTEXECLOCATION_COORDINATOR:
 			CdbPathLocus_MakeEntry(&(pathnode->path.locus));
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 1e64cfb06ee..95662fd05f8 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -60,7 +60,6 @@
 
 #include "cdb/cdbappendonlyam.h"
 #include "cdb/cdbrelsize.h"
-#include "cdb/cdbutil.h"
 #include "catalog/pg_appendonly.h"
 #include "catalog/pg_foreign_server.h"
 #include "catalog/pg_inherits.h"
@@ -472,14 +471,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 		rel->serverid = GetForeignServerIdByRelId(RelationGetRelid(relation));
 		rel->fdwroutine = GetFdwRoutineForRelation(relation, true);
 		rel->exec_location = GetForeignTable(RelationGetRelid(relation))->exec_location;
-		rel->num_segments = GetForeignTable(RelationGetRelid(relation))->num_segments;
 	}
 	else
 	{
 		rel->serverid = InvalidOid;
 		rel->fdwroutine = NULL;
 		rel->exec_location = FTEXECLOCATION_NOT_DEFINED;
-		rel->num_segments = getgpsegmentCount();
 	}
 
 	/* Collect info about relation's foreign keys, if relevant */
diff --git a/src/include/foreign/foreign.h b/src/include/foreign/foreign.h
index c8226060067..04405241b0c 100644
--- a/src/include/foreign/foreign.h
+++ b/src/include/foreign/foreign.h
@@ -59,8 +59,7 @@ typedef struct ForeignTable
 	Oid			relid;			/* relation Oid */
 	Oid			serverid;		/* server Oid */
 	List	   *options;		/* ftoptions as DefElem list */
-	char		exec_location;  /* execute on COORDINATOR, ANY or ALL SEGMENTS, Cloudberry MPP specific */
-	int32		num_segments;	/* the number of segments of the foreign table */
+	char		exec_location;  /* execute on COORDINATOR, ANY or ALL SEGMENTS, Greenplum MPP specific */
 } ForeignTable;
 
 /* Flags for GetForeignServerExtended */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 695b4dbc5e3..6cb36977a2b 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -924,8 +924,7 @@ typedef struct RelOptInfo
 	Oid			serverid;		/* identifies server for the table or join */
 	Oid			userid;			/* identifies user to check access as */
 	bool		useridiscurrent;	/* join is only valid for current user */
-	char		exec_location;  /* execute on MASTER, ANY or ALL SEGMENTS, Cloudberry MPP specific */
-	int32		num_segments;  /* number of segments, Cloudberry MPP specific */
+	char		exec_location;  /* execute on MASTER, ANY or ALL SEGMENTS, Greenplum MPP specific */
 	/* use "struct FdwRoutine" to avoid including fdwapi.h here */
 	struct FdwRoutine *fdwroutine;
 	void	   *fdw_private;
diff --git a/src/test/regress/expected/gp_foreign_data.out b/src/test/regress/expected/gp_foreign_data.out
index c4dbd5bea93..500d395404d 100644
--- a/src/test/regress/expected/gp_foreign_data.out
+++ b/src/test/regress/expected/gp_foreign_data.out
@@ -26,18 +26,14 @@ CREATE FOREIGN TABLE ft3 (
 CREATE FOREIGN TABLE ft4 (
 	c1 int
 ) SERVER s0 OPTIONS (delimiter ',', mpp_execute 'all segments');
--- Test num_segments option
-CREATE SERVER s1 FOREIGN DATA WRAPPER dummy OPTIONS (num_segments '3');
-CREATE FOREIGN TABLE ft5 (
-       c1 int
-) SERVER s1 OPTIONS (delimiter ',', mpp_execute 'all segments', num_segments '5');
-\d+ ft5
-                                       Foreign table "public.ft5"
- Column |  Type   | Collation | Nullable | Default | FDW options | Storage | Stats target | Description 
---------+---------+-----------+----------+---------+-------------+---------+--------------+-------------
- c1     | integer |           |          |         |             | plain   |              | 
-Server: s1
-FDW options: (delimiter ',', mpp_execute 'all segments', num_segments '5')
+-- CREATE FOREIGN SERVER WITH num_segments
+CREATE SERVER s1 FOREIGN DATA WRAPPER dummy OPTIONS (num_segments '5');
+-- CHECK FOREIGN SERVER's OPTIONS
+SELECT srvoptions FROM pg_foreign_server WHERE srvname = 's1';
+    srvoptions    
+------------------
+ {num_segments=5}
+(1 row)
 
 --start_ignore
 DROP FOREIGN DATA WRAPPER dummy CASCADE;
diff --git a/src/test/regress/sql/gp_foreign_data.sql b/src/test/regress/sql/gp_foreign_data.sql
index a26f96f9d50..4db395743ad 100644
--- a/src/test/regress/sql/gp_foreign_data.sql
+++ b/src/test/regress/sql/gp_foreign_data.sql
@@ -2,6 +2,12 @@
 -- Test foreign-data wrapper and server management. Cloudberry MPP specific
 --
 
+-- start_ignore
+DROP SERVER s0 CASCADE;
+DROP SERVER s1 CASCADE;
+DROP FOREIGN DATA WRAPPER dummy CASCADE;
+-- end_ignore
+
 CREATE FOREIGN DATA WRAPPER dummy;
 COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';
 
@@ -21,13 +27,8 @@ CREATE FOREIGN TABLE ft4 (
 	c1 int
 ) SERVER s0 OPTIONS (delimiter ',', mpp_execute 'all segments');
 
--- Test num_segments option
-CREATE SERVER s1 FOREIGN DATA WRAPPER dummy OPTIONS (num_segments '3');
-CREATE FOREIGN TABLE ft5 (
-       c1 int
-) SERVER s1 OPTIONS (delimiter ',', mpp_execute 'all segments', num_segments '5');
-\d+ ft5
+-- CREATE FOREIGN SERVER WITH num_segments
+CREATE SERVER s1 FOREIGN DATA WRAPPER dummy OPTIONS (num_segments '5');
 
---start_ignore
-DROP FOREIGN DATA WRAPPER dummy CASCADE;
---end_ignore
+-- CHECK FOREIGN SERVER's OPTIONS
+SELECT srvoptions FROM pg_foreign_server WHERE srvname = 's1';

From 668e8bb89b65d97391f73e417f2bd61b48c8f92f Mon Sep 17 00:00:00 2001
From: Adam Lee <adlee@vmware.com>
Date: Wed, 23 Feb 2022 13:07:28 +0800
Subject: [PATCH 22/48] Remove coverity pipeline

It's out of date for quite a while, I also don't know if anyone uses
them for Greenplum development.
---
 concourse/pipelines/pipeline_coverity.yml |  61 -------------
 concourse/scripts/scan_with_coverity.bash | 101 ----------------------
 concourse/tasks/scan_with_coverity.yml    |  17 ----
 3 files changed, 179 deletions(-)
 delete mode 100644 concourse/pipelines/pipeline_coverity.yml
 delete mode 100755 concourse/scripts/scan_with_coverity.bash
 delete mode 100644 concourse/tasks/scan_with_coverity.yml

diff --git a/concourse/pipelines/pipeline_coverity.yml b/concourse/pipelines/pipeline_coverity.yml
deleted file mode 100644
index c055129a906..00000000000
--- a/concourse/pipelines/pipeline_coverity.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-resource_types:
-- name: gcs
-  type: registry-image
-  source:
-    repository: frodenas/gcs-resource
-
-resources:
-- name: gpdb_src
-  type: git
-  source:
-    branch: ((gpdb-git-branch))
-    uri: ((gpdb-git-remote))
-
-- name: centos-coverity
-  type: registry-image
-  source:
-    repository: pivotaldata/centos-coverity
-    username: ((docker_username))
-    password: ((docker_password))
-
-- name: coverity_daily
-  type: time
-  source:
-    location: America/Los_Angeles
-    days: [Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday]
-    start: 4:00 AM
-    stop: 5:00 AM
-
-- name: libquicklz-centos7
-  type: gcs
-  source:
-    bucket: ((gcs-bucket))
-    json_key: ((concourse-gcs-resources-service-account-key)))
-    regexp: gp-internal-artifacts/centos7/libquicklz-(1\.5\.0-.*)-1.el7.x86_64.rpm
-
-- name: libquicklz-devel-centos7
-  type: gcs
-  source:
-    bucket: ((gcs-bucket))
-    json_key: ((concourse-gcs-resources-service-account-key))
-    regexp: gp-internal-artifacts/centos7/libquicklz-devel-(1\.5\.0-.*)-1.el7.x86_64.rpm
-
-jobs:
-
-- name: coverity_scan
-  plan:
-  - get: coverity_daily
-    trigger: true
-  - aggregate:
-    - get: gpdb_src
-    - get: centos-coverity
-    - get: libquicklz-installer
-      resource: libquicklz-centos7
-    - get: libquicklz-devel-installer
-      resource: libquicklz-devel-centos7
-  - task: scan_with_coverity
-    file: gpdb_src/concourse/tasks/scan_with_coverity.yml
-    image: centos-coverity
-    params:
-      COVERITY_TOKEN: ((coverity_token))
-      COVERITY_EMAIL: ((coverity_email))
diff --git a/concourse/scripts/scan_with_coverity.bash b/concourse/scripts/scan_with_coverity.bash
deleted file mode 100755
index 5fab1bb37f2..00000000000
--- a/concourse/scripts/scan_with_coverity.bash
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash -l
-set -exo pipefail
-
-BASE_DIR=$(pwd)
-export GPDB_ARTIFACTS_DIR
-GPDB_ARTIFACTS_DIR=$BASE_DIR/$OUTPUT_ARTIFACT_DIR
-
-function prep_env_for_centos() {
-  BLD_ARCH=rhel7_x86_64
-  echo "Detecting java7 path ..."
-  java7_packages=($(rpm -qa | grep -F java-1.7))
-  java7_bin="$(rpm -ql "${java7_packages[@]}" | grep /jre/bin/java$)"
-  alternatives --set java "$java7_bin"
-  export JAVA_HOME="${java7_bin/jre\/bin\/java/}"
-  ln -sf /usr/bin/xsubpp /usr/share/perl5/ExtUtils/xsubpp
-
-  export PATH=${JAVA_HOME}/bin:${PATH}
-}
-
-function generate_build_number() {
-  pushd gpdb_src
-    #Only if its git repro, add commit SHA as build number
-    # BUILD_NUMBER file is used by getversion file in GPDB to append to version
-    if [ -d .git ] ; then
-      echo "commit:$(git rev-parse HEAD)" > BUILD_NUMBER
-    fi
-  popd
-}
-
-function make_sync_tools() {
-  pushd gpdb_src/gpAux
-    make sync_tools
-    # We have compiled LLVM with native zlib on CentOS6 and not from
-    # the zlib downloaded from artifacts.  Therefore, remove the zlib
-    # downloaded from artifacts in order to use the native zlib.
-    find ext -name 'libz.*' -exec rm -f {} \;
-  popd
-}
-
-function build_gpdb_and_scan_with_coverity() {
-  local cov_int_dir="$1"
-
-  pushd gpdb_src/gpAux
-    cov-build --dir "$cov_int_dir" make BLD_TARGETS="gpdb" GPROOT=/usr/local
-  popd
-}
-
-function upload_to_coverity() {
-  (
-    set +x
-    local cov_int_base="$1"
-    local sha="$2"
-    local cov_int_tar="$cov_int_base"/cov-int.tgz
-
-    tar czfp "$cov_int_tar" -C "$cov_int_base" cov-int
-
-    response=$(curl --verbose \
-    --progress-bar \
-    --form token="$COVERITY_TOKEN" \
-    --form email="$COVERITY_EMAIL" \
-    --form file=@"$cov_int_tar" \
-    --form version="$sha" \
-    --form description="Generated by Concourse on https://gpdb.data.pivotal.ci/" \
-    https://scan.coverity.com/builds?project=greenplum-db%2Fgpdb)
-
-    ERROR_STRINGS=(
-    "quota for this project has been reached"
-    )
-
-    for ERR in "${ERROR_STRINGS[@]}"; do
-      if echo "$response" | grep -q "$ERR"; then
-	echo "Coverty returned: \"$response\""
-	echo "Response matches following know error: \"$ERR\""
-        exit 1
-      fi
-    done
-  )
-}
-
-function install_deps_for_centos() {
-  # quicklz is proprietary code that we cannot put in our public Docker images.
-  rpm -i libquicklz-installer/libquicklz-*.rpm
-  rpm -i libquicklz-devel-installer/libquicklz-*.rpm
-}
-
-function _main() {
-  install_deps_for_centos
-  prep_env_for_centos
-  generate_build_number
-  make_sync_tools
-
-  /opt/prepare-coverity.bash
-
-  mkdir -p "$GPDB_ARTIFACTS_DIR"/cov-int
-  build_gpdb_and_scan_with_coverity "$GPDB_ARTIFACTS_DIR"/cov-int
-
-  sha=$(cd gpdb_src && git rev-parse HEAD)
-  upload_to_coverity "$GPDB_ARTIFACTS_DIR" "$sha"
-}
-
-_main "$@"
diff --git a/concourse/tasks/scan_with_coverity.yml b/concourse/tasks/scan_with_coverity.yml
deleted file mode 100644
index b550e825d37..00000000000
--- a/concourse/tasks/scan_with_coverity.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-platform: linux
-image_resource:
-  type: registry-image
-  source:
-    repository: pivotaldata/centos-coverity
-inputs:
-  - name: gpdb_src
-  - name: libquicklz-installer
-  - name: libquicklz-devel-installer
-outputs:
-  - name: gpdb_coverity_artifacts
-run:
-  path: gpdb_src/concourse/scripts/scan_with_coverity.bash
-params:
-  OUTPUT_ARTIFACT_DIR: gpdb_coverity_artifacts
-  COVERITY_TOKEN:
-  COVERITY_EMAIL:

From fd47be99d7e7f8225078f70e00646cdfa59df3c9 Mon Sep 17 00:00:00 2001
From: Annpurna Shahani <30636132+Annu149@users.noreply.github.com>
Date: Fri, 25 Feb 2022 09:25:57 +0530
Subject: [PATCH 23/48] Changes to avoid gpstop errors when standby is not
 reachable. (#13062)

* Changes to avoid gpstop errors when standby is not reachable.

Added check for standby reachability before stopping the standby to avoid gpstop failures when standby is unreachable.
Added behave test case for gpstop when standby is not reachable.
Addressed review comments (Updated warning message)
---
 gpMgmt/bin/gpstop                             |  5 ++
 gpMgmt/test/behave/mgmt_utils/gpstop.feature  | 10 ++++
 .../test/behave/mgmt_utils/steps/gpstart.py   | 50 +++++++++++--------
 3 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/gpMgmt/bin/gpstop b/gpMgmt/bin/gpstop
index fb852d94f2e..6862e42ba90 100755
--- a/gpMgmt/bin/gpstop
+++ b/gpMgmt/bin/gpstop
@@ -34,6 +34,7 @@ try:
     from gppylib.gp_era import GpEraFile
     from gppylib.operations.utils import ParallelOperation, RemoteOperation
     from gppylib.operations.rebalanceSegments import ReconfigDetectionSQLQueryCommand
+    from gppylib.operations.detect_unreachable_hosts import get_unreachable_segment_hosts
 except ImportError as e:
     sys.exit('ERROR: Cannot import modules.  Please check that you have sourced greenplum_path.sh.  Detail: ' + str(e))
 
@@ -514,6 +515,10 @@ class GpStop:
         if self.gparray.standbyCoordinator:
             standby = self.gparray.standbyCoordinator
 
+            if get_unreachable_segment_hosts([standby.hostname], 1):
+                logger.warning("Standby is unreachable, skipping shutdown on standby")
+                return True
+
             logger.info("Stopping coordinator standby host %s mode=%s" % (standby.hostname, self.mode))
             try:
                 cmd = SegmentStop("stopping coordinator standby",
diff --git a/gpMgmt/test/behave/mgmt_utils/gpstop.feature b/gpMgmt/test/behave/mgmt_utils/gpstop.feature
index e5ea0c41b3e..7ff5b86ae7c 100644
--- a/gpMgmt/test/behave/mgmt_utils/gpstop.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gpstop.feature
@@ -27,3 +27,13 @@ Feature: gpstop behave tests
           And gpstop should print "There were 1 user connections at the start of the shutdown" to stdout
           And gpstop should print "'\(s\)mart_mode', '\(f\)ast_mode', '\(i\)mmediate_mode'" to stdout
          Then gpstop should return a return code of 0
+
+    @demo_cluster
+    Scenario: gpstop succeeds even if the standby host is unreachable
+        Given the database is running
+          And the catalog has a standby coordinator entry
+         When the standby host is made unreachable
+          And the user runs "gpstop -a"
+         Then gpstop should print "Standby is unreachable, skipping shutdown on standby" to stdout
+          And gpstop should return a return code of 0
+          And the standby host is made reachable
diff --git a/gpMgmt/test/behave/mgmt_utils/steps/gpstart.py b/gpMgmt/test/behave/mgmt_utils/steps/gpstart.py
index cfb23d95fa8..2046dbcae4f 100644
--- a/gpMgmt/test/behave/mgmt_utils/steps/gpstart.py
+++ b/gpMgmt/test/behave/mgmt_utils/steps/gpstart.py
@@ -37,30 +37,36 @@ def change_hostname(content, preferred_role, hostname):
 def impl(context):
     change_hostname(-1, 'm', 'invalid_host')
 
-    def cleanup(context):
-        """
-        Reverses the above SQL by starting up in coordinator-only utility mode. Since
-        the standby host is incorrect, a regular gpstart call won't work.
-        """
-        utils.stop_database_if_started(context)
-
-        subprocess.check_call(['gpstart', '-am'])
-        _run_sql("""
-            SET allow_system_table_mods='true';
-            UPDATE gp_segment_configuration
-               SET hostname = coordinator.hostname,
-                    address = coordinator.address
-              FROM (
-                     SELECT hostname, address
-                       FROM gp_segment_configuration
-                      WHERE content = -1 and role = 'p'
-                   ) coordinator
-             WHERE content = -1 AND role = 'm'
-        """, {'gp_role': 'utility'})
-        subprocess.check_call(['gpstop', '-am'])
-
     context.add_cleanup(cleanup, context)
 
+@when('the standby host is made reachable')
+@then('the standby host is made reachable')
+def impl(context):
+    cleanup(context)
+
+"""
+Reverses the changes done by change_hostname() function by starting up cluster in master-only utility mode. 
+Since the standby host is incorrect, a regular gpstart call won't work.
+"""
+def cleanup(context):
+
+    utils.stop_database_if_started(context)
+
+    subprocess.check_call(['gpstart', '-am'])
+    _run_sql("""
+        SET allow_system_table_mods='true';
+        UPDATE gp_segment_configuration
+           SET hostname = coordinator.hostname,
+                address = coordinator.address
+          FROM (
+                 SELECT hostname, address
+                   FROM gp_segment_configuration
+                  WHERE content = -1 and role = 'p'
+               ) coordinator
+         WHERE content = -1 AND role = 'm'
+    """, {'gp_role': 'utility'})
+    subprocess.check_call(['gpstop', '-am'])
+
 def _handle_sigpipe():
     """
     Work around https://bugs.python.org/issue1615376, which is not fixed until

From 67db502f295d2946845dc210d08a7f8916aad8b3 Mon Sep 17 00:00:00 2001
From: Xing Guo <higuoxing@gmail.com>
Date: Mon, 28 Feb 2022 15:08:30 +0800
Subject: [PATCH 24/48] Set statement_mem to a larger value in tests to make
 pipelines happy. (#13144)

We observed greenplum built without cassert couldn't pass tests. Because
when the gpdb is configured with --enable-cassert, the minimum
statement_mem it accepts is 50kB while if it's configured without
--enable-cassert, the minimum statement_mem it accepts is 1000kB (See:
backend/utils/misc/guc_gp.c). This patch is trying to make pipelines
happy.
---
 src/test/regress/expected/misc_jiras.out             |  6 +++---
 .../regress/expected/statement_mem_for_windowagg.out | 12 ++++++------
 src/test/regress/sql/misc_jiras.sql                  |  4 ++--
 src/test/regress/sql/statement_mem_for_windowagg.sql | 12 ++++++------
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/test/regress/expected/misc_jiras.out b/src/test/regress/expected/misc_jiras.out
index ebe7446972e..ab418abb849 100644
--- a/src/test/regress/expected/misc_jiras.out
+++ b/src/test/regress/expected/misc_jiras.out
@@ -12,10 +12,10 @@ create schema misc_jiras;
 --
 create table misc_jiras.t1 (c1 int, c2 text, c3 smallint) distributed by (c1);
 insert into misc_jiras.t1 select i % 13, md5(i::text), i % 3
-  from generate_series(1, 20000) i;
+  from generate_series(1, 40000) i;
 -- tuplestore in windowagg uses statement_mem to control the in-memory data size,
 -- set a small value to trigger the spilling.
-set statement_mem to '512kB';
+set statement_mem to '1024kB';
 set extra_float_digits=0; -- the last decimal digits are somewhat random
 -- Inject fault at 'winagg_after_spool_tuples' to show that the tuplestore spills
 -- to disk.
@@ -44,7 +44,7 @@ NOTICE:  winagg: tuplestore spilled to disk  (seg1 slice1 127.0.0.1:7003 pid=547
 NOTICE:  winagg: tuplestore spilled to disk  (seg2 slice1 127.0.0.1:7004 pid=54721)
    sum   
 ---------
- 10006.5
+ 20006.5
 (1 row)
 
 SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
diff --git a/src/test/regress/expected/statement_mem_for_windowagg.out b/src/test/regress/expected/statement_mem_for_windowagg.out
index a2e44474ec3..d41610f6a30 100644
--- a/src/test/regress/expected/statement_mem_for_windowagg.out
+++ b/src/test/regress/expected/statement_mem_for_windowagg.out
@@ -1,11 +1,11 @@
 CREATE TABLE dummy_table(x int, y int) DISTRIBUTED BY (y);
-INSERT INTO dummy_table SELECT generate_series(0, 10000), 0;
-INSERT INTO dummy_table SELECT generate_series(0, 10000), 3;
-INSERT INTO dummy_table SELECT generate_series(0, 10000), 10;
+INSERT INTO dummy_table SELECT generate_series(0, 20000), 0;
+INSERT INTO dummy_table SELECT generate_series(0, 20000), 3;
+INSERT INTO dummy_table SELECT generate_series(0, 20000), 10;
 -- 1. Test that if we set statement_mem to a larger value, the tuplestore
 -- for caching the tuples in partition used in WindowAgg is able to be fitted
 -- in memory.
-SET statement_mem TO '2048kB';
+SET statement_mem TO '4096kB';
 SELECT gp_inject_fault('winagg_after_spool_tuples', 'skip', dbid)
   FROM gp_segment_configuration WHERE role='p' AND content>=0;
  gp_inject_fault 
@@ -89,7 +89,7 @@ SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
 
 -- 3. Test that if we set statement_mem to a larger value, the tuplesort
 -- operation in DISTINCT-qualified WindowAgg is able to be fitted in memory.
-SET statement_mem TO '1024kB';
+SET statement_mem TO '4096kB';
 SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'skip', dbid)
   FROM gp_segment_configuration WHERE role='p' AND content>=0;
  gp_inject_fault_infinite 
@@ -131,7 +131,7 @@ SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'reset', dbid)
 
 -- 4. Test that if we set statement_mem to a smaller value, the tuplesort
 -- operation in DISTINCT-qualified WindowAgg will be spilled to disk.
-SET statement_mem TO '128kB';
+SET statement_mem TO '1024kB';
 SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'skip', dbid)
   FROM gp_segment_configuration WHERE role='p' AND content>=0;
  gp_inject_fault_infinite 
diff --git a/src/test/regress/sql/misc_jiras.sql b/src/test/regress/sql/misc_jiras.sql
index 1536496d505..4e96bc11e52 100644
--- a/src/test/regress/sql/misc_jiras.sql
+++ b/src/test/regress/sql/misc_jiras.sql
@@ -13,11 +13,11 @@ create schema misc_jiras;
 
 create table misc_jiras.t1 (c1 int, c2 text, c3 smallint) distributed by (c1);
 insert into misc_jiras.t1 select i % 13, md5(i::text), i % 3
-  from generate_series(1, 20000) i;
+  from generate_series(1, 40000) i;
 
 -- tuplestore in windowagg uses statement_mem to control the in-memory data size,
 -- set a small value to trigger the spilling.
-set statement_mem to '512kB';
+set statement_mem to '1024kB';
 
 set extra_float_digits=0; -- the last decimal digits are somewhat random
 
diff --git a/src/test/regress/sql/statement_mem_for_windowagg.sql b/src/test/regress/sql/statement_mem_for_windowagg.sql
index 28339d55ba7..5c4b96a1761 100644
--- a/src/test/regress/sql/statement_mem_for_windowagg.sql
+++ b/src/test/regress/sql/statement_mem_for_windowagg.sql
@@ -1,12 +1,12 @@
 CREATE TABLE dummy_table(x int, y int) DISTRIBUTED BY (y);
-INSERT INTO dummy_table SELECT generate_series(0, 10000), 0;
-INSERT INTO dummy_table SELECT generate_series(0, 10000), 3;
-INSERT INTO dummy_table SELECT generate_series(0, 10000), 10;
+INSERT INTO dummy_table SELECT generate_series(0, 20000), 0;
+INSERT INTO dummy_table SELECT generate_series(0, 20000), 3;
+INSERT INTO dummy_table SELECT generate_series(0, 20000), 10;
 
 -- 1. Test that if we set statement_mem to a larger value, the tuplestore
 -- for caching the tuples in partition used in WindowAgg is able to be fitted
 -- in memory.
-SET statement_mem TO '2048kB';
+SET statement_mem TO '4096kB';
 
 SELECT gp_inject_fault('winagg_after_spool_tuples', 'skip', dbid)
   FROM gp_segment_configuration WHERE role='p' AND content>=0;
@@ -30,7 +30,7 @@ SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
 
 -- 3. Test that if we set statement_mem to a larger value, the tuplesort
 -- operation in DISTINCT-qualified WindowAgg is able to be fitted in memory.
-SET statement_mem TO '1024kB';
+SET statement_mem TO '4096kB';
 
 SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'skip', dbid)
   FROM gp_segment_configuration WHERE role='p' AND content>=0;
@@ -42,7 +42,7 @@ SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'reset', dbid)
 
 -- 4. Test that if we set statement_mem to a smaller value, the tuplesort
 -- operation in DISTINCT-qualified WindowAgg will be spilled to disk.
-SET statement_mem TO '128kB';
+SET statement_mem TO '1024kB';
 
 SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'skip', dbid)
   FROM gp_segment_configuration WHERE role='p' AND content>=0;

From e0a72ff0de817070ec8d55f5416ab814653daab8 Mon Sep 17 00:00:00 2001
From: Xing Guo <higuoxing@gmail.com>
Date: Mon, 28 Feb 2022 18:13:34 +0800
Subject: [PATCH 25/48] Trying to make parallel_retrieve_cursor/fault_inject
 stable. (#13141)

This patch is trying to make isolation2/parallel_retrieve_cursor/fault_inject
stable by replacing 'sleep' fault with 'suspend' fault.

Below is the regression.diffs file fetched from the PR pipeline.

```diff
diff -I HINT: -I CONTEXT: -I GP_IGNORE: -U3 /tmp/build/d62a0504/gpdb_src/src/test/isolation2/expected/parallel_retrieve_cursor/fault_inject.out /tmp/build/d62a0504/gpdb_src/src/test/isolation2/results/parallel_retrieve_cursor/fault_inject.out
--- /tmp/build/d62a0504/gpdb_src/src/test/isolation2/expected/parallel_retrieve_cursor/fault_inject.out	2022-02-24 16:18:00.128491568 +0000
+++ /tmp/build/d62a0504/gpdb_src/src/test/isolation2/results/parallel_retrieve_cursor/fault_inject.out	2022-02-24 16:18:00.940556278 +0000
@@ -684,6 +684,3256 @@
  READY
 (1 row)
 2R&: @pre_run 'set_endpoint_variable @ENDPOINT6': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT6";  <waiting ...>
+FAILED:  Forked command is not blocking; got output: a
+-------
+ 5
+ 6
+ 9
+ ...
+ 9999
+ 10000
+(3247 rows)

 1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1';
  state
@@ -699,7 +3949,7 @@
 0R<:  <... completed>
 ERROR:  endpoint is not available because the parallel retrieve cursor was aborted (cdbendpointretrieve.c:LINE_NUM)
 2R<:  <... completed>
-ERROR:  endpoint is not available because the parallel retrieve cursor was aborted (cdbendpointretrieve.c:LINE_NUM)
+FAILED:  Execution failed

 1<:  <... completed>
 FAILED:  Execution failed
```
---
 .../fault_inject.source                       | 39 +++++++---
 .../fault_inject.source                       | 71 ++++++++++++++++---
 2 files changed, 88 insertions(+), 22 deletions(-)

diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/fault_inject.source b/src/test/isolation2/input/parallel_retrieve_cursor/fault_inject.source
index 3e592b5968c..c04643d14f8 100644
--- a/src/test/isolation2/input/parallel_retrieve_cursor/fault_inject.source
+++ b/src/test/isolation2/input/parallel_retrieve_cursor/fault_inject.source
@@ -114,8 +114,8 @@ insert into t1 select generate_series(1,100);
 1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', 2);
 1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', 3);
 1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', 4);
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 2::smallint);
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 4::smallint);
+1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 2::smallint);
+1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 4::smallint);
 1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'interrupt', '', '', '', 5, 5, 0, 3::smallint);
 
 1: BEGIN;
@@ -132,8 +132,14 @@ insert into t1 select generate_series(1,100);
 1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1';
 1R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5";
 
+SELECT gp_wait_until_triggered_fault('fetch_tuples_from_endpoint', 1, 2);
+SELECT gp_wait_until_triggered_fault('fetch_tuples_from_endpoint', 1, 4);
+
 1<:
 
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 2);
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 4);
+
 0R<:
 2R<:
 
@@ -144,9 +150,9 @@ insert into t1 select generate_series(1,100);
 1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', 4);
 
 -- Test6: close PARALLEL RETRIEVE CURSOR during retrieve
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 2::smallint);
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 4::smallint);
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 3::smallint);
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 2::smallint);
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 4::smallint);
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 3::smallint);
 
 1: BEGIN;
 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1;
@@ -164,6 +170,10 @@ insert into t1 select generate_series(1,100);
 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0);
 1: CLOSE c1;
 
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 2);
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 3);
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 4);
+
 0R<:
 1R<:
 2R<:
@@ -181,22 +191,22 @@ DROP TABLE IF EXISTS t2;
 CREATE TABLE t2 (a INT) DISTRIBUTED by (a);
 insert into t2 select generate_series(1,10000);
 
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid)
     FROM gp_segment_configuration
     WHERE content=1 AND role='p';
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'interrupt', '', '', '', 1000, 1000, 0, dbid)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'interrupt', '', '', '', 1000, 1000, 0, dbid)
     FROM gp_segment_configuration
     WHERE content=1 AND role='p';
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid)
     FROM gp_segment_configuration
     WHERE content=0 AND role='p';
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 900, 900, 2, dbid)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 900, 900, 0, dbid)
     FROM gp_segment_configuration
     WHERE content=0 AND role='p';
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid)
     FROM gp_segment_configuration
     WHERE content=2 AND role='p';
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 800, 800, 2, dbid)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 800, 800, 0, dbid)
     FROM gp_segment_configuration
     WHERE content=2 AND role='p';
 
@@ -216,6 +226,13 @@ insert into t2 select generate_series(1,10000);
 
 1<:
 
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', dbid)
+    FROM gp_segment_configuration
+    WHERE content=0 AND role='p';
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', dbid)
+    FROM gp_segment_configuration
+    WHERE content=2 AND role='p';
+
 0R<:
 2R<:
 
diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/fault_inject.source b/src/test/isolation2/output/parallel_retrieve_cursor/fault_inject.source
index a047a0c8294..044e288796b 100644
--- a/src/test/isolation2/output/parallel_retrieve_cursor/fault_inject.source
+++ b/src/test/isolation2/output/parallel_retrieve_cursor/fault_inject.source
@@ -457,12 +457,12 @@ ROLLBACK
 -----------------
  Success:        
 (1 row)
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 2::smallint);
+1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 2::smallint);
  gp_inject_fault 
 -----------------
  Success:        
 (1 row)
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 4::smallint);
+1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 4::smallint);
  gp_inject_fault 
 -----------------
  Success:        
@@ -506,9 +506,31 @@ DECLARE
 1R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5";
 ERROR:  canceling statement due to user request
 
+SELECT gp_wait_until_triggered_fault('fetch_tuples_from_endpoint', 1, 2);
+ gp_wait_until_triggered_fault 
+-------------------------------
+ Success:                      
+(1 row)
+SELECT gp_wait_until_triggered_fault('fetch_tuples_from_endpoint', 1, 4);
+ gp_wait_until_triggered_fault 
+-------------------------------
+ Success:                      
+(1 row)
+
 1<:  <... completed>
 ERROR:  canceling MPP operation: "Endpoint retrieve statement aborted"
 
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 2);
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 4);
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
 0R<:  <... completed>
 ERROR:  endpoint is not available because the parallel retrieve cursor was aborted (cdbendpointretrieve.c:245)
 2R<:  <... completed>
@@ -535,17 +557,17 @@ ROLLBACK
 (1 row)
 
 -- Test6: close PARALLEL RETRIEVE CURSOR during retrieve
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 2::smallint);
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 2::smallint);
  gp_inject_fault 
 -----------------
  Success:        
 (1 row)
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 4::smallint);
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 4::smallint);
  gp_inject_fault 
 -----------------
  Success:        
 (1 row)
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 5, 6, 3, 3::smallint);
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5, 0, 3::smallint);
  gp_inject_fault 
 -----------------
  Success:        
@@ -590,6 +612,22 @@ DECLARE
 1: CLOSE c1;
 CLOSE
 
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 2);
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 3);
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', 4);
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
 0R<:  <... completed>
 ERROR:  endpoint is not available because the parallel retrieve cursor was aborted (cdbendpointretrieve.c:245)
 1R<:  <... completed>
@@ -628,32 +666,32 @@ CREATE
 insert into t2 select generate_series(1,10000);
 INSERT 10000
 
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid) FROM gp_segment_configuration WHERE content=1 AND role='p';
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid) FROM gp_segment_configuration WHERE content=1 AND role='p';
  gp_inject_fault 
 -----------------
  Success:        
 (1 row)
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'interrupt', '', '', '', 1000, 1000, 0, dbid) FROM gp_segment_configuration WHERE content=1 AND role='p';
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'interrupt', '', '', '', 1000, 1000, 0, dbid) FROM gp_segment_configuration WHERE content=1 AND role='p';
  gp_inject_fault 
 -----------------
  Success:        
 (1 row)
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid) FROM gp_segment_configuration WHERE content=0 AND role='p';
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid) FROM gp_segment_configuration WHERE content=0 AND role='p';
  gp_inject_fault 
 -----------------
  Success:        
 (1 row)
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 900, 900, 2, dbid) FROM gp_segment_configuration WHERE content=0 AND role='p';
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 900, 900, 0, dbid) FROM gp_segment_configuration WHERE content=0 AND role='p';
  gp_inject_fault 
 -----------------
  Success:        
 (1 row)
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid) FROM gp_segment_configuration WHERE content=2 AND role='p';
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', dbid) FROM gp_segment_configuration WHERE content=2 AND role='p';
  gp_inject_fault 
 -----------------
  Success:        
 (1 row)
-1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'sleep', '', '', '', 800, 800, 2, dbid) FROM gp_segment_configuration WHERE content=2 AND role='p';
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 800, 800, 0, dbid) FROM gp_segment_configuration WHERE content=2 AND role='p';
  gp_inject_fault 
 -----------------
  Success:        
@@ -695,6 +733,17 @@ ERROR:  canceling statement due to user request
 1<:  <... completed>
 ERROR:  canceling MPP operation: "Endpoint retrieve statement aborted"
 
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', dbid) FROM gp_segment_configuration WHERE content=0 AND role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'resume', dbid) FROM gp_segment_configuration WHERE content=2 AND role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
 0R<:  <... completed>
 ERROR:  endpoint is not available because the parallel retrieve cursor was aborted (cdbendpointretrieve.c:245)
 2R<:  <... completed>

From 30da5b4907c9eb91f50fb3d5e9cf5714c02cb2dc Mon Sep 17 00:00:00 2001
From: Zhang Mingli <avamingli@gmail.com>
Date: Mon, 13 May 2024 10:33:32 +0800
Subject: [PATCH 26/48] Fix Merge GPDB.

Some codes do not work in CBDB after Merge from GPDB.
Fix errors and etc.

Authored-by: Zhang Mingli avamingli@gmail.com
---
 src/backend/optimizer/util/pathnode.c            |  8 ++++----
 .../regress/expected/aggregates_optimizer.out    |  2 ++
 src/test/regress/expected/gp_dqa_optimizer.out   | 16 ++++++++++++++++
 src/test/regress/sql/gp_foreign_data.sql         |  6 ++++++
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 540d27abc48..9cf83acc338 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3648,9 +3648,9 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
 		case FTEXECLOCATION_ALL_SEGMENTS:
 			server = GetForeignServer(rel->serverid);
 			if (server)
-				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), server->num_segments);
+				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), server->num_segments, 0);
 			else
-				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), getgpsegmentCount());
+				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), getgpsegmentCount(), 0);
 			break;
 		case FTEXECLOCATION_COORDINATOR:
 			CdbPathLocus_MakeEntry(&(pathnode->path.locus));
@@ -3718,9 +3718,9 @@ create_foreign_join_path(PlannerInfo *root, RelOptInfo *rel,
 		case FTEXECLOCATION_ALL_SEGMENTS:
 			server = GetForeignServer(rel->serverid);
 			if (server)
-				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), server->num_segments);
+				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), server->num_segments, 0);
 			else
-				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), getgpsegmentCount());
+				CdbPathLocus_MakeStrewn(&(pathnode->path.locus), getgpsegmentCount(), 0);
 			break;
 		case FTEXECLOCATION_COORDINATOR:
 			CdbPathLocus_MakeEntry(&(pathnode->path.locus));
diff --git a/src/test/regress/expected/aggregates_optimizer.out b/src/test/regress/expected/aggregates_optimizer.out
index 634744680d9..87d3dd7ee53 100644
--- a/src/test/regress/expected/aggregates_optimizer.out
+++ b/src/test/regress/expected/aggregates_optimizer.out
@@ -2944,6 +2944,8 @@ INFO:  GPORCA failed to produce a plan, falling back to planner
 DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
 INFO:  GPORCA failed to produce a plan, falling back to planner
 DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
 NOTICE:  avg_transfn called with 3
 INFO:  GPORCA failed to produce a plan, falling back to planner
 DETAIL:  GPDB Expression type: Query Parameter not supported in DXL
diff --git a/src/test/regress/expected/gp_dqa_optimizer.out b/src/test/regress/expected/gp_dqa_optimizer.out
index bcc734b3978..7c28cc1d7c2 100644
--- a/src/test/regress/expected/gp_dqa_optimizer.out
+++ b/src/test/regress/expected/gp_dqa_optimizer.out
@@ -2338,12 +2338,16 @@ insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), ('321'
 --           ->  Seq Scan on public.dqa_f3
 --                 Output: b, a, (b)::text
 select count(distinct (b)::text) as b, count(distinct (a)::text) as a from dqa_f3;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  b | a 
 ---+---
  3 | 7
 (1 row)
 
 explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::text) as a from dqa_f3;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                            QUERY PLAN
 ------------------------------------------------------------------------------------------------
  Finalize Aggregate
@@ -2369,12 +2373,16 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin
 
 -- Case 2: Same as the above one, but convert the type of column 'a' to 'varchar' via binary-compatible types.
 select count(distinct (b)::text) as b, count(distinct (a)::text::varchar) as a from dqa_f3;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  b | a 
 ---+---
  3 | 7
 (1 row)
 
 explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::text::varchar) as a from dqa_f3;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                  QUERY PLAN
 -------------------------------------------------------------------------------------------------------------
  Finalize Aggregate
@@ -2410,12 +2418,16 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin
 --           ->  Seq Scan on public.dqa_f3
 --                 Output: b, a, (b)::text, (a)::integer
 select count(distinct (b)::text) as b, count(distinct (a)::int) as a from dqa_f3;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  b | a 
 ---+---
  3 | 7
 (1 row)
 
 explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::int) as a from dqa_f3;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                             QUERY PLAN
 ---------------------------------------------------------------------------------------------------
  Finalize Aggregate
@@ -2442,12 +2454,16 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin
 -- Case 4: When converting the type of column 'a' from 'varchar' to 'int' to 'varchar', TupleSplit should generate an additional
 -- column '(a)::integer::varchar' as part of hash-key in Redistribute-Motion.
 select count(distinct (b)::text) as b, count(distinct (a)::int::varchar) as a from dqa_f3;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
  b | a 
 ---+---
  3 | 7
 (1 row)
 
 explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::int::varchar) as a from dqa_f3;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Feature not supported: Multiple Distinct Qualified Aggregates are disabled in the optimizer
                                                        QUERY PLAN
 ------------------------------------------------------------------------------------------------------------------------
  Finalize Aggregate
diff --git a/src/test/regress/sql/gp_foreign_data.sql b/src/test/regress/sql/gp_foreign_data.sql
index 4db395743ad..23c630797da 100644
--- a/src/test/regress/sql/gp_foreign_data.sql
+++ b/src/test/regress/sql/gp_foreign_data.sql
@@ -32,3 +32,9 @@ CREATE SERVER s1 FOREIGN DATA WRAPPER dummy OPTIONS (num_segments '5');
 
 -- CHECK FOREIGN SERVER's OPTIONS
 SELECT srvoptions FROM pg_foreign_server WHERE srvname = 's1';
+
+-- start_ignore
+DROP SERVER s0 CASCADE;
+DROP SERVER s1 CASCADE;
+DROP FOREIGN DATA WRAPPER dummy CASCADE;
+-- end_ignore

From 0592389e180c589695ee6492f04882ffcf5a8b14 Mon Sep 17 00:00:00 2001
From: Hao Wu <gfphoenix78@gmail.com>
Date: Wed, 15 May 2024 21:41:55 +0800
Subject: [PATCH 27/48] Fix motion toast error. (#436)

slot->tts_isnull cannot be read before invoking slot_getallattrs.

Fix Github Issue 16906.

Authored-by: wenxing.yaun <wenxing.yuan@esgyn.cn>

Co-authored-by: HelloYJohn <xxdrywx@gmail.com>
---
 src/backend/cdb/motion/tupser.c     |  6 +++++-
 src/test/regress/expected/toast.out | 16 ++++++++++++++++
 src/test/regress/sql/toast.sql      | 20 ++++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/src/backend/cdb/motion/tupser.c b/src/backend/cdb/motion/tupser.c
index df1ab4f2f2e..b0a7bb065f7 100644
--- a/src/backend/cdb/motion/tupser.c
+++ b/src/backend/cdb/motion/tupser.c
@@ -412,7 +412,11 @@ SerializeTuple(TupleTableSlot *slot, SerTupInfo *pSerInfo, struct directTranspor
 	{
 		Form_pg_attribute attr = TupleDescAttr(slot->tts_tupleDescriptor, i);
 
-		if (!attr->attisdropped && attr->attlen == -1 && !slot->tts_isnull[i])
+		/*
+		 * Cannot access slot->tts_isnull before invoking slot_getallattrs.
+		 * See Github Issue 16906.
+		 */
+		if (!attr->attisdropped && attr->attlen == -1)
 		{
 			hasExternalAttr = true;
 			break;
diff --git a/src/test/regress/expected/toast.out b/src/test/regress/expected/toast.out
index abf5136fd4a..9d97ab868c5 100644
--- a/src/test/regress/expected/toast.out
+++ b/src/test/regress/expected/toast.out
@@ -180,3 +180,19 @@ SELECT encode(substring(a from 521*26+1 for 26), 'escape') FROM toast_chunk_test
  abcdefghijklmnopqrstuvwxyz
 (1 row)
 
+-- Test for Github Issue 16906
+create table t_16906(a int, b text) distributed by(a);
+-- Insert two rows and make sure they are in the same segment (same dist key)
+-- the 1st row's column b must be NULL;
+-- the 2nd row's column b must be a long string even after toast compression
+-- for details please refer to the issue page.
+insert into t_16906 values(1, null);
+insert into t_16906 values(1, randomtext(10240));
+-- Don't want actually fetch all data just need to test
+-- it does not hit assert fail or error. Using explain
+-- analyze might introduce a new ansfile for ORCA so here
+-- I decide to use \o.
+\o /tmp/t_16906.tmp
+select * from t_16906;
+\o
+drop table t_16906;
diff --git a/src/test/regress/sql/toast.sql b/src/test/regress/sql/toast.sql
index 7b3cfe7d204..a35cb77e4b7 100644
--- a/src/test/regress/sql/toast.sql
+++ b/src/test/regress/sql/toast.sql
@@ -95,3 +95,23 @@ SELECT * FROM toast_chunk_test WHERE a <> repeat('abcdefghijklmnopqrstuvwxyz', 1
 
 -- Random access into the toast table should work equally well.
 SELECT encode(substring(a from 521*26+1 for 26), 'escape') FROM toast_chunk_test;
+
+-- Test for Github Issue 16906
+create table t_16906(a int, b text) distributed by(a);
+
+-- Insert two rows and make sure they are in the same segment (same dist key)
+-- the 1st row's column b must be NULL;
+-- the 2nd row's column b must be a long string even after toast compression
+-- for details please refer to the issue page.
+insert into t_16906 values(1, null);
+insert into t_16906 values(1, randomtext(10240));
+
+-- Don't want actually fetch all data just need to test
+-- it does not hit assert fail or error. Using explain
+-- analyze might introduce a new ansfile for ORCA so here
+-- I decide to use \o.
+\o /tmp/t_16906.tmp
+select * from t_16906;
+\o
+
+drop table t_16906;

From 9274a53490bc1a85f3f103054edd420cc48bd026 Mon Sep 17 00:00:00 2001
From: Xiaoran Wang <fanfuxiaoran@gmail.com>
Date: Thu, 16 May 2024 10:18:18 +0800
Subject: [PATCH 28/48] Fix checking password file permissions in dbconn.py
 (#438)

The mode is expressed in octal which causes warning.
---
 gpMgmt/bin/gppylib/db/dbconn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gpMgmt/bin/gppylib/db/dbconn.py b/gpMgmt/bin/gppylib/db/dbconn.py
index 47a7e91bfd2..7d3f839f68b 100644
--- a/gpMgmt/bin/gppylib/db/dbconn.py
+++ b/gpMgmt/bin/gppylib/db/dbconn.py
@@ -40,7 +40,7 @@ def __init__(self):
         st_info = os.stat(PGPASSFILE)
         mode = str(oct(st_info[stat.ST_MODE] & 0o777))
 
-        if mode != "0600":
+        if mode != "0o600":
             print('WARNING: password file "%s" has group or world access; permissions should be u=rw (0600) or less' % PGPASSFILE)
             self.valid_pgpass = False
             return

From 9a630392a94ec29790a25174b851f6e64be63e21 Mon Sep 17 00:00:00 2001
From: David Kimura <dkimura@vmware.com>
Date: Tue, 7 Mar 2023 16:08:43 -0800
Subject: [PATCH 29/48] [ORCA] Fix flaky "Invalid key is inaccessible" fallback
 (#15147)

In CI pipeline there were occassional test failures due to ORCA fallback
with following stacktrace.

    ```
    +INFO:  GPORCA failed to produce a plan, falling back to planner
    +DETAIL:  CSyncHashtable.h:109: Failed assertion: IsValid(key) && "Invalid key is inaccessible"
    +Stack trace:
    +1    gpos::CException::Raise + 227
    +2    <symbol not found> + 15235666
    +3    gpos::CMemoryPoolManager::CreateMemoryPool + 653
    +4    gpos::CAutoMemoryPool::CAutoMemoryPool + 34
    +5    gpopt::CColumnFactory::CColumnFactory + 80
    +6    gpopt::COptCtxt::PoctxtCreate + 77
    +7    gpopt::CAutoOptCtxt::CAutoOptCtxt + 54
    +8    gpopt::COptimizer::PdxlnOptimize + 411
    +9    COptTasks::OptimizeTask + 850
    +10   gpos::CTask::Execute + 52
    +11   gpos::CWorker::Execute + 36
    +12   gpos::CAutoTaskProxy::Execute + 97
    +13   gpos_exec + 557
    ```

Core dump of failure showed CMemoryPool::m_hash_key had invalid key
value 0xffffffff. Hence, the query raised an assertion error and fell
back to PLANNER.

Issue is that CMemoryPool::m_hash_key was never directly initialized.
This suggests that it was using uninitialized memory to produce
randomness in the key. When that memory contains 0xffffffff in just the
right place, then the value of the CMemoryPool::m_hash_key is an invalid
key and ORCA falls back.

Following is patch that demonstrates the issue:
    ```
    diff src/backend/utils/mmgr/aset.c
    @@ -989,6 +989,8 @@ AllocSetAlloc(MemoryContext context, Size size)

                    MEMORY_ACCOUNT_INC_ALLOCATED(set, chunk->size);

    +               memset((char *) AllocChunkGetPointer(chunk), 0xFFFFFFFF, size);
    +
                    return AllocChunkGetPointer(chunk);
            }
    ```

A few lines above that patch, you can see that when compiled with
RANDOMIZE_ALLOCATED_MEMORY the memory is randomly initialied. So we can
make no assumptions about the uninitialied memory; meaning that 0xffffff
is valid.

Note: Seemed this failure manifested more commonly with JIT ICW runs.
(cherry picked from commit 2c7152f46aced9328d86dc1025d0395fcf467455)
---
 .../libgpos/include/gpos/memory/CMemoryPool.h    | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/backend/gporca/libgpos/include/gpos/memory/CMemoryPool.h b/src/backend/gporca/libgpos/include/gpos/memory/CMemoryPool.h
index 7d997d37cbb..3f9ab65946c 100644
--- a/src/backend/gporca/libgpos/include/gpos/memory/CMemoryPool.h
+++ b/src/backend/gporca/libgpos/include/gpos/memory/CMemoryPool.h
@@ -25,6 +25,9 @@
 #ifndef GPOS_CMemoryPool_H
 #define GPOS_CMemoryPool_H
 
+#include <limits>
+#include <random>
+
 #include "gpos/assert.h"
 #include "gpos/common/CLink.h"
 #include "gpos/common/CStackDescriptor.h"
@@ -75,7 +78,11 @@ class CMemoryPool
 	friend class CMemoryPoolManager;
 
 private:
-	// hash key is only set by pool manager
+	// psudo random hash key generator
+	std::mt19937 m_generator;
+	std::uniform_int_distribution<ULONG> m_distribution;
+
+	// hash key for this memory pool
 	ULONG_PTR m_hash_key;
 
 #ifdef GPOS_DEBUG
@@ -98,6 +105,13 @@ class CMemoryPool
 		EatArray = 0x7e
 	};
 
+	CMemoryPool()
+		// MAX LONG is invalid hash key, so skip that hash value.
+		: m_distribution(0, std::numeric_limits<ULONG>::max() - 1),
+		  m_hash_key(m_distribution(m_generator))
+	{
+	}
+
 	// dtor
 	virtual ~CMemoryPool() = default;
 

From 1347cd6901d02fa5f828f28c7468881c478db57e Mon Sep 17 00:00:00 2001
From: zhangwenchao <656540940@qq.com>
Date: Wed, 8 May 2024 17:44:03 +0800
Subject: [PATCH 30/48] Refactor cbload to gpdirtableload with python.

We refactor cbload with python language which is more friendly to
kernel. What's more, we make a minor fix with respect to directory
table check in copy from.
---
 gpMgmt/bin/Makefile                 |   2 +-
 gpMgmt/bin/gpdirtableload           | 450 ++++++++++++++++++++++++++++
 gpMgmt/bin/gppylib/commands/base.py |   4 +-
 src/backend/commands/copy.c         |  16 +-
 4 files changed, 463 insertions(+), 9 deletions(-)
 create mode 100755 gpMgmt/bin/gpdirtableload

diff --git a/gpMgmt/bin/Makefile b/gpMgmt/bin/Makefile
index 2b4c7483e2a..24e70491184 100644
--- a/gpMgmt/bin/Makefile
+++ b/gpMgmt/bin/Makefile
@@ -16,7 +16,7 @@ PROGRAMS= analyzedb gpactivatestandby gpaddmirrors gpcheckcat gpcheckperf \
 	gpcheckresgroupimpl gpconfig gpdeletesystem gpexpand gpshrink gpinitstandby \
 	gpinitsystem gpload gpload.py gplogfilter gpmovemirrors \
 	gppkg gprecoverseg gpreload gpscp gpsd gpssh gpssh-exkeys gpstart \
-	gpstate gpstop minirepro gpmemwatcher gpmemreport gpdemo
+	gpstate gpstop minirepro gpmemwatcher gpmemreport gpdemo gpdirtableload
 
 GPDEMO_LIBS = gpdemo-defaults.sh lalshell generate_certs.sh demo_cluster.sh \
 				probe_config.sh README
diff --git a/gpMgmt/bin/gpdirtableload b/gpMgmt/bin/gpdirtableload
new file mode 100755
index 00000000000..27a6d2a700e
--- /dev/null
+++ b/gpMgmt/bin/gpdirtableload
@@ -0,0 +1,450 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# gpdirtableload - load files(s) to directory table
+# Copyright Hashdata 2024
+
+import sys
+import argparse
+
+if sys.hexversion < 0x2040400:
+    sys.stderr.write("gpdirtableload needs python 2.4.4 or higher\n")
+    sys.exit(2)
+
+import platform
+
+try:
+    import pg
+except ImportError:
+    try:
+        from pygresql import pg
+    except Exception as e:
+        pass
+except Exception as e:
+    errorMsg = "gpload was unable to import The PyGreSQL Python module (pg.py) - %s\n" % str(e)
+    sys.stderr.write(str(errorMsg))
+    errorMsg = "Please check if you have the correct Visual Studio redistributable package installed.\n"
+    sys.stderr.write(str(errorMsg))
+    sys.exit(2)
+
+import datetime, getpass, os, signal, socket, threading, time, traceback, re
+from gppylib.commands.base import WorkerPool, Command, LOCAL
+
+try:
+    from gppylib.gpversion import GpVersion
+except ImportError:
+    sys.stderr.write("gpload can't import gpversion, will run in GPDB5 compatibility mode.\n")
+    noGpVersion = True
+else:
+    noGpVersion = False
+
+thePlatform = platform.system()
+if thePlatform in ['Windows', 'Microsoft']:
+    windowsPlatform = True
+else:
+    windowsPlatform = False
+
+if windowsPlatform == False:
+    import select
+
+from sys import version_info
+
+if version_info.major == 2:
+    import __builtin__
+
+    long = __builtin__.long
+else:
+    long = int
+
+EXECNAME = 'gpdirtableload'
+
+NUM_WARN_ROWS = 0
+received_kill = False
+
+
+def parseargs():
+    parser = argparse.ArgumentParser(description='gpdirtableload --load file to directory table',
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument('--database', '-d', default="gpadmin",
+                        help='Database to connect to')
+    parser.add_argument('--dest-path', help='Path relative to the table root directory')
+
+    parser.add_argument('--force-password-auth', default=False, action='store_true',
+                        help='Force a password prompt')
+
+    parser.add_argument('--host', default="localhost",
+                        help='Host to connect to')
+    parser.add_argument('--input-file', help='Input files or directory')
+
+    parser.add_argument('--logfile', help='Log output to logfile')
+
+    parser.add_argument('--port', '-p', type=int, default="5432",
+                        help='Port to connect to')
+    parser.add_argument('--stop-on-error', default=False,
+                        help='Stop loading files when an error occurs')
+    parser.add_argument('--table', '-t', help='Directory table to load to')
+    parser.add_argument('--tag', help='Tag name')
+    parser.add_argument('--tasks', '-T', type=int, default="1",
+                        help='The maximum number of files that concurrently loads')
+    parser.add_argument('--user', '-U', default="gpadmin",
+                        help='User to connect as')
+    parser.add_argument('--verbose', '-V', default=False, action='store_true',
+                        help='Indicates that the tool should generate verbose output')
+    parser.add_argument('--version', '-v', action='version', version='gpdirtableload version 1.0.0\n',
+                        help='Print version info and exit')
+
+    # Parse the command line arguments
+    args = parser.parse_args()
+    return args, parser
+
+
+def handle_kill(signum, frame):
+    # already dying?
+    global received_kill
+    if received_kill:
+        return
+
+    received_kill = True
+
+    g.log(g.INFO, "received signal %d" % signum)
+    g.exitValue = 2
+    sys.exit(2)
+
+
+def splitPgpassLine(a):
+    """
+    If the user has specified a .pgpass file, we'll have to parse it. We simply
+    split the string into arrays at :. We could just use a native python
+    function but we need to escape the ':' character.
+    """
+    b = []
+    escape = False
+    d = ''
+    for c in a:
+        if not escape and c == '\\':
+            escape = True
+        elif not escape and c == ':':
+            b.append(d)
+            d = ''
+        else:
+            d += c
+            escape = False
+    if escape:
+        d += '\\'
+    b.append(d)
+    return b
+
+
+class gpdirtableload:
+    """
+    Main class wrapper
+    """
+
+    def __init__(self, argv):
+        args, parser = parseargs()
+        self.options = args
+        self.options.password = None
+        self.options.max_retries = 3
+        self.exitValue = 0
+        self.dbs = []
+        self.DEBUG = 5
+        self.LOG = 4
+        self.INFO = 3
+        self.WARN = 2
+        self.ERROR = 1
+        self.options.qv = self.INFO
+        self.startTimestamp = time.time()
+        self.pool = None
+
+        # set default log level
+        if self.options.verbose is not None:
+            self.options.qv = self.DEBUG
+        else:
+            self.options.qv = self.INFO
+
+        # default to gpAdminLogs for a log file, may be overwritten
+        if self.options.logfile is None:
+            self.options.logfile = os.path.join(os.environ.get('HOME', '.'), 'gpAdminLogs')
+            if not os.path.isdir(self.options.logfile):
+                os.mkdir(self.options.logfile)
+
+            self.options.logfile = os.path.join(self.options.logfile, 'gpdirtableload_' + \
+                                                datetime.date.today().strftime('%Y%m%d') + '.log')
+
+        try:
+            self.logfile = open(self.options.logfile, 'a')
+        except Exception as e:
+            self.log(self.ERROR, "could not open logfile %s: %s" %
+                     (self.options.logfile, e))
+
+        self.log(self.INFO, 'gpdirtableload session started ' + \
+                 datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+
+    def elevel2str(self, level):
+        if level == self.DEBUG:
+            return "DEBUG"
+        elif level == self.LOG:
+            return "LOG"
+        elif level == self.INFO:
+            return "INFO"
+        elif level == self.ERROR:
+            return "ERROR"
+        elif level == self.WARN:
+            return "WARN"
+        else:
+            self.log(self.ERROR, "unknown log type %i" % level)
+
+    def log(self, level, a):
+        """
+        Level is either DEBUG, LOG, INFO, ERROR. a is the message
+        """
+        log = ''
+        try:
+            log = '|'.join(
+                [datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'),
+                 self.elevel2str(level), a]) + '\n'
+
+        except Exception as e:
+            # log even if contains non-utf8 data and pass this exception
+            self.logfile.write("\nWarning: Log() threw an exception: %s \n" % (e))
+
+        if level <= self.options.qv:
+            sys.stdout.write(log)
+
+        if level <= self.options.qv or level <= self.INFO:
+            try:
+                self.logfile.write(log)
+                self.logfile.flush()
+            except AttributeError as e:
+                pass
+
+        if level == self.ERROR:
+            self.exitValue = 2;
+            sys.exit(self.exitValue)
+
+    def readPgpass(self, pgpassname):
+        """
+        Get password form .pgpass file
+        """
+        try:
+            f = open(pgpassname, 'r')
+        except IOError:
+            return
+        for row in f:
+            try:
+                row = row.rstrip("\n")
+                line = splitPgpassLine(row)
+                if line[0] != '*' and line[0].lower() != self.options.host.lower():
+                    continue
+                if line[1] != '*' and int(line[1]) != self.options.port:
+                    continue
+                if line[2] != '*' and line[2] != self.options.database:
+                    continue
+                if line[3] != '*' and line[3] != self.options.user:
+                    continue
+                self.options.password = line[4]
+                break
+            except (ValueError, IndexError):
+                pass
+        f.close()
+
+    def setup_connection(self, recurse=0):
+        """
+        Connect to the backend
+        """
+        if self.db != None:
+            self.db.close()
+            self.db = None
+        if self.options.force_password_auth:
+            if self.options.password == None:
+                self.options.password = getpass.getpass()
+        else:
+            if self.options.password == None:
+                self.options.password = os.environ.get('PGPASSWORD')
+            if self.options.password == None:
+                self.readPgpass(os.environ.get('PGPASSFILE',
+                                               os.environ.get('HOME', '.') + '/.pgpass'))
+            if self.options.password == None:
+                self.options.password = getpass.getpass()
+        try:
+            self.log(self.DEBUG, "connection string:" +
+                     " user=" + str(self.options.user) +
+                     " host=" + str(self.options.host) +
+                     " port=" + str(self.options.port) +
+                     " database=" + str(self.options.database))
+            self.db = pg.DB(dbname=self.options.database
+                            , host=self.options.host
+                            , port=self.options.port
+                            , user=self.options.user
+                            , passwd=self.options.password)
+            self.log(self.DEBUG, "Successfully connected to database")
+
+            if noGpVersion == False:
+                # Get GPDB version
+                curs = self.db.query("SELECT version()")
+                self.gpdb_version = GpVersion(curs.getresult()[0][0])
+                self.log(self.DEBUG, "GPDB version is: %s" % self.gpdb_version)
+
+        except Exception as e:
+            errorMessage = str(e)
+            if errorMessage.find("no password supplied") != -1:
+                self.options.password = getpass.getpass()
+                recurse += 1
+                if recurse > 10:
+                    self.log(self.ERROR, "too many login attempt failures")
+                self.setup_connection(recurse)
+            elif errorMessage.find("Connection timed out") != -1 and self.options.max_retries != 0:
+                recurse += 1
+                if self.options.max_retries > 0:
+                    if recurse > self.options.max_retries:  # retry failed
+                        self.log(self.ERROR, "could not connect to database after retry %d times, " \
+                                             "error message:\n %s" % (recurse - 1, errorMessage))
+                    else:
+                        self.log(self.INFO, "retry to connect to database, %d of %d times" % (recurse,
+                                                                                              self.options.max_retries))
+                else:  # max_retries < 0, retry forever
+                    self.log(self.INFO, "retry to connect to database.")
+                self.setup_connection(recurse)
+            else:
+                self.log(self.ERROR, "could not connect to database: %s. Is " \
+                                     "the Cloudberry Database running on port %i?" % (errorMessage,
+                                                                                      self.options.port))
+
+    def isDirectoryMode(self):
+        if self.options.input_file != None and not os.path.exists(self.options.input_file):
+            self.log(self.ERROR, "File or directory %s does not exist." % self.options.input_file)
+        if self.options.input_file != None and os.path.isdir(self.options.input_file):
+            return True
+        return False
+
+    def collectAllFiles(self):
+        self.allFiles = []
+        self.numFiles = 0
+
+        if self.isDirectoryMode():
+            for root, dirs, files in os.walk(self.options.input_file):
+                for file in files:
+                    dirpath = os.path.abspath(root)
+                    filepath = os.path.join(dirpath, file)
+                    self.allFiles.append(filepath)
+                    self.numFiles += 1
+        else:
+            if self.options.input_file is not None and os.path.exists(self.options.input_file):
+                filepath = os.path.abspath(self.options.input_file)
+                self.allFiles.append(filepath)
+                self.numFiles = 1
+
+    def confirmWorkers(self):
+        if self.numFiles < self.options.tasks:
+            self.numWorkers = self.numFiles
+        else:
+            self.numWorkers = self.options.tasks
+
+    def startLoadFiles(self):
+        """
+        startLoadFiles
+        """
+        self.pool = WorkerPool(numWorkers=self.numWorkers, should_stop=self.options.stop_on_error)
+
+        srcfile = None
+        if os.environ.get('GPHOME_LOADERS'):
+            srcfile = os.path.join(os.environ.get('GPHOME_LOADERS'),
+                                   'greenplum_loaders_path.sh')
+        elif os.environ.get('GPHOME'):
+            srcfile = os.path.join(os.environ.get('GPHOME'),
+                                   'greenplum_path.sh')
+        if (not (srcfile and os.path.exists(srcfile))):
+            self.log(self.ERROR, 'cannot find greenplum environment ' +
+                     'file: environment misconfigured')
+
+        cmdstrbase = "source %s ;"
+        
+        cmdstrbase += "export PGPASSWORD=%s ; psql " % self.options.password
+
+        if self.options.database != None:
+            cmdstrbase += "-d %s " % self.options.database
+        if self.options.host != None:
+            cmdstrbase += "-h %s " % self.options.host
+        if self.options.port != 0:
+            cmdstrbase += "-p %d " % self.options.port
+        if self.options.user != None:
+            cmdstrbase += "-U %s " % self.options.user
+
+        try:
+            for file in self.allFiles:
+                cmdstr = cmdstrbase
+                cmdstr += '-c \"copy binary %s from \'%s\' ' % (self.options.table, file)
+                if self.isDirectoryMode():
+                    cmdstr += '\'%s/%s\' ' % (self.options.dest_path, os.path.relpath(file))
+                else:
+                    cmdstr += '\'%s\' ' % self.options.dest_path
+
+                if self.options.tag is not None:
+                    cmdstr += 'with tag \'%s\' \"' % self.options.tag
+                else:
+                    cmdstr += '\"'
+
+                cmd = Command(name='load directory table', ctxt=LOCAL, cmdStr=cmdstr)
+                self.pool.addCommand(cmd)
+            self.pool.join()
+            items = self.pool.getCompletedItems()
+            for i in items:
+                if not i.was_successful():
+                    self.log(self.ERROR, 'failed load file to directory table %s, msg:%s' %
+                             (self.options.table, i.get_results().stderr))
+            self.pool.check_results()
+        except Exception as err:
+            self.log(self.ERROR, 'errors in job:')
+            self.log(self.ERROR, err.__str__())
+            self.log(self.ERROR, 'exiting early')
+        finally:
+            self.pool.haltWork()
+            self.pool.joinWorkers()
+
+    def run2(self):
+        try:
+            start = time.time()
+            self.collectAllFiles()
+            self.confirmWorkers()
+            self.setup_connection()
+            self.startLoadFiles()
+            self.log(self.INFO, 'running time: %.2f seconds' % (time.time() - start))
+        except Exception as e:
+            raise
+
+    def run(self):
+        self.db = None
+        signal.signal(signal.SIGINT, handle_kill)
+        signal.signal(signal.SIGTERM, handle_kill)
+        # win32 doesn't do SIGQUIT
+        if not platform.system() in ['Windows', 'Microsoft']:
+            signal.signal(signal.SIGQUIT, handle_kill)
+            signal.signal(signal.SIGHUP, signal.SIG_IGN)
+
+        try:
+            try:
+                self.run2()
+            except Exception:
+                traceback.print_exc(file=self.logfile)
+                self.logfile.flush()
+                self.exitValue = 2
+                if (self.options.qv > self.INFO):
+                    traceback.print_exc()
+                else:
+                    self.log(self.ERROR, "unexpected error -- backtrace " +
+                             "written to log file")
+        finally:
+            if self.exitValue == 0:
+                self.log(self.INFO, 'gpdirtableload succeeded')
+            elif self.exitValue == 1:
+                self.log(self.INFO, 'gpdirtableload succeeded with warnings')
+            else:
+                self.log(self.INFO, 'gpdirtableload failed')
+
+
+if __name__ == '__main__':
+    g = gpdirtableload(sys.argv[1:])
+    g.run()
+    sys.stdout.flush()
+    sys.stderr.flush()
+    os._exit(g.exitValue)
diff --git a/gpMgmt/bin/gppylib/commands/base.py b/gpMgmt/bin/gppylib/commands/base.py
index c7dc4b8e2ca..0e0296d7c4c 100755
--- a/gpMgmt/bin/gppylib/commands/base.py
+++ b/gpMgmt/bin/gppylib/commands/base.py
@@ -47,11 +47,11 @@ class WorkerPool(object):
 
     halt_command = 'halt command'
 
-    def __init__(self, numWorkers=16, items=None, daemonize=False, logger=gplog.get_default_logger()):
+    def __init__(self, numWorkers=16, should_stop = False, items=None, daemonize=False, logger=gplog.get_default_logger()):
         if numWorkers <= 0:
             raise Exception("WorkerPool(): numWorkers should be greater than 0.")
         self.workers = []
-        self.should_stop = False
+        self.should_stop = should_stop
         self.work_queue = Queue()
         self.completed_queue = Queue()
         self._assigned = 0
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index daaca78cd7e..189eaa6926e 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -112,7 +112,8 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 {
 	bool		is_from = stmt->is_from;
 	bool		pipe = (stmt->filename == NULL || Gp_role == GP_ROLE_EXECUTE);
-	Relation	rel;
+	Relation	rel = NULL;
+	LOCKMODE	lockmode;
 	Oid			relid;
 	RawStmt    *query = NULL;
 	Node	   *whereClause = NULL;
@@ -139,6 +140,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 		options = lappend(options, makeDefElem("sreh", (Node *) sreh, -1));
 	}
 
+	lockmode = is_from ? RowExclusiveLock : AccessShareLock;
+	if (stmt->relation)
+	{
+		/* Open and lock the relation, using the appropriate lock type. */
+		rel = table_openrv(stmt->relation, lockmode);
+	}
+	
 	/*
 	 * Disallow COPY to/from file or program except to users with the
 	 * appropriate role.
@@ -156,7 +164,7 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 		}
 		else
 		{
-			if (is_from && !is_member_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES))
+			if (is_from && !is_member_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES) && rel->rd_rel->relkind != RELKIND_DIRECTORY_TABLE)
 				ereport(ERROR,
 						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 						 errmsg("must be superuser or a member of the pg_read_server_files role to COPY from a file"),
@@ -174,7 +182,6 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 
 	if (stmt->relation)
 	{
-		LOCKMODE	lockmode = is_from ? RowExclusiveLock : AccessShareLock;
 		ParseNamespaceItem *nsitem;
 		RangeTblEntry *rte;
 		TupleDesc	tupDesc;
@@ -183,9 +190,6 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 
 		Assert(!stmt->query);
 
-		/* Open and lock the relation, using the appropriate lock type. */
-		rel = table_openrv(stmt->relation, lockmode);
-
 		if (is_from && !allowSystemTableMods && IsUnderPostmaster && IsSystemRelation(rel))
 		{
 			ereport(ERROR,

From 6bf35c89d5c45e61006283e4f75d9aebcbe73939 Mon Sep 17 00:00:00 2001
From: Zhang Mingli <avamingli@gmail.com>
Date: Tue, 14 May 2024 10:55:29 +0800
Subject: [PATCH 31/48] Fix explain(locus) issues.

Fix typo and locus null as below.

explain (costs off, locus)
select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab
t);
                                      QUERY PLAN
------------------------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)
   Locus: Entry
   ->  Result
         Locus: Strewn
         ->  Unique
               Locus: NULL
               Group Key: (RowIdExpr)
               ->  Sort
                     Locus: NULL
                     Sort Key (Distinct): (RowIdExpr)
                     ->  Redistribute Motion 3:3  (slice2; segments: 3)
                           Locus: Hashed
                           Hash Key: (RowIdExpr)
                           ->  Hash Join
                                 Locus: Hashed
                                 Hash Cond: ((t.a / 10) = r.a)
                                 ->  Seq Scan on dedup_tab t
                                       Locus: Hashed
                                 ->  Hash
                                       Locus: Replicated
                                       ->  Broadcast Motion 1:3
(slice3; segments: 1)
                                             Locus: Replicated
                                             ->  Seq Scan on
dedup_reptab r
                                                   Locus: SingleQE

Authored-by: Zhang Mingli avamingli@gmail.com
---
 src/backend/commands/explain.c                   |  2 +-
 src/backend/optimizer/plan/createplan.c          | 16 +++++++++++++++-
 .../regress/expected/offload_entry_to_qe.out     |  8 ++++----
 src/test/regress/expected/subselect_gp.out       | 14 ++++++++++++--
 .../regress/expected/subselect_gp_optimizer.out  |  2 +-
 src/test/regress/sql/subselect_gp.sql            |  2 +-
 6 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index d44a70b04e9..62388047528 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -5797,7 +5797,7 @@ Explainlocus(ExplainState *es, CdbLocusType locustype, int parallel)
 			locus = "SegmentGeneralWorkers";
 			break;
 		case CdbLocusType_OuterQuery:
-			locus = "OuteryQuery";
+			locus = "OuterQuery";
 			break;
 		case CdbLocusType_Replicated:
 			locus = "Replicated";
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 05131746da2..396fc318fca 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -627,7 +627,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
 	}
 
 	Assert(best_path->parallel_workers == best_path->locus.parallel_workers);
-	plan->locustype = best_path->locus.locustype;
+	if (plan->locustype == CdbLocusType_Null)
+	{
+		plan->locustype = best_path->locus.locustype;
+	}
 	plan->parallel = best_path->locus.parallel_workers;
 
 	return plan;
@@ -2345,6 +2348,12 @@ inject_projection_plan(Plan *subplan, List *tlist, bool parallel_safe)
 	copy_plan_costsize(plan, subplan);
 	plan->parallel_safe = parallel_safe;
 
+	if (subplan != NULL)
+	{
+		plan->locustype = subplan->locustype;
+		plan->parallel = subplan->parallel;
+	}
+
 	return plan;
 }
 
@@ -7282,6 +7291,9 @@ make_sort(Plan *lefttree, int numCols,
 	node->collations = collations;
 	node->nullsFirst = nullsFirst;
 
+	plan->locustype = lefttree->locustype;
+	plan->parallel = lefttree->parallel;
+
 	Assert(sortColIdx[0] != 0);
 
 	node->noduplicates = false; /* CDB */
@@ -8016,6 +8028,8 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
 	plan->qual = NIL;
 	plan->lefttree = lefttree;
 	plan->righttree = NULL;
+	plan->locustype = lefttree->locustype;
+	plan->parallel = lefttree->parallel;
 
 	/*
 	 * convert SortGroupClause list into arrays of attr indexes and equality
diff --git a/src/test/regress/expected/offload_entry_to_qe.out b/src/test/regress/expected/offload_entry_to_qe.out
index c6bd5f219db..1c1d0a13a25 100644
--- a/src/test/regress/expected/offload_entry_to_qe.out
+++ b/src/test/regress/expected/offload_entry_to_qe.out
@@ -364,14 +364,14 @@ explain (costs off, locus) select (select max((select distinct x from tst t2 whe
                            Locus: Hashed
                      SubPlan 1
                        ->  Unique
-                             Locus: OuteryQuery
+                             Locus: OuterQuery
                              ->  Result
-                                   Locus: OuteryQuery
+                                   Locus: OuterQuery
                                    Filter: (t2.x = t1.x)
                                    ->  Materialize
-                                         Locus: OuteryQuery
+                                         Locus: OuterQuery
                                          ->  Broadcast Motion 3:3  (slice3; segments: 3)
-                                               Locus: OuteryQuery
+                                               Locus: OuterQuery
                                                ->  Seq Scan on tst t2
                                                      Locus: Hashed
          SubPlan 2
diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out
index e56d7a2577d..7c7bedb7865 100644
--- a/src/test/regress/expected/subselect_gp.out
+++ b/src/test/regress/expected/subselect_gp.out
@@ -1840,26 +1840,36 @@ analyze dedup_reptab;
 -- tacked on, because even though all the rows are available in all the
 -- segments, you cannot distinguish join rows generated by the same "logical"
 -- row otherwise.
-explain (costs off)
+explain (costs off, locus)
 select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t);
                                       QUERY PLAN                                       
 ---------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
+   Locus: Entry
    ->  Result
+         Locus: Hashed
          ->  Unique
+               Locus: Hashed
                Group Key: (RowIdExpr)
                ->  Sort
+                     Locus: Hashed
                      Sort Key (Distinct): (RowIdExpr)
                      ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                           Locus: Hashed
                            Hash Key: (RowIdExpr)
                            ->  Hash Join
+                                 Locus: Hashed
                                  Hash Cond: ((t.a / 10) = r.a)
                                  ->  Seq Scan on dedup_tab t
+                                       Locus: Hashed
                                  ->  Hash
+                                       Locus: Replicated
                                        ->  Broadcast Motion 1:3  (slice3; segments: 1)
+                                             Locus: Replicated
                                              ->  Seq Scan on dedup_reptab r
+                                                   Locus: SingleQE
  Optimizer: Postgres query optimizer
-(15 rows)
+(25 rows)
 
 select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t);
  a 
diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out
index 36e39aeb6a7..ba31bb39a1d 100644
--- a/src/test/regress/expected/subselect_gp_optimizer.out
+++ b/src/test/regress/expected/subselect_gp_optimizer.out
@@ -1896,7 +1896,7 @@ analyze dedup_reptab;
 -- tacked on, because even though all the rows are available in all the
 -- segments, you cannot distinguish join rows generated by the same "logical"
 -- row otherwise.
-explain (costs off)
+explain (costs off, locus)
 select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t);
                                QUERY PLAN                               
 ------------------------------------------------------------------------
diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql
index 18c765cd8c5..8e36f03bf93 100644
--- a/src/test/regress/sql/subselect_gp.sql
+++ b/src/test/regress/sql/subselect_gp.sql
@@ -820,7 +820,7 @@ analyze dedup_reptab;
 -- tacked on, because even though all the rows are available in all the
 -- segments, you cannot distinguish join rows generated by the same "logical"
 -- row otherwise.
-explain (costs off)
+explain (costs off, locus)
 select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t);
 select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t);
 

From fd453bfd47f63e44f32e775aeb65d454dc20e681 Mon Sep 17 00:00:00 2001
From: Zhang Mingli <avamingli@gmail.com>
Date: Thu, 16 May 2024 11:06:12 +0800
Subject: [PATCH 32/48] [AQUMV] Support DISTINCT clause on origin query.

SELECT DISTINCT clause references are processed in target
list, open it on origin query.
DISTINCT in aggregation and Group By DISTINCT are already
supported, add cases to verify that.

create incremental materialized view mv as
  select c1 as mc1, c2 as mc2, c3 as mc3, c4 as mc4
  from t1 where c1 > 90;

Origin querys:

  select DISTINCT c2 from t1 where c1 > 90;

  select count(DISTINCT c2) from t1 where c1 > 90;

  select c1, c2, c3, sum(c4) from t1 where c1 > 90
  group by DISTINCT rollup(c1, c2), rollup(c1, c3);

Could be rewritten to:

  select DISTINCT mc2 from mv;

  select count(DISTINCT mc2) from mv;

  select mc1, mc2, mc3, sum(mc4) from mv
  group by DISTINCT rollup(mc1, mc2), rollup(mc1, mc3);

Authored-by: Zhang Mingli avamingli@gmail.com
---
 src/backend/optimizer/README.cbdb.aqumv |   2 +-
 src/backend/optimizer/plan/aqumv.c      |   5 +-
 src/test/regress/expected/aqumv.out     | 218 ++++++++++++++++++++++++
 src/test/regress/sql/aqumv.sql          |  56 ++++++
 4 files changed, 277 insertions(+), 4 deletions(-)

diff --git a/src/backend/optimizer/README.cbdb.aqumv b/src/backend/optimizer/README.cbdb.aqumv
index c18d295983a..a0a1c349fa7 100644
--- a/src/backend/optimizer/README.cbdb.aqumv
+++ b/src/backend/optimizer/README.cbdb.aqumv
@@ -227,7 +227,7 @@ Below are not supported now:
       Group By/Grouping Sets/Rollup/Cube (on view_query)
       Window Functions
       CTE
-      Distinct
+      Distinct (on view_query)
       Distinct On
       UNION/INTERSECT/EXCEPT
       FOR UPDATE, FOR NO KEY UPDATE, FOR SHARE, FOR KEY SHARE
diff --git a/src/backend/optimizer/plan/aqumv.c b/src/backend/optimizer/plan/aqumv.c
index f8249a38115..0ea441dfd52 100644
--- a/src/backend/optimizer/plan/aqumv.c
+++ b/src/backend/optimizer/plan/aqumv.c
@@ -115,7 +115,6 @@ answer_query_using_materialized_views(PlannerInfo *root,
 	/* Group By without agg could be possible though IMMV doesn't support it yet. */
 	bool can_not_use_mv = (parse->commandType != CMD_SELECT) ||
 						  (parse->rowMarks != NIL) ||
-						  (parse->distinctClause != NIL) ||
 						  (parse->scatterClause != NIL) ||
 						  (parse->cteList != NIL) ||
 						  (parse->setOperations != NULL) ||
@@ -345,6 +344,7 @@ answer_query_using_materialized_views(PlannerInfo *root,
 		viewQuery->groupClause = parse->groupClause;
 		viewQuery->groupingSets = parse->groupingSets;
 		viewQuery->sortClause = parse->sortClause;
+		viewQuery->distinctClause = parse->distinctClause;
 
 		/*
 		 * AQUMV
@@ -419,13 +419,12 @@ answer_query_using_materialized_views(PlannerInfo *root,
 			/*
 			 * Update pathkeys which may be changed by qp_callback.
 			 * Set belows if corresponding feature is supported.
-			 * distinct_pathkey
 			 * window_pathkeys
 			 */
 			root->group_pathkeys = subroot->group_pathkeys;
 			root->sort_pathkeys = subroot->sort_pathkeys;
 			root->query_pathkeys = subroot->query_pathkeys;
-
+			root->distinct_pathkeys = subroot->distinct_pathkeys;
 			/*
 			 * AQUMV_FIXME_MVP
 			 * Use new query's ecs.
diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out
index dd50fcdbf06..3fee06cd8aa 100644
--- a/src/test/regress/expected/aqumv.out
+++ b/src/test/regress/expected/aqumv.out
@@ -1867,6 +1867,224 @@ select c1, sum(c3) as sum_c3 from aqumv_t5 where c1 > 90 group by c1 order by su
   91 |    188
 (10 rows)
 
+abort;
+-- Test DISTINCT
+begin;
+create table aqumv_t6(c1 int, c2 int, c3 int, c4 int) distributed by (c1);
+insert into aqumv_t6 select i, i+1, i+2, i+3 from generate_series(1, 100) i;
+insert into aqumv_t6 select i, i+1, i+2, i+3 from generate_series(1, 100) i;
+insert into aqumv_t6 values (91, NULL, 97, 98);
+analyze aqumv_t6;
+create incremental materialized view aqumv_mvt6_0 as
+  select c1 as mc1, c2 as mc2
+  from aqumv_t6 where c1 > 90;
+analyze aqumv_mvt6_0;
+-- DISTINCT
+\pset null NULL
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select distinct c2, c1 from aqumv_t6 where c1 > 90;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: c2, c1
+   ->  HashAggregate
+         Output: c2, c1
+         Group Key: aqumv_t6.c2, aqumv_t6.c1
+         ->  Seq Scan on public.aqumv_t6
+               Output: c1, c2, c3, c4
+               Filter: (aqumv_t6.c1 > 90)
+ Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(10 rows)
+
+select distinct c2, c1 from aqumv_t6 where c1 > 90 order by c2, c1;
+  c2  | c1  
+------+-----
+   92 |  91
+   93 |  92
+   94 |  93
+   95 |  94
+   96 |  95
+   97 |  96
+   98 |  97
+   99 |  98
+  100 |  99
+  101 | 100
+ NULL |  91
+(11 rows)
+
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select distinct c2, c1 from aqumv_t6 where c1 > 90;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: mc2, mc1
+   ->  HashAggregate
+         Output: mc2, mc1
+         Group Key: aqumv_mvt6_0.mc2, aqumv_mvt6_0.mc1
+         ->  Seq Scan on public.aqumv_mvt6_0
+               Output: mc1, mc2
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+select distinct c2, c1 from aqumv_t6 where c1 > 90 order by c2, c1;
+  c2  | c1  
+------+-----
+   92 |  91
+   93 |  92
+   94 |  93
+   95 |  94
+   96 |  95
+   97 |  96
+   98 |  97
+   99 |  98
+  100 |  99
+  101 | 100
+ NULL |  91
+(11 rows)
+
+-- Agg DISTINCT
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select count(c1) as count_c1, count(distinct c1) as count_distinct_c1,
+sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Aggregate
+   Output: count(c1), count(DISTINCT c1), sum(c2), sum(DISTINCT c2)
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: c1, c2
+         ->  Seq Scan on public.aqumv_t6
+               Output: c1, c2
+               Filter: (aqumv_t6.c1 > 90)
+ Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+select count(c1) as count_c1, count(distinct c1) as count_distinct_c1,
+sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90;
+ count_c1 | count_distinct_c1 | sum_c2 | sum_distinct_c2 
+----------+-------------------+--------+-----------------
+       21 |                10 |   1930 |             965
+(1 row)
+
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select count(c1) as count_c1, count(distinct c1) as count_distinct_c1,
+sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Aggregate
+   Output: count(mc1), count(DISTINCT mc1), sum(mc2), sum(DISTINCT mc2)
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: mc1, mc2
+         ->  Seq Scan on public.aqumv_mvt6_0
+               Output: mc1, mc2
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(8 rows)
+
+select count(c1) as count_c1, count(distinct c1) as count_distinct_c1,
+sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90;
+ count_c1 | count_distinct_c1 | sum_c2 | sum_distinct_c2 
+----------+-------------------+--------+-----------------
+       21 |                10 |   1930 |             965
+(1 row)
+
+-- Group DISTINCT
+create incremental materialized view aqumv_mvt6_1 as
+  select c3 as mc3, c4 as mc4, c1 as mc1, c2 as mc2
+  from aqumv_t6 where c1 > 97;
+analyze aqumv_mvt6_1;
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ MixedAggregate
+   Output: c1, c2, c3, sum(c4)
+   Hash Key: aqumv_t6.c1, aqumv_t6.c3
+   Group Key: aqumv_t6.c1, aqumv_t6.c2, aqumv_t6.c3
+   Group Key: aqumv_t6.c1, aqumv_t6.c2
+   Group Key: aqumv_t6.c1
+   Group Key: ()
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: c1, c2, c3, c4
+         Merge Key: c1, c2, c3
+         ->  Sort
+               Output: c1, c2, c3, c4
+               Sort Key: aqumv_t6.c1, aqumv_t6.c2, aqumv_t6.c3
+               ->  Seq Scan on public.aqumv_t6
+                     Output: c1, c2, c3, c4
+                     Filter: (aqumv_t6.c1 > 97)
+ Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(18 rows)
+
+select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
+  c1  |  c2  |  c3  | sum 
+------+------+------+-----
+   98 |   99 |  100 | 202
+   98 |   99 | NULL | 202
+   98 | NULL | NULL | 202
+   99 |  100 |  101 | 204
+   99 |  100 | NULL | 204
+   99 | NULL | NULL | 204
+  100 |  101 |  102 | 206
+  100 |  101 | NULL | 206
+  100 | NULL | NULL | 206
+ NULL | NULL | NULL | 612
+  100 | NULL |  102 | 206
+   99 | NULL |  101 | 204
+   98 | NULL |  100 | 202
+(13 rows)
+
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ MixedAggregate
+   Output: mc1, mc2, mc3, sum(mc4)
+   Hash Key: aqumv_mvt6_1.mc1, aqumv_mvt6_1.mc3
+   Group Key: aqumv_mvt6_1.mc1, aqumv_mvt6_1.mc2, aqumv_mvt6_1.mc3
+   Group Key: aqumv_mvt6_1.mc1, aqumv_mvt6_1.mc2
+   Group Key: aqumv_mvt6_1.mc1
+   Group Key: ()
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: mc1, mc2, mc3, mc4
+         Merge Key: mc1, mc2, mc3
+         ->  Sort
+               Output: mc1, mc2, mc3, mc4
+               Sort Key: aqumv_mvt6_1.mc1, aqumv_mvt6_1.mc2, aqumv_mvt6_1.mc3
+               ->  Seq Scan on public.aqumv_mvt6_1
+                     Output: mc1, mc2, mc3, mc4
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(17 rows)
+
+select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
+  c1  |  c2  |  c3  | sum 
+------+------+------+-----
+   98 |   99 |  100 | 202
+   98 |   99 | NULL | 202
+   98 | NULL | NULL | 202
+   99 |  100 |  101 | 204
+   99 |  100 | NULL | 204
+   99 | NULL | NULL | 204
+  100 |  101 |  102 | 206
+  100 |  101 | NULL | 206
+  100 | NULL | NULL | 206
+ NULL | NULL | NULL | 612
+  100 | NULL |  102 | 206
+   99 | NULL |  101 | 204
+   98 | NULL |  100 | 202
+(13 rows)
+
+\pset null ''
 abort;
 reset optimizer;
 reset enable_answer_query_using_materialized_views;
diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql
index 772a1358b36..cc60dcb6819 100644
--- a/src/test/regress/sql/aqumv.sql
+++ b/src/test/regress/sql/aqumv.sql
@@ -462,6 +462,62 @@ select c1, sum(c3) as sum_c3 from aqumv_t5 where c1 > 90 group by c1 order by su
 select c1, sum(c3) as sum_c3 from aqumv_t5 where c1 > 90 group by c1 order by sum_c3 asc;
 abort;
 
+-- Test DISTINCT
+begin;
+create table aqumv_t6(c1 int, c2 int, c3 int, c4 int) distributed by (c1);
+insert into aqumv_t6 select i, i+1, i+2, i+3 from generate_series(1, 100) i;
+insert into aqumv_t6 select i, i+1, i+2, i+3 from generate_series(1, 100) i;
+insert into aqumv_t6 values (91, NULL, 97, 98);
+analyze aqumv_t6;
+
+create incremental materialized view aqumv_mvt6_0 as
+  select c1 as mc1, c2 as mc2
+  from aqumv_t6 where c1 > 90;
+analyze aqumv_mvt6_0;
+
+-- DISTINCT
+\pset null NULL
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select distinct c2, c1 from aqumv_t6 where c1 > 90;
+select distinct c2, c1 from aqumv_t6 where c1 > 90 order by c2, c1;
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select distinct c2, c1 from aqumv_t6 where c1 > 90;
+select distinct c2, c1 from aqumv_t6 where c1 > 90 order by c2, c1;
+
+-- Agg DISTINCT
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select count(c1) as count_c1, count(distinct c1) as count_distinct_c1,
+sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90;
+select count(c1) as count_c1, count(distinct c1) as count_distinct_c1,
+sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90;
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select count(c1) as count_c1, count(distinct c1) as count_distinct_c1,
+sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90;
+select count(c1) as count_c1, count(distinct c1) as count_distinct_c1,
+sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90;
+
+-- Group DISTINCT
+create incremental materialized view aqumv_mvt6_1 as
+  select c3 as mc3, c4 as mc4, c1 as mc1, c2 as mc2
+  from aqumv_t6 where c1 > 97;
+analyze aqumv_mvt6_1;
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
+select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
+select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
+
+\pset null ''
+abort;
+
+
 reset optimizer;
 reset enable_answer_query_using_materialized_views;
 drop table aqumv_t1 cascade;

From 1b0e01f7447b1441243264856ea4cc932b733ed1 Mon Sep 17 00:00:00 2001
From: Zhang Mingli <avamingli@gmail.com>
Date: Fri, 17 May 2024 16:04:39 +0800
Subject: [PATCH 33/48] [AQUMV] Support DISTINCT ON clause on origin query.

Since we have supported ORDER BY caluse and DISTINCT ON
clause references are processed in target list, open it
on origin query.

create incremental materialized view mv as
  select c1 as mc1, c2 as mc2, c3 as mc3, c4 as mc4
  from t1 where c1 > 90;

Origin querys:

 select DISTINCT ON(c1 - 1) c1, c2 from t1 where c1 > 90
   order by c1 - 1, c2 nulls first;

Could be rewritten to:

 select DISTINCT ON(mc1 - 1) mc1, mc2 from mv
   order by mc1 - 1, mc2 nulls first;

Authored-by: Zhang Mingli avamingli@gmail.com
---
 src/backend/optimizer/README.cbdb.aqumv |  2 +-
 src/backend/optimizer/plan/aqumv.c      |  2 +-
 src/test/regress/expected/aqumv.out     | 78 +++++++++++++++++++++++++
 src/test/regress/sql/aqumv.sql          | 10 ++++
 4 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/src/backend/optimizer/README.cbdb.aqumv b/src/backend/optimizer/README.cbdb.aqumv
index a0a1c349fa7..99dc217d6e3 100644
--- a/src/backend/optimizer/README.cbdb.aqumv
+++ b/src/backend/optimizer/README.cbdb.aqumv
@@ -228,7 +228,7 @@ Below are not supported now:
       Window Functions
       CTE
       Distinct (on view_query)
-      Distinct On
+      Distinct On (on view_query)
       UNION/INTERSECT/EXCEPT
       FOR UPDATE, FOR NO KEY UPDATE, FOR SHARE, FOR KEY SHARE
       Scatter By
diff --git a/src/backend/optimizer/plan/aqumv.c b/src/backend/optimizer/plan/aqumv.c
index 0ea441dfd52..cefd868771e 100644
--- a/src/backend/optimizer/plan/aqumv.c
+++ b/src/backend/optimizer/plan/aqumv.c
@@ -119,7 +119,6 @@ answer_query_using_materialized_views(PlannerInfo *root,
 						  (parse->cteList != NIL) ||
 						  (parse->setOperations != NULL) ||
 						  parse->hasWindowFuncs ||
-						  parse->hasDistinctOn ||
 						  parse->hasModifyingCTE ||
 						  (parse->parentStmtType == PARENTSTMTTYPE_REFRESH_MATVIEW) ||
 						  (parse->parentStmtType == PARENTSTMTTYPE_CTAS) ||
@@ -328,6 +327,7 @@ answer_query_using_materialized_views(PlannerInfo *root,
 		 * could be computed from viewQuery.
 		 */
 		viewQuery->hasAggs = parse->hasAggs;
+		viewQuery->hasDistinctOn = parse->hasDistinctOn;
 		/*
 		 * For HAVING quals have aggregations, we have already processed them in
 		 * Aggrefs during aqumv_process_targetlist().
diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out
index 3fee06cd8aa..0694dbcae97 100644
--- a/src/test/regress/expected/aqumv.out
+++ b/src/test/regress/expected/aqumv.out
@@ -2084,6 +2084,84 @@ select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(
    98 | NULL |  100 | 202
 (13 rows)
 
+-- DISTINCT ON
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c2 nulls first;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: c1, c2, ((c1 - 1))
+   Merge Key: ((c1 - 1)), c2
+   ->  Unique
+         Output: c1, c2, ((c1 - 1))
+         Group Key: ((c1 - 1))
+         ->  Sort
+               Output: c1, c2, ((c1 - 1))
+               Sort Key: ((aqumv_t6.c1 - 1)), aqumv_t6.c2 NULLS FIRST
+               ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                     Output: c1, c2, ((c1 - 1))
+                     Hash Key: ((c1 - 1))
+                     ->  Seq Scan on public.aqumv_t6
+                           Output: c1, c2, (c1 - 1)
+                           Filter: (aqumv_t6.c1 > 90)
+ Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(17 rows)
+
+select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c2 nulls first;
+ c1  |  c2  
+-----+------
+  91 | NULL
+  92 |   93
+  93 |   94
+  94 |   95
+  95 |   96
+  96 |   97
+  97 |   98
+  98 |   99
+  99 |  100
+ 100 |  101
+(10 rows)
+
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c2 nulls first;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: mc1, mc2, ((mc1 - 1))
+   Merge Key: ((mc1 - 1)), mc2
+   ->  Unique
+         Output: mc1, mc2, ((mc1 - 1))
+         Group Key: ((mc1 - 1))
+         ->  Sort
+               Output: mc1, mc2, ((mc1 - 1))
+               Sort Key: ((aqumv_mvt6_0.mc1 - 1)), aqumv_mvt6_0.mc2 NULLS FIRST
+               ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                     Output: mc1, mc2, ((mc1 - 1))
+                     Hash Key: ((mc1 - 1))
+                     ->  Seq Scan on public.aqumv_mvt6_0
+                           Output: mc1, mc2, (mc1 - 1)
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(16 rows)
+
+select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c2 nulls first;
+ c1  |  c2  
+-----+------
+  91 | NULL
+  92 |   93
+  93 |   94
+  94 |   95
+  95 |   96
+  96 |   97
+  97 |   98
+  98 |   99
+  99 |  100
+ 100 |  101
+(10 rows)
+
 \pset null ''
 abort;
 reset optimizer;
diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql
index cc60dcb6819..86af470541f 100644
--- a/src/test/regress/sql/aqumv.sql
+++ b/src/test/regress/sql/aqumv.sql
@@ -514,6 +514,16 @@ explain(costs off, verbose)
 select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
 select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3);
 
+-- DISTINCT ON
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c2 nulls first;
+select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c2 nulls first;
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c2 nulls first;
+select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c2 nulls first;
+
 \pset null ''
 abort;
 

From d678c248612c3a431ac88c015c1193b53fd2ac02 Mon Sep 17 00:00:00 2001
From: jiaqizho <zhoujiaqi@hashdata.cn>
Date: Thu, 23 May 2024 14:34:38 +0800
Subject: [PATCH 34/48] Expand a new external var tag (#443)

External toast in CBDB have the fixed structure, the vartag_external used to tell which way to detoast it.

If want to add an external toast implementation in the extension without changing the kernel, then need to
add a new tag in vartag_external.

The current change defines an extension generic tag which named VARTAG_CUSTOM, this kind of tag is not
used in the kernel, which means that the datum returned from the extension should not be a toast with this kind
of tag. This tag is only used within the extension.
---
 src/include/postgres.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/include/postgres.h b/src/include/postgres.h
index 3561e2ed40b..d73030a6492 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -131,7 +131,8 @@ typedef enum vartag_external
 	VARTAG_INDIRECT = 1,
 	VARTAG_EXPANDED_RO = 2,
 	VARTAG_EXPANDED_RW = 3,
-	VARTAG_ONDISK = 20
+	VARTAG_ONDISK = 20,
+	VARTAG_CUSTOM = 21 /* external toast custom defined tag */
 } vartag_external;
 
 /* this test relies on the specific tag values above */

From 9b9fd5bbc90faa8b7ea059598ba0d98da431e103 Mon Sep 17 00:00:00 2001
From: jiaqizho <zhoujiaqi@hashdata.cn>
Date: Fri, 24 May 2024 10:45:31 +0800
Subject: [PATCH 35/48] PendingDelete: expand the pending deletes interface
 (#442)

The pending deletes in CBDB can only mount the relfilenode.

Current change expand the pending deletes interface, make self-defined structure can be mount in the
pending delete list, also can use the self-defined callback decide how to delete the resource. It's very
helper for the UFile or other extension which will use the different local/remote resource.
---
 src/backend/catalog/storage.c                 | 197 +++++++++--------
 src/backend/catalog/storage_directory_table.c | 199 ++++++------------
 src/backend/commands/copyfrom.c               |   2 +-
 src/backend/commands/dirtablecmds.c           |   2 +-
 src/include/catalog/storage.h                 |  84 ++++++++
 src/include/catalog/storage_directory_table.h |   9 +-
 src/include/storage/smgr.h                    |   3 +-
 7 files changed, 265 insertions(+), 231 deletions(-)

diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 6d80abce254..76a9f7e12e5 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -41,33 +41,6 @@
 /* GUC variables */
 int			wal_skip_threshold = 2048;	/* in kilobytes */
 
-/*
- * We keep a list of all relations (represented as RelFileNode values)
- * that have been created or deleted in the current transaction.  When
- * a relation is created, we create the physical file immediately, but
- * remember it so that we can delete the file again if the current
- * transaction is aborted.  Conversely, a deletion request is NOT
- * executed immediately, but is just entered in the list.  When and if
- * the transaction commits, we can delete the physical file.
- *
- * To handle subtransactions, every entry is marked with its transaction
- * nesting level.  At subtransaction commit, we reassign the subtransaction's
- * entries to the parent nesting level.  At subtransaction abort, we can
- * immediately execute the abort-time actions for all entries of the current
- * nesting level.
- *
- * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
- * unbetimes.  It'd probably be OK to keep it in TopTransactionContext,
- * but I'm being paranoid.
- */
-
-typedef struct PendingRelDelete
-{
-	RelFileNodePendingDelete relnode;		/* relation that may need to be deleted */
-	bool		atCommit;		/* T=delete at commit; F=delete at abort */
-	int			nestLevel;		/* xact nesting level of request */
-	struct PendingRelDelete *next;	/* linked-list link */
-} PendingRelDelete;
 
 typedef struct PendingRelSync
 {
@@ -79,6 +52,38 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
 HTAB	   *pendingSyncHash = NULL;
 
 
+static
+void
+StoargeDestroyPendingRelDelete(PendingRelDelete *reldelete)
+{
+	pfree(reldelete);
+}
+
+static
+void
+StorageDoPendingRelDelete(PendingRelDelete *delete)
+{
+	SMgrRelation srel;
+
+	/*
+	 * GPDB: backend can only be TempRelBackendId or InvalidBackendId for a
+	 * given relfile since we don't tie temp relations to their backends.
+	 */
+	srel = smgropen(delete->relnode.node,
+					delete->relnode.isTempRelation ?
+					TempRelBackendId : InvalidBackendId,
+					delete->relnode.smgr_which, NULL);
+	smgrdounlinkall(&srel, 1, false);
+	smgrclose(srel);
+}
+
+struct PendingRelDeleteAction storage_pending_rel_deletes_action = {
+	.flags = PENDING_REL_DELETE_NEED_PRESERVE | PENDING_REL_DELETE_NEED_XLOG | PENDING_REL_DELETE_NEED_SYNC,
+	.destroy_pending_rel_delete = StoargeDestroyPendingRelDelete,
+	.do_pending_rel_delete = StorageDoPendingRelDelete
+};
+
+
 /*
  * AddPendingSync
  *		Queue an at-commit fsync.
@@ -160,8 +165,8 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence, SMgrImpl smgr_whic
 	pending->atCommit = false;	/* delete if abort */
 	pending->nestLevel = GetCurrentTransactionNestLevel();
 	pending->relnode.smgr_which = smgr_which;
-	pending->next = pendingDeletes;
-	pendingDeletes = pending;
+	pending->action = &storage_pending_rel_deletes_action;
+	RegisterPendingDelete(pending);
 
 	if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
 	{
@@ -210,8 +215,8 @@ RelationDropStorage(Relation rel)
 	pending->nestLevel = GetCurrentTransactionNestLevel();
 	pending->relnode.smgr_which =
 		RelationIsAppendOptimized(rel) ? SMGR_AO : SMGR_MD;
-	pending->next = pendingDeletes;
-	pendingDeletes = pending;
+	pending->action = &storage_pending_rel_deletes_action;
+	RegisterPendingDelete(pending);
 
 	/*
 	 * NOTE: if the relation was created in this transaction, it will now be
@@ -254,6 +259,12 @@ RelationPreserveStorage(RelFileNode rnode, bool atCommit)
 	for (pending = pendingDeletes; pending != NULL; pending = next)
 	{
 		next = pending->next;
+		Assert(pending->action);
+		if (!(pending->action->flags & PENDING_REL_DELETE_NEED_PRESERVE))
+		{
+			continue;
+		}
+
 		if (RelFileNodeEquals(rnode, pending->relnode.node)
 			&& pending->atCommit == atCommit)
 		{
@@ -337,13 +348,13 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
 	 * is in progress.
 	 *
 	 * The truncation operation might drop buffers that the checkpoint
-	 * otherwise would have flushed. If it does, then it's essential that
-	 * the files actually get truncated on disk before the checkpoint record
-	 * is written. Otherwise, if reply begins from that checkpoint, the
+	 * otherwise would have flushed. If it does, then it's essential that the
+	 * files actually get truncated on disk before the checkpoint record is
+	 * written. Otherwise, if reply begins from that checkpoint, the
 	 * to-be-truncated blocks might still exist on disk but have older
-	 * contents than expected, which can cause replay to fail. It's OK for
-	 * the blocks to not exist on disk at all, but not for them to have the
-	 * wrong contents.
+	 * contents than expected, which can cause replay to fail. It's OK for the
+	 * blocks to not exist on disk at all, but not for them to have the wrong
+	 * contents.
 	 */
 	Assert(!MyProc->delayChkptEnd);
 	MyProc->delayChkptEnd = true;
@@ -584,10 +595,12 @@ SerializePendingSyncs(Size maxSize, char *startAddress)
 
 	/* remove deleted rnodes */
 	for (delete = pendingDeletes; delete != NULL; delete = delete->next)
-		if (delete->atCommit)
+	{
+		Assert(delete->action);
+		if (delete->atCommit || !(delete->action->flags & PENDING_REL_DELETE_NEED_SYNC))
 			(void) hash_search(tmphash, (void *) &delete->relnode,
 							   HASH_REMOVE, NULL);
-
+	}
 	hash_seq_init(&scan, tmphash);
 	while ((src = (RelFileNode *) hash_seq_search(&scan)))
 		*dest++ = *src;
@@ -616,6 +629,15 @@ RestorePendingSyncs(char *startAddress)
 		AddPendingSync(rnode);
 }
 
+void
+RegisterPendingDelete(struct PendingRelDelete *delete)
+{
+	Assert(delete);
+	Assert(delete->action);
+	delete->next = pendingDeletes;
+	pendingDeletes = delete;
+}
+
 /*
  *	smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
  *
@@ -634,11 +656,6 @@ smgrDoPendingDeletes(bool isCommit)
 	PendingRelDelete *pending;
 	PendingRelDelete *prev;
 	PendingRelDelete *next;
-	int			nrels = 0,
-				maxrels = 0;
-	SMgrRelation *srels = NULL;
-
-	UFileDoDeletesActions(isCommit);
 
 	prev = NULL;
 	for (pending = pendingDeletes; pending != NULL; pending = next)
@@ -659,44 +676,18 @@ smgrDoPendingDeletes(bool isCommit)
 			/* do deletion if called for */
 			if (pending->atCommit == isCommit)
 			{
-				SMgrRelation srel;
-				/* GPDB: backend can only be TempRelBackendId or
-				 * InvalidBackendId for a given relfile since we don't tie temp
-				 * relations to their backends. */
-				srel = smgropen(pending->relnode.node,
-								pending->relnode.isTempRelation ?
-								TempRelBackendId : InvalidBackendId,
-								pending->relnode.smgr_which, NULL);
-
-				/* allocate the initial array, or extend it, if needed */
-				if (maxrels == 0)
-				{
-					maxrels = 8;
-					srels = palloc(sizeof(SMgrRelation) * maxrels);
-				}
-				else if (maxrels <= nrels)
-				{
-					maxrels *= 2;
-					srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
-				}
-
-				srels[nrels++] = srel;
+				Assert(pending->action);
+				Assert(pending->action->do_pending_rel_delete);
+				pending->action->do_pending_rel_delete(pending);
 			}
+
 			/* must explicitly free the list entry */
-			pfree(pending);
+			Assert(pending->action);
+			Assert(pending->action->destroy_pending_rel_delete);
+			pending->action->destroy_pending_rel_delete(pending);
 			/* prev does not change */
 		}
 	}
-
-	if (nrels > 0)
-	{
-		smgrdounlinkall(srels, nrels, false);
-
-		for (int i = 0; i < nrels; i++)
-			smgrclose(srels[i]);
-
-		pfree(srels);
-	}
 }
 
 /*
@@ -733,12 +724,17 @@ smgrDoPendingSyncs(bool isCommit, bool isParallelWorker)
 		return;
 	}
 
-	/* Skip syncing nodes that smgrDoPendingDeletes() will delete. */
+	/*
+	 * Skip syncing nodes that smgrDoPendingDeletes() will delete. Also skip
+	 * the no need sync pending delete item.
+	 */
 	for (pending = pendingDeletes; pending != NULL; pending = pending->next)
-		if (pending->atCommit)
+	{
+		Assert(pending->action);
+		if (pending->atCommit || !(pending->action->flags & PENDING_REL_DELETE_NEED_SYNC))
 			(void) hash_search(pendingSyncHash, (void *) &pending->relnode,
 							   HASH_REMOVE, NULL);
-
+	}
 	hash_seq_init(&scan, pendingSyncHash);
 	while ((pendingsync = (PendingRelSync *) hash_seq_search(&scan)))
 	{
@@ -872,6 +868,13 @@ smgrGetPendingDeletes(bool forCommit, RelFileNodePendingDelete **ptr)
 	nrels = 0;
 	for (pending = pendingDeletes; pending != NULL; pending = pending->next)
 	{
+		Assert(pending->action);
+		if (!(pending->action->flags & PENDING_REL_DELETE_NEED_XLOG))
+		{
+			/* should not reocrd xlog expect pg relation */
+			continue;
+		}
+
 		if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
 			/*
 			 * Cloudberry allows transactions that access temporary tables to be
@@ -890,6 +893,12 @@ smgrGetPendingDeletes(bool forCommit, RelFileNodePendingDelete **ptr)
 	*ptr = rptr;
 	for (pending = pendingDeletes; pending != NULL; pending = pending->next)
 	{
+		Assert(pending->action);
+		if (!(pending->action->flags & PENDING_REL_DELETE_NEED_XLOG))
+		{
+			continue;
+		}
+
 		if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
 			/*
 			 * Keep this loop condition identical to above
@@ -903,6 +912,7 @@ smgrGetPendingDeletes(bool forCommit, RelFileNodePendingDelete **ptr)
 	}
 	return nrels;
 }
+
 /*
  *	PostPrepare_smgr -- Clean up after a successful PREPARE
  *
@@ -914,14 +924,32 @@ void
 PostPrepare_smgr(void)
 {
 	PendingRelDelete *pending;
+	PendingRelDelete *prev;
 	PendingRelDelete *next;
 
+	prev = NULL;
 	for (pending = pendingDeletes; pending != NULL; pending = next)
 	{
 		next = pending->next;
-		pendingDeletes = next;
-		/* must explicitly free the list entry */
-		pfree(pending);
+
+		Assert(pending->action);
+		if (pending->action->flags & PENDING_REL_DELETE_NEED_DROP_DELAY_DELETE)
+		{
+			/* delay delete entries should not be processed yet */
+			prev = pending;
+		}
+		else
+		{
+			/* unlink list entry first, so we don't retry on failure */
+			if (prev)
+				prev->next = next;
+			else
+				pendingDeletes = next;
+
+			/* do deletion if called for */
+			Assert(pending->action->destroy_pending_rel_delete);
+			pending->action->destroy_pending_rel_delete(pending);
+		}
 	}
 }
 
@@ -936,8 +964,6 @@ AtSubCommit_smgr(void)
 	int			nestLevel = GetCurrentTransactionNestLevel();
 	PendingRelDelete *pending;
 
-	UFileAtSubCommitSmgr();
-
 	for (pending = pendingDeletes; pending != NULL; pending = pending->next)
 	{
 		if (pending->nestLevel >= nestLevel)
@@ -955,7 +981,6 @@ AtSubCommit_smgr(void)
 void
 AtSubAbort_smgr(void)
 {
-	UFileAtSubAbortSmgr();
 	smgrDoPendingDeletes(false);
 }
 
diff --git a/src/backend/catalog/storage_directory_table.c b/src/backend/catalog/storage_directory_table.c
index 0f8ebdb9dbc..6577aa56e53 100644
--- a/src/backend/catalog/storage_directory_table.c
+++ b/src/backend/catalog/storage_directory_table.c
@@ -19,6 +19,7 @@
 #include "access/xact.h"
 #include "catalog/pg_directory_table.h"
 #include "catalog/pg_tablespace.h"
+#include "catalog/storage.h"
 #include "catalog/storage_directory_table.h"
 #include "storage/smgr.h"
 #include "storage/ufile.h"
@@ -28,45 +29,75 @@
 #include "cdb/cdbvars.h"
 
 /*
- * TODO: Redo pending delete
+ * TODO: support ufile pending delete xlog
+ *
+ * Ufile do not support deleteing files during WAL redo, two of reason:
+ *
+ * 1. deleting files requires a connection to object storage system.
+ * In order to establish the connection to the object storage, we
+ * need to access the catalog table to retrieve the connection
+ * configuration info, which is impossible during WAL redo.
+ *
+ * 2. no custom xlog entry support.
+ * Custom WAL Resource Managers are immature and not reflected in CBDB.
  *
- * We do not support deleteing files during WAL redo, this is because deleting
- * files requires a connection to object storage system. In order to establish
- * the connection to the object storage, we need to access the catalog table to
- * retrieve the connection configuration info, which is impossible during WAL
- * redo.
  */
-
 typedef struct UFileNodePendingDelete
 {
-	char  relkind;
-	Oid   spcId;			/* directory table needs an extra tabpespace */
-	char *relativePath;
-} UFileNodePendingDelete;
+	char		relkind;
+	Oid			spcId;			/* directory table needs an extra tabpespace */
+	char	   *relativePath;
+}			UFileNodePendingDelete;
 
 typedef struct PendingRelDeleteUFile
 {
-	UFileNodePendingDelete filenode;		/* relation that may need to be deleted */
-	bool		atCommit;		/* T=delete at commit; F=delete at abort */
-	int			nestLevel;		/* xact nesting level of request */
-	struct PendingRelDeleteUFile *next;		/* linked-list link */
-} PendingRelDeleteUFile;
+	PendingRelDelete reldelete; /* base pending delete */
+	UFileNodePendingDelete filenode;	/* relation that may need to be
+										 * deleted */
+}			PendingRelDeleteUFile;
+
+
+static void
+UfileDestroyPendingRelDelete(PendingRelDelete *reldelete)
+{
+	PendingRelDeleteUFile *ufiledelete;
+
+	Assert(reldelete);
+	ufiledelete = (PendingRelDeleteUFile *) reldelete;
+
+	pfree(ufiledelete->filenode.relativePath);
+	pfree(ufiledelete);
+}
+
+static void
+UfileDoPendingRelDelete(PendingRelDelete *reldelete)
+{
+	PendingRelDeleteUFile *ufiledelete;
 
-static PendingRelDeleteUFile *pendingDeleteUFiles = NULL; /* head of linked list */
+	Assert(reldelete);
+	ufiledelete = (PendingRelDeleteUFile *) reldelete;
+
+	UFileUnlink(ufiledelete->filenode.spcId, ufiledelete->filenode.relativePath);
+}
+
+struct PendingRelDeleteAction ufile_pending_rel_deletes_action = {
+	.flags = PENDING_REL_DELETE_DEFAULT_FLAG,
+	.destroy_pending_rel_delete = UfileDestroyPendingRelDelete,
+	.do_pending_rel_delete = UfileDoPendingRelDelete
+};
 
 void
 DirectoryTableDropStorage(Relation rel)
 {
-	char *filePath;
+	char	   *filePath;
 	DirectoryTable *dirTable;
-	PendingRelDeleteUFile *pending;
 	TableScanDesc scandesc;
 	Relation	spcrel;
 	HeapTuple	tuple;
 	Form_pg_tablespace spcform;
 	ScanKeyData entry[1];
 	Oid			tablespaceoid;
-	char 	   *tablespace_name;
+	char	   *tablespace_name;
 
 	dirTable = GetDirectoryTable(RelationGetRelid(rel));
 
@@ -85,9 +116,9 @@ DirectoryTableDropStorage(Relation rel)
 	if (!HeapTupleIsValid(tuple))
 	{
 		ereport(ERROR,
-					(errcode(ERRCODE_UNDEFINED_OBJECT),
-					 errmsg("tablespace \"%d\" does not exist",
-							dirTable->spcId)));
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("tablespace \"%d\" does not exist",
+						dirTable->spcId)));
 	}
 
 	spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
@@ -99,35 +130,13 @@ DirectoryTableDropStorage(Relation rel)
 
 	filePath = psprintf("%s", dirTable->location);
 
-	/* Add the relation to the list of stuff to delete at commit */
-	pending = (PendingRelDeleteUFile *)
-		MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDeleteUFile));
-	pending->filenode.relkind = rel->rd_rel->relkind;
-	pending->filenode.relativePath = MemoryContextStrdup(TopMemoryContext, filePath);
-	pending->filenode.spcId = dirTable->spcId;
-
-	pending->atCommit = true;	/* delete if commit */
-	pending->nestLevel = GetCurrentTransactionNestLevel();
-	pending->next = pendingDeleteUFiles;
-
-	pendingDeleteUFiles = pending;
+	UFileAddPendingDelete(rel, dirTable->spcId, filePath, true);
 
 	pfree(filePath);
-
-	/*
-	 * Make sure the connection to the corresponding tablespace has
-	 * been cached.
-	 *
-	 * UFileDoDeletesActions->UFileUnlink is called outside of the
-	 * transaction, if we don't establish a connection here. we may
-	 * face the issus of accessing the catalog outside of the
-	 * transaction.
-	 */
-	forceCacheUFileResource(dirTable->spcId);
 }
 
 void
-UFileAddCreatePendingEntry(Relation rel, Oid spcId, char *relativePath)
+UFileAddPendingDelete(Relation rel, Oid spcId, char *relativePath, bool atCommit)
 {
 	PendingRelDeleteUFile *pending;
 
@@ -138,98 +147,18 @@ UFileAddCreatePendingEntry(Relation rel, Oid spcId, char *relativePath)
 	pending->filenode.relativePath = MemoryContextStrdup(TopMemoryContext, relativePath);
 	pending->filenode.spcId = spcId;
 
-	pending->atCommit = false;	/* delete if abort */
-	pending->nestLevel = GetCurrentTransactionNestLevel();
-	pending->next = pendingDeleteUFiles;
+	pending->reldelete.atCommit = atCommit; /* delete if abort */
+	pending->reldelete.nestLevel = GetCurrentTransactionNestLevel();
 
-	pendingDeleteUFiles = pending;
+	pending->reldelete.relnode.node = rel->rd_node;
+	pending->reldelete.relnode.isTempRelation = rel->rd_backend == TempRelBackendId;
+	pending->reldelete.relnode.smgr_which = SMGR_INVALID;
 
-	/*
-	 * Make sure the spccache to the corresponding tablespace has
-	 * been cached.
-	 */
-	forceCacheUFileResource(spcId);
-}
-
-void
-UFileAddDeletePendingEntry(Relation rel, Oid spcId, char *relativePath)
-{
-	PendingRelDeleteUFile *pending;
-
-	/* Add the relation to the list of stuff to delete at abort */
-	pending = (PendingRelDeleteUFile *)
-		MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDeleteUFile));
-	pending->filenode.relkind = rel->rd_rel->relkind;
-	pending->filenode.relativePath = MemoryContextStrdup(TopMemoryContext, relativePath);
-	pending->filenode.spcId = spcId;
-
-	pending->atCommit = true;	/* delete if commit */
-	pending->nestLevel = GetCurrentTransactionNestLevel();
-	pending->next = pendingDeleteUFiles;
-
-	pendingDeleteUFiles = pending;
+	pending->reldelete.action = &ufile_pending_rel_deletes_action;
+	RegisterPendingDelete(&pending->reldelete);
 
 	/*
-	 * Make sure the spccache to the corresponding tablespace has
-	 * been cached.
+	 * Make sure the spccache to the corresponding tablespace has been cached.
 	 */
 	forceCacheUFileResource(spcId);
 }
-
-void
-UFileDoDeletesActions(bool isCommit)
-{
-	int nestLevel = GetCurrentTransactionNestLevel();
-	PendingRelDeleteUFile *pending;
-	PendingRelDeleteUFile *prev;
-	PendingRelDeleteUFile *next;
-
-	prev = NULL;
-	for (pending = pendingDeleteUFiles; pending != NULL; pending = next)
-	{
-		next = pending->next;
-		if (pending->nestLevel < nestLevel)
-		{
-			/* outer-level entries should not be processed yet */
-			prev = pending;
-		}
-		else
-		{
-			/* unlink list entry first, so we don't retry on failure */
-			if (prev)
-				prev->next = next;
-			else
-				pendingDeleteUFiles = next;
-
-			/* do deletion if called for */
-			if (pending->atCommit == isCommit)
-				UFileUnlink(pending->filenode.spcId, pending->filenode.relativePath);
-
-			/* must explicitly free the list entry */
-			if (pending->filenode.relativePath)
-				pfree(pending->filenode.relativePath);
-
-			pfree(pending);
-			/* prev does not change */
-		}
-	}
-}
-
-void
-UFileAtSubCommitSmgr(void)
-{
-	int	nestLevel = GetCurrentTransactionNestLevel();
-	PendingRelDeleteUFile *pending;
-
-	for (pending = pendingDeleteUFiles; pending != NULL; pending = pending->next)
-	{
-		if (pending->nestLevel >= nestLevel)
-			pending->nestLevel = nestLevel - 1;
-	}
-}
-
-void
-UFileAtSubAbortSmgr(void)
-{
-	UFileDoDeletesActions(false);
-}
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index dc0f3df20ff..3736921eba4 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1132,7 +1132,7 @@ CopyFromDirectoryTable(CopyFromState cstate)
 						 	 errmsg("failed to open file \"%s\": %s", orgiFileName, errorMessage)));
 
 			/* Delete uploaded file when the transaction fails */
-			UFileAddCreatePendingEntry(cstate->rel, dirTable->spcId, orgiFileName);
+			UFileAddPendingDelete(cstate->rel, dirTable->spcId, orgiFileName, false);
 
 			file_buf = TextDatumGetCString(myslot->tts_values[5]);
 			decode_file_len = strlen(file_buf);
diff --git a/src/backend/commands/dirtablecmds.c b/src/backend/commands/dirtablecmds.c
index e1d18938cff..33e7a6cc933 100644
--- a/src/backend/commands/dirtablecmds.c
+++ b/src/backend/commands/dirtablecmds.c
@@ -392,7 +392,7 @@ remove_file_segment(PG_FUNCTION_ARGS)
 	{
 		CatalogTupleDelete(relation, &tuple->t_self);
 		fullPathName = psprintf("%s/%s", dirTable->location, relativePath);
-		UFileAddDeletePendingEntry(relation, dirTable->spcId, fullPathName);
+		UFileAddPendingDelete(relation, dirTable->spcId, fullPathName, true);
 		exist = true;
 	}
 
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h
index b0e68a8cd50..5fcf27a70df 100644
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -23,6 +23,87 @@
 /* GUC variables */
 extern int	wal_skip_threshold;
 
+/*
+ * We keep a list of all relations (represented as RelFileNode values)
+ * that have been created or deleted in the current transaction.  When
+ * a relation is created, we create the physical file immediately, but
+ * remember it so that we can delete the file again if the current
+ * transaction is aborted.  Conversely, a deletion request is NOT
+ * executed immediately, but is just entered in the list.  When and if
+ * the transaction commits, we can delete the physical file.
+ *
+ * To handle subtransactions, every entry is marked with its transaction
+ * nesting level.  At subtransaction commit, we reassign the subtransaction's
+ * entries to the parent nesting level.  At subtransaction abort, we can
+ * immediately execute the abort-time actions for all entries of the current
+ * nesting level.
+ *
+ * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
+ * unbetimes.  It'd probably be OK to keep it in TopTransactionContext,
+ * but I'm being paranoid.
+ */
+struct PendingRelDeleteAction;
+typedef struct PendingRelDelete
+{
+	struct PendingRelDeleteAction *action;	/* The action is to do pending
+											 * delete */
+	RelFileNodePendingDelete relnode;	/* relation that may need to be
+										 * deleted */
+	bool		atCommit;		/* T=delete at commit; F=delete at abort */
+	int			nestLevel;		/* xact nesting level of request */
+
+	struct PendingRelDelete *next;	/* linked-list link */
+} PendingRelDelete;
+
+/*
+ * functions used to pending delete callbacks
+ *
+ * Notice that: no xlog generate in these interface.
+ * also make sure that NO register same pending delete
+ * into smgr
+ */
+struct PendingRelDeleteAction
+{
+	/* The flag to tell action support function in current pending delete */
+	int			flags;
+
+	/* Used to destroy pending delete item */
+	void		(*destroy_pending_rel_delete) (PendingRelDelete *reldelete);
+
+	/* do delete function */
+	void		(*do_pending_rel_delete) (PendingRelDelete *reldelete);
+};
+
+
+/*
+ * pending delete need delay delete when drop storage happend.
+ * The pg pending delete item will insert the xlog and read the
+ * xlog in `FinishPreparedTransaction` then do the `unlink`.
+ * So after trascation prepared, all of the pending delete items
+ * will be removed. But if current pending delete item have not
+ * xlog, then should setting this flags, `do_pending_rel_delete`
+ * will be called in trascation commit.
+ */
+#define PENDING_REL_DELETE_NEED_DROP_DELAY_DELETE (1)
+
+/*
+ * The flags in pending delete action
+ * do NOT register XLOG/SYNC if current relation is not HEAP/AO/AOCS
+ * after CBDB support custom WAL resouce manager, then different
+ * pending delete item can define different WAL log. But for now,
+ * CBDB only support pg pending item record WAL log.
+ */
+
+#define PENDING_REL_DELETE_NEED_PRESERVE (1 << 1)
+#define PENDING_REL_DELETE_NEED_XLOG (1 << 2)
+#define PENDING_REL_DELETE_NEED_SYNC (1 << 3)
+
+/*
+ * The default pending delete item won't write the xlog,
+ * also need delay do pending delete when storage is dropped.
+*/
+#define PENDING_REL_DELETE_DEFAULT_FLAG PENDING_REL_DELETE_NEED_DROP_DELAY_DELETE
+
 extern SMgrRelation RelationCreateStorage(RelFileNode rnode,
 										  char relpersistence,
 										  SMgrImpl smgr_which,
@@ -38,6 +119,9 @@ extern Size EstimatePendingSyncsSpace(void);
 extern void SerializePendingSyncs(Size maxSize, char *startAddress);
 extern void RestorePendingSyncs(char *startAddress);
 
+/* register a pending delete item into pending delete list */
+void		RegisterPendingDelete(struct PendingRelDelete *delete);
+
 /*
  * These functions used to be in storage/smgr/smgr.c, which explains the
  * naming
diff --git a/src/include/catalog/storage_directory_table.h b/src/include/catalog/storage_directory_table.h
index a73cd8de75d..21c421768b7 100644
--- a/src/include/catalog/storage_directory_table.h
+++ b/src/include/catalog/storage_directory_table.h
@@ -14,12 +14,7 @@
 
 #include "utils/relcache.h"
 
-extern void UFileAddCreatePendingEntry(Relation rel, Oid spcId, char *relativePath);
-extern void UFileAddDeletePendingEntry(Relation rel, Oid spcId, char *relativePath);
-
-extern void UFileDoDeletesActions(bool isCommit);
-extern void UFileAtSubCommitSmgr(void);
-extern void UFileAtSubAbortSmgr(void);
+extern void UFileAddPendingDelete(Relation rel, Oid spcId, char *relativePath, bool atCommit);
 extern void DirectoryTableDropStorage(Relation rel);
 
-#endif //STORAGE_DIRECTORY_TABLE_H
+#endif	/* STORAGE_DIRECTORY_TABLE_H */
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index f68734ba167..3751900cd05 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -25,8 +25,9 @@
 
 typedef enum SMgrImplementation
 {
+	SMGR_INVALID = -1,
 	SMGR_MD = 0,
-	SMGR_AO = 1
+	SMGR_AO = 1,
 } SMgrImpl;
 
 struct f_smgr;

From c226e0c819cd34a32c1f21afa432fdd6cdcb6edd Mon Sep 17 00:00:00 2001
From: jiaqizho <zhoujiaqi@hashdata.cn>
Date: Fri, 24 May 2024 17:15:38 +0800
Subject: [PATCH 36/48] CPP keywords should not be used as function/parameter
 names (#449)

Also extern "C" will not work in this case. Current change remove the delete which defined as parameter.
---
 src/backend/catalog/storage.c | 10 +++++-----
 src/include/catalog/storage.h |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 76a9f7e12e5..9c862971153 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -630,12 +630,12 @@ RestorePendingSyncs(char *startAddress)
 }
 
 void
-RegisterPendingDelete(struct PendingRelDelete *delete)
+RegisterPendingDelete(struct PendingRelDelete *pending)
 {
-	Assert(delete);
-	Assert(delete->action);
-	delete->next = pendingDeletes;
-	pendingDeletes = delete;
+	Assert(pending);
+	Assert(pending->action);
+	pending->next = pendingDeletes;
+	pendingDeletes = pending;
 }
 
 /*
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h
index 5fcf27a70df..3378b18434f 100644
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -120,7 +120,7 @@ extern void SerializePendingSyncs(Size maxSize, char *startAddress);
 extern void RestorePendingSyncs(char *startAddress);
 
 /* register a pending delete item into pending delete list */
-void		RegisterPendingDelete(struct PendingRelDelete *delete);
+void		RegisterPendingDelete(struct PendingRelDelete *pending);
 
 /*
  * These functions used to be in storage/smgr/smgr.c, which explains the

From 21603eb5cd3544455126852f7e79116185942ff1 Mon Sep 17 00:00:00 2001
From: Zhang Mingli <avamingli@gmail.com>
Date: Thu, 23 May 2024 15:17:02 +0800
Subject: [PATCH 37/48] [AQUMV] Support LIMIT/OFFSET/FETCH clause on origin
 query.

If origin query has LIMIT clause, we could process it on
view query.
We could process FETCH too as ORDER BY is supported on
origin query.

create incremental materialized view mv as
  select c1 as mc1, c2 as mc2, c3 as mc3, c4 as mc4
  from t1 where c1 > 90;

Origin querys:

  select c2 from t1 where c1 > 90 limit 3 offset 4;
  select c2 from t1 where c1 > 90 order by c3, c4
    fetch first 3 rows with ties;

Could be rewritten to:

  select mc2 from mv order by mc1 limit 3 offset 4;
  select mc2 from mv order by mc3, mc4
    fetch first 3 rows with ties;

Authored-by: Zhang Mingli avamingli@gmail.com
---
 src/backend/optimizer/plan/aqumv.c  |  13 +-
 src/test/regress/expected/aqumv.out | 253 ++++++++++++++++++++++++++++
 src/test/regress/sql/aqumv.sql      |  53 ++++++
 3 files changed, 318 insertions(+), 1 deletion(-)

diff --git a/src/backend/optimizer/plan/aqumv.c b/src/backend/optimizer/plan/aqumv.c
index cefd868771e..238e719d7c1 100644
--- a/src/backend/optimizer/plan/aqumv.c
+++ b/src/backend/optimizer/plan/aqumv.c
@@ -209,6 +209,7 @@ answer_query_using_materialized_views(PlannerInfo *root,
 			viewQuery->hasDistinctOn ||
 			viewQuery->hasModifyingCTE ||
 			viewQuery->hasSubLinks ||
+			(limit_needed(viewQuery)) ||
 			(viewQuery->groupClause != NIL) ||
 			/* IVM doesn't support belows now, just in case. */
 			(viewQuery->rowMarks != NIL) ||
@@ -261,6 +262,14 @@ answer_query_using_materialized_views(PlannerInfo *root,
 		subroot->aggtransinfos = NIL;
 		subroot->parse = viewQuery;
 
+		/*
+		 * AQUMV_FIXME:
+		 * We copy from root currently, but it's not true
+		 * if we support LIMIT node on view query.
+		 */
+		subroot->tuple_fraction = root->tuple_fraction;
+		subroot->limit_tuples = root->limit_tuples;
+
 		/*
 		 * AQUMV
 		 * We have to rewrite now before we do the real Equivalent
@@ -345,6 +354,9 @@ answer_query_using_materialized_views(PlannerInfo *root,
 		viewQuery->groupingSets = parse->groupingSets;
 		viewQuery->sortClause = parse->sortClause;
 		viewQuery->distinctClause = parse->distinctClause;
+		viewQuery->limitOption = parse->limitOption;
+		viewQuery->limitCount = parse->limitCount;
+		viewQuery->limitOffset = parse->limitOffset;
 
 		/*
 		 * AQUMV
@@ -398,7 +410,6 @@ answer_query_using_materialized_views(PlannerInfo *root,
 		 * We don't use STD_FUZZ_FACTOR for cost comparisons like compare_path_costs_fuzzily here.
 		 * The STD_FUZZ_FACTOR is used to reduce paths of a rel, and keep the significantly ones.
 		 * But in AQUMV, we always have only one best path of rel at the last to compare.
-		 * TODO: limit clause and startup_cost.
 		 */
 		if (mv_final_rel->cheapest_total_path->total_cost < current_rel->cheapest_total_path->total_cost)
 		{
diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out
index 0694dbcae97..80ec2f2507c 100644
--- a/src/test/regress/expected/aqumv.out
+++ b/src/test/regress/expected/aqumv.out
@@ -2163,6 +2163,259 @@ select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c
 (10 rows)
 
 \pset null ''
+abort;
+-- Test LIMIT
+begin;
+create table aqumv_t7(c1 int, c2 int, c3 int, c4 int) distributed by (c1);
+insert into aqumv_t7 select i, i+1, i+2, i+3 from generate_series(1, 100) i;
+insert into aqumv_t7 select i, i+1, i+2, i+3 from generate_series(1, 100) i;
+analyze aqumv_t7;
+create incremental materialized view aqumv_mvt7_0 as
+  select c3 as cm3, c1 as mc1, c2 as mc2
+  from aqumv_t7 where c1 > 90;
+analyze aqumv_mvt7_0;
+-- LIMIT
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Limit
+   Output: c2, c3
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: c2, c3
+         Merge Key: c2, c3
+         ->  Limit
+               Output: c2, c3
+               ->  Sort
+                     Output: c2, c3
+                     Sort Key: aqumv_t7.c2, aqumv_t7.c3
+                     ->  Seq Scan on public.aqumv_t7
+                           Output: c2, c3
+                           Filter: (aqumv_t7.c1 > 90)
+ Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(15 rows)
+
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3;
+ c2 | c3 
+----+----
+ 92 | 93
+ 92 | 93
+ 93 | 94
+(3 rows)
+
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Limit
+   Output: mc2, cm3
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: mc2, cm3
+         Merge Key: mc2, cm3
+         ->  Limit
+               Output: mc2, cm3
+               ->  Sort
+                     Output: mc2, cm3
+                     Sort Key: aqumv_mvt7_0.mc2, aqumv_mvt7_0.cm3
+                     ->  Seq Scan on public.aqumv_mvt7_0
+                           Output: mc2, cm3
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(14 rows)
+
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3;
+ c2 | c3 
+----+----
+ 92 | 93
+ 92 | 93
+ 93 | 94
+(3 rows)
+
+-- OFFSET
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3 offset 4;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Limit
+   Output: c2, c3
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: c2, c3
+         Merge Key: c2, c3
+         ->  Limit
+               Output: c2, c3
+               ->  Sort
+                     Output: c2, c3
+                     Sort Key: aqumv_t7.c2, aqumv_t7.c3
+                     ->  Seq Scan on public.aqumv_t7
+                           Output: c2, c3
+                           Filter: (aqumv_t7.c1 > 90)
+ Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(15 rows)
+
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3 offset 4;
+ c2 | c3 
+----+----
+ 94 | 95
+ 94 | 95
+ 95 | 96
+(3 rows)
+
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3 offset 4;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Limit
+   Output: mc2, cm3
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: mc2, cm3
+         Merge Key: mc2, cm3
+         ->  Limit
+               Output: mc2, cm3
+               ->  Sort
+                     Output: mc2, cm3
+                     Sort Key: aqumv_mvt7_0.mc2, aqumv_mvt7_0.cm3
+                     ->  Seq Scan on public.aqumv_mvt7_0
+                           Output: mc2, cm3
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(14 rows)
+
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3 offset 4;
+ c2 | c3 
+----+----
+ 94 | 95
+ 94 | 95
+ 95 | 96
+(3 rows)
+
+-- FETCH
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows only;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Limit
+   Output: c2, c3
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: c2, c3
+         Merge Key: c2, c3
+         ->  Limit
+               Output: c2, c3
+               ->  Sort
+                     Output: c2, c3
+                     Sort Key: aqumv_t7.c2, aqumv_t7.c3
+                     ->  Seq Scan on public.aqumv_t7
+                           Output: c2, c3
+                           Filter: (aqumv_t7.c1 > 90)
+ Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(15 rows)
+
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows only;
+ c2 | c3 
+----+----
+ 92 | 93
+ 92 | 93
+ 93 | 94
+(3 rows)
+
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows only;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Limit
+   Output: mc2, cm3
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: mc2, cm3
+         Merge Key: mc2, cm3
+         ->  Limit
+               Output: mc2, cm3
+               ->  Sort
+                     Output: mc2, cm3
+                     Sort Key: aqumv_mvt7_0.mc2, aqumv_mvt7_0.cm3
+                     ->  Seq Scan on public.aqumv_mvt7_0
+                           Output: mc2, cm3
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(14 rows)
+
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows only;
+ c2 | c3 
+----+----
+ 92 | 93
+ 92 | 93
+ 93 | 94
+(3 rows)
+
+-- WITH TIES
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows with ties;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Limit
+   Output: c2, c3
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: c2, c3
+         Merge Key: c2, c3
+         ->  Limit
+               Output: c2, c3
+               ->  Sort
+                     Output: c2, c3
+                     Sort Key: aqumv_t7.c2, aqumv_t7.c3
+                     ->  Seq Scan on public.aqumv_t7
+                           Output: c2, c3
+                           Filter: (aqumv_t7.c1 > 90)
+ Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(15 rows)
+
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows with ties;
+ c2 | c3 
+----+----
+ 92 | 93
+ 92 | 93
+ 93 | 94
+ 93 | 94
+(4 rows)
+
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows with ties;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Limit
+   Output: mc2, cm3
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: mc2, cm3
+         Merge Key: mc2, cm3
+         ->  Limit
+               Output: mc2, cm3
+               ->  Sort
+                     Output: mc2, cm3
+                     Sort Key: aqumv_mvt7_0.mc2, aqumv_mvt7_0.cm3
+                     ->  Seq Scan on public.aqumv_mvt7_0
+                           Output: mc2, cm3
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(14 rows)
+
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows with ties;
+ c2 | c3 
+----+----
+ 92 | 93
+ 92 | 93
+ 93 | 94
+ 93 | 94
+(4 rows)
+
 abort;
 reset optimizer;
 reset enable_answer_query_using_materialized_views;
diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql
index 86af470541f..3fcb47ed631 100644
--- a/src/test/regress/sql/aqumv.sql
+++ b/src/test/regress/sql/aqumv.sql
@@ -527,6 +527,59 @@ select distinct on(c1 - 1) c1, c2 from aqumv_t6 where c1 > 90 order by c1 - 1, c
 \pset null ''
 abort;
 
+-- Test LIMIT
+begin;
+create table aqumv_t7(c1 int, c2 int, c3 int, c4 int) distributed by (c1);
+insert into aqumv_t7 select i, i+1, i+2, i+3 from generate_series(1, 100) i;
+insert into aqumv_t7 select i, i+1, i+2, i+3 from generate_series(1, 100) i;
+analyze aqumv_t7;
+
+create incremental materialized view aqumv_mvt7_0 as
+  select c3 as cm3, c1 as mc1, c2 as mc2
+  from aqumv_t7 where c1 > 90;
+analyze aqumv_mvt7_0;
+
+-- LIMIT
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3;
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3;
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3;
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3;
+
+-- OFFSET
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3 offset 4;
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3 offset 4;
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3 offset 4;
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 limit 3 offset 4;
+
+-- FETCH
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows only;
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows only;
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows only;
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows only;
+
+-- WITH TIES
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows with ties;
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows with ties;
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows with ties;
+select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows with ties;
+
+abort;
 
 reset optimizer;
 reset enable_answer_query_using_materialized_views;

From 2ff7b0cf5f8cc10f8325617409f61f20374dccec Mon Sep 17 00:00:00 2001
From: Annpurna Shahani <30636132+Annu149@users.noreply.github.com>
Date: Fri, 28 Oct 2022 16:02:28 +0530
Subject: [PATCH 38/48] Replace scp with rsync (#14145)

Security vulnerability was found in the scp program shipped with the
openssh-clients package and CVSS score for this security vulnerability
is 7.8 (https://access.redhat.com/security/cve/cve-2020-15778).
Recommended action was to replace scp with rsync so,

 * Replaced scp with rsync in gpdb cm utilities and utility files
 * Renamed gpscp utility to gpsync after replacing scp with rsync in
   utility code
---
 gpMgmt/bin/Makefile                           |  23 +-
 gpMgmt/bin/README.md                          |  28 +-
 gpMgmt/bin/gpcheckperf                        |   8 +-
 gpMgmt/bin/gpexpand                           |  10 +-
 gpMgmt/bin/gpmemwatcher                       |   2 +-
 gpMgmt/bin/gppylib/commands/gp.py             |   4 +-
 gpMgmt/bin/gppylib/commands/unix.py           |   6 +-
 .../gppylib/operations/buildMirrorSegments.py |   2 +-
 gpMgmt/bin/gppylib/operations/package.py      |  32 +-
 .../test/regress/test_package/__init__.py     |   4 +-
 .../test_regress_muck_with_internals.py       |  14 +-
 ..._regress_muck_with_internals_on_standby.py |  16 +-
 .../gppylib/operations/test/test_package.py   |   6 +-
 .../gppylib/test/unit/test_unit_package.py    |   6 +-
 gpMgmt/bin/gpssh-exkeys                       |   4 +-
 gpMgmt/bin/{gpscp => gpsync}                  |  17 +-
 gpMgmt/bin/lib/gp_bash_functions.sh           |   4 +-
 gpMgmt/test/behave/mgmt_utils/gppkg.feature   |   2 +-
 .../mgmt_utils/steps/gpconfig_mgmt_utils.py   |   4 +-
 .../steps/gpssh_exkeys_mgmt_utils.py          |   2 +-
 .../behave/mgmt_utils/steps/mgmt_utils.py     | 485 +++++++++++++++---
 21 files changed, 507 insertions(+), 172 deletions(-)
 rename gpMgmt/bin/{gpscp => gpsync} (86%)

diff --git a/gpMgmt/bin/Makefile b/gpMgmt/bin/Makefile
index 24e70491184..70d650fc772 100644
--- a/gpMgmt/bin/Makefile
+++ b/gpMgmt/bin/Makefile
@@ -13,27 +13,19 @@ SUBDIRS += ifaddrs
 $(recurse)
 
 PROGRAMS= analyzedb gpactivatestandby gpaddmirrors gpcheckcat gpcheckperf \
-	gpcheckresgroupimpl gpconfig gpdeletesystem gpexpand gpshrink gpinitstandby \
+	gpcheckresgroupimpl gpconfig gpdeletesystem gpexpand gpinitstandby \
 	gpinitsystem gpload gpload.py gplogfilter gpmovemirrors \
-	gppkg gprecoverseg gpreload gpscp gpsd gpssh gpssh-exkeys gpstart \
-	gpstate gpstop minirepro gpmemwatcher gpmemreport gpdemo gpdirtableload
-
-GPDEMO_LIBS = gpdemo-defaults.sh lalshell generate_certs.sh demo_cluster.sh \
-				probe_config.sh README
+	gppkg gprecoverseg gpreload gpsync gpsd gpssh gpssh-exkeys gpstart \
+	gpstate gpstop minirepro gpmemwatcher gpmemreport
 
 installdirs:
 	$(MKDIR_P) '$(DESTDIR)$(bindir)/lib'
-	$(MKDIR_P) '$(DESTDIR)$(bindir)/lib/gpdemo'
 
 installprograms: installdirs
 	for file in $(PROGRAMS); do \
 		$(INSTALL_SCRIPT) $$file '$(DESTDIR)$(bindir)/'$$file ; \
 		$(PERL) $(top_builddir)/putversion '$(DESTDIR)$(bindir)/'$$file ; \
 	done
-	# install dependencies of gpdemo
-	for file in $(GPDEMO_LIBS); do \
-		$(INSTALL_SCRIPT) $(top_builddir)/gpAux/gpdemo/$$file '$(DESTDIR)$(bindir)/lib/gpdemo/'$$file ; \
-	done
 	# Symlink gpcheckcat from bin to bin/lib to maintain backward compatibility
 	if [ ! -L $(DESTDIR)$(bindir)/lib/gpcheckcat  ]; then \
 		cd $(DESTDIR)$(bindir)/lib/ && $(LN_S) ../gpcheckcat gpcheckcat; \
@@ -44,9 +36,6 @@ uninstall:
 	for file in $(PROGRAMS); do \
 		rm -f '$(DESTDIR)$(bindir)/'$$file ; \
 	done
-	for file in $(GPDEMO_LIBS); do \
-		rm -f '$(DESTDIR)$(bindir)/lib/gpdemo/'$$file ; \
-	done
 	rm -f '$(DESTDIR)$(bindir)/gpload.bat'
 
 #
@@ -115,7 +104,7 @@ pyyaml:
 	@echo "--- pyyaml"
 	cd $(PYLIB_SRC_EXT)/ && $(TAR) xzf $(PYYAML_DIR).tar.gz
 	cd $(PYLIB_SRC_EXT)/$(PYYAML_DIR)/ && env -u CC python3 setup.py build
-	cp -r $(PYLIB_SRC_EXT)/$(PYYAML_DIR)/build/lib*-3*/* $(PYLIB_DIR)
+	cp -r $(PYLIB_SRC_EXT)/$(PYYAML_DIR)/build/lib*-3.*/* $(PYLIB_DIR)
 
 #
 # PYLINT
@@ -194,7 +183,7 @@ clean distclean:
 	rm -rf *.pyc
 	rm -f analyzedbc gpactivatestandbyc gpaddmirrorsc gpcheckcatc \
 		  gpcheckperfc gpcheckresgroupimplc gpchecksubnetcfgc gpconfigc \
-		  gpdeletesystemc gpexpandc gpshrinkc gpinitstandbyc gplogfilterc gpmovemirrorsc \
-		  gppkgc gprecoversegc gpreloadc gpscpc gpsdc gpssh-exkeysc gpsshc \
+		  gpdeletesystemc gpexpandc gpinitstandbyc gplogfilterc gpmovemirrorsc \
+		  gppkgc gprecoversegc gpreloadc gpscpc gpsyncc gpsdc gpssh-exkeysc gpsshc \
 		  gpstartc gpstatec gpstopc minireproc
 	rm -f gpconfig_modules/gucs_disallowed_in_file.txt
diff --git a/gpMgmt/bin/README.md b/gpMgmt/bin/README.md
index 674a209e0f0..b003592b16c 100644
--- a/gpMgmt/bin/README.md
+++ b/gpMgmt/bin/README.md
@@ -28,21 +28,21 @@ Where Things Go
 
 List of Management Scripts Written in Bash
 ------------------------------------------
-bin/gpinitsystem        -  Creates a new Cloudberry Database
+bin/gpinitsystem        -  Creates a new Greenplum Database
 bin/gpload              -  Sets env variables and calls gpload.py
 
 
 List of Management Scripts Written in Python (no libraries)
 -----------------------------------------------------------
-bin/gpload.py           -  Loads data into a Cloudberry Database
+bin/gpload.py           -  Loads data into a Greenplum Database
 
 
 List of Management Scripts Written in Python (gpmlib - old libraries)
 ---------------------------------------------------------------------
 bin/gpaddmirrors        -  Adds mirrors to an array (needs rewrite)
 bin/gprecoverseg        -  Recovers a failed segment (needs rewrite)
-bin/gpcheckperf         -  Checks the hardware for Cloudberry Database
-bin/gpscp               -  Copies files to many hosts
+bin/gpcheckperf         -  Checks the hardware for Greenplum Database
+bin/gpsync              -  Copies files to many hosts
 bin/gpssh               -  Remote shell to many hosts
 bin/gpssh-exkeys        -  Exchange ssh keys between many hosts
 
@@ -51,12 +51,12 @@ List of Management Scripts Written in Python (gppylib - current libraries)
 --------------------------------------------------------------------------
 bin/gpactivatestandby   -  Activates the Standby Coordinator
 bin/gpconfig_helper     -  Edits postgresql.conf file for all segments
-bin/gpdeletesystem      -  Deletes a Cloudberry Database
-bin/gpexpand            -  Adds additional segments to a Cloudberry Database
+bin/gpdeletesystem      -  Deletes a Greenplum Database
+bin/gpexpand            -  Adds additional segments to a Greenplum Database
 bin/gpinitstandby       -  Initializes standby coordinator
 bin/gplogfilter         -  Filters log files
-bin/gpstart             -  Start a Cloudberry Database
-bin/gpstop              -  Stop a Cloudberry Database
+bin/gpstart             -  Start a Greenplum Database
+bin/gpstop              -  Stop a Greenplum Database
 
 sbin/gpconfig_helper.py -  Helper script for gpconfig
 sbin/gpsegcopy          -  Helper script for gpexpand
@@ -76,10 +76,10 @@ gparray.py
    +-  SegmentPair - Configuration information for a single content id
    |     \-  Contains multiple Segment objects
    |
-   +-  GpArray   - Configuration information for a Cloudberry Database
+   +-  GpArray   - Configuration information for a Greenplum Database
          \-  Contains multiple SegmentPair objects
 
-gplog.py         - Utility functions to assist in Cloudberry standard logging
+gplog.py         - Utility functions to assist in Greenplum standard logging
 
 gpparseopts.py   - Wrapper around optparse library to aid in locating help files
 
@@ -143,9 +143,9 @@ db/dbconn.py       - Connections to the database
   |
   +- Should have a wrapper class around a pygresql connection object!
 
-util/gp_utils.py     - Cloudberry related utility functions that are not Commands
-util/ssh_session.py  - SSH and SCP related utility functions brought in from gpmlib.py/gplib.py
-                       that are used by gpssh, gpscp and gpssh-exkeys
+util/gp_utils.py     - Greenplum related utility functions that are not Commands
+util/ssh_session.py  - SSH and RSYNC related utility functions brought in from gpmlib.py/gplib.py
+                       that are used by gpssh, gpsync and gpssh-exkeys
 
 
 ## Testing Management Scripts (unit tests)
@@ -175,7 +175,7 @@ tests that do not require a running cluster.
 
 ## Testing Management Scripts (behave tests)
 
-Behave tests require a running Cloudberry cluster, and additional python libraries for testing, available to gpadmin.
+Behave tests require a running Greenplum cluster, and additional python libraries for testing, available to gpadmin.
 
 Thus, you can install these additional python libraries using any of the following methods:
 
diff --git a/gpMgmt/bin/gpcheckperf b/gpMgmt/bin/gpcheckperf
index 79ca3ec2792..09bb496a669 100755
--- a/gpMgmt/bin/gpcheckperf
+++ b/gpMgmt/bin/gpcheckperf
@@ -103,8 +103,8 @@ def gpssh(cmd):
     return not rc, out
 
 
-def gpscp(src, dst):
-    c = ['%s/bin/gpscp' % GPHOME]
+def gpsync(src, dst):
+    c = ['%s/bin/gpsync' % GPHOME]
     if GV.opt['-V']:
         c.append('-v')
     if GV.opt['-f']:
@@ -391,9 +391,9 @@ def copyExecOver(fname):
     if not os.access(path, os.X_OK):
         sys.exit('[Exit] file not executable: ' + path)
 
-    (ok, out) = gpscp(path, '=:%s' % target)
+    (ok, out) = gpsync(path, '=:%s' % target)
     if not ok:
-        sys.exit('[Error] command failed: gpscp %s =:%s with output: %s' % (path, target, out))
+        sys.exit('[Error] command failed: gpsync %s =:%s with output: %s' % (path, target, out))
 
     # chmod +x file
     (ok, out) = gpssh('chmod a+rx %s' % target)
diff --git a/gpMgmt/bin/gpexpand b/gpMgmt/bin/gpexpand
index f9d0fe36b2b..3825095829a 100755
--- a/gpMgmt/bin/gpexpand
+++ b/gpMgmt/bin/gpexpand
@@ -442,7 +442,7 @@ class GpExpandStatus():
 
     def _sync_status_file(self):
         """Syncs the gpexpand status file with the coordinator mirror"""
-        cpCmd = Scp('gpexpand copying status file to coordinator mirror',
+        cpCmd = Rsync('gpexpand copying status file to coordinator mirror',
                     srcFile=self._status_filename,
                     dstFile=self._status_standby_filename,
                     dstHost=self._coordinator_mirror.getSegmentHostName())
@@ -512,7 +512,7 @@ class GpExpandStatus():
         """ Sync the segment configuration backup file to standby """
         if self._coordinator_mirror:
             self.logger.debug("Sync segment configuration backup file")
-            cpCmd = Scp('gpexpand copying segment configuration backup file to coordinator mirror',
+            cpCmd = Rsync('gpexpand copying segment configuration backup file to coordinator mirror',
                         srcFile=self._gp_segment_configuration_backup,
                         dstFile=self._segment_configuration_standby_filename,
                         dstHost=self._coordinator_mirror.getSegmentHostName())
@@ -757,7 +757,7 @@ class SegmentTemplate:
         """Distributes template tar file to hosts"""
         for host in self.hosts:
             logger.debug('Copying tar file to %s' % host)
-            cpCmd = Scp(name='gpexpand distribute tar file to new hosts',
+            cpCmd = Rsync(name='gpexpand distribute tar file to new hosts',
                         srcFile=self.schema_tar_file,
                         dstFile=self.segTarDir,
                         dstHost=host)
@@ -846,7 +846,7 @@ class SegmentTemplate:
         localHostname = self.gparray.coordinator.getSegmentHostName()
         cmdName = 'gpexpand copying postgresql.conf to %s:%s/postgresql.conf' \
                   % (self.srcSegHostname, self.srcSegDataDir)
-        cpCmd = Scp(name=cmdName, srcFile=self.srcSegDataDir + '/postgresql.conf',
+        cpCmd = Rsync(name=cmdName, srcFile=self.srcSegDataDir + '/postgresql.conf',
             dstFile=self.tempDir, dstHost=localHostname, ctxt=REMOTE,
             remoteHost=self.srcSegHostname)
         cpCmd.run(validateAfter=True)
@@ -854,7 +854,7 @@ class SegmentTemplate:
         self.logger.info('Copying pg_hba.conf from existing segment into template')
         cmdName = 'gpexpand copy pg_hba.conf to %s:%s/pg_hba.conf' \
                   % (self.srcSegHostname, self.srcSegDataDir)
-        cpCmd = Scp(name=cmdName, srcFile=self.srcSegDataDir + '/pg_hba.conf',
+        cpCmd = Rsync(name=cmdName, srcFile=self.srcSegDataDir + '/pg_hba.conf',
                     dstFile=self.tempDir, dstHost=localHostname,ctxt=REMOTE,
                     remoteHost=self.srcSegHostname)
         cpCmd.run(validateAfter=True)
diff --git a/gpMgmt/bin/gpmemwatcher b/gpMgmt/bin/gpmemwatcher
index 29895cbe75c..6569015bc09 100755
--- a/gpMgmt/bin/gpmemwatcher
+++ b/gpMgmt/bin/gpmemwatcher
@@ -171,7 +171,7 @@ def stopProcesses(host, workdir):
         return
 
     try:
-        subprocess.check_call('scp -q %s:%s/%s ./%s.%s' % (host, dest_dir, ps_file, host, ps_file), shell=True)
+        subprocess.check_call('rsync -q %s:%s/%s ./%s.%s' % (host, dest_dir, ps_file, host, ps_file), shell=True)
     except subprocess.CalledProcessError as e:
         print('Error retrieving data from host: ' + host, file=sys.stderr)
         print(e)
diff --git a/gpMgmt/bin/gppylib/commands/gp.py b/gpMgmt/bin/gppylib/commands/gp.py
index 46d2f636b7d..8aa208ba576 100644
--- a/gpMgmt/bin/gppylib/commands/gp.py
+++ b/gpMgmt/bin/gppylib/commands/gp.py
@@ -1169,8 +1169,8 @@ def distribute_tarball(queue,list,tarball):
             hostname = db.getSegmentHostName()
             datadir = db.getSegmentDataDirectory()
             (head,tail)=os.path.split(datadir)
-            scp_cmd=Scp(name="copy coordinator",srcFile=tarball,dstHost=hostname,dstFile=head)
-            queue.addCommand(scp_cmd)
+            rsync_cmd=Rsync(name="copy coordinator",srcFile=tarball,dstHost=hostname,dstFile=head)
+            queue.addCommand(rsync_cmd)
         queue.join()
         queue.check_results()
         logger.debug("distributeTarBall finished")
diff --git a/gpMgmt/bin/gppylib/commands/unix.py b/gpMgmt/bin/gppylib/commands/unix.py
index 361bac0f69b..3d0b0582d14 100644
--- a/gpMgmt/bin/gppylib/commands/unix.py
+++ b/gpMgmt/bin/gppylib/commands/unix.py
@@ -501,7 +501,7 @@ def filedir_exists(self):
         return (not self.results.rc)
 
 
-# -------------scp------------------
+# -------------rsync------------------
 
 # MPP-13617
 def canonicalize(addr):
@@ -510,10 +510,10 @@ def canonicalize(addr):
     return '[' + addr + ']'
 
 
-class Scp(Command):
+class Rsync(Command):
     def __init__(self, name, srcFile, dstFile, srcHost=None, dstHost=None, recursive=False, ctxt=LOCAL,
                  remoteHost=None):
-        cmdStr = findCmdInPath('scp') + " "
+        cmdStr = findCmdInPath('rsync') + " "
 
         if recursive:
             cmdStr = cmdStr + "-r "
diff --git a/gpMgmt/bin/gppylib/operations/buildMirrorSegments.py b/gpMgmt/bin/gppylib/operations/buildMirrorSegments.py
index 91c537e4ffa..4bfe87ecd13 100644
--- a/gpMgmt/bin/gppylib/operations/buildMirrorSegments.py
+++ b/gpMgmt/bin/gppylib/operations/buildMirrorSegments.py
@@ -18,7 +18,7 @@
 from gppylib.operations.utils import ParallelOperation, RemoteOperation
 from gppylib.system import configurationInterface as configInterface
 from gppylib.commands.gp import is_pid_postmaster, get_pid_from_remotehost
-from gppylib.commands.unix import check_pid_on_remotehost, Scp
+from gppylib.commands.unix import check_pid_on_remotehost
 from gppylib.programs.clsRecoverSegment_triples import RecoveryTriplet
 
 
diff --git a/gpMgmt/bin/gppylib/operations/package.py b/gpMgmt/bin/gppylib/operations/package.py
index 7ba8d34affa..f1e73563846 100644
--- a/gpMgmt/bin/gppylib/operations/package.py
+++ b/gpMgmt/bin/gppylib/operations/package.py
@@ -11,7 +11,7 @@
     from gppylib import gplog
     from gppylib.commands import gp
     from gppylib.commands.base import Command, REMOTE, WorkerPool, ExecutionError
-    from gppylib.commands.unix import Scp
+    from gppylib.commands.unix import Rsync
     from gppylib.gpversion import GpVersion
     from gppylib.mainUtils import ExceptionNoStackTraceNeeded
     from gppylib.operations import Operation
@@ -302,7 +302,7 @@ class RemoteCommand(Operation):
     """
     DEPRECATED
 
-    TODO: AK: Rename as GpSsh, like GpScp below.
+    TODO: AK: Rename as GpSsh, like GpRsync below.
     """
 
     def __init__(self, cmd_str, host_list):
@@ -1019,7 +1019,7 @@ def execute(self):
             for package in install_package_set:
                 logger.debug('copying %s to %s' % (package, self.host))
                 dstFile = os.path.join(GPHOME, package)
-                Scp(name='copying %s to %s' % (package, self.host),
+                Rsync(name='copying %s to %s' % (package, self.host),
                     srcFile=os.path.join(GPPKG_ARCHIVE_PATH, package),
                     dstFile=dstFile,
                     dstHost=self.host).run(validateAfter=True)
@@ -1072,12 +1072,12 @@ def execute(self):
         if linux_distribution_id() == 'ubuntu':
             # install package on segments
             if self.segment_host_list:
-                GpScp(srcFile, dstFile, self.segment_host_list).run()
+                GpRsync(srcFile, dstFile, self.segment_host_list).run()
                 HostOperation(InstallDebPackageLocally(dstFile), self.segment_host_list).run()
 
             # install package on standby
             if self.standby_host:
-                Scp(name='copying %s to %s' % (srcFile, self.standby_host),
+                Rsync(name='copying %s to %s' % (srcFile, self.standby_host),
                     srcFile=srcFile,
                     dstFile=dstFile,
                     dstHost=self.standby_host).run(validateAfter=True)
@@ -1088,12 +1088,12 @@ def execute(self):
         else:
             # install package on segments
             if self.segment_host_list:
-                GpScp(srcFile, dstFile, self.segment_host_list).run()
+                GpRsync(srcFile, dstFile, self.segment_host_list).run()
                 HostOperation(InstallPackageLocally(dstFile), self.segment_host_list).run()
 
             # install package on standby
             if self.standby_host:
-                Scp(name='copying %s to %s' % (srcFile, self.standby_host),
+                Rsync(name='copying %s to %s' % (srcFile, self.standby_host),
                     srcFile=srcFile,
                     dstFile=dstFile,
                     dstHost=self.standby_host).run(validateAfter=True)
@@ -1408,14 +1408,14 @@ def execute(self):
         # distribute package to segments
         srcFile = self.gppkg.abspath
         dstFile = os.path.join(GPHOME, self.gppkg.pkg)
-        GpScp(srcFile, dstFile, self.segment_host_list).run()
+        GpRsync(srcFile, dstFile, self.segment_host_list).run()
 
         # update package on segments
         HostOperation(UpdatePackageLocally(dstFile), self.segment_host_list).run()
 
         # update package on standby
         if self.standby_host:
-            Scp(name='copying %s to %s' % (srcFile, self.standby_host),
+            Rsync(name='copying %s to %s' % (srcFile, self.standby_host),
                 srcFile=srcFile,
                 dstFile=dstFile,
                 dstHost=self.standby_host).run(validateAfter=True)
@@ -1552,7 +1552,7 @@ def execute(self):
         logger.info('The package migration has completed.')
 
 
-class GpScp(Operation):
+class GpRsync(Operation):
     """
     TODO: AK: This obviously does not belong here. My preference would be that it remain here until
     the following problem is solved.
@@ -1562,14 +1562,14 @@ class GpScp(Operation):
     I suggest:
 
         We consume an extra parameter 'fanout'. We partition the host_list into a number of buckets
-        given by 'fanout'. For each bucket, we scp the artifact to the first host in the bucket, and then
-        we recursively invoke GpScp on that machine for the remaining hosts in its bucket.
+        given by 'fanout'. For each bucket, we rsync the artifact to the first host in the bucket, and then
+        we recursively invoke GpRsync on that machine for the remaining hosts in its bucket.
 
-        GpScp := ParallelOperation([ A(i) for i in range(0, n) ])
+        GpRsync := ParallelOperation([ A(i) for i in range(0, n) ])
         A := SerialOperation(B, C)
-        B := scp source_path target_path @ host_i
+        B := rsync source_path target_path @ host_i
             where host_i := the first host in the ith bucket
-        C := RemoteOperation(GpScp(target_path, target_path, host_list_i))
+        C := RemoteOperation(GpRsync(target_path, target_path, host_list_i))
             where host_list_i := the remaining hosts in the ith bucket
     """
 
@@ -1582,7 +1582,7 @@ def __init__(self, source_path, target_path, host_list):
     def execute(self):
         self.pool = WorkerPool()
         for host in self.host_list:
-            self.pool.addCommand(Scp(name='copying %s to %s' % (self.source_path, host),
+            self.pool.addCommand(Rsync(name='copying %s to %s' % (self.source_path, host),
                                      srcFile=self.source_path,
                                      dstFile=self.target_path,
                                      dstHost=host))
diff --git a/gpMgmt/bin/gppylib/operations/test/regress/test_package/__init__.py b/gpMgmt/bin/gppylib/operations/test/regress/test_package/__init__.py
index 692ca12d6e0..e5c8c0d8f80 100644
--- a/gpMgmt/bin/gppylib/operations/test/regress/test_package/__init__.py
+++ b/gpMgmt/bin/gppylib/operations/test/regress/test_package/__init__.py
@@ -10,11 +10,11 @@
 from gppylib.gpversion import MAIN_VERSION
 from contextlib import closing
 from gppylib.commands import gp
-from gppylib.commands.unix import Scp
+from gppylib.commands.unix import Rsync
 from gppylib.commands.base import Command, ExecutionError, REMOTE
 from gppylib.operations import Operation
 from gppylib.operations.unix import CheckFile, CheckRemoteFile, RemoveRemoteFile
-from gppylib.operations.package import dereference_symlink, GpScp, linux_distribution_id, linux_distribution_version
+from gppylib.operations.package import dereference_symlink, GpRsync, linux_distribution_id, linux_distribution_version
 from gppylib.commands.base import Command, REMOTE
 
 def get_os():
diff --git a/gpMgmt/bin/gppylib/operations/test/regress/test_package/test_regress_muck_with_internals.py b/gpMgmt/bin/gppylib/operations/test/regress/test_package/test_regress_muck_with_internals.py
index 011e4fd996a..38ceb55cda7 100755
--- a/gpMgmt/bin/gppylib/operations/test/regress/test_package/test_regress_muck_with_internals.py
+++ b/gpMgmt/bin/gppylib/operations/test/regress/test_package/test_regress_muck_with_internals.py
@@ -5,9 +5,9 @@
 import shutil
 
 from contextlib import closing
-from gppylib.commands.unix import Scp
+from gppylib.commands.unix import Rsync
 from gppylib.commands.base import ExecutionError
-from gppylib.operations.package import GpScp
+from gppylib.operations.package import GpRsync
 from gppylib.operations.unix import RemoveRemoteFile
 from gppylib.operations.test.regress.test_package import GppkgTestCase, unittest, get_host_list, ARCHIVE_PATH, RPM_DATABASE, run_command, skipIfSingleNode
 
@@ -132,7 +132,7 @@ def test06_delete_package_from_archive_on_segment_and_install(self):
         try:
             self.install(gppkg_file)
         except ExecutionError as e:
-            Scp(name = "copy gppkg to segment",
+            Rsync(name = "copy gppkg to segment",
                 srcFile = gppkg_file, 
                 dstFile = archive_file, 
                 srcHost = None,
@@ -159,7 +159,7 @@ def test07_delete_package_from_archive_on_segment_and_uninstall(self):
         try:
             self.remove(gppkg_file)
         except ExecutionError as e:
-            GpScp(source_path = gppkg_file,
+            GpRsync(source_path = gppkg_file,
                   target_path = archive_file,
                   host_list = segment_host_list).run()
             self.fail("ExecutionError %s" % str(e))
@@ -187,7 +187,7 @@ def test08_uninstall_rpm_on_segments_and_install(self):
             #Install the rpm 
             with closing(tarfile.open(self.alpha_spec.get_filename())) as tf:
                 tf.extract(self.A_spec.get_filename())
-            Scp(name = "copy rpm to segment", 
+            Rsync(name = "copy rpm to segment",
                 srcFile = self.A_spec.get_filename(), 
                 dstFile = self.A_spec.get_filename(), 
                 srcHost = None,
@@ -229,7 +229,7 @@ def test10_install_rpm_on_segments_and_install(self):
         #Install the rpm 
         with closing(tarfile.open(self.alpha_spec.get_filename())) as tf:
             tf.extract(self.A_spec.get_filename())
-        Scp(name = "copy rpm to segment", 
+        Rsync(name = "copy rpm to segment",
             srcFile = self.A_spec.get_filename(), 
             dstFile = self.A_spec.get_filename(), 
             srcHost = None, 
@@ -255,7 +255,7 @@ def test11_install_rpm_on_segments_and_uninstall(self):
         #Install the rpm
         with closing(tarfile.open(self.alpha_spec.get_filename())) as tf:
             tf.extract(self.A_spec.get_filename())
-        Scp(name = "copy rpm to segment",
+        Rsync(name = "copy rpm to segment",
               srcFile = self.A_spec.get_filename(),
               dstFile = self.A_spec.get_filename(), 
               srcHost = None,
diff --git a/gpMgmt/bin/gppylib/operations/test/regress/test_package/test_regress_muck_with_internals_on_standby.py b/gpMgmt/bin/gppylib/operations/test/regress/test_package/test_regress_muck_with_internals_on_standby.py
index 1e8bd93a73f..e7fcf823688 100755
--- a/gpMgmt/bin/gppylib/operations/test/regress/test_package/test_regress_muck_with_internals_on_standby.py
+++ b/gpMgmt/bin/gppylib/operations/test/regress/test_package/test_regress_muck_with_internals_on_standby.py
@@ -5,8 +5,8 @@
 
 from contextlib import closing
 from gppylib.commands.base import ExecutionError
-from gppylib.commands.unix import Scp
-from gppylib.operations.package import GpScp
+from gppylib.commands.unix import Rsync
+from gppylib.operations.package import GpRsync
 from gppylib.operations.test.regress.test_package import GppkgTestCase, unittest, skipIfNoStandby, get_host_list, ARCHIVE_PATH, run_command
 from gppylib.operations.unix import RemoveRemoteFile
 
@@ -25,7 +25,7 @@ def test00_delete_package_from_archive_on_standby_and_install(self):
         try:
             self.install(gppkg_file)
         except ExecutionError as e:
-            Scp(name = "copy gppkg to standby",
+            Rsync(name = "copy gppkg to standby",
                 srcFile = gppkg_file,
                 dstFile = archive_file,
                 srcHost = None,
@@ -48,10 +48,10 @@ def test01_delete_package_from_archive_on_standby_and_uninstall(self):
         try:
             self.remove(gppkg_file)
         except ExecutionError as e:
-            GpScp(source_path = gppkg_file,
+            GpRsync(source_path = gppkg_file,
                   target_path = archive_file,
                   host_list = get_host_list()[1]).run()
-            Scp(name = "copy gppkg to standby",
+            Rsync(name = "copy gppkg to standby",
                 srcFile = gppkg_file,
                 dstFile = archive_file, 
                 srcHost = None, 
@@ -73,7 +73,7 @@ def test02_uninstall_rpm_on_standby_and_install(self):
             #Install the rpm 
             with closing(tarfile.open(self.alpha_spec.get_filename())) as tf:
                 tf.extract(self.A_spec.get_filename())
-            Scp(name = "copy rpm to standby", 
+            Rsync(name = "copy rpm to standby",
                 srcFile = self.A_spec.get_filename(), 
                 dstFile = self.A_spec.get_filename(), 
                 srcHost = None,
@@ -101,7 +101,7 @@ def test04_install_rpm_on_standby_and_install(self):
         #Install the rpm 
         with closing(tarfile.open(self.alpha_spec.get_filename())) as tf:
             tf.extract(self.A_spec.get_filename())
-        Scp(name = "copy the rpm to standby",
+        Rsync(name = "copy the rpm to standby",
             srcFile = self.A_spec.get_filename(),
             dstFile = self.A_spec.get_filename(), 
             srcHost = None, 
@@ -119,7 +119,7 @@ def test05_install_rpm_on_standby_and_uninstall(self):
         #Install the rpm
         with closing(tarfile.open(self.alpha_spec.get_filename())) as tf:
             tf.extract(self.A_spec.get_filename())
-        Scp(name = "copy rpm to standby",
+        Rsync(name = "copy rpm to standby",
             srcFile = self.A_spec.get_filename(),
             dstFile = self.A_spec.get_filename(), 
             srcHost = None,
diff --git a/gpMgmt/bin/gppylib/operations/test/test_package.py b/gpMgmt/bin/gppylib/operations/test/test_package.py
index d4f65648d12..202a53db17c 100755
--- a/gpMgmt/bin/gppylib/operations/test/test_package.py
+++ b/gpMgmt/bin/gppylib/operations/test/test_package.py
@@ -12,7 +12,7 @@
 from gppylib.gparray import GpArray
 from contextlib import closing
 from gppylib.commands import gp
-from gppylib.commands.unix import Scp
+from gppylib.commands.unix import Rsync
 from gppylib.commands.base import Command, ExecutionError
 from gppylib.operations import Operation
 from gppylib.operations.unix import CheckFile, RemoveRemoteFile
@@ -632,7 +632,7 @@ def test04_delete_package_from_archive_on_segment(self):
         try:
             self.install(gppkg_file)
         except ExecutionError as e:
-            Scp(name="copy to segment", srcFile=gppkg_file, dstFile=archive_file, srcHost=None,
+            Rsync(name="copy to segment", srcFile=gppkg_file, dstFile=archive_file, srcHost=None,
                 dstHost=segment_host_list[0]).run(validateAfter=True)
             self.fail("ExecutionError %s" % e)
 
@@ -653,7 +653,7 @@ def test05_delete_package_from_archive_on_standby(self):
         try:
             self.install(gppkg_file)
         except ExecutionError as e:
-            Scp(name="copy to segment", srcFile=gppkg_file, dstFile=archive_file, srcHost=None, dstHost=standby).run(
+            Rsync(name="copy to segment", srcFile=gppkg_file, dstFile=archive_file, srcHost=None, dstHost=standby).run(
                 validateAfter=True)
             self.fail("ExecutionError %s" % e)
 
diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_package.py b/gpMgmt/bin/gppylib/test/unit/test_unit_package.py
index 8ea90face04..a4d3288e37b 100644
--- a/gpMgmt/bin/gppylib/test/unit/test_unit_package.py
+++ b/gpMgmt/bin/gppylib/test/unit/test_unit_package.py
@@ -183,7 +183,7 @@ def setUp(self):
             patch('gppylib.operations.package.MakeDir'),
             patch('gppylib.operations.package.CheckRemoteDir'),
             patch('gppylib.operations.package.MakeRemoteDir'),
-            patch('gppylib.operations.package.Scp'),
+            patch('gppylib.operations.package.Rsync'),
             patch('gppylib.operations.package.RemoteOperation'),
             patch('gppylib.operations.package.RemoveRemoteFile'),
             patch('gppylib.operations.package.InstallPackageLocally'),
@@ -199,7 +199,7 @@ def setUp(self):
         self.mock_listdir = self.get_mock_from_apply_patch('listdir')
         self.mock_command = self.get_mock_from_apply_patch('Command')
         self.mock_logger = self.get_mock_from_apply_patch('logger')
-        self.mock_scp = self.get_mock_from_apply_patch('Scp')
+        self.mock_rsync = self.get_mock_from_apply_patch('Rsync')
         self.mock_install_packages_locally = self.get_mock_from_apply_patch('InstallPackageLocally')
 
 
@@ -228,7 +228,7 @@ def test__execute_install_on_segments_when_package_are_missing(self):
         hostname = 'localhost'
         subject = SyncPackages(hostname)
         subject.execute()
-        self.assertEqual(self.mock_scp.call_count, 2)
+        self.assertEqual(self.mock_rsync.call_count, 2)
         self.assertEqual(self.make_dir_mock.call_count, 1)
         self.assertEqual(self.make_remote_dir_mock.call_count, 1)
 
diff --git a/gpMgmt/bin/gpssh-exkeys b/gpMgmt/bin/gpssh-exkeys
index 1a1b2ec91dd..cf9853360fd 100755
--- a/gpMgmt/bin/gpssh-exkeys
+++ b/gpMgmt/bin/gpssh-exkeys
@@ -751,7 +751,7 @@ try:
 
 
         for h in GV.newHosts:
-            cmd = ('scp -q -o "BatchMode yes" -o "NumberOfPasswordPrompts 0" ' +
+            cmd = ('rsync -q -e "ssh -o BatchMode=yes -o NumberOfPasswordPrompts=0" ' +
                    '%s %s %s %s %s:.ssh/ 2>&1'
                    % (GV.authorized_keys_fname,
                       GV.known_hosts_fname,
@@ -792,7 +792,7 @@ try:
                     remoteIdentity = GV.id_rsa_fname
                     remoteIdentityPub = GV.id_rsa_pub_fname
 
-                cmd = ('scp -q -o "BatchMode yes" -o "NumberOfPasswordPrompts 0" ' +
+                cmd = ('rsync -q -e "ssh -o BatchMode=yes -o NumberOfPasswordPrompts=0" ' +
                        '%s %s %s %s %s:.ssh/ 2>&1'
                        % (remoteAuthKeysFile,
                           remoteKnownHostsFile,
diff --git a/gpMgmt/bin/gpscp b/gpMgmt/bin/gpsync
similarity index 86%
rename from gpMgmt/bin/gpscp
rename to gpMgmt/bin/gpsync
index 21be83e90d6..e656ca97a88 100755
--- a/gpMgmt/bin/gpscp
+++ b/gpMgmt/bin/gpsync
@@ -1,15 +1,16 @@
 #!/usr/bin/env python3
 
 '''
-gpscp -- scp to multiple hosts at once
+gpsync -- rsync to multiple hosts at once
 
-Usage: gpscp [--version] [-?v] [-r] [-p port] [-u user]
+Usage: gpsync [--version] [-?v] [-r] [-a] [-p port] [-u user]
              [-h host] [-f hostfile] [-J host_substitution_character] [[user@]host1:]file1 [...] [[user@]hosts2:]file2
 
 	     --version    : print version information
              -?           : print this help screen
 	     -v	          : verbose mode
              -r           : recursively copy entire directories
+             -a           : archive mode; equals -rlptgoD (no -H,-A,-X)
 	     -h host      : ssh host to connect to (multiple -h is okay)
 	     -f file      : a file listing all hosts to connect to
              -J character : character to be substitute as hostname [default='=']
@@ -36,6 +37,7 @@ class Global:
     opt['-f'] = None
     opt['-J'] = '=:'
     opt['-r'] = False
+    opt['-a'] = False
     filePath = []
 
 
@@ -61,7 +63,7 @@ def print_version():
 #############
 def parseCommandLine():
     try:
-        (options, args) = getopt.getopt(sys.argv[1:], '?vrJ:p:u:h:f:', ['version'])
+        (options, args) = getopt.getopt(sys.argv[1:], '?vraJ:p:u:h:f:', ['version'])
     except Exception as e:
         usage('Error: ' + str(e))
 
@@ -78,6 +80,8 @@ def parseCommandLine():
             GV.opt[switch] = val + ':'
         elif (switch == '-r'):
             GV.opt[switch] = True
+        elif (switch == '-a'):
+            GV.opt[switch] = True
         elif (switch == '--version'):
             print_version()
 
@@ -109,13 +113,14 @@ try:
     if len(GV.opt['-h']) == 0:
         usage('Error: missing hosts in -h and/or -f arguments')
 
-    scp = 'scp -o "BatchMode yes" -o "StrictHostKeyChecking no"'
-    if GV.opt['-r']:  scp += ' -r'
+    rsync = 'rsync -e "ssh -o BatchMode=yes -o StrictHostKeyChecking=no"'
+    if GV.opt['-r']:  rsync += ' -r'
+    if GV.opt['-a']:  rsync += ' -a'
 
     proc = []
     for peer in GV.opt['-h']:
         peer = canonicalize_address(peer)  # MPP-13617
-        cmd = scp + ' '
+        cmd = rsync + ' '
         for f in GV.filePath:
             cmd += f.replace(GV.opt['-J'], '%s:' % peer) + ' '
         if GV.opt['-v']: print('[INFO]', cmd)
diff --git a/gpMgmt/bin/lib/gp_bash_functions.sh b/gpMgmt/bin/lib/gp_bash_functions.sh
index dc996cb6411..e23f7d9f5ad 100755
--- a/gpMgmt/bin/lib/gp_bash_functions.sh
+++ b/gpMgmt/bin/lib/gp_bash_functions.sh
@@ -87,7 +87,7 @@ MV=`findCmdInPath mv`
 MKDIR=`findCmdInPath mkdir`
 PING=`findCmdInPath ping`
 RM=`findCmdInPath rm`
-SCP=`findCmdInPath scp`
+RSYNC=`findCmdInPath rsync`
 SED=`findCmdInPath sed`
 SLEEP=`findCmdInPath sleep`
 SORT=`findCmdInPath sort`
@@ -158,7 +158,7 @@ PG_CONF=postgresql.conf
 PG_INTERNAL_CONF=internal.auto.conf
 PG_HBA=pg_hba.conf
 if [ x"$TRUSTED_SHELL" = x"" ]; then TRUSTED_SHELL="$SSH"; fi
-if [ x"$TRUSTED_COPY" = x"" ]; then TRUSTED_COPY="$SCP"; fi
+if [ x"$TRUSTED_COPY" = x"" ]; then TRUSTED_COPY="$RSYNC  "; fi
 PG_CONF_ADD_FILE=$WORKDIR/postgresql_conf_gp_additions
 DEFAULTDB=template1
 ETCD_CONFIG_TMP_FILE=/tmp/cbdb_etcd.conf
diff --git a/gpMgmt/test/behave/mgmt_utils/gppkg.feature b/gpMgmt/test/behave/mgmt_utils/gppkg.feature
index 68adcd247ea..b6a070e901d 100644
--- a/gpMgmt/test/behave/mgmt_utils/gppkg.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gppkg.feature
@@ -120,7 +120,7 @@ Feature: gppkg tests
     Scenario: gppkg --migrate copies all packages from coordinator to all segment hosts
         Given the database is running
         And the user runs "gppkg -r sample"
-        And a gphome copy is created at /tmp/gppkg_migrate on all hosts
+        And a gphome copy is created at /tmp/gppkg_migrate/ on all hosts
         When a user runs "COORDINATOR_DATA_DIRECTORY=$COORDINATOR_DATA_DIRECTORY gppkg -r sample" with gphome "/tmp/gppkg_migrate"
         And "sample" gppkg files do not exist on any hosts
         When a user runs "COORDINATOR_DATA_DIRECTORY=$COORDINATOR_DATA_DIRECTORY gppkg --install $(pwd)/test/behave/mgmt_utils/steps/data/sample.gppkg" with gphome "/tmp/gppkg_migrate"
diff --git a/gpMgmt/test/behave/mgmt_utils/steps/gpconfig_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/gpconfig_mgmt_utils.py
index 9913ae5b937..c8e2dfc07fb 100644
--- a/gpMgmt/test/behave/mgmt_utils/steps/gpconfig_mgmt_utils.py
+++ b/gpMgmt/test/behave/mgmt_utils/steps/gpconfig_mgmt_utils.py
@@ -29,8 +29,8 @@ def impl(context):
         os.mkdir(segment_tmp_directory)
         backup_path = path.join(segment_tmp_directory, 'postgresql.conf')
         original_path = path.join(segment.datadir, 'postgresql.conf')
-        copy_command = ('scp %s:%s %s' % (segment.hostname, original_path, backup_path)).split(' ')
-        restore_command = ('scp %s %s:%s' % (backup_path, segment.hostname, original_path)).split(' ')
+        copy_command = ('rsync %s:%s %s' % (segment.hostname, original_path, backup_path)).split(' ')
+        restore_command = ('rsync %s %s:%s' % (backup_path, segment.hostname, original_path)).split(' ')
         restore_commands.append(restore_command)
 
         subprocess.check_call(copy_command)
diff --git a/gpMgmt/test/behave/mgmt_utils/steps/gpssh_exkeys_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/gpssh_exkeys_mgmt_utils.py
index 5add9165f57..9e8d57d8a65 100644
--- a/gpMgmt/test/behave/mgmt_utils/steps/gpssh_exkeys_mgmt_utils.py
+++ b/gpMgmt/test/behave/mgmt_utils/steps/gpssh_exkeys_mgmt_utils.py
@@ -295,7 +295,7 @@ def impl(context):
 
     # This blows away any existing authorized_keys file on the segments.
     subprocess.check_call([
-        'gpscp',
+        'gpsync',
         '-v',
         ] + host_opts + [
         '~/.ssh/id_rsa.pub',
diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
index 9b455177079..24049c068b5 100644
--- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
+++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
@@ -23,8 +23,9 @@
 from gppylib.gparray import GpArray, ROLE_PRIMARY, ROLE_MIRROR
 from gppylib.commands.gp import SegmentStart, GpStandbyStart, CoordinatorStop
 from gppylib.commands import gp
-from gppylib.commands.unix import findCmdInPath, Scp
+from gppylib.commands.pg import PgBaseBackup
 from gppylib.operations.startSegments import MIRROR_MODE_MIRRORLESS
+from gppylib.operations.buildMirrorSegments import get_recovery_progress_pattern
 from gppylib.operations.unix import ListRemoteFilesByPattern, CheckRemoteFile
 from test.behave_utils.gpfdist_utils.gpfdist_mgmt import Gpfdist
 from test.behave_utils.utils import *
@@ -34,7 +35,7 @@
 from gppylib.commands.base import Command, REMOTE
 from gppylib import pgconf
 from gppylib.operations.package import linux_distribution_id, linux_distribution_version
-
+from gppylib.commands.gp import get_coordinatordatadir
 
 coordinator_data_dir = gp.get_coordinatordatadir()
 if coordinator_data_dir is None:
@@ -340,7 +341,7 @@ def impl(context, dbname):
     drop_database(context, dbname)
 
 
-@given('{env_var} environment variable is not set')
+@given('"{env_var}" environment variable is not set')
 def impl(context, env_var):
     if not hasattr(context, 'orig_env'):
         context.orig_env = dict()
@@ -349,7 +350,15 @@ def impl(context, env_var):
     if env_var in os.environ:
         del os.environ[env_var]
 
-@then('{env_var} environment variable should be restored')
+@given('the environment variable "{env_var}" is set to "{val}"')
+def impl(context, env_var, val):
+    if not hasattr(context, 'orig_env'):
+        context.orig_env = dict()
+
+    context.orig_env[env_var] = os.environ.get(env_var)
+    os.environ[env_var] = val
+
+@then('"{env_var}" environment variable should be restored')
 def impl(context, env_var):
     if not hasattr(context, 'orig_env'):
         raise Exception('%s can not be reset' % env_var)
@@ -357,11 +366,28 @@ def impl(context, env_var):
     if env_var not in context.orig_env:
         raise Exception('%s can not be reset.' % env_var)
 
-    os.environ[env_var] = context.orig_env[env_var]
+    if context.orig_env[env_var] is None:
+        del os.environ[env_var]
+    else:
+        os.environ[env_var] = context.orig_env[env_var]
 
     del context.orig_env[env_var]
 
+
+@given('all files in pg_wal directory are deleted from data directory of preferred primary of content {content_ids}')
+def impl(context, content_ids):
+    all_segments = GpArray.initFromCatalog(dbconn.DbURL()).getDbList()
+    segments = filter(lambda seg: seg.getSegmentPreferredRole() == ROLE_PRIMARY and
+                      seg.getSegmentContentId() in [int(c) for c in content_ids.split(',')], all_segments)
+    for seg in segments:
+        cmd = Command(name="Remove pg_wal files",
+                      cmdStr='rm -rf {}'.format(os.path.join(seg.getSegmentDataDirectory(), 'pg_wal')),
+                      remoteHost=seg.getSegmentHostName(), ctxt=REMOTE)
+        cmd.run(validateAfter=True)
+
+
 @given('the user {action} the walsender on the {segment} on content {content}')
+@when('the user {action} the walsender on the {segment} on content {content}')
 @then('the user {action} the walsender on the {segment} on content {content}')
 def impl(context, action, segment, content):
     if segment == 'mirror':
@@ -401,11 +427,84 @@ def impl(context, segment, content):
 @then('the user waits until all bytes are sent to mirror on content {content}')
 def impl(context, content):
     host, port = get_primary_segment_host_port_for_content(content)
-    query = "SELECT CASE WHEN sync_state='sync' THEN (pg_current_wal_lsn() - sent_lsn) ELSE 1 END FROM pg_stat_replication;"
+    query = "SELECT pg_current_wal_lsn() - sent_lsn FROM pg_stat_replication;"
     desired_result = 0
-    wait_for_desired_query_result_on_segment(host, port, query, desired_result)
+    dburl = dbconn.DbURL(hostname=host, port=port, dbname='template1')
+    wait_for_desired_query_result(dburl, query, desired_result, utility=True)
+
+    # Wait for replication state to be in 'sync'
+    query = "SELECT sync_state FROM pg_stat_replication;"
+    desired_result = 'sync'
+    dburl = dbconn.DbURL(hostname=host, port=port, dbname='template1')
+    wait_for_desired_query_result(dburl, query, desired_result, utility=True)
+
+@given('the user waits until recovery_progress.file is created in {logdir} and verifies its format')
+@when('the user waits until recovery_progress.file is created in {logdir} and verifies its format')
+@then('the user waits until recovery_progress.file is created in {logdir} and verifies its format')
+def impl(context, logdir):
+    attempt = 0
+    num_retries = 60000
+    log_dir = _get_gpAdminLogs_directory() if logdir == 'gpAdminLogs' else logdir
+    recovery_progress_file = '{}/recovery_progress.file'.format(log_dir)
+    while attempt < num_retries:
+        attempt += 1
+        if os.path.exists(recovery_progress_file):
+            with open(recovery_progress_file, 'r') as fp:
+                context.recovery_lines = fp.readlines()
+            for line in context.recovery_lines:
+                recovery_type, dbid, progress = line.strip().split(':', 2)
+                progress_pattern = re.compile(get_recovery_progress_pattern())
+                # TODO: assert progress line in the actual hosts bb/rewind progress file
+                if re.search(progress_pattern, progress) and dbid.isdigit() and recovery_type in ['full', 'incremental']:
+                    return
+                else:
+                    raise Exception('File present but incorrect format line "{}"'.format(line))
+        time.sleep(0.01)
+        if attempt == num_retries:
+            raise Exception('Timed out after {} retries'.format(num_retries))
+
+
+@then( 'verify if the gprecoverseg.lock directory is present in coordinator_data_directory')
+def impl(context):
+    gprecoverseg_lock_file = "%s/gprecoverseg.lock" % gp.get_coordinatordatadir()
+    if not os.path.exists(gprecoverseg_lock_file):
+        raise Exception('gprecoverseg.lock directory does not exist')
+    else:
+        return
 
 
+@then('verify that lines from recovery_progress.file are present in segment progress files in {logdir}')
+def impl(context, logdir):
+    all_progress_lines_by_dbid = {}
+    for line in context.recovery_lines:
+        recovery_type, dbid, line_from_combined_progress_file = line.strip().split(':', 2)
+        all_progress_lines_by_dbid[int(dbid)] = [recovery_type, line_from_combined_progress_file]
+
+    all_segments = GpArray.initFromCatalog(dbconn.DbURL()).getDbList()
+
+    log_dir = _get_gpAdminLogs_directory() if logdir == 'gpAdminLogs' else logdir
+    for seg in all_segments:
+        seg_dbid = seg.getSegmentDbId()
+        if seg_dbid in all_progress_lines_by_dbid:
+            recovery_type, line_from_combined_progress_file = all_progress_lines_by_dbid[seg_dbid]
+            process_name = 'pg_basebackup' if recovery_type == 'full' else 'pg_rewind'
+            seg_progress_file = '{}/{}.*.dbid{}.out'.format(log_dir, process_name, seg_dbid)
+            check_cmd_str = 'grep "{}" {}'.format(line_from_combined_progress_file, seg_progress_file)
+            check_cmd = Command(name='check line in segment progress file',
+                          cmdStr=check_cmd_str,
+                          ctxt=REMOTE,
+                          remoteHost=seg.getSegmentHostName())
+            check_cmd.run()
+            if check_cmd.get_return_code() != 0:
+                raise Exception('Expected line {} in segment progress file {} on host {} but not found.'
+                                .format(line_from_combined_progress_file, seg_progress_file, seg.getSegmentHostName()))
+
+
+@then('recovery_progress.file should not exist in {logdir}')
+def impl(context, logdir):
+    log_dir = _get_gpAdminLogs_directory() if logdir == 'gpAdminLogs' else logdir
+    if os.path.exists('{}/recovery_progress.file'.format(log_dir)):
+        raise Exception('recovery_progress.file is still present under {}'.format(log_dir))
 
 def backup_bashrc():
     home_dir = os.environ.get('HOME')
@@ -439,6 +538,7 @@ def restore_bashrc():
 def impl(context, command):
     run_gpcommand(context, command)
 
+
 @when('the user sets banner on host')
 def impl(context):
     file = '~/.bashrc'
@@ -518,14 +618,43 @@ def impl(context, process_name, secs):
     run_async_command(context, command)
 
 
-@when('the user asynchronously sets up to end gpinitsystem process when {log_msg} is printed in the logs')
-def impl(context, log_msg):
+@when('the user asynchronously sets up to end {process_name} process when {log_msg} is printed in the logs')
+def impl(context, process_name, log_msg):
     command = "while sleep 0.1; " \
-              "do if egrep --quiet %s  ~/gpAdminLogs/gpinitsystem*log ; " \
-              "then ps ux | grep bin/gpinitsystem |awk '{print $2}' | xargs kill ;break 2; " \
-              "fi; done" % (log_msg)
+              "do if egrep --quiet %s  ~/gpAdminLogs/%s*log ; " \
+              "then ps ux | grep bin/%s |awk '{print $2}' | xargs kill ;break 2; " \
+              "fi; done" % (log_msg, process_name, process_name)
     run_async_command(context, command)
 
+@then('the user asynchronously sets up to end {kill_process_name} process when {log_msg} is printed in the {logfile_name} logs')
+def impl(context, kill_process_name, log_msg, logfile_name):
+    command = "while sleep 0.1; " \
+              "do if egrep --quiet %s  ~/gpAdminLogs/%s*log ; " \
+              "then ps ux | grep bin/%s |awk '{print $2}' | xargs kill -2 ;break 2; " \
+              "fi; done" % (log_msg, logfile_name, kill_process_name)
+    run_async_command(context, command)
+
+@given('the user asynchronously sets up to end {process_name} process with SIGINT')
+@when('the user asynchronously sets up to end {process_name} process with SIGINT')
+@then('the user asynchronously sets up to end {process_name} process with SIGINT')
+def impl(context, process_name):
+    command = "ps ux | grep bin/%s | awk '{print $2}' | xargs kill -2" % (process_name)
+    run_async_command(context, command)
+
+
+@given('the user asynchronously sets up to end {process_name} process with SIGHUP')
+@when('the user asynchronously sets up to end {process_name} process with SIGHUP')
+@then('the user asynchronously sets up to end {process_name} process with SIGHUP')
+def impl(context, process_name):
+    command = "ps ux | grep bin/%s | awk '{print $2}' | xargs kill -9" % (process_name)
+    run_async_command(context, command)
+
+@given('the user asynchronously ends {process_name} process with SIGHUP')
+@when('the user asynchronously ends {process_name} process with SIGHUP')
+@then('the user asynchronously ends {process_name} process with SIGHUP')
+def impl(context, process_name):
+    command = "ps ux | grep %s | awk '{print $2}' | xargs kill -9" % (process_name)
+    run_async_command(context, command)
 
 @when('the user asynchronously sets up to end gpcreateseg process when it starts')
 def impl(context):
@@ -559,6 +688,7 @@ def impl(context, ret_code):
 
 
 @when('the user waits until saved async process is completed')
+@then('the user waits until saved async process is completed')
 def impl(context):
     context.asyncproc.communicate2()
 
@@ -698,9 +828,45 @@ def impl(context, command, out_msg, num):
         raise Exception("Expected %s to occur %s times. Found %d. stdout: %s" % (out_msg, num, count, msg_list))
 
 
+@given('the user records the current timestamp in log_timestamp table')
+@when('the user records the current timestamp in log_timestamp table')
+@then('the user records the current timestamp in log_timestamp table')
+def impl(context):
+    sql = "CREATE TABLE log_timestamp AS SELECT CURRENT_TIMESTAMP;"
+    rc, output, error = run_cmd("psql -d template1 -c \'%s\'" %sql)
+    if rc:
+        raise Exception(error)
+
+
+@then('the user drops log_timestamp table')
+def impl(context):
+    rc, output, error = run_cmd("psql -d template1 -c \"DROP TABLE log_timestamp;\"")
+    if rc:
+        raise Exception(error)
+
+
+@then('the pg_log files on primary segments should not contain "{msg}"')
+def impl(context, msg):
+
+    gparray = GpArray.initFromCatalog(dbconn.DbURL())
+    segments = gparray.getDbList()
+    conn = dbconn.connect(dbconn.DbURL(dbname='template1'), unsetSearchPath=False)
+
+    for seg in segments:
+        if seg.isSegmentPrimary():
+            segname = "seg"+str(seg.content)
+            sql = "select * from gp_toolkit.__gp_log_segment_ext where logsegment='%s' and logtime > (select * from log_timestamp) and logmessage like '%s'" %(segname, msg)
+            try:
+                cursor = dbconn.query(conn, sql)
+                if cursor.fetchone():
+                    raise Exception("Fatal message exists in pg_log file on primary segment %s" %segname)
+            finally:
+                pass
+    conn.close()
+
 def lines_matching_both(in_str, str_1, str_2):
     lines = [x.strip() for x in in_str.split('\n')]
-    return [x for x in lines if x.count(str_1) and x.count(str_2)]
+    return [line for line in lines if line.count(str_1) and line.count(str_2)]
 
 
 @then('check if {command} ran "{called_command}" {num} times with args "{args}"')
@@ -748,6 +914,21 @@ def impl(context):
     raise Exception('segments are not in sync after %d seconds' % (times * sleeptime))
 
 
+@then('the segments are synchronized for content {content_ids}')
+def impl(context, content_ids):
+    if content_ids == 'None':
+        return
+    times = 60
+    sleeptime = 10
+    content_ids_to_check = [int(c) for c in content_ids.split(',')]
+    for i in range(times):
+        if are_segments_synchronized_for_content_ids(content_ids_to_check):
+            return
+        time.sleep(sleeptime)
+
+    raise Exception('segments are not in sync after %d seconds' % (times * sleeptime))
+
+
 @then('verify that there is no table "{tablename}" in "{dbname}"')
 def impl(context, tablename, dbname):
     dbname = replace_special_char_env(dbname)
@@ -1203,6 +1384,8 @@ def stop_all_primary_or_mirror_segments_on_hosts(context, segment_type, hosts):
 
 
 @given('the {role} on content {contentID} is stopped')
+@when('the {role} on content {contentID} is stopped')
+@then('the {role} on content {contentID} is stopped')
 def stop_segments_on_contentID(context, role, contentID):
     if role not in ("primary", "mirror"):
         raise Exception("Expected segment_type to be 'primary' or 'mirror', but found '%s'." % role)
@@ -1210,6 +1393,18 @@ def stop_segments_on_contentID(context, role, contentID):
     role = ROLE_PRIMARY if role == 'primary' else ROLE_MIRROR
     stop_segments(context, lambda seg: seg.getSegmentRole() == role and seg.content == int(contentID))
 
+@given('the {role} on content {contents} is stopped with the immediate flag')
+@when('the {role} on content {contents} is stopped with the immediate flag')
+@then('the {role} on content {contents} is stopped with the immediate flag')
+def stop_segments_on_contentID(context, role, contents):
+    if role not in ("primary", "mirror"):
+        raise Exception("Expected segment_type to be 'primary' or 'mirror', but found '%s'." % role)
+    content_ids = [int(i) for i in contents.split(',')]
+
+    role = ROLE_PRIMARY if role == 'primary' else ROLE_MIRROR
+    stop_segments_immediate(context, lambda seg: seg.getSegmentRole() == role and seg.content in content_ids)
+
+
 
 # where_clause is a lambda that takes a segment to select what segments to stop
 def stop_segments(context, where_clause):
@@ -1224,6 +1419,17 @@ def stop_segments(context, where_clause):
                                    pipes.quote(os.environ.get("GPHOME")), pipes.quote(seg.getSegmentDataDirectory()))
                                ])
 
+
+@given('user immediately stops all {segment_type} processes for content {content}')
+@then('user immediately stops all {segment_type} processes for content {content}')
+def stop_all_primary_or_mirror_segments(context, segment_type, content):
+    if segment_type not in ("primary", "mirror"):
+        raise Exception("Expected segment_type to be 'primary' or 'mirror', but found '%s'." % segment_type)
+    content_ids = [int(i) for i in content.split(',')]
+    role = ROLE_PRIMARY if segment_type == 'primary' else ROLE_MIRROR
+    stop_segments_immediate(context, lambda seg: seg.getSegmentRole() == role and seg.content in content_ids)
+
+
 @given('user immediately stops all {segment_type} processes')
 @when('user immediately stops all {segment_type} processes')
 @then('user immediately stops all {segment_type} processes')
@@ -1253,13 +1459,6 @@ def stop_segments_immediate(context, where_clause):
 def impl(context):
     wait_for_unblocked_transactions(context)
 
-
-@given('the environment variable "{var}" is set to "{val}"')
-def impl(context, var, val):
-    context.env_var = os.environ.get(var)
-    os.environ[var] = val
-
-
 @given('below sql is executed in "{dbname}" db')
 @when('below sql is executed in "{dbname}" db')
 def impl(context, dbname):
@@ -1342,24 +1541,22 @@ def impl(context):
     return
 
 
-@given('the "{seg}" segment information is saved')
-@when('the "{seg}" segment information is saved')
-@then('the "{seg}" segment information is saved')
-def impl(context, seg):
-    gparray = GpArray.initFromCatalog(dbconn.DbURL())
+@given('verify that mirror on content {content_ids} is {expected_status}')
+@when('verify that mirror on content {content_ids} is {expected_status}')
+@then('verify that mirror on content {content_ids} is {expected_status}')
+def impl(context, content_ids, expected_status):
+    if content_ids == 'None':
+        return
+    if expected_status not in ('up', 'down'):
+        raise Exception("expected_status can only be 'up' or 'down'")
 
-    if seg == "primary":
-        primary_segs = [seg for seg in gparray.getDbList() if seg.isSegmentPrimary()]
-        context.pseg = primary_segs[0]
-        context.pseg_data_dir = context.pseg.getSegmentDataDirectory()
-        context.pseg_hostname = context.pseg.getSegmentHostName()
-        context.pseg_dbid = context.pseg.getSegmentDbId()
-    elif seg == "mirror":
-        mirror_segs = [seg for seg in gparray.getDbList() if seg.isSegmentMirror()]
-        context.mseg = mirror_segs[0]
-        context.mseg_hostname = context.mseg.getSegmentHostName()
-        context.mseg_dbid = context.mseg.getSegmentDbId()
-        context.mseg_data_dir = context.mseg.getSegmentDataDirectory()
+    for content in content_ids.split(','):
+        if expected_status == 'up' and not is_segment_running(ROLE_MIRROR, int(content)):
+            raise Exception("mirror for content {} is not up".format(content))
+        elif expected_status == 'down' and is_segment_running(ROLE_MIRROR, int(content)):
+            raise Exception("mirror for content {} is not down".format(content))
+
+    return
 
 
 @given('the cluster configuration has no segments where "{filter}"')
@@ -1413,7 +1610,7 @@ def impl(context, seg):
                   remoteHost=hostname, ctxt=REMOTE)
     cmd.run(validateAfter=True)
 
-    cmd = Command(name="Copy background script to remote host", cmdStr='scp %s %s:/tmp' % (filename, hostname))
+    cmd = Command(name="Copy background script to remote host", cmdStr='rsync %s %s:/tmp' % (filename, hostname))
     cmd.run(validateAfter=True)
 
     cmd = Command(name="Run Bg process to save pid",
@@ -1463,23 +1660,20 @@ def impl(context, seg):
     pid_file = os.path.join(data_dir, 'postmaster.pid')
     pid_file_orig = pid_file + '.orig'
 
-    cmd = Command(name="Copy pid file", cmdStr='cp %s %s' % (pid_file_orig, pid_file), remoteHost=hostname, ctxt=REMOTE)
-    cmd.run(validateAfter=True)
-
-    cpCmd = Command(name='copy pid file to coordinator for editing', cmdStr='scp %s:%s /tmp' % (hostname, pid_file))
+    cpCmd = Command(name='copy pid file to coordinator for editing', cmdStr='rsync %s:%s /tmp' % (hostname, pid_file_orig))
 
     cpCmd.run(validateAfter=True)
 
-    with open('/tmp/postmaster.pid', 'r') as fr:
+    with open('/tmp/postmaster.pid.orig', 'r') as fr:
         lines = fr.readlines()
 
     lines[0] = "%s\n" % context.bg_pid
 
-    with open('/tmp/postmaster.pid', 'w') as fw:
+    with open('/tmp/postmaster.pid.orig', 'w') as fw:
         fw.writelines(lines)
 
     cpCmd = Command(name='copy pid file to segment after editing',
-                    cmdStr='scp /tmp/postmaster.pid %s:%s' % (hostname, pid_file))
+                    cmdStr='rsync /tmp/postmaster.pid.orig %s:%s' % (hostname, pid_file))
     cpCmd.run(validateAfter=True)
 
 
@@ -1503,7 +1697,7 @@ def impl(context, seg):
     cmd = Command(name="Copy pid file", cmdStr='cp %s %s' % (pid_file_orig, pid_file), remoteHost=hostname, ctxt=REMOTE)
     cmd.run(validateAfter=True)
 
-    cpCmd = Command(name='copy pid file to coordinator for editing', cmdStr='scp %s:%s /tmp' % (hostname, pid_file))
+    cpCmd = Command(name='copy pid file to coordinator for editing', cmdStr='rsync %s:%s /tmp' % (hostname, pid_file))
 
     cpCmd.run(validateAfter=True)
 
@@ -1526,7 +1720,7 @@ def impl(context, seg):
         fw.writelines(lines)
 
     cpCmd = Command(name='copy pid file to segment after editing',
-                    cmdStr='scp /tmp/postmaster.pid %s:%s' % (hostname, pid_file))
+                    cmdStr='rsync /tmp/postmaster.pid %s:%s' % (hostname, pid_file))
     cpCmd.run(validateAfter=True)
 
 
@@ -2388,10 +2582,10 @@ def impl(context, location):
             host_opts.extend(['-h', host])
 
         subprocess.check_call([
-            'gpscp',
-            '-rv',
+            'gpsync',
+            '-av',
             ] + host_opts + [
-            os.getenv('GPHOME'),
+            os.getenv('GPHOME')+'/',
             '=:{}'.format(location),
         ])
 
@@ -2402,6 +2596,7 @@ def impl(context, location):
         ])
 
 @given('all files in gpAdminLogs directory are deleted')
+@when('all files in gpAdminLogs directory are deleted')
 @then('all files in gpAdminLogs directory are deleted')
 def impl(context):
     log_dir = _get_gpAdminLogs_directory()
@@ -2409,6 +2604,40 @@ def impl(context):
     for file in files_found:
         os.remove(file)
 
+
+@given('all files in gpAdminLogs directory are deleted on hosts {hosts}')
+def impl(context, hosts):
+    host_list = hosts.split(',')
+    log_dir = _get_gpAdminLogs_directory()
+    for host in host_list:
+        rm_cmd = Command(name="remove files in gpAdminLogs",
+                              cmdStr="rm -rf {}/*".format(log_dir),
+                              remoteHost=host, ctxt=REMOTE)
+        rm_cmd.run(validateAfter=True)
+
+@given('all files in "{dir}" directory are deleted on all hosts in the cluster')
+@then('all files in "{dir}" directory are deleted on all hosts in the cluster')
+def impl(context, dir):
+    host_list = GpArray.initFromCatalog(dbconn.DbURL()).getHostList()
+    for host in host_list:
+        rm_cmd = Command(name="remove files in {}".format(dir),
+                         cmdStr="rm -rf {}/*".format(dir),
+                         remoteHost=host, ctxt=REMOTE)
+        rm_cmd.run(validateAfter=True)
+
+@given('all files in gpAdminLogs directory are deleted on all hosts in the cluster')
+@when('all files in gpAdminLogs directory are deleted on all hosts in the cluster')
+@then('all files in gpAdminLogs directory are deleted on all hosts in the cluster')
+def impl(context):
+    host_list = GpArray.initFromCatalog(dbconn.DbURL()).getHostList()
+    log_dir = _get_gpAdminLogs_directory()
+    for host in host_list:
+        rm_cmd = Command(name="remove files in gpAdminLogs",
+                         cmdStr="rm -rf {}/*".format(log_dir),
+                         remoteHost=host, ctxt=REMOTE)
+        rm_cmd.run(validateAfter=True)
+
+
 @then('gpAdminLogs directory {has} "{expected_file}" files')
 def impl(context, has, expected_file):
     log_dir = _get_gpAdminLogs_directory()
@@ -2418,6 +2647,61 @@ def impl(context, has, expected_file):
     if (not files_found) and (has == 'has'):
         raise Exception("expected %s file in %s, but not found" % (expected_file, log_dir))
 
+
+@then('gpAdminLogs directory has "{expected_file}" files on respective hosts only for content {content_ids}')
+def impl(context, expected_file, content_ids):
+    content_list = [int(c) for c in content_ids.split(',')]
+    all_segments = GpArray.initFromCatalog(dbconn.DbURL()).getDbList()
+    segments = filter(lambda seg: seg.getSegmentRole() == ROLE_MIRROR and
+                                  seg.content in content_list, all_segments)
+    host_to_seg_dbids = {}
+    for seg in segments:
+        segHost = seg.getSegmentHostName()
+        if segHost in host_to_seg_dbids:
+            host_to_seg_dbids[segHost].append('dbid{}'.format(seg.dbid))
+        else:
+            host_to_seg_dbids[segHost] = ['dbid{}'.format(seg.dbid)]
+
+    for segHost, expected_files_on_host in host_to_seg_dbids.items():
+        log_dir = "%s/gpAdminLogs" % os.path.expanduser("~")
+        listdir_cmd = Command(name="list logfiles on host",
+                              cmdStr="ls -l {}/{}".format(log_dir, expected_file),
+                              remoteHost=segHost, ctxt=REMOTE)
+        listdir_cmd.run(validateAfter=True)
+        ls_outs = listdir_cmd.get_results().stdout.split('\n')
+        files_found = [ls_line.split(' ')[-1] for ls_line in ls_outs if ls_line]
+
+        if not files_found:
+            raise Exception("expected {} files in {} on host {}, but not found".format(expected_file, log_dir, segHost))
+
+        if len(files_found) != len(expected_files_on_host):
+            raise Exception("expected {} {} files in {} on host {}, but found {}: {}"
+                            .format(len(expected_files_on_host), expected_file, log_dir, segHost, len(files_found),
+                                    files_found))
+        for file in files_found:
+            if file.split('.')[-2] not in expected_files_on_host:
+                raise Exception("Found unexpected file {} in {}".format(file, log_dir))
+
+
+@then('gpAdminLogs directory {has} "{expected_file}" files on all segment hosts')
+def impl(context, has, expected_file):
+    all_segments = GpArray.initFromCatalog(dbconn.DbURL()).getDbList()
+    all_segment_hosts = [seg.getSegmentHostName() for seg in all_segments if seg.getSegmentContentId() >= 0]
+
+    for seg_host in all_segment_hosts:
+        log_dir = "%s/gpAdminLogs" % os.path.expanduser("~")
+        listdir_cmd = Command(name="list logfiles on host",
+                              cmdStr="ls -l {}/{} | wc -l".format(log_dir, expected_file),
+                              remoteHost=seg_host, ctxt=REMOTE)
+        listdir_cmd.run(validateAfter=True)
+        ls_outs = listdir_cmd.get_results().stdout
+        files_found = int(ls_outs)
+        if files_found > 0 and (has == 'has no'):
+            raise Exception("expected no {} files in {} on host {}, but found".format(expected_file, log_dir, seg_host))
+        if files_found == 0 and (has == 'has'):
+            raise Exception("expected {} files in {} on host {}, but not found".format(expected_file, log_dir, seg_host))
+
+
 @given('"{filepath}" is copied to the install directory')
 def impl(context, filepath):
     gphome = os.getenv("GPHOME")
@@ -2646,6 +2930,7 @@ def step_impl(context, segment_id):
     wait_for_unblocked_transactions(context)
 
 @given('the user runs gpexpand with a static inputfile for a two-node cluster with mirrors')
+@when('the user runs gpexpand with a static inputfile for a two-node cluster with mirrors')
 def impl(context):
     inputfile_contents = """
 sdw1|sdw1|20502|/tmp/gpexpand_behave/two_nodes/data/primary/gpseg2|6|2|p
@@ -2886,7 +3171,7 @@ def impl(context, hostnames):
 
 @given("a temporary directory under '{tmp_base_dir}' with mode '{mode}' is created")
 @given('a temporary directory under "{tmp_base_dir}" to expand into')
-def make_temp_dir(context,tmp_base_dir, mode=''):
+def make_temp_dir(context, tmp_base_dir, mode=''):
     if not tmp_base_dir:
         raise Exception("tmp_base_dir cannot be empty")
     if not os.path.exists(tmp_base_dir):
@@ -2894,7 +3179,7 @@ def make_temp_dir(context,tmp_base_dir, mode=''):
     context.temp_base_dir = tempfile.mkdtemp(dir=tmp_base_dir)
     if mode:
         os.chmod(path.normpath(path.join(tmp_base_dir, context.temp_base_dir)),
-                 int(mode,8))
+                 int(mode, 8))
 
 @given('the new host "{hostnames}" is ready to go')
 def impl(context, hostnames):
@@ -3009,27 +3294,32 @@ def impl(context, config_file):
 
 @when('check segment conf: postgresql.conf')
 @then('check segment conf: postgresql.conf')
+@given('check segment conf: postgresql.conf')
 def step_impl(context):
     query = "select dbid, port, hostname, datadir from gp_segment_configuration where content >= 0"
     conn = dbconn.connect(dbconn.DbURL(dbname='postgres'), unsetSearchPath=False)
-    segments = dbconn.query(conn, query).fetchall()
-    for segment in segments:
-        dbid = "'%s'" % segment[0]
-        port = "'%s'" % segment[1]
-        hostname = segment[2]
-        datadir = segment[3]
-
-        ## check postgresql.conf
-        remote_postgresql_conf = "%s/%s" % (datadir, 'postgresql.conf')
-        local_conf_copy = os.path.join(gp.get_coordinatordatadir(), "%s.%s" % ('postgresql.conf', hostname))
-        cmd = Command(name="Copy remote conf to local to diff",
-                    cmdStr='scp %s:%s %s' % (hostname, remote_postgresql_conf, local_conf_copy))
-        cmd.run(validateAfter=True)
-
-        dic = pgconf.readfile(filename=local_conf_copy)
-        if str(dic['port']) != port:
-            raise Exception("port value in postgresql.conf of %s is incorrect. Expected:%s, given:%s" %
-                            (hostname, port, dic['port']))
+    try:
+        segments = dbconn.query(conn, query).fetchall()
+        for segment in segments:
+            dbid = "'%s'" % segment[0]
+            port = "'%s'" % segment[1]
+            hostname = segment[2]
+            datadir = segment[3]
+
+            ## check postgresql.conf
+            remote_postgresql_conf = "%s/%s" % (datadir, 'postgresql.conf')
+            local_conf_copy = os.path.join(gp.get_coordinatordatadir(), "%s.%s" % ('postgresql.conf', hostname))
+            cmd = Command(name="Copy remote conf to local to diff",
+                        cmdStr='rsync %s:%s %s' % (hostname, remote_postgresql_conf, local_conf_copy))
+            cmd.run(validateAfter=True)
+
+            dic = pgconf.readfile(filename=local_conf_copy)
+            if str(dic['port']) != port:
+                raise Exception("port value in postgresql.conf of %s is incorrect. Expected:%s, given:%s" %
+                                (hostname, port, dic['port']))
+    finally:
+        if conn:
+            conn.close()
 
 @given('the transactions are started for dml')
 def impl(context):
@@ -3370,10 +3660,10 @@ def impl(context):
     standby_local_gp_segment_configuration_file = "%s/%s.standby" % \
             (coordinator_datadir, gp_segment_configuration_backup)
 
-    cmd = Command(name="Copy standby file to coordinator", cmdStr='scp %s %s' % \
+    cmd = Command(name="Copy standby file to coordinator", cmdStr='rsync %s %s' % \
             (standby_remote_statusfile, standby_local_statusfile))
     cmd.run(validateAfter=True)
-    cmd = Command(name="Copy standby file to coordinator", cmdStr='scp %s %s' % \
+    cmd = Command(name="Copy standby file to coordinator", cmdStr='rsync %s %s' % \
             (standby_remote_gp_segment_configuration_file, standby_local_gp_segment_configuration_file))
     cmd.run(validateAfter=True)
 
@@ -3493,3 +3783,54 @@ def impl(context, args):
 def impl(context):
     locale = get_en_utf_locale()
     context.execute_steps('''When a demo cluster is created using gpinitsystem args "--lc-ctype=%s"''' % locale)
+
+
+@given('the user asynchronously runs pg_basebackup with {segment} of content {contentid} as source and the process is saved')
+@when('the user asynchronously runs pg_basebackup with {segment} of content {contentid} as source and the process is saved')
+@then('the user asynchronously runs pg_basebackup with {segment} of content {contentid} as source and the process is saved')
+def impl(context, segment, contentid):
+    if segment == 'mirror':
+        role = 'm'
+    elif segment == 'primary':
+        role = 'p'
+
+    all_segments = GpArray.initFromCatalog(dbconn.DbURL()).getDbList()
+
+    basebackup_target = all_segments[0]
+    basebackup_source = all_segments[0]
+    for seg in all_segments:
+        if role == seg.role and str(seg.content) == contentid:
+            basebackup_source = seg
+        elif str(seg.content) == contentid:
+            basebackup_target = seg
+
+    make_temp_dir(context, '/tmp')
+
+    cmd = PgBaseBackup(target_datadir=context.temp_base_dir,
+                       source_host=basebackup_source.getSegmentHostName(),
+                       source_port=str(basebackup_source.getSegmentPort()),
+                       create_slot=True,
+                       replication_slot_name="replication_slot",
+                       forceoverwrite=True,
+                       target_gp_dbid=basebackup_target.getSegmentDbId())
+    asyncproc = cmd.runNoWait()
+    context.asyncproc = asyncproc
+
+
+@given('gp_stat_replication table has pg_basebackup entry for content {contentid}')
+@when('gp_stat_replication table has pg_basebackup entry for content {contentid}')
+@then('gp_stat_replication table has pg_basebackup entry for content {contentid}')
+def impl(context, contentid):
+    sql = "select gp_segment_id from gp_stat_replication where application_name = 'pg_basebackup'"
+
+    try:
+        with closing(dbconn.connect(dbconn.DbURL())) as conn:
+            res = dbconn.query(conn, sql)
+            rows = res.fetchall()
+    except Exception as e:
+        raise Exception("Failed to query gp_stat_replication: %s" % str(e))
+
+    segments_with_running_basebackup = {str(row[0]) for row in rows}
+
+    if str(contentid) not in segments_with_running_basebackup:
+        raise Exception("pg_basebackup entry was not found for content %s in gp_stat_replication" % contentid)

From 2528f3263eba79e19d342deaf874957681e2b5a4 Mon Sep 17 00:00:00 2001
From: Piyush Chandwadkar <65647926+piyushc01@users.noreply.github.com>
Date: Fri, 20 Jan 2023 17:14:11 +0530
Subject: [PATCH 39/48] Fixing gpcheckperf failure on -V with -f option
 (#14310)

Gpcheckperf was throwing an exception when run with -f and -V option together.
This was happening at with -V option, gpssh command outpuot is having few
extra lines which are causing trouble while parsing the output. With this change, provided flag to skip verbose mode when running ssh command and used this non-verbose SSH mode to execute the command when getting host-name.
Corrected run-time errors due to python3 in gpcheckperf.
Also added the test case to cover the scenario relating the host file and -V option.
---
 gpMgmt/bin/gpcheckperf                        |  37 +++--
 .../behave/mgmt_utils/gpcheckperf.feature     |  37 +++++
 .../behave/mgmt_utils/steps/mgmt_utils.py     | 153 +++++++++++++++++-
 3 files changed, 210 insertions(+), 17 deletions(-)

diff --git a/gpMgmt/bin/gpcheckperf b/gpMgmt/bin/gpcheckperf
index 09bb496a669..c6c4a906158 100755
--- a/gpMgmt/bin/gpcheckperf
+++ b/gpMgmt/bin/gpcheckperf
@@ -82,9 +82,9 @@ def strcmd(cmd):
     return reduce(lambda x, y: x + ' ' + y, map(lambda x: x.find(' ') > 0 and "'" + x + "'" or x, cmd))
 
 
-def gpssh(cmd):
+def gpssh(cmd, call_verbose=True):
     c = ['%s/bin/gpssh' % GPHOME]
-    if GV.opt['-V']:
+    if GV.opt['-V'] and call_verbose:
         c.append('-v')
     if GV.opt['-f']:
         c.append('-f')
@@ -541,7 +541,7 @@ def spawnNetperfTestBetween(x, y, netperf_path, netserver_port, sec=5):
          x, cmd]
     proc = None
     try:
-        if GV.opt['-v']:
+        if GV.opt['-v'] or GV.opt['-V']:
             print('[Info]', strcmd(c))
         proc = subprocess.Popen(c, stdout=subprocess.PIPE)
     except KeyboardInterrupt:
@@ -740,13 +740,23 @@ def get_host_map(hostlist):
     uniqhosts = dict()  # unique host list
 
     # get list of hostnames
-    rc, out = gpssh('hostname')
+    # disabling verbose mode for gpssh as it is adding extra lines of output
+    rc, out = gpssh('hostname', False)
+
     if not rc:
         raise Exception('Encountered error running hostname')
 
+    ''' Sample output:
+        [sdw1] sdw1
+        [sdw2] sdw2
+    '''
+
     # get unique hostname list
     for line in out.splitlines():
-        seg, host = line.translate(None, '[]').split()
+        seg, host = line.translate(str.maketrans('','','[]')).split()
+        # removing \r and b coming in the output of the command in hostname
+        host = host.replace('\\r\'', '')
+        host = host.replace('b\'', '')
         uniqhosts[host] = seg
 
     # get list of segments associated with each host (can't use gpssh since it de-dupes hosts)
@@ -755,7 +765,8 @@ def get_host_map(hostlist):
 
         proc = None
         try:
-            if GV.opt['-v']: print('[Info]', strcmd(cmd))
+            if GV.opt['-v'] or GV.opt['-V']:
+                print('[Info]', strcmd(cmd))
             proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
             out = proc.stdout.read(-1)
             rc = proc.wait()
@@ -781,7 +792,7 @@ def runNetPerfTestMatrix():
     '''
     (netperf, hostlist, netserver_port) = setupNetPerfTest()
     if not netperf:
-        return None
+        return None, None
 
     # dict() of seglist[segname] = hostname, uniqhosts[hostname] = 1 segment name
     seglist, uniqhosts = get_host_map(hostlist)
@@ -807,19 +818,21 @@ def runNetPerfTestMatrix():
 
 
 def printMatrixResult(result, seglist):
+    if not result:
+        return
     print('Full matrix netperf bandwidth test')
 
     # sum up Rx/Tx rate for each host
     netTx = dict()
     netRx = dict()
     for h in result:
-        if netTx.has_key(h[0]):
+        if h[0] in netTx:
             netTx[h[0]] += float(h[6])
         else:
             netTx[h[0]] = float(h[6])
 
         # netRx requires that we lookup the hostname for a given segment name
-        if netRx.has_key(seglist[h[1]]):
+        if seglist[h[1]] in netRx:
             netRx[seglist[h[1]]] += float(h[6])
         else:
             netRx[seglist[h[1]]] = float(h[6])
@@ -850,7 +863,7 @@ def printMatrixResult(result, seglist):
 
     copy = n[:]
     copy.sort()
-    median = copy[len(copy) / 2]
+    median = copy[len(copy) // 2]
 
     print('')
     print('Summary:')
@@ -863,6 +876,8 @@ def printMatrixResult(result, seglist):
 
 
 def printNetResult(result):
+    if not result:
+        return
     print('Netperf bisection bandwidth test')
     for h in result:
         print('%s -> %s = %f' % (h[0], h[1], h[6]))
@@ -894,6 +909,8 @@ def printNetResult(result):
 
 
 def printResult(title, result):
+    if not result:
+        return
     totTime = 0
     totBytes = 0
     totMBPS = 0
diff --git a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
index a59f309f112..9b660b491a8 100644
--- a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
@@ -15,3 +15,40 @@ Feature: Tests for gpcheckperf
     Then  gpcheckperf should return a return code of 0
     And   gpcheckperf should print "avg = " to stdout
     And   gpcheckperf should not print "NOTICE: -t is deprecated " to stdout
+
+  @concourse_cluster
+  Scenario: gpcheckperf runs tests by passing hostfile in super verbose mode
+    Given the database is running
+    And   create a gpcheckperf input host file
+    When  the user runs "gpcheckperf -f /tmp/hostfile1 -r M -d /data/gpdata/ --duration=3m -V"
+    Then  gpcheckperf should return a return code of 0
+    And   gpcheckperf should print "Full matrix netperf bandwidth test" to stdout
+    And   gpcheckperf should not print "IndexError: list index out of range" to stdout
+
+ @concourse_cluster
+  Scenario: gpcheckperf runs tests by passing hostfile in verbose mode
+    Given the database is running
+    And   create a gpcheckperf input host file
+    When  the user runs "gpcheckperf -f /tmp/hostfile1 -r M -d /data/gpdata/ --duration=3m -v"
+    Then  gpcheckperf should return a return code of 0
+    And   gpcheckperf should print "Full matrix netperf bandwidth test" to stdout
+    And   gpcheckperf should not print "IndexError: list index out of range" to stdout
+
+ @concourse_cluster
+  Scenario: gpcheckperf runs tests by passing hostfile in regular mode
+    Given the database is running
+    And   create a gpcheckperf input host file
+    When  the user runs "gpcheckperf -f /tmp/hostfile1 -r M -d /data/gpdata/ --duration=3m"
+    Then  gpcheckperf should return a return code of 0
+    And   gpcheckperf should print "Full matrix netperf bandwidth test" to stdout
+    And   gpcheckperf should not print "IndexError: list index out of range" to stdout
+
+  @concourse_cluster
+  Scenario: gpcheckperf does not throws typeerror when run with single host
+    Given the database is running
+    And   create a gpcheckperf input host file
+    When  the user runs "gpcheckperf -h sdw1 -r M -d /data/gpdata/ --duration=3m"
+    Then  gpcheckperf should return a return code of 0
+    And   gpcheckperf should print "single host only - abandon netperf test" to stdout
+    And gpcheckperf should not print "TypeError:" to stdout
+
diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
index 24049c068b5..7b346b04dde 100644
--- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
+++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
@@ -46,7 +46,7 @@ def show_all_installed(gphome):
     name = x[0].lower()
     if 'ubuntu' in name:
         return "dpkg --get-selections --admindir=%s/share/packages/database/deb | awk '{print $1}'" % gphome
-    elif 'centos' in name or 'rhel' in name:
+    elif 'centos' in name or 'rhel' in name or 'rocky' in name or 'ol' in name:
         return "rpm -qa --dbpath %s/share/packages/database" % gphome
     else:
         raise Exception('UNKNOWN platform: %s' % str(x))
@@ -56,7 +56,7 @@ def remove_native_package_command(gphome, full_gppkg_name):
     name = x[0].lower()
     if 'ubuntu' in name:
         return 'fakeroot dpkg --force-not-root --log=/dev/null --instdir=%s --admindir=%s/share/packages/database/deb -r %s' % (gphome, gphome, full_gppkg_name)
-    elif 'centos' in name or 'rhel' in name:
+    elif 'centos' in name or 'rhel' in name or 'rocky' in name or 'ol' in name:
         return 'rpm -e %s --dbpath %s/share/packages/database' % (full_gppkg_name, gphome)
     else:
         raise Exception('UNKNOWN platform: %s' % str(x))
@@ -438,12 +438,30 @@ def impl(context, content):
     dburl = dbconn.DbURL(hostname=host, port=port, dbname='template1')
     wait_for_desired_query_result(dburl, query, desired_result, utility=True)
 
+
+@given('the user just waits until recovery_progress.file is created in {logdir}')
+@when('the user just waits until recovery_progress.file is created in {logdir}')
+@then('the user just waits until recovery_progress.file is created in {logdir}')
+def impl(context, logdir):
+    attempt = 0
+    num_retries = 6000
+    log_dir = _get_gpAdminLogs_directory() if logdir == 'gpAdminLogs' else logdir
+    recovery_progress_file = '{}/recovery_progress.file'.format(log_dir)
+    while attempt < num_retries:
+        attempt += 1
+        if os.path.exists(recovery_progress_file):
+            return
+        time.sleep(0.1)
+        if attempt == num_retries:
+            raise Exception('Timed out after {} retries'.format(num_retries))
+
+
 @given('the user waits until recovery_progress.file is created in {logdir} and verifies its format')
 @when('the user waits until recovery_progress.file is created in {logdir} and verifies its format')
 @then('the user waits until recovery_progress.file is created in {logdir} and verifies its format')
 def impl(context, logdir):
     attempt = 0
-    num_retries = 60000
+    num_retries = 6000
     log_dir = _get_gpAdminLogs_directory() if logdir == 'gpAdminLogs' else logdir
     recovery_progress_file = '{}/recovery_progress.file'.format(log_dir)
     while attempt < num_retries:
@@ -459,7 +477,7 @@ def impl(context, logdir):
                     return
                 else:
                     raise Exception('File present but incorrect format line "{}"'.format(line))
-        time.sleep(0.01)
+        time.sleep(0.1)
         if attempt == num_retries:
             raise Exception('Timed out after {} retries'.format(num_retries))
 
@@ -3685,8 +3703,8 @@ def impl(context, command, input):
     context.error_message = stderr.decode()
 
 def are_on_different_subnets(primary_hostname, mirror_hostname):
-    primary_broadcast = check_output(['ssh', '-n', primary_hostname, "/sbin/ip addr show eth0 | grep 'inet .* brd' | awk '{ print $4 }'"])
-    mirror_broadcast = check_output(['ssh', '-n', mirror_hostname,  "/sbin/ip addr show eth0 | grep 'inet .* brd' | awk '{ print $4 }'"])
+    primary_broadcast = check_output(['ssh', '-n', primary_hostname, "/sbin/ip addr show | grep 'inet .* brd' | awk '{ print $4 }'"])
+    mirror_broadcast = check_output(['ssh', '-n', mirror_hostname,  "/sbin/ip addr show | grep 'inet .* brd' | awk '{ print $4 }'"])
     if not primary_broadcast:
         raise Exception("primary hostname %s has no broadcast address" % primary_hostname)
     if not mirror_broadcast:
@@ -3784,7 +3802,6 @@ def impl(context):
     locale = get_en_utf_locale()
     context.execute_steps('''When a demo cluster is created using gpinitsystem args "--lc-ctype=%s"''' % locale)
 
-
 @given('the user asynchronously runs pg_basebackup with {segment} of content {contentid} as source and the process is saved')
 @when('the user asynchronously runs pg_basebackup with {segment} of content {contentid} as source and the process is saved')
 @then('the user asynchronously runs pg_basebackup with {segment} of content {contentid} as source and the process is saved')
@@ -3834,3 +3851,125 @@ def impl(context, contentid):
 
     if str(contentid) not in segments_with_running_basebackup:
         raise Exception("pg_basebackup entry was not found for content %s in gp_stat_replication" % contentid)
+
+@given('create a gpcheckperf input host file')
+def impl(context):
+    cmd = Command(name='create input host file', cmdStr='echo sdw1 > /tmp/hostfile1;echo mdw >> /tmp/hostfile1;')
+    cmd.run(validateAfter=True)
+
+@given('backup /etc/hosts file and update hostname entry for localhost')
+def impl(context):
+     # Backup current /etc/hosts file
+     cmd = Command(name='backup the hosts file', cmdStr='sudo cp /etc/hosts /tmp/hosts_orig')
+     cmd.run(validateAfter=True)
+     # Get the host-name
+     cmd = Command(name='get hostname', cmdStr='hostname')
+     cmd.run(validateAfter=True)
+     hostname = cmd.get_stdout()
+     # Update entry in current /etc/hosts file to add new host-address
+     cmd = Command(name='update hostlist with new hostname', cmdStr="sudo sed 's/%s/%s__1 %s/g' </etc/hosts >> /tmp/hosts; sudo cp -f /tmp/hosts /etc/hosts;rm /tmp/hosts"
+                                                        %(hostname, hostname, hostname))
+     cmd.run(validateAfter=True)
+
+@then('restore /etc/hosts file and cleanup hostlist file')
+def impl(context):
+    cmd = "sudo mv -f /tmp/hosts_orig /etc/hosts; rm -f /tmp/clusterConfigFile-1; rm -f /tmp/hostfile--1"
+    context.execute_steps(u'''Then the user runs command "%s"''' % cmd)
+
+@given('update hostlist file with updated host-address')
+def impl(context):
+     cmd = Command(name='get hostname', cmdStr='hostname')
+     cmd.run(validateAfter=True)
+     hostname = cmd.get_stdout()
+     # Update entry in hostfile to replace with address
+     cmd = Command(name='update temp hosts file', cmdStr= "sed 's/%s/%s__1/g' < ../gpAux/gpdemo/hostfile >> /tmp/hostfile--1" % (hostname, hostname))
+     cmd.run(validateAfter=True)
+
+@given('update clusterConfig file with new port and host-address')
+def impl(context):
+     cmd = Command(name='get hostname', cmdStr='hostname')
+     cmd.run(validateAfter=True)
+     hostname = cmd.get_stdout()
+
+     # Create a copy of config file
+     cmd = Command(name='create a copy of config file',
+                   cmdStr= "cp ../gpAux/gpdemo/clusterConfigFile /tmp/clusterConfigFile-1;")
+     cmd.run(validateAfter=True)
+
+     # Update hostfile location
+     cmd = Command(name='update master hostname in config file',
+                   cmdStr= "sed 's/MACHINE_LIST_FILE=.*/MACHINE_LIST_FILE=\/tmp\/hostfile--1/g' -i /tmp/clusterConfigFile-1")
+     cmd.run(validateAfter=True)
+
+
+@then('verify that cluster config has host-name populated correctly')
+def impl(context):
+     cmd = Command(name='get hostname', cmdStr='hostname')
+     cmd.run(validateAfter=True)
+     hostname_orig = cmd.get_stdout().strip()
+     hostname_new = "{}__1".format(hostname_orig)
+     # Verift host-address not populated in the config
+     with closing(dbconn.connect(dbconn.DbURL(), unsetSearchPath=False)) as conn:
+         sql = "SELECT count(*) FROM gp_segment_configuration WHERE hostname='%s'" % hostname_new
+         num_matching = dbconn.querySingleton(conn, sql)
+         if(num_matching != 0):
+             raise Exception("Found entries in gp_segment_configuration is host-address popoulated as host-name")
+     # Verify correct host-name is populated in the config
+     with closing(dbconn.connect(dbconn.DbURL(), unsetSearchPath=False)) as conn:
+         sql = "SELECT count( distinct hostname) FROM gp_segment_configuration WHERE hostname='%s'" % hostname_orig
+         num_matching = dbconn.querySingleton(conn, sql)
+         if(num_matching != 1):
+             raise Exception("Found no entries in gp_segment_configuration is host-address popoulated as host-name")
+
+@given('update the private keys for the new host address')
+def impl(context):
+     cmd = Command(name='get hostname', cmdStr='hostname')
+     cmd.run(validateAfter=True)
+     hostname = "{}__1".format(cmd.get_stdout().strip())
+     cmd_str = "rm -f ~/.ssh/id_rsa ~/.ssh/id_rsa.pub ~/.ssh/known_hosts; $GPHOME/bin/gpssh-exkeys -h {}".format(hostname)
+     cmd = Command(name='update ssh private keys', cmdStr=cmd_str)
+     cmd.run(validateAfter=True)
+
+@then('verify replication slot {slot} is available on all the segments')
+@when('verify replication slot {slot} is available on all the segments')
+@given('verify replication slot {slot} is available on all the segments')
+def impl(context, slot):
+    gparray = GpArray.initFromCatalog(dbconn.DbURL())
+    segments = gparray.getDbList()
+    dbname = "template1"
+    query = "SELECT count(*) FROM pg_catalog.pg_replication_slots WHERE slot_name = '{}'".format(slot)
+
+    for seg in segments:
+        if seg.isSegmentPrimary():
+            host = seg.getSegmentHostName()
+            port = seg.getSegmentPort()
+            with closing(dbconn.connect(dbconn.DbURL(dbname=dbname, port=port, hostname=host),
+                                        utility=True, unsetSearchPath=False)) as conn:
+                result = dbconn.querySingleton(conn, query)
+                if result == 0:
+                    raise Exception("Slot does not exist for host:{}, port:{}".format(host, port))
+
+
+@given('user waits until gp_stat_replication table has no pg_basebackup entries for content {contentids}')
+@when('user waits until gp_stat_replication table has no pg_basebackup entries for content {contentids}')
+@then('user waits until gp_stat_replication table has no pg_basebackup entries for content {contentids}')
+def impl(context, contentids):
+     retries = 600
+     content_ids = contentids.split(',')
+     content_ids = ', '.join(c for c in content_ids)
+     sql = "select count(*) from gp_stat_replication where application_name = 'pg_basebackup' and gp_segment_id in (%s)" %(content_ids)
+     no_basebackup = False
+
+     for i in range(retries):
+         try:
+             with closing(dbconn.connect(dbconn.DbURL())) as conn:
+                 res = dbconn.querySingleton(conn, sql)
+         except Exception as e:
+             raise Exception("Failed to query gp_stat_replication: %s" % str(e))
+         if res == 0:
+             no_basebackup = True
+             break
+         time.sleep(1)
+
+     if not no_basebackup:
+         raise Exception("pg_basebackup entry was found for contents %s in gp_stat_replication after %d retries" % (contentids, retries))

From c72ce69bf3c6be5792c9e3a6b23a53ff34aeff03 Mon Sep 17 00:00:00 2001
From: Nihal Jain <jnihal@vmware.com>
Date: Mon, 6 Mar 2023 11:31:12 +0530
Subject: [PATCH 40/48] gpcheckperf: Fix memory calculation function (#14865)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* gpcheckperf: Fix memory calculation function

Currently, there is a bug in the `getMemory()` function in `gpcheckperf` because of the way we check the return code of the `run()` method which is called inside `getMemory()`. The `run()` method returns an integer value of `zero` in case of success and a `non-zero` value if it fails.

We are checking this value using the condition `if not ok` which is incorrect because when the `run()` method succeeds (`ok = 0`), the condition would result as `False` causing the `getMemory()` function to assume that the `run()` method failed but in reality, it did not.

A simple fix would be to change the condition from `if not ok` to `if ok != 0` to check for any possible failure from the `run()` method.

Further, the way `getMemory()` handles errors is also incorrect. It just returns `0` whenever there is an error which can lead to the incorrect file size being used to perform disk performance tests. This is because the gpcheckperf utility internally calls `multidd` command to perform disk performance tests which also accepts a file size parameter with the -S option which when not set (or is equal to 0), uses its own default value of `2 * memory_size`, but instead of dividing it from the number of input directories, it uses this value for each directory i.e. the total file size value would be `2 * memory_size * no_of_input_directories`. Due to this, sometimes the user can meet File System Full error when the number of input directories is big.

Hence we need to properly handle the error in the `getMemory()` function and exit from the code instead of just returning `0`

Before:
```
$ gpcheckperf -d /tmp/test1 -d /tmp/test2 -d /tmp/test3 -h localhost -rd
 disk write avg time (sec): 5.46
 disk write tot bytes: 12884901888     --> is equal to 2 * memory_size * no_of_input_directories
 disk write tot bandwidth (MB/s): 2250.55
 disk write min bandwidth (MB/s): 2250.55 [localhost]
 disk write max bandwidth (MB/s): 2250.55 [localhost]
 ```
After:
```
$ gpcheckperf -d /tmp/test1 -d /tmp/test2 -d /tmp/test3 -h localhost -rd
 disk write avg time (sec): 1.87
 disk write tot bytes: 4295000064     --> is equal to 2 * memory_size
 disk write tot bandwidth (MB/s): 2190.39
 disk write min bandwidth (MB/s): 2190.39 [localhost]
 disk write max bandwidth (MB/s): 2190.39 [localhost]
```

Also added unit test cases to test the getMemory() function outputs and added a main section to the gpcheckperf code.
---
 gpMgmt/bin/gpcheckperf                        | 109 ++++++++++--------
 .../test/unit/test_unit_gpcheckperf.py        |  66 +++++++++++
 .../behave/mgmt_utils/gpcheckperf.feature     |  10 ++
 3 files changed, 136 insertions(+), 49 deletions(-)
 create mode 100644 gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py

diff --git a/gpMgmt/bin/gpcheckperf b/gpMgmt/bin/gpcheckperf
index c6c4a906158..44c610861f8 100755
--- a/gpMgmt/bin/gpcheckperf
+++ b/gpMgmt/bin/gpcheckperf
@@ -133,26 +133,27 @@ def getPlatform():
 
 
 def getMemory():
-    if getPlatform() == 'linux':
-        ok, out = run("sh -c 'cat /proc/meminfo | grep MemTotal'")
-        if not ok:
-            return 0
+    platform = getPlatform()
+
+    if platform == 'linux':
+        rc, out = run("sh -c 'cat /proc/meminfo | grep MemTotal'")
+        if rc != 0:
+            return None
         word_list = out.strip().split(' ')
         val = int(word_list[len(word_list) - 2])
         factor = word_list[len(word_list) - 1]
         if factor == 'kB':
-            return val * 1024
-        return 0
+            return val * 1024 if val else None
 
-    if getPlatform() == 'darwin':
-        ok, out = run("/usr/sbin/sysctl hw.physmem")
-        if not ok:
-            return 0
+    if platform == 'darwin':
+        rc, out = run("/usr/sbin/sysctl hw.physmem")
+        if rc != 0:
+            return None
         word_list = out.strip().split(' ')
         val = int(word_list[1])
-        return val
+        return val if val else None
 
-    return 0
+    return None
 
 
 def parseMemorySize(line):
@@ -261,7 +262,12 @@ def parseCommandLine():
         usage('Error: maximum size for -B parameter is 1MB')
 
     if GV.opt['-S'] == 0:
-        GV.opt['-S'] = 2 * getMemory() / len(GV.opt['-d'])
+        system_mem_size = getMemory()
+        if system_mem_size is not None:
+            GV.opt['-S'] = 2 * system_mem_size / len(GV.opt['-d'])
+        else:
+            sys.exit('[Error] could not get system memory size. Instead, you can use the -S option to provide the file size value')
+
     else:
         GV.opt['-S'] /= len(GV.opt['-d'])
 
@@ -947,50 +953,55 @@ def printResult(title, result):
     print('')
 
 
-try:
-    parseCommandLine()
-    runSetup()
-    diskWriteResult = diskReadResult = streamResult = netResult = None
-    tornDown = False
+def main():
     try:
-        if GV.opt['-r'].find('d') >= 0:
-            multidd = copyExecOver('multidd')
-            diskWriteResult = runDiskWriteTest(multidd)
-            diskReadResult = runDiskReadTest(multidd)
+        parseCommandLine()
+        runSetup()
+        diskWriteResult = diskReadResult = streamResult = netResult = None
+        tornDown = False
+        try:
+            if GV.opt['-r'].find('d') >= 0:
+                print('[Warning] Using %d bytes for disk performance test. This might take some time' % (GV.opt['-S'] * len(GV.opt['-d'])))
+                multidd = copyExecOver('multidd')
+                diskWriteResult = runDiskWriteTest(multidd)
+                diskReadResult = runDiskReadTest(multidd)
 
-        if GV.opt['-r'].find('s') >= 0:
-            streamResult = runStreamTest()
+            if GV.opt['-r'].find('s') >= 0:
+                streamResult = runStreamTest()
 
-        if GV.opt['--net'] == 'netperf':
-            netResult = runNetPerfTest()
-        elif GV.opt['--net'] == 'parallel':
-            netResult = runNetPerfTestParallel()
-        elif GV.opt['--net'] == 'matrix':
-            netResult, seglist = runNetPerfTestMatrix()
+            if GV.opt['--net'] == 'netperf':
+                netResult = runNetPerfTest()
+            elif GV.opt['--net'] == 'parallel':
+                netResult = runNetPerfTestParallel()
+            elif GV.opt['--net'] == 'matrix':
+                netResult, seglist = runNetPerfTestMatrix()
 
-        runTeardown()
+            runTeardown()
+
+        finally:
+            print('')
+            print('====================')
+            print('==  RESULT %s' % datetime.datetime.now().isoformat())
+            print('====================')
 
-    finally:
-        print('')
-        print('====================')
-        print('==  RESULT %s' % datetime.datetime.now().isoformat())
-        print('====================')
+            if diskWriteResult:
+                printResult('disk write', diskWriteResult)
 
-        if diskWriteResult:
-            printResult('disk write', diskWriteResult)
+            if diskReadResult:
+                printResult('disk read', diskReadResult)
 
-        if diskReadResult:
-            printResult('disk read', diskReadResult)
+            if streamResult:
+                printResult('stream', streamResult)
 
-        if streamResult:
-            printResult('stream', streamResult)
+            if netResult and GV.opt['--net'] == 'matrix':
+                printMatrixResult(netResult, seglist)
+            elif netResult and GV.opt['--net']:
+                printNetResult(netResult)
 
-        if netResult and GV.opt['--net'] == 'matrix':
-            printMatrixResult(netResult, seglist)
-        elif netResult and GV.opt['--net']:
-            printNetResult(netResult)
+            runTeardown()
 
-        runTeardown()
+    except KeyboardInterrupt:
+        print('[Abort] Keyboard Interrupt ...')
 
-except KeyboardInterrupt:
-    print('[Abort] Keyboard Interrupt ...')
+if __name__ == '__main__':
+     main()
diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py
new file mode 100644
index 00000000000..097d4a1b191
--- /dev/null
+++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py
@@ -0,0 +1,66 @@
+import imp
+import os
+import sys
+from mock import patch
+from gppylib.test.unit.gp_unittest import GpTestCase,run_tests
+
+class GpCheckPerf(GpTestCase):
+    def setUp(self):
+        gpcheckcat_file = os.path.abspath(os.path.dirname(__file__) + "/../../../gpcheckperf")
+        self.subject = imp.load_source('gpcheckperf', gpcheckcat_file)
+
+    def tearDown(self):
+        super(GpCheckPerf, self).tearDown()
+
+    @patch('gpcheckperf.getPlatform', return_value='darwin')
+    @patch('gpcheckperf.run')
+    def test_get_memory_on_darwin(self, mock_run, mock_get_platform):
+        mock_run.return_value = [1, 'hw.physmem: 1234']
+        actual_result = self.subject.getMemory()
+        self.assertEquals(actual_result, None)
+
+        mock_run.return_value = [0, 'hw.physmem: 0']
+        actual_result = self.subject.getMemory()
+        self.assertEquals(actual_result, None)
+
+        mock_run.return_value = [0, 'hw.physmem: 1234']
+        actual_result = self.subject.getMemory()
+        self.assertEquals(actual_result, 1234)
+
+    @patch('gpcheckperf.getPlatform', return_value='linux')
+    @patch('gpcheckperf.run')
+    def test_get_memory_on_linux(self, mock_run, mock_get_platform):
+        mock_run.return_value = [1, 'MemTotal:        10 kB']
+        actual_result = self.subject.getMemory()
+        self.assertEquals(actual_result, None)
+
+        mock_run.return_value = [0, 'MemTotal:        0 kB']
+        actual_result = self.subject.getMemory()
+        self.assertEquals(actual_result, None)
+
+        mock_run.return_value = [0, 'MemTotal:        10 kB']
+        actual_result = self.subject.getMemory()
+        self.assertEquals(actual_result, 10240)
+
+    @patch('gpcheckperf.getPlatform', return_value='abc')
+    def test_get_memory_on_invalid_platform(self, mock_get_platform):
+        actual_result = self.subject.getMemory()
+        self.assertEquals(actual_result, None)
+
+    @patch('gpcheckperf.getMemory', return_value=None)
+    def test_parseCommandLine_when_get_memory_fails(self, mock_get_memory):
+        sys.argv = ["gpcheckperf", "-h", "locahost", "-r", "d", "-d", "/tmp"]
+        with self.assertRaises(SystemExit) as e:
+            self.subject.parseCommandLine()
+
+        self.assertEqual(e.exception.code, '[Error] could not get system memory size. Instead, you can use the -S option to provide the file size value')
+
+    @patch('gpcheckperf.getMemory', return_value=123)
+    def test_parseCommandLine_when_get_memory_succeeds(self, mock_get_memory):
+        sys.argv = ["gpcheckperf", "-h", "locahost", "-r", "d", "-d", "/tmp"]
+        self.subject.parseCommandLine()
+        self.assertEqual(self.subject.GV.opt['-S'], 246.0)
+
+
+if __name__ == '__main__':
+    run_tests()
diff --git a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
index 9b660b491a8..bfa13951c0b 100644
--- a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
@@ -52,3 +52,13 @@ Feature: Tests for gpcheckperf
     And   gpcheckperf should print "single host only - abandon netperf test" to stdout
     And gpcheckperf should not print "TypeError:" to stdout
 
+  Scenario: gpcheckperf runs with -S option and prints a warning message
+    Given the database is running
+    When  the user runs "gpcheckperf -h localhost -r d -d /tmp -S 1GB"
+    Then  gpcheckperf should return a return code of 0
+    And   gpcheckperf should print "\[Warning] Using 1073741824 bytes for disk performance test. This might take some time" to stdout
+
+  Scenario: gpcheckperf errors out when invalid value is passed to the -S option
+    Given the database is running
+    When  the user runs "gpcheckperf -h localhost -r d -d /tmp -S abc"
+    Then  gpcheckperf should return a return code of 1

From b6f0fc2bef9429bb01e2f74b02cc2e76129edba2 Mon Sep 17 00:00:00 2001
From: Ed Espino <eespino@vmware.com>
Date: Sun, 12 Mar 2023 12:04:37 -0700
Subject: [PATCH 41/48] gpcheckperf - Update a Python 3 reference of "python"
 to "python3"

In gpcheckperf, a remaining "python" reference exists. This commit
renames it to "python3" and allows it to run when the symbolic link
/usr/bin/python pointing to /usr/bin/python3 does not exist.

Tl;dr - An attempt was made to update all Greenplum utilities to
reference 'python3'. A reference was left unchanged in gpcheckperf.

Python 3 installations by default do not create the symbolic link
/usr/bin/python --> /usr/bin/python3. This has been observed in Python
3 installation3 on Rockylinux 8 & 9 and Ubuntu 20.04 & 22.04.

In the case of gpcheckperf, when invoked, the output below reveals the
"Error" a user receives when /usr/bin/python does not exist:

```
gpadmin@cdw:~$ /usr/local/gp7/bin/gpcheckperf -h cdw -d /data -r d -S 1 -v
--------------------
  SETUP 2023-03-12T17:26:27.967030
--------------------
[Info] verify python interpreter exists
[Info] /usr/local/gp7/bin/gpssh -h cdw 'python -c print'
--------------------
  TEARDOWN
--------------------
[Info] /usr/local/gp7/bin/gpssh -h cdw 'rm -rf  /data/gpcheckperf_$USER'
[Error] unable to find python interpreter on some hosts
        verify PATH variables on the hosts
gpadmin@cdw:~$
```
---
 gpMgmt/bin/gpcheckperf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gpMgmt/bin/gpcheckperf b/gpMgmt/bin/gpcheckperf
index 44c610861f8..6847c18c475 100755
--- a/gpMgmt/bin/gpcheckperf
+++ b/gpMgmt/bin/gpcheckperf
@@ -346,14 +346,14 @@ def runSetup():
         print('--------------------')
     okCount = 0
     try:
-        # check python reachable
+        # Verify python3 is accessible
         if GV.opt['-v']:
-            print('[Info] verify python interpreter exists')
-        (ok, out) = gpssh('python -c print')
+            print('[Info] verify python3 interpreter exists')
+        (ok, out) = gpssh('python3 -c print')
         if not ok:
             if not GV.opt['-v']:
                 print(out)
-            sys.exit("[Error] unable to find python interpreter on some hosts\n"
+            sys.exit("[Error] unable to find python3 interpreter on some hosts\n"
                      + "        verify PATH variables on the hosts")
 
             # mkdir cperf

From 957a7e83ea5f3e24eedb616a45e3e83854879f1f Mon Sep 17 00:00:00 2001
From: Piyush Chandwadkar <pchandwadkar@vmware.com>
Date: Mon, 20 Mar 2023 18:25:26 +0530
Subject: [PATCH 42/48] gpcheckperf: incorporating parity changes from 6X PR

Parity changes from 6X PR: https://github.com/greenplum-db/gpdb/pull/15192
Changes are as follows:
    1. Controlling verbosity of gpssh() function through verbose parameter
    2. Updating gpssh calls with verbose parameter
    3. Removing non-required checks in print*Results()
    4. Removing regex removal for b' and \r characters when getting hostname
       as it is fixed in gpssh.
    5. Re-structured test cases for gpcheckperf
---
 gpMgmt/bin/gpcheckperf                        | 46 +++++-----
 .../behave/mgmt_utils/gpcheckperf.feature     | 85 +++++++++++--------
 2 files changed, 70 insertions(+), 61 deletions(-)

diff --git a/gpMgmt/bin/gpcheckperf b/gpMgmt/bin/gpcheckperf
index 6847c18c475..a2f259cb22d 100755
--- a/gpMgmt/bin/gpcheckperf
+++ b/gpMgmt/bin/gpcheckperf
@@ -82,9 +82,9 @@ def strcmd(cmd):
     return reduce(lambda x, y: x + ' ' + y, map(lambda x: x.find(' ') > 0 and "'" + x + "'" or x, cmd))
 
 
-def gpssh(cmd, call_verbose=True):
+def gpssh(cmd, verbose):
     c = ['%s/bin/gpssh' % GPHOME]
-    if GV.opt['-V'] and call_verbose:
+    if verbose:
         c.append('-v')
     if GV.opt['-f']:
         c.append('-f')
@@ -326,13 +326,13 @@ def runTeardown():
     for d in GV.opt['-d']:
         dirs = '%s %s/gpcheckperf_$USER' % (dirs, d)
     try:
-        gpssh('rm -rf ' + dirs)
+        gpssh('rm -rf ' + dirs, GV.opt['-V'])
     except:
         pass
 
     try:
         if GV.opt['--net']:
-            gpssh(killall(GV.opt['--netserver']))
+            gpssh(killall(GV.opt['--netserver']), GV.opt['-V'])
     except:
         pass
 
@@ -349,7 +349,7 @@ def runSetup():
         # Verify python3 is accessible
         if GV.opt['-v']:
             print('[Info] verify python3 interpreter exists')
-        (ok, out) = gpssh('python3 -c print')
+        (ok, out) = gpssh('python3 -c print', GV.opt['-V'])
         if not ok:
             if not GV.opt['-v']:
                 print(out)
@@ -364,7 +364,7 @@ def runSetup():
             dirs = '%s %s/gpcheckperf_$USER' % (dirs, d)
 
         cmd = 'rm -rf %s ; mkdir -p %s' % (dirs, dirs)
-        (ok, out) = gpssh(cmd)
+        (ok, out) = gpssh(cmd, GV.opt['-V'])
         if not ok:
             print('failed gpssh: %s' % out)
             sys.exit("[Error] unable to make gpcheckperf directory. \n"
@@ -402,7 +402,7 @@ def copyExecOver(fname):
         sys.exit('[Error] command failed: gpsync %s =:%s with output: %s' % (path, target, out))
 
     # chmod +x file
-    (ok, out) = gpssh('chmod a+rx %s' % target)
+    (ok, out) = gpssh('chmod a+rx %s' % target, GV.opt['-V'])
     if not ok:
         sys.exit('[Error] command failed: chmod a+rx %s with output: %s' % (target, out))
 
@@ -458,7 +458,7 @@ def runDiskWriteTest(multidd):
         cmd = cmd + (' -B %d' % GV.opt['-B'])
     if GV.opt['-S']:
         cmd = cmd + (' -S %d' % GV.opt['-S'])
-    (ok, out) = gpssh(cmd)
+    (ok, out) = gpssh(cmd, GV.opt['-V'])
     if not ok:
         sys.exit('[Error] command failed: %s with output: %s' % (cmd, out))
     return parseMultiDDResult(out)
@@ -477,7 +477,7 @@ def runDiskReadTest(multidd):
         cmd = cmd + (' -B %d' % GV.opt['-B'])
     if GV.opt['-S']:
         cmd = cmd + (' -S %d' % GV.opt['-S'])
-    (ok, out) = gpssh(cmd)
+    (ok, out) = gpssh(cmd, GV.opt['-V'])
     if not ok:
         sys.exit('[Error] command failed: %s with output: %s' % (cmd, out))
     return parseMultiDDResult(out)
@@ -490,7 +490,7 @@ def runStreamTest():
     print('--------------------')
 
     cmd = copyExecOver('stream')
-    (ok, out) = gpssh(cmd)
+    (ok, out) = gpssh(cmd, GV.opt['-V'])
     if not ok:
         sys.exit('[Error] command failed: %s with output: %s' % (cmd, out))
     out = io.StringIO(out)
@@ -512,9 +512,9 @@ def startNetServer():
     for i in range(5):
         if i > 0:
             print('[Warning] retrying with port %d' % port)
-        (ok, out) = gpssh(killall(GV.opt['--netserver']))
+        (ok, out) = gpssh(killall(GV.opt['--netserver']), GV.opt['-V'])
 
-        (ok, out) = gpssh('%s -p %d > /dev/null 2>&1' % (rmtPath, port))
+        (ok, out) = gpssh('%s -p %d > /dev/null 2>&1' % (rmtPath, port), GV.opt['-V'])
         if ok:
             return port
 
@@ -745,8 +745,15 @@ def get_host_map(hostlist):
     seglist = dict()  # segment list
     uniqhosts = dict()  # unique host list
 
-    # get list of hostnames
-    # disabling verbose mode for gpssh as it is adding extra lines of output
+    '''
+     Get hostnames using non-verbose mode since verbose output makes parsing difficult with extra lines as show:
+        Using delaybeforesend 0.05 and prompt_validation_timeout 1.0
+        [Reset ...]
+        [INFO] login sdw2
+        [sdw2] sdw2
+        [INFO] completed successfully
+        [Cleanup...]
+    '''
     rc, out = gpssh('hostname', False)
 
     if not rc:
@@ -760,9 +767,6 @@ def get_host_map(hostlist):
     # get unique hostname list
     for line in out.splitlines():
         seg, host = line.translate(str.maketrans('','','[]')).split()
-        # removing \r and b coming in the output of the command in hostname
-        host = host.replace('\\r\'', '')
-        host = host.replace('b\'', '')
         uniqhosts[host] = seg
 
     # get list of segments associated with each host (can't use gpssh since it de-dupes hosts)
@@ -771,7 +775,7 @@ def get_host_map(hostlist):
 
         proc = None
         try:
-            if GV.opt['-v'] or GV.opt['-V']:
+            if GV.opt['-v']:
                 print('[Info]', strcmd(cmd))
             proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
             out = proc.stdout.read(-1)
@@ -824,8 +828,6 @@ def runNetPerfTestMatrix():
 
 
 def printMatrixResult(result, seglist):
-    if not result:
-        return
     print('Full matrix netperf bandwidth test')
 
     # sum up Rx/Tx rate for each host
@@ -882,8 +884,6 @@ def printMatrixResult(result, seglist):
 
 
 def printNetResult(result):
-    if not result:
-        return
     print('Netperf bisection bandwidth test')
     for h in result:
         print('%s -> %s = %f' % (h[0], h[1], h[6]))
@@ -915,8 +915,6 @@ def printNetResult(result):
 
 
 def printResult(title, result):
-    if not result:
-        return
     totTime = 0
     totBytes = 0
     totMBPS = 0
diff --git a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
index bfa13951c0b..9fafafef7af 100644
--- a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
@@ -17,48 +17,59 @@ Feature: Tests for gpcheckperf
     And   gpcheckperf should not print "NOTICE: -t is deprecated " to stdout
 
   @concourse_cluster
-  Scenario: gpcheckperf runs tests by passing hostfile in super verbose mode
+  Scenario Outline: gpcheckperf run <test_type> test by passing hostfile in regular mode
     Given the database is running
-    And   create a gpcheckperf input host file
-    When  the user runs "gpcheckperf -f /tmp/hostfile1 -r M -d /data/gpdata/ --duration=3m -V"
+    And create a gpcheckperf input host file
+    When  the user runs "gpcheckperf -f /tmp/hostfile1 -r <cmd_param> -d /data/gpdata/ --duration=10s"
     Then  gpcheckperf should return a return code of 0
-    And   gpcheckperf should print "Full matrix netperf bandwidth test" to stdout
-    And   gpcheckperf should not print "IndexError: list index out of range" to stdout
+    And   gpcheckperf should print "--  NETPERF TEST" to stdout
+    And   gpcheckperf should print "<print_message>" to stdout
+    And   gpcheckperf should print "Summary:" to stdout
+    And   gpcheckperf should print "sum =" to stdout
+    And   gpcheckperf should print "min =" to stdout
+    And   gpcheckperf should print "max =" to stdout
+    And   gpcheckperf should print "avg =" to stdout
+    And   gpcheckperf should print "median =" to stdout
 
- @concourse_cluster
-  Scenario: gpcheckperf runs tests by passing hostfile in verbose mode
-    Given the database is running
-    And   create a gpcheckperf input host file
-    When  the user runs "gpcheckperf -f /tmp/hostfile1 -r M -d /data/gpdata/ --duration=3m -v"
-    Then  gpcheckperf should return a return code of 0
-    And   gpcheckperf should print "Full matrix netperf bandwidth test" to stdout
-    And   gpcheckperf should not print "IndexError: list index out of range" to stdout
+  Examples:
+    | test_type | cmd_param | print_message                      |
+    | network   | N         | Netperf bisection bandwidth test   |
+    | matrix    | M         | Full matrix netperf bandwidth test |
 
- @concourse_cluster
-  Scenario: gpcheckperf runs tests by passing hostfile in regular mode
-    Given the database is running
-    And   create a gpcheckperf input host file
-    When  the user runs "gpcheckperf -f /tmp/hostfile1 -r M -d /data/gpdata/ --duration=3m"
-    Then  gpcheckperf should return a return code of 0
-    And   gpcheckperf should print "Full matrix netperf bandwidth test" to stdout
-    And   gpcheckperf should not print "IndexError: list index out of range" to stdout
+  @concourse_cluster
+  Scenario Outline: gpcheckperf runs <test_type> test with hostfile in <verbosity> mode
+     Given the database is running
+     And create a gpcheckperf input host file
+     When  the user runs "gpcheckperf -f /tmp/hostfile1 -r <cmd_param> -d /data/gpdata/ --duration=10s <verbose_flag>"
+     Then  gpcheckperf should return a return code of 0
+     And   gpcheckperf should print "--  NETPERF TEST" to stdout
+     And   gpcheckperf should print "<print_message>" to stdout
+     And   gpcheckperf should print "making gpcheckperf directory on all hosts ..." to stdout
+     And   gpcheckperf should print "[Info].*gpssh <gpssh_param> .*hostfile1 .*gpnetbenchClient." to stdout
+     And   gpcheckperf should print "[Info].*gpssh <gpssh_param> .*hostfile1 .*gpnetbenchServer." to stdout
+     And   gpcheckperf should print "==  RESULT*" to stdout
+     And   gpcheckperf should print "Summary:" to stdout
+     And   gpcheckperf should print "TEARDOWN" to stdout
+
+  Examples:
+    | test_type | verbosity     | cmd_param  | verbose_flag | gpssh_param | print_message                      |
+    | network   | verbose       | N          | -v           | -f          | Netperf bisection bandwidth test   |
+    | network   | extra verbose | N          | -V           | -v -f       | Netperf bisection bandwidth test   |
+    | matrix    | verbose       | M          | -v           | -f          | Full matrix netperf bandwidth test |
+    | matrix    | extra verbose | M          | -V           | -v -f       | Full matrix netperf bandwidth test |
 
   @concourse_cluster
-  Scenario: gpcheckperf does not throws typeerror when run with single host
-    Given the database is running
-    And   create a gpcheckperf input host file
-    When  the user runs "gpcheckperf -h sdw1 -r M -d /data/gpdata/ --duration=3m"
-    Then  gpcheckperf should return a return code of 0
-    And   gpcheckperf should print "single host only - abandon netperf test" to stdout
-    And gpcheckperf should not print "TypeError:" to stdout
+  Scenario Outline: running gpcheckperf single host <test_name> test case
+     Given the database is running
+     And create a gpcheckperf input host file
+     When  the user runs "gpcheckperf -h cdw -r <cmd_param> -d /data/gpdata/ --duration=10s -v"
+     Then  gpcheckperf should return a return code of 0
+     And   gpcheckperf should print "--  NETPERF TEST" to stdout
+     And   gpcheckperf should print "single host only - abandon netperf test" to stdout
+     And   gpcheckperf should print "TEARDOWN" to stdout
 
-  Scenario: gpcheckperf runs with -S option and prints a warning message
-    Given the database is running
-    When  the user runs "gpcheckperf -h localhost -r d -d /tmp -S 1GB"
-    Then  gpcheckperf should return a return code of 0
-    And   gpcheckperf should print "\[Warning] Using 1073741824 bytes for disk performance test. This might take some time" to stdout
+  Examples:
+    | test_name   | cmd_param|
+    | matrix test | M        |
+    | network test| N        |
 
-  Scenario: gpcheckperf errors out when invalid value is passed to the -S option
-    Given the database is running
-    When  the user runs "gpcheckperf -h localhost -r d -d /tmp -S abc"
-    Then  gpcheckperf should return a return code of 1

From e82142d008bbab0718a408099db9fd3f1d671642 Mon Sep 17 00:00:00 2001
From: Piyush Chandwadkar <pchandwadkar@vmware.com>
Date: Mon, 20 Mar 2023 18:30:44 +0530
Subject: [PATCH 43/48] gpcheckperf: fixing string parsing in
 parseMultiDDResult()

Issue:
After updating gpssh to return string output, gpcheckperf failed to parse results.

RCA:
String before gpssh changes: "'multidd total bytes '"
String after gpssh fix: "multidd total bytes '"
In parseMultiDDResult(), str.Find() before gpssh changes were returning index 1
as it was getting string starting with "'".
Post gpssh changes, substring is as the beginning of the string hence returning index 0.

Fix:
str.find() returns -1 if the substring is not found otherwise returns index.
As the substring searched in parseMultiDDResult() is appearing at index zero,
corrected condition to include index zero.
---
 gpMgmt/bin/gpcheckperf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gpMgmt/bin/gpcheckperf b/gpMgmt/bin/gpcheckperf
index a2f259cb22d..cc8cba62af9 100755
--- a/gpMgmt/bin/gpcheckperf
+++ b/gpMgmt/bin/gpcheckperf
@@ -420,7 +420,7 @@ def parseMultiDDResult(out):
         o = line[i + 2:]
 
 
-        if o.find('multidd total bytes ') > 0:
+        if o.find('multidd total bytes ') >= 0:
             h = line[1:i]
             o = o.split()
             m = re.search("(^\d+)", o[-1])
@@ -429,7 +429,7 @@ def parseMultiDDResult(out):
             bytes = int(m.group(1))
             continue
 
-        if o.find('real') > 0:
+        if o.find('real') >= 0:
             h = line[1:i]
             o = o.split()
             m = re.search("(^\d+.\d+)", o[1])

From 0cea992da2517a1234a054149e7d3b9058b34e45 Mon Sep 17 00:00:00 2001
From: Evgeniy Ratkov <e.ratkov@arenadata.io>
Date: Tue, 27 Jun 2023 08:08:49 +0300
Subject: [PATCH 44/48] gpcheckperf: add buffer size parameter (#14848)

* gpcheckperf: add buffer size parameter (#14848)

Before this patch, while running gpcheckperf utility the buffer was set by
default for the underlying gpnetbenchClient utility as 32Kb. It led to problem
with receiving annoying and misleading warnings about connections between
hosts.

Use '--buffer-size' flag with size in kilobytes to set buffer size,
which will be used at gpnetbenchClient.
It is an optional parameter. The default value is 8Kb.
---
 gpMgmt/bin/gpcheckperf                        | 20 ++++++++++++++----
 gpMgmt/doc/gpcheckperf_help                   |  7 ++++++-
 .../behave/mgmt_utils/gpcheckperf.feature     | 21 +++++++++++++++++++
 3 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/gpMgmt/bin/gpcheckperf b/gpMgmt/bin/gpcheckperf
index cc8cba62af9..98ade0638e7 100755
--- a/gpMgmt/bin/gpcheckperf
+++ b/gpMgmt/bin/gpcheckperf
@@ -29,6 +29,7 @@ Usage: gpcheckperf <options>
     -f file    : a file listing all hosts to connect to
     --duration : how long to run network test (default 5 seconds)
     --netperf  : use netperf instead of gpnetbenchServer/gpnetbenchClient
+    --buffer-size : the size of the send buffer in kilobytes ( default 8 kilobytes)
 """
 
 import datetime
@@ -68,7 +69,7 @@ class Global():
     opt = {'-d': [], '-D': False, '-v': False, '-V': False, '-r': '',
            '-B': 1024 * 32, '-S': 0, '-h': [], '-f': None,
            '--duration': 15, '--net': None, '--netserver': 'gpnetbenchServer',
-           '--netclient': 'gpnetbenchClient'}
+           '--netclient': 'gpnetbenchClient', '--buffer-size': 0}
 
 
 GV = Global()
@@ -203,7 +204,7 @@ def print_version():
 
 def parseCommandLine():
     try:
-        (options, args) = getopt.getopt(sys.argv[1:], '?vVDd:r:B:S:p:h:f:', ['duration=', 'version', 'netperf'])
+        (options, args) = getopt.getopt(sys.argv[1:], '?vVDd:r:B:S:p:h:f:', ['duration=', 'version', 'netperf', 'buffer-size='])
     except Exception as e:
         usage('Error: ' + str(e))
         exit(1)
@@ -226,6 +227,8 @@ def parseCommandLine():
         elif switch == '--netperf':
             GV.opt['--netserver'] = 'netserver'
             GV.opt['--netclient'] = 'netperf'
+        elif switch == '--buffer-size':
+            GV.opt[switch] = int(val)
 
     # run default tests (if not specified)
     if GV.opt['-r'] == '':
@@ -275,6 +278,14 @@ def parseCommandLine():
         GV.opt['--duration'] = 15
         print('[INFO] Invalid network duration specified.  Using default (15 seconds)')
 
+    if GV.opt['--netclient'].find('netperf') >= 0:
+        if GV.opt['--buffer-size']:
+            print('[Warning] --buffer-size option will be ignored when the --netperf option is enabled')
+    else:
+        if GV.opt['--buffer-size'] <= 0:
+            print('[INFO] --buffer-size value is not specified or invalid. Using default (8 kilobytes)')
+            GV.opt['--buffer-size'] = 8
+
     # strip the last '/' from the dir
     dd = []
     for d in GV.opt['-d']:
@@ -540,8 +551,9 @@ def spawnNetperfTestBetween(x, y, netperf_path, netserver_port, sec=5):
         cmd = ('%s -H %s -p %d -t TCP_STREAM -l %s -f M -P 0 '
                % (netperf_path, y, netserver_port, sec))
     else:
-        cmd = ('%s -H %s -p %d -l %s -P 0 '
-               % (netperf_path, y, netserver_port, sec))
+        cmd = ('%s -H %s -p %d -l %s -P 0 -b %s'
+               % (netperf_path, y, netserver_port, sec, GV.opt['--buffer-size']))
+
     c = ['ssh', '-o', 'BatchMode yes',
          '-o', 'StrictHostKeyChecking no',
          x, cmd]
diff --git a/gpMgmt/doc/gpcheckperf_help b/gpMgmt/doc/gpcheckperf_help
index 23820d46ab1..8fd95568530 100755
--- a/gpMgmt/doc/gpcheckperf_help
+++ b/gpMgmt/doc/gpcheckperf_help
@@ -13,7 +13,7 @@ gpcheckperf -d <test_directory> [-d <test_directory> ...]
 
 gpcheckperf -d <temp_directory>
         {-f <hostfile_gpchecknet> | -h <hostname> [-h <hostname> ...]} 
-        [ -r n|N|M [--duration <time>] [--netperf] ] [-D] [-v|-V]
+        [ -r n|N|M [--duration <time>] [--netperf] ] [-D] [-v|-V] [--buffer-size <kbytes>]
 
 
 gpcheckperf -? 
@@ -97,6 +97,11 @@ Specifies the block size (in KB or MB) to use for disk I/O test.
 The default is 32KB, which is the same as the Cloudberry Database 
 page size. The maximum block size is 1 MB.
 
+--buffersize <kbytes>
+
+ Specifies size of the send buffer in kilobytes, which is used by gpnetbenchClient.
+ Default size is 8 kilobytes.
+
 
 -d <test_directory>
 
diff --git a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
index 9fafafef7af..b824fe4785a 100644
--- a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature
@@ -73,3 +73,24 @@ Feature: Tests for gpcheckperf
     | matrix test | M        |
     | network test| N        |
 
+  @concourse_cluster
+  Scenario: gpcheckperf runs sequential network test with buffer size flag
+    Given the database is running
+    When  the user runs "gpcheckperf -h cdw -h sdw1 -d /data/gpdata/ -r n --buffer-size 8 -v"
+    Then  gpcheckperf should return a return code of 0
+    And   gpcheckperf should print "avg = " to stdout
+    And   gpcheckperf should print "gpnetbenchClient -H cdw -p 23000 -l 15 -P 0 -b 8" to stdout
+
+  @concourse_cluster
+  Scenario: gpcheckperf runs sequential network test with buffer size flag and netperf option
+    Given the database is running
+    When  the user runs "gpcheckperf -h cdw -h sdw1 -d /data/gpdata/ -r n --buffer-size 8 --netperf"
+    Then  gpcheckperf should print "--buffer-size option will be ignored when the --netperf option is enabled" to stdout
+
+  @concourse_cluster
+  Scenario: gpcheckperf runs sequential network test without buffer size flag
+    Given the database is running
+    When  the user runs "gpcheckperf -h cdw -h sdw1 -d /data/gpdata/ -r n"
+    Then  gpcheckperf should return a return code of 0
+    And   gpcheckperf should print "--buffer-size value is not specified or invalid. Using default \(8 kilobytes\)" to stdout
+    And   gpcheckperf should print "avg = " to stdout

From b4c5a133d026629eda06e17abac393000eeb06b2 Mon Sep 17 00:00:00 2001
From: Piyush Chandwadkar <65647926+piyushc01@users.noreply.github.com>
Date: Tue, 19 Mar 2024 10:54:52 +0530
Subject: [PATCH 45/48] Fixing if the time command has comma in the output
 (#17207)

Changes to fix gpcheckperf failure with an exception when sometime time command output
has a comma separator instead of a dot.
Steps to reproduce issue:

$export LC_ALL=de_DE_utf8
$time sleep 1
real	0m1,021s
user	0m0,001s
sys	0m0,005s

Fix: added check if comma present in the time output, replace it with a dot and continue parsing.

Testing: Added unit tests to check the output of parseMultiDDResult() in case of comma and dot.
---
 gpMgmt/bin/gpcheckperf                        |  1 +
 .../test/unit/test_unit_gpcheckperf.py        | 35 +++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/gpMgmt/bin/gpcheckperf b/gpMgmt/bin/gpcheckperf
index 98ade0638e7..c4b61b0f399 100755
--- a/gpMgmt/bin/gpcheckperf
+++ b/gpMgmt/bin/gpcheckperf
@@ -443,6 +443,7 @@ def parseMultiDDResult(out):
         if o.find('real') >= 0:
             h = line[1:i]
             o = o.split()
+            o[1] = o[1].replace(',', '.')
             m = re.search("(^\d+.\d+)", o[1])
             if m is None:
                 sys.exit('[Error] expected %s to be a floating point number' % o[1])
diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py
index 097d4a1b191..b1de2f7896b 100644
--- a/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py
+++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py
@@ -61,6 +61,41 @@ def test_parseCommandLine_when_get_memory_succeeds(self, mock_get_memory):
         self.subject.parseCommandLine()
         self.assertEqual(self.subject.GV.opt['-S'], 246.0)
 
+    def test_parseMultiDDResult_when_output_regular(self):
+        inputText = """[localhost] dd if=/dev/zero of=/tmp/gpcheckperf_gpadmin/ddfile count=131072 bs=32768
+[localhost] 131072+0 records in
+[localhost] 131072+0 records out
+[localhost] 4294967296 bytes transferred in 2.973025 secs (1444645536 bytes/sec)
+[localhost]
+[localhost] multidd total bytes  4294967296
+[localhost] real 3.65
+[localhost] user 0.18
+[localhost] sys 2.52
+    """
+        actual_result = self.subject.parseMultiDDResult(inputText)
+        (mbps, time, bytes) = actual_result["localhost"]
+        exp_mbps, exp_time, exp_bytes = (1122.1917808219177, 3.65, 4294967296)
+        self.assertEqual(mbps, exp_mbps)
+        self.assertEqual(time, exp_time)
+        self.assertEqual(bytes, exp_bytes)
+
+    def test_parseMultiDDResult_when_output_comma(self):
+        inputText = """[localhost] dd if=/dev/zero of=/tmp/gpcheckperf_gpadmin/ddfile count=131072 bs=32768
+[localhost] 131072+0 records in
+[localhost] 131072+0 records out
+[localhost] 4294967296 bytes transferred in 2.973025 secs (1444645536 bytes/sec)
+[localhost]
+[localhost] multidd total bytes  4294967296
+[localhost] real 3,65
+[localhost] user 0,18
+[localhost] sys 2,52
+    """
+        actual_result = self.subject.parseMultiDDResult(inputText)
+        (mbps, time, bytes) = actual_result["localhost"]
+        exp_mbps, exp_time, exp_bytes = (1122.1917808219177, 3.65, 4294967296)
+        self.assertEqual(mbps, exp_mbps)
+        self.assertEqual(time, exp_time)
+        self.assertEqual(bytes, exp_bytes)
 
 if __name__ == '__main__':
     run_tests()

From 6b5c6a212f7a2b50cd9ec2ce999d603b650e90e9 Mon Sep 17 00:00:00 2001
From: liwenkai <liwenkai@hashdata.cn>
Date: Fri, 17 May 2024 17:18:30 +0800
Subject: [PATCH 46/48] Replace "Greenplum" with "Cloudberry" in README.md &
 update Makefile

This commit updates the README.md file by replacing all occurrences of
"Greenplum" with "cdbd".

This commit includes the following changes:

1. README.md:
   - Replaced all instances of "Greenplum" with "Cloudberry" to reflect
 the new naming convention.

2. Makefile:
   - Added new libraries to GPDEMO_LIBS.
   - Added new targets for `gpdemo` and `gpshrink`.
   - Included a command to create the directory '$(DESTDIR)$(bindir)/lib/gpdemo':
3.gpshrink:
   -update Scp -> Sync
 $(MKDIR_P) '$(DESTDIR)$(bindir)/lib/gpdemo'

Related to Issue#id <https://github.com/cloudberrydb/cloudberrydb/issues/421>
---
 gpMgmt/bin/Makefile  | 17 ++++++++++++++---
 gpMgmt/bin/README.md | 22 +++++++++++-----------
 gpMgmt/bin/gpshrink  |  4 ++--
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/gpMgmt/bin/Makefile b/gpMgmt/bin/Makefile
index 70d650fc772..7a81781d2b1 100644
--- a/gpMgmt/bin/Makefile
+++ b/gpMgmt/bin/Makefile
@@ -13,19 +13,27 @@ SUBDIRS += ifaddrs
 $(recurse)
 
 PROGRAMS= analyzedb gpactivatestandby gpaddmirrors gpcheckcat gpcheckperf \
-	gpcheckresgroupimpl gpconfig gpdeletesystem gpexpand gpinitstandby \
+	gpcheckresgroupimpl gpconfig gpdeletesystem gpexpand gpshrink gpinitstandby \
 	gpinitsystem gpload gpload.py gplogfilter gpmovemirrors \
 	gppkg gprecoverseg gpreload gpsync gpsd gpssh gpssh-exkeys gpstart \
-	gpstate gpstop minirepro gpmemwatcher gpmemreport
+	gpstate gpstop minirepro gpmemwatcher gpmemreport gpdemo gpdirtableload
+
+	GPDEMO_LIBS = gpdemo-defaults.sh lalshell generate_certs.sh demo_cluster.sh \
+					probe_config.sh README
 
 installdirs:
 	$(MKDIR_P) '$(DESTDIR)$(bindir)/lib'
+	$(MKDIR_P) '$(DESTDIR)$(bindir)/lib/gpdemo'
 
 installprograms: installdirs
 	for file in $(PROGRAMS); do \
 		$(INSTALL_SCRIPT) $$file '$(DESTDIR)$(bindir)/'$$file ; \
 		$(PERL) $(top_builddir)/putversion '$(DESTDIR)$(bindir)/'$$file ; \
 	done
+	# install dependencies of gpdemo
+	for file in $(GPDEMO_LIBS); do \
+		$(INSTALL_SCRIPT) $(top_builddir)/gpAux/gpdemo/$$file '$(DESTDIR)$(bindir)/lib/gpdemo/'$$file ; \
+	done
 	# Symlink gpcheckcat from bin to bin/lib to maintain backward compatibility
 	if [ ! -L $(DESTDIR)$(bindir)/lib/gpcheckcat  ]; then \
 		cd $(DESTDIR)$(bindir)/lib/ && $(LN_S) ../gpcheckcat gpcheckcat; \
@@ -36,6 +44,9 @@ uninstall:
 	for file in $(PROGRAMS); do \
 		rm -f '$(DESTDIR)$(bindir)/'$$file ; \
 	done
+	for file in $(GPDEMO_LIBS); do \
+		rm -f '$(DESTDIR)$(bindir)/lib/gpdemo/'$$file ; \
+	done
 	rm -f '$(DESTDIR)$(bindir)/gpload.bat'
 
 #
@@ -183,7 +194,7 @@ clean distclean:
 	rm -rf *.pyc
 	rm -f analyzedbc gpactivatestandbyc gpaddmirrorsc gpcheckcatc \
 		  gpcheckperfc gpcheckresgroupimplc gpchecksubnetcfgc gpconfigc \
-		  gpdeletesystemc gpexpandc gpinitstandbyc gplogfilterc gpmovemirrorsc \
+		  gpdeletesystemc gpexpandc gpshrinkc gpinitstandbyc gplogfilterc gpmovemirrorsc \
 		  gppkgc gprecoversegc gpreloadc gpscpc gpsyncc gpsdc gpssh-exkeysc gpsshc \
 		  gpstartc gpstatec gpstopc minireproc
 	rm -f gpconfig_modules/gucs_disallowed_in_file.txt
diff --git a/gpMgmt/bin/README.md b/gpMgmt/bin/README.md
index b003592b16c..a1600a9d662 100644
--- a/gpMgmt/bin/README.md
+++ b/gpMgmt/bin/README.md
@@ -28,20 +28,20 @@ Where Things Go
 
 List of Management Scripts Written in Bash
 ------------------------------------------
-bin/gpinitsystem        -  Creates a new Greenplum Database
+bin/gpinitsystem        -  Creates a new Cloudberry Database
 bin/gpload              -  Sets env variables and calls gpload.py
 
 
 List of Management Scripts Written in Python (no libraries)
 -----------------------------------------------------------
-bin/gpload.py           -  Loads data into a Greenplum Database
+bin/gpload.py           -  Loads data into a Cloudberry Database
 
 
 List of Management Scripts Written in Python (gpmlib - old libraries)
 ---------------------------------------------------------------------
 bin/gpaddmirrors        -  Adds mirrors to an array (needs rewrite)
 bin/gprecoverseg        -  Recovers a failed segment (needs rewrite)
-bin/gpcheckperf         -  Checks the hardware for Greenplum Database
+bin/gpcheckperf         -  Checks the hardware for Cloudberry Database
 bin/gpsync              -  Copies files to many hosts
 bin/gpssh               -  Remote shell to many hosts
 bin/gpssh-exkeys        -  Exchange ssh keys between many hosts
@@ -51,12 +51,12 @@ List of Management Scripts Written in Python (gppylib - current libraries)
 --------------------------------------------------------------------------
 bin/gpactivatestandby   -  Activates the Standby Coordinator
 bin/gpconfig_helper     -  Edits postgresql.conf file for all segments
-bin/gpdeletesystem      -  Deletes a Greenplum Database
-bin/gpexpand            -  Adds additional segments to a Greenplum Database
+bin/gpdeletesystem      -  Deletes a Cloudberry Database
+bin/gpexpand            -  Adds additional segments to a Cloudberry Database
 bin/gpinitstandby       -  Initializes standby coordinator
 bin/gplogfilter         -  Filters log files
-bin/gpstart             -  Start a Greenplum Database
-bin/gpstop              -  Stop a Greenplum Database
+bin/gpstart             -  Start a Cloudberry Database
+bin/gpstop              -  Stop a Cloudberry Database
 
 sbin/gpconfig_helper.py -  Helper script for gpconfig
 sbin/gpsegcopy          -  Helper script for gpexpand
@@ -76,10 +76,10 @@ gparray.py
    +-  SegmentPair - Configuration information for a single content id
    |     \-  Contains multiple Segment objects
    |
-   +-  GpArray   - Configuration information for a Greenplum Database
+   +-  GpArray   - Configuration information for a Cloudberry Database
          \-  Contains multiple SegmentPair objects
 
-gplog.py         - Utility functions to assist in Greenplum standard logging
+gplog.py         - Utility functions to assist in Cloudberry standard logging
 
 gpparseopts.py   - Wrapper around optparse library to aid in locating help files
 
@@ -143,7 +143,7 @@ db/dbconn.py       - Connections to the database
   |
   +- Should have a wrapper class around a pygresql connection object!
 
-util/gp_utils.py     - Greenplum related utility functions that are not Commands
+util/gp_utils.py     - Cloudberry related utility functions that are not Commands
 util/ssh_session.py  - SSH and RSYNC related utility functions brought in from gpmlib.py/gplib.py
                        that are used by gpssh, gpsync and gpssh-exkeys
 
@@ -175,7 +175,7 @@ tests that do not require a running cluster.
 
 ## Testing Management Scripts (behave tests)
 
-Behave tests require a running Greenplum cluster, and additional python libraries for testing, available to gpadmin.
+Behave tests require a running Cloudberry cluster, and additional python libraries for testing, available to gpadmin.
 
 Thus, you can install these additional python libraries using any of the following methods:
 
diff --git a/gpMgmt/bin/gpshrink b/gpMgmt/bin/gpshrink
index 386c64a7e24..fb06e16a055 100644
--- a/gpMgmt/bin/gpshrink
+++ b/gpMgmt/bin/gpshrink
@@ -436,7 +436,7 @@ class gpshrinkStatus():
 
     def _sync_status_file(self):
         """Syncs the gpshrink status file with the coordinator mirror"""
-        cpCmd = Scp('gpshrink copying status file to coordinator mirror',
+        cpCmd = Rsync('gpshrink copying status file to coordinator mirror',
                     srcFile=self._status_filename,
                     dstFile=self._status_standby_filename,
                     dstHost=self._coordinator_mirror.getSegmentHostName())
@@ -503,7 +503,7 @@ class gpshrinkStatus():
         """ Sync the segment configuration backup file to standby """
         if self._coordinator_mirror:
             self.logger.debug("Sync segment configuration backup file")
-            cpCmd = Scp('gpshrink copying segment configuration backup file to coordinator mirror',
+            cpCmd = Rsync('gpshrink copying segment configuration backup file to coordinator mirror',
                         srcFile=self._gp_segment_configuration_backup,
                         dstFile=self._segment_configuration_standby_filename,
                         dstHost=self._coordinator_mirror.getSegmentHostName())

From 8ba10ab675a7e15a3ec64275d93dd779dde9accd Mon Sep 17 00:00:00 2001
From: Dianjin Wang <wangdianjin@gmail.com>
Date: Thu, 23 May 2024 10:29:28 +0800
Subject: [PATCH 47/48] Cleanup the build tools and guide directories

In this PR, we have some changes for the build guides and deploy tools.
Including:

- Rename `readmes/*` -> `deploy/build/*`
- Rename `hooks/*` -> `src/tools/hooks/*`
- Move `python-dependencies.txt` to the top dir
- Move `README.Conda.md` out from the main repo to the
  `cloudberrydb/cloudberrydb-site` repo because it's for the data
  analytics guide, not how to build a Cloudberry Database.

Note that we have not verified the files in `deploy/vagrant`,
`deploy/docker`, and `deploy/k8s`. We cannot make sure it can run
successfully. We will do that in the following months.
---
 README.md                                     |   2 +-
 {readmes => deploy/build}/README.CentOS.bash  |   0
 {readmes => deploy/build}/README.Linux.md     |  10 +--
 .../build}/README.Rhel-Rocky.bash             |   0
 {readmes => deploy/build}/README.Ubuntu.bash  |   0
 {readmes => deploy/build}/README.macOS.bash   |   0
 {readmes => deploy/build}/README.macOS.md     |   4 +-
 {readmes => deploy/build}/README.md           |  10 +--
 {src/tools => deploy}/vagrant/.gitignore      |   0
 {src/tools => deploy}/vagrant/README.md       |   0
 .../vagrant/centos/Vagrantfile                |   0
 .../vagrant/centos/vagrant-configure-os.sh    |   0
 .../vagrant/centos/vagrant-setup.sh           |   0
 .../vagrant/common/vagrant-build-gpdb.sh      |   0
 .../vagrant/common/vagrant-build-gporca.sh    |   0
 .../vagrant/common/vagrant-common.rb          |   0
 .../vagrant/common/vagrant-local-example.yml  |   0
 .../vagrant/debian/Vagrantfile                |   0
 .../vagrant/debian/vagrant-setup.sh           |   0
 .../vagrant/pictures/gpdb_processes.png       | Bin
 .../vagrant/ubuntu/Vagrantfile                |   0
 .../vagrant/ubuntu/vagrant-setup.sh           |   0
 ...ependencies.txt => python-dependencies.txt |   0
 readmes/README.Conda.md                       |  80 ------------------
 {hooks => src/tools/hooks}/install            |   0
 {hooks => src/tools/hooks}/pre-push           |   0
 26 files changed, 13 insertions(+), 93 deletions(-)
 rename {readmes => deploy/build}/README.CentOS.bash (100%)
 rename {readmes => deploy/build}/README.Linux.md (96%)
 rename {readmes => deploy/build}/README.Rhel-Rocky.bash (100%)
 rename {readmes => deploy/build}/README.Ubuntu.bash (100%)
 rename {readmes => deploy/build}/README.macOS.bash (100%)
 rename {readmes => deploy/build}/README.macOS.md (98%)
 rename {readmes => deploy/build}/README.md (91%)
 rename {src/tools => deploy}/vagrant/.gitignore (100%)
 rename {src/tools => deploy}/vagrant/README.md (100%)
 rename {src/tools => deploy}/vagrant/centos/Vagrantfile (100%)
 rename {src/tools => deploy}/vagrant/centos/vagrant-configure-os.sh (100%)
 rename {src/tools => deploy}/vagrant/centos/vagrant-setup.sh (100%)
 rename {src/tools => deploy}/vagrant/common/vagrant-build-gpdb.sh (100%)
 rename {src/tools => deploy}/vagrant/common/vagrant-build-gporca.sh (100%)
 rename {src/tools => deploy}/vagrant/common/vagrant-common.rb (100%)
 rename {src/tools => deploy}/vagrant/common/vagrant-local-example.yml (100%)
 rename {src/tools => deploy}/vagrant/debian/Vagrantfile (100%)
 rename {src/tools => deploy}/vagrant/debian/vagrant-setup.sh (100%)
 rename {src/tools => deploy}/vagrant/pictures/gpdb_processes.png (100%)
 rename {src/tools => deploy}/vagrant/ubuntu/Vagrantfile (100%)
 rename {src/tools => deploy}/vagrant/ubuntu/vagrant-setup.sh (100%)
 rename readmes/python-dependencies.txt => python-dependencies.txt (100%)
 delete mode 100644 readmes/README.Conda.md
 rename {hooks => src/tools/hooks}/install (100%)
 rename {hooks => src/tools/hooks}/pre-push (100%)

diff --git a/README.md b/README.md
index 4f8ab14eb82..ef79502156d 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ Database.
 
 ### Build from source
 
-You can follow [these guides](./readmes) to build the Cloudberry Database on
+You can follow [these guides](./deploy/build) to build the Cloudberry Database on
 Linux OS(including CentOS, RHEL/Rocky Linux, and Ubuntu) and macOS.
 
 ### Try out quickly
diff --git a/readmes/README.CentOS.bash b/deploy/build/README.CentOS.bash
similarity index 100%
rename from readmes/README.CentOS.bash
rename to deploy/build/README.CentOS.bash
diff --git a/readmes/README.Linux.md b/deploy/build/README.Linux.md
similarity index 96%
rename from readmes/README.Linux.md
rename to deploy/build/README.Linux.md
index 8221bc33be5..db087b5a81e 100644
--- a/readmes/README.Linux.md
+++ b/deploy/build/README.Linux.md
@@ -30,10 +30,10 @@ Enter the repository and install dependencies according to your operating system
 
 The following steps work on CentOS 7. For other CentOS versions, these steps might work but are not guaranteed to work.
 
-1. Run the bash script `README.CentOS.bash` in the `readmes` directory of the `cloudberrydb/cloudberrydb` repository. To run this script, password is required. Then, some required dependencies will be automatically downloaded.
+1. Run the bash script `README.CentOS.bash` in the `deploy/build` directory of the `cloudberrydb/cloudberrydb` repository. To run this script, password is required. Then, some required dependencies will be automatically downloaded.
 
     ```bash
-    cd cloudberrydb/readmes
+    cd cloudberrydb/deploy/build
     ./README.CentOS.bash
     ```
 
@@ -81,16 +81,16 @@ The following steps work on CentOS 7. For other CentOS versions, these steps mig
 3. Install more dependencies by running the `README.Rhel-Rocky.bash` script.
 
     ```bash
-    ~/cloudberrydb/readmes/README.Rhel-Rocky.bash
+    ~/cloudberrydb/deploy/build/README.Rhel-Rocky.bash
     ```
 
 ### For Ubuntu 18.04 or later
 
-1. Install dependencies by running the `README.Ubuntu.bash` script in the `readmes` directory.
+1. Install dependencies by running the `README.Ubuntu.bash` script in the `deploy/build` directory.
 
     ```shell
     # You need to enter your password to run.
-    sudo ~/cloudberrydb/readmes/README.Ubuntu.bash
+    sudo ~/cloudberrydb/deploy/build/README.Ubuntu.bash
     ```
 
     > [!Note]
diff --git a/readmes/README.Rhel-Rocky.bash b/deploy/build/README.Rhel-Rocky.bash
similarity index 100%
rename from readmes/README.Rhel-Rocky.bash
rename to deploy/build/README.Rhel-Rocky.bash
diff --git a/readmes/README.Ubuntu.bash b/deploy/build/README.Ubuntu.bash
similarity index 100%
rename from readmes/README.Ubuntu.bash
rename to deploy/build/README.Ubuntu.bash
diff --git a/readmes/README.macOS.bash b/deploy/build/README.macOS.bash
similarity index 100%
rename from readmes/README.macOS.bash
rename to deploy/build/README.macOS.bash
diff --git a/readmes/README.macOS.md b/deploy/build/README.macOS.md
similarity index 98%
rename from readmes/README.macOS.md
rename to deploy/build/README.macOS.md
index ccce250957c..3231f983486 100644
--- a/readmes/README.macOS.md
+++ b/deploy/build/README.macOS.md
@@ -9,7 +9,7 @@ According to our test, these steps work well on macOS Ventura 13.4+ with both In
 Run the following command to install the needed dependencies. You will be asked to enter the `sudo` password of your macOS system.
 
 ```bash
-source readmes/README.macOS.bash
+source deploy/build/README.macOS.bash
 ```
 
 > [!NOTE]
@@ -66,7 +66,7 @@ source $(cd ~; pwd)/install/cbdb/greenplum_path.sh
 
 # 4. Install the Python dependencies.
 
-pip3 install --user -r readmes/python-dependencies.txt
+pip3 install --user -r python-dependencies.txt
 
 # 5. Start a demo cluster.
 
diff --git a/readmes/README.md b/deploy/build/README.md
similarity index 91%
rename from readmes/README.md
rename to deploy/build/README.md
index a02926fa43e..50a78710222 100644
--- a/readmes/README.md
+++ b/deploy/build/README.md
@@ -5,8 +5,8 @@ Greenplum Database READE.md here. Thanks all the original writers.-->
 
 This guides describes how to build Cloudberry Database from source code.
 
-- For building on Linux systems, see [Compile and Install Cloudberry Database on Linux](/readmes/README.Linux.md).
-- For building on macOS system, see [Compile and Install Cloudberry Database on macOS](/readmes/README.macOS.md).
+- For building on Linux systems, see [Compile and Install Cloudberry Database on Linux](./README.Linux.md).
+- For building on macOS system, see [Compile and Install Cloudberry Database on macOS](./README.macOS.md).
 
 ## Build the database
 
@@ -98,7 +98,7 @@ make distclean
 
 PXF is an extension framework for Greenplum Database/Cloudberry
 Database to enable fast access to external Hadoop datasets. Refer to
-[PXF extension](../gpcontrib/pxf/README.md) for more information.
+[PXF extension](../../gpcontrib/pxf_fdw/README.md) for more information.
 
 Currently, CBDB is built with PXF by default (--enable-pxf is on).
 In order to build CBDB without pxf, simply invoke `./configure` with additional option `--disable-pxf`.
@@ -108,9 +108,9 @@ PXF requires curl, so `--enable-pxf` is not compatible with the `--without-libcu
 
 Cloudberry Database supports Python3 with plpython3u UDF
 
-See [how to enable Python3](../src/pl/plpython/README.md) for details.
+See [how to enable Python3](../../src/pl/plpython/README.md) for details.
 
 
 # Development with Vagrant
 
-There is a Vagrant-based [quickstart guide for developers](../src/tools/vagrant/README.md).
+There is a Vagrant-based [quickstart guide for developers](../vagrant/README.md).
diff --git a/src/tools/vagrant/.gitignore b/deploy/vagrant/.gitignore
similarity index 100%
rename from src/tools/vagrant/.gitignore
rename to deploy/vagrant/.gitignore
diff --git a/src/tools/vagrant/README.md b/deploy/vagrant/README.md
similarity index 100%
rename from src/tools/vagrant/README.md
rename to deploy/vagrant/README.md
diff --git a/src/tools/vagrant/centos/Vagrantfile b/deploy/vagrant/centos/Vagrantfile
similarity index 100%
rename from src/tools/vagrant/centos/Vagrantfile
rename to deploy/vagrant/centos/Vagrantfile
diff --git a/src/tools/vagrant/centos/vagrant-configure-os.sh b/deploy/vagrant/centos/vagrant-configure-os.sh
similarity index 100%
rename from src/tools/vagrant/centos/vagrant-configure-os.sh
rename to deploy/vagrant/centos/vagrant-configure-os.sh
diff --git a/src/tools/vagrant/centos/vagrant-setup.sh b/deploy/vagrant/centos/vagrant-setup.sh
similarity index 100%
rename from src/tools/vagrant/centos/vagrant-setup.sh
rename to deploy/vagrant/centos/vagrant-setup.sh
diff --git a/src/tools/vagrant/common/vagrant-build-gpdb.sh b/deploy/vagrant/common/vagrant-build-gpdb.sh
similarity index 100%
rename from src/tools/vagrant/common/vagrant-build-gpdb.sh
rename to deploy/vagrant/common/vagrant-build-gpdb.sh
diff --git a/src/tools/vagrant/common/vagrant-build-gporca.sh b/deploy/vagrant/common/vagrant-build-gporca.sh
similarity index 100%
rename from src/tools/vagrant/common/vagrant-build-gporca.sh
rename to deploy/vagrant/common/vagrant-build-gporca.sh
diff --git a/src/tools/vagrant/common/vagrant-common.rb b/deploy/vagrant/common/vagrant-common.rb
similarity index 100%
rename from src/tools/vagrant/common/vagrant-common.rb
rename to deploy/vagrant/common/vagrant-common.rb
diff --git a/src/tools/vagrant/common/vagrant-local-example.yml b/deploy/vagrant/common/vagrant-local-example.yml
similarity index 100%
rename from src/tools/vagrant/common/vagrant-local-example.yml
rename to deploy/vagrant/common/vagrant-local-example.yml
diff --git a/src/tools/vagrant/debian/Vagrantfile b/deploy/vagrant/debian/Vagrantfile
similarity index 100%
rename from src/tools/vagrant/debian/Vagrantfile
rename to deploy/vagrant/debian/Vagrantfile
diff --git a/src/tools/vagrant/debian/vagrant-setup.sh b/deploy/vagrant/debian/vagrant-setup.sh
similarity index 100%
rename from src/tools/vagrant/debian/vagrant-setup.sh
rename to deploy/vagrant/debian/vagrant-setup.sh
diff --git a/src/tools/vagrant/pictures/gpdb_processes.png b/deploy/vagrant/pictures/gpdb_processes.png
similarity index 100%
rename from src/tools/vagrant/pictures/gpdb_processes.png
rename to deploy/vagrant/pictures/gpdb_processes.png
diff --git a/src/tools/vagrant/ubuntu/Vagrantfile b/deploy/vagrant/ubuntu/Vagrantfile
similarity index 100%
rename from src/tools/vagrant/ubuntu/Vagrantfile
rename to deploy/vagrant/ubuntu/Vagrantfile
diff --git a/src/tools/vagrant/ubuntu/vagrant-setup.sh b/deploy/vagrant/ubuntu/vagrant-setup.sh
similarity index 100%
rename from src/tools/vagrant/ubuntu/vagrant-setup.sh
rename to deploy/vagrant/ubuntu/vagrant-setup.sh
diff --git a/readmes/python-dependencies.txt b/python-dependencies.txt
similarity index 100%
rename from readmes/python-dependencies.txt
rename to python-dependencies.txt
diff --git a/readmes/README.Conda.md b/readmes/README.Conda.md
deleted file mode 100644
index 74693583c98..00000000000
--- a/readmes/README.Conda.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# Steps to build plpython with Anaconda
-
-Anaconda (https://www.continuum.io/) is an excellent Python distribution for
-machine learning and analytics which also provide its own package
-tracking/management tools. CBDB support plpython which can make use of all
-features provided by Anaconda.  Note that CBDB only supports Python 2.7 now.
-Although plpython might be built with Python 3.x, other Python tools in CBDB
-won't work with Python 3.x.
-
-You can try following steps to run Anaconda with CBDB from source code.
-
-## Install Anaconda. 
-We use miniconda here.
-
-	wget  https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh
-	chmod +x Miniconda-latest-Linux-x86_64.sh
-	./Miniconda-latest-Linux-x86_64.sh
-
-Then follow the instructions to complete the installation of Anaconda.
-Assume miniconda is installed to "/PATH/TO/CONDAHOME".
-
-IMPORTANT: Add this path to head of PATH environment, so that Anaconda become the default Python
-
-	export PATH=/PATH/TO/CONDAHOME/bin:$PATH
-
-## Install related tools
-Following instructions are for Centos/RHEL/Fedora as example. 
-	sudo yum install git
-	sudo yum groupinstall "Development tools"
-	sudo yum install curl-devel bzip2-devel openssl-devel perl-ExtUtils-Embed   libxml2-devel  openldap-devel  pam pam-devel  perl-devel  readline-devel 
-
-The python-devel is not required here because it is provided by Anaconda.
-
-## Download and build CBDB
-	git clone https://github.com/cloudberrydb/cloudberrydb.git
-	cd cloudberrydb
-	./configure --prefix=`pwd`/greenplumdb  --with-gssapi --with-pgport=5432 --with-perl --with-python --with-ssl=openssl  --with-libxml --enable-cassert --enable-debug --enable-depend
-	make install
-
-Make sure "--with-python" parameter exists. Because the default Python is the Anaconda Python, It's done.
-
-## Test link path of Python
-In following example, Anaconda is installed to /home/gpadmin/miniconda.
-    ldd greenplumdb/lib/postgresql/plpython.so
-	...
-	libpython2.7.so.1.0 => /home/gpadmin/miniconda/lib/libpython2.7.so.1.0 (0x00007f1f3c40c000)
-	...
-
-## Install Python package from Anaconda
-Install numpy from Anaconda with command:
-
-	conda install numpy
-
-If you run a multinode cluster, make sure the install paths of Anaconda (and
-GPDB) are the same on every host. gpssh can help to install Anaconda package on
-each segment:
-
-	gpssh -f hostlistfile /PATH/TO/CONDAHOME/bin/conda install numpy
-
-## Init CBDB cluster
-
-## Run "hello world"
-You can run following example to ensure Anaconda working for you.
-
-
-	DROP LANGUAGE IF EXISTS 'plpythonu' CASCADE;
-	CREATE PROCEDURAL LANGUAGE 'plpythonu' HANDLER plpython_call_handler;
-
-	CREATE OR REPLACE FUNCTION testnumpy() RETURNS float AS
-	$$
-	    import time,numpy
-	    t1 = time.time()
-	    X = numpy.arange(10000000)
-	    Y = numpy.arange(10000000)
-	    Z = X + Y
-	    return time.time() - t1
-	$$
-	LANGUAGE 'plpythonu' VOLATILE;
-	select testnumpy();
-
diff --git a/hooks/install b/src/tools/hooks/install
similarity index 100%
rename from hooks/install
rename to src/tools/hooks/install
diff --git a/hooks/pre-push b/src/tools/hooks/pre-push
similarity index 100%
rename from hooks/pre-push
rename to src/tools/hooks/pre-push

From 4649bc40a014004be4fe474c0a7c678c8631632d Mon Sep 17 00:00:00 2001
From: Xing Guo <higuoxing@gmail.com>
Date: Tue, 1 Mar 2022 04:36:05 +0800
Subject: [PATCH 48/48] Insert more data to make tuplestore spill in
 regress/misc_jiras.sql.

This is a follow-up patch for ad8b266 and a039910 to make pipelines
happy.

Previous commits are causing CI failure.
See: https://prod.ci.gpdb.pivotal.io/teams/main/pipelines/gpdb_master_without_asserts/jobs/icw_planner_icproxy_ubuntu18.04/builds/603

(cherry picked from commit 09056de6940bd95cf4382474a1d3c6bdb60500d9)
---
 src/test/regress/expected/misc_jiras.out | 4 ++--
 src/test/regress/sql/misc_jiras.sql      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/test/regress/expected/misc_jiras.out b/src/test/regress/expected/misc_jiras.out
index ab418abb849..2c6d24f5a82 100644
--- a/src/test/regress/expected/misc_jiras.out
+++ b/src/test/regress/expected/misc_jiras.out
@@ -12,7 +12,7 @@ create schema misc_jiras;
 --
 create table misc_jiras.t1 (c1 int, c2 text, c3 smallint) distributed by (c1);
 insert into misc_jiras.t1 select i % 13, md5(i::text), i % 3
-  from generate_series(1, 40000) i;
+  from generate_series(1, 60000) i;
 -- tuplestore in windowagg uses statement_mem to control the in-memory data size,
 -- set a small value to trigger the spilling.
 set statement_mem to '1024kB';
@@ -44,7 +44,7 @@ NOTICE:  winagg: tuplestore spilled to disk  (seg1 slice1 127.0.0.1:7003 pid=547
 NOTICE:  winagg: tuplestore spilled to disk  (seg2 slice1 127.0.0.1:7004 pid=54721)
    sum   
 ---------
- 20006.5
+ 30006.5
 (1 row)
 
 SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid)
diff --git a/src/test/regress/sql/misc_jiras.sql b/src/test/regress/sql/misc_jiras.sql
index 4e96bc11e52..fbd35f8b1a2 100644
--- a/src/test/regress/sql/misc_jiras.sql
+++ b/src/test/regress/sql/misc_jiras.sql
@@ -13,7 +13,7 @@ create schema misc_jiras;
 
 create table misc_jiras.t1 (c1 int, c2 text, c3 smallint) distributed by (c1);
 insert into misc_jiras.t1 select i % 13, md5(i::text), i % 3
-  from generate_series(1, 40000) i;
+  from generate_series(1, 60000) i;
 
 -- tuplestore in windowagg uses statement_mem to control the in-memory data size,
 -- set a small value to trigger the spilling.