diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 4e160d54b8c..60a220c57ab 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -233,13 +233,6 @@ static int	cmp(const chr *, const chr *, size_t);
 static int	casecmp(const chr *, const chr *, size_t);
 
 
-/* info we need during compilation about a known capturing subexpression */
-struct subinfo
-{
-	struct state *left;			/* left end of its sub-NFA */
-	struct state *right;		/* right end of its sub-NFA */
-};
-
 /* internal variables, bundled for easy passing around */
 struct vars
 {
@@ -252,10 +245,10 @@ struct vars
 	int			nexttype;		/* type of next token */
 	chr			nextvalue;		/* value (if any) of next token */
 	int			lexcon;			/* lexical context type (see regc_lex.c) */
-	int			nsubexp;		/* number of known capturing subexpressions */
-	struct subinfo *subs;		/* info about known capturing subexpressions */
-	size_t		nsubs;			/* allocated length of subs[] vector */
-	struct subinfo sub10[10];	/* initial vector, enough for most */
+	int			nsubexp;		/* subexpression count */
+	struct subre **subs;		/* subRE pointer vector */
+	size_t		nsubs;			/* length of vector */
+	struct subre *sub10[10];	/* initial vector, enough for most */
 	struct nfa *nfa;			/* the NFA */
 	struct colormap *cm;		/* character color map */
 	color		nlcolor;		/* color of newline */
@@ -375,7 +368,7 @@ pg_regcomp(regex_t *re,
 	v->subs = v->sub10;
 	v->nsubs = 10;
 	for (j = 0; j < v->nsubs; j++)
-		v->subs[j].left = v->subs[j].right = NULL;
+		v->subs[j] = NULL;
 	v->nfa = NULL;
 	v->cm = NULL;
 	v->nlcolor = COLORLESS;
@@ -511,13 +504,13 @@ pg_regcomp(regex_t *re,
 }
 
 /*
- * moresubs - enlarge capturing-subexpressions vector
+ * moresubs - enlarge subRE vector
  */
 static void
 moresubs(struct vars *v,
 		 int wanted)			/* want enough room for this one */
 {
-	struct subinfo *p;
+	struct subre **p;
 	size_t		n;
 
 	assert(wanted > 0 && (size_t) wanted >= v->nsubs);
@@ -525,13 +518,13 @@ moresubs(struct vars *v,
 
 	if (v->subs == v->sub10)
 	{
-		p = (struct subinfo *) MALLOC(n * sizeof(struct subinfo));
+		p = (struct subre **) MALLOC(n * sizeof(struct subre *));
 		if (p != NULL)
 			memcpy(VS(p), VS(v->subs),
-				   v->nsubs * sizeof(struct subinfo));
+				   v->nsubs * sizeof(struct subre *));
 	}
 	else
-		p = (struct subinfo *) REALLOC(v->subs, n * sizeof(struct subinfo));
+		p = (struct subre **) REALLOC(v->subs, n * sizeof(struct subre *));
 	if (p == NULL)
 	{
 		ERR(REG_ESPACE);
@@ -539,7 +532,7 @@ moresubs(struct vars *v,
 	}
 	v->subs = p;
 	for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++)
-		p->left = p->right = NULL;
+		*p = NULL;
 	assert(v->nsubs == n);
 	assert((size_t) wanted < v->nsubs);
 }
@@ -988,6 +981,7 @@ parseqatom(struct vars *v,
 			s = newstate(v->nfa);
 			s2 = newstate(v->nfa);
 			NOERRN();
+			/* We may not need these arcs, but keep things connected for now */
 			EMPTYARC(lp, s);
 			EMPTYARC(s2, rp);
 			NOERRN();
@@ -997,10 +991,6 @@ parseqatom(struct vars *v,
 			NOERRN();
 			if (cap)
 			{
-				/* save the sub-NFA's endpoints for future backrefs to use */
-				assert(v->subs[subno].left == NULL);
-				v->subs[subno].left = s;
-				v->subs[subno].right = s2;
 				if (atom->capno == 0)
 				{
 					/* normal case: just mark the atom as capturing */
@@ -1016,13 +1006,15 @@ parseqatom(struct vars *v,
 					t->child = atom;
 					atom = t;
 				}
+				assert(v->subs[subno] == NULL);
+				v->subs[subno] = atom;
 			}
 			/* postpone everything else pending possible {0} */
 			break;
 		case BACKREF:			/* the Feature From The Black Lagoon */
 			INSIST(type != LACON, REG_ESUBREG);
 			INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
-			INSIST(v->subs[v->nextvalue].left != NULL, REG_ESUBREG);
+			INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
 			NOERRN();
 			assert(v->nextvalue > 0);
 			atom = subre(v, 'b', BACKR, lp, rp);
@@ -1097,7 +1089,7 @@ parseqatom(struct vars *v,
 		if (atom != NULL)
 			freesubre(v, atom);
 		if (atomtype == '(')
-			v->subs[subno].left = v->subs[subno].right = NULL;
+			v->subs[subno] = NULL;
 		delsub(v->nfa, lp, rp);
 		EMPTYARC(lp, rp);
 		return top;
@@ -1130,30 +1122,48 @@ parseqatom(struct vars *v,
 		NOERRN();
 	}
 
+	/*
+	 * For what follows, we need the atom to have its own begin/end states
+	 * that are distinct from lp/rp, so that we can wrap iteration structure
+	 * around it.  The parenthesized-atom case above already made suitable
+	 * states (and we don't want to modify a capturing subre, since it's
+	 * already recorded in v->subs[]).  Otherwise, we need more states.
+	 */
+	if (atom->begin == lp || atom->end == rp)
+	{
+		s = newstate(v->nfa);
+		s2 = newstate(v->nfa);
+		NOERRN();
+		moveouts(v->nfa, lp, s);
+		moveins(v->nfa, rp, s2);
+		atom->begin = s;
+		atom->end = s2;
+	}
+	else
+	{
+		/* The atom's OK, but we must temporarily disconnect it from lp/rp */
+		/* (this removes the EMPTY arcs we made above) */
+		delsub(v->nfa, lp, atom->begin);
+		delsub(v->nfa, atom->end, rp);
+	}
+
 	/*----------
 	 * Prepare a general-purpose state skeleton.
 	 *
 	 * In the no-backrefs case, we want this:
 	 *
-	 * [lp] ---> [s] ---prefix---> [begin] ---atom---> [end] ---rest---> [rp]
+	 * [lp] ---> [s] ---prefix---> ---atom---> ---rest---> [rp]
 	 *
-	 * where prefix is some repetitions of atom.  In the general case we need
+	 * where prefix is some repetitions of atom, and "rest" is the remainder
+	 * of the branch.  In the general case we need:
 	 *
 	 * [lp] ---> [s] ---iterator---> [s2] ---rest---> [rp]
 	 *
-	 * where the iterator wraps around [begin] ---atom---> [end]
+	 * where the iterator wraps around the atom.
 	 *
 	 * We make the s state here for both cases; s2 is made below if needed
 	 *----------
 	 */
-	s = newstate(v->nfa);		/* first, new endpoints for the atom */
-	s2 = newstate(v->nfa);
-	NOERRN();
-	moveouts(v->nfa, lp, s);
-	moveins(v->nfa, rp, s2);
-	NOERRN();
-	atom->begin = s;
-	atom->end = s2;
 	s = newstate(v->nfa);		/* set up starting state */
 	NOERRN();
 	EMPTYARC(lp, s);
@@ -1190,14 +1200,14 @@ parseqatom(struct vars *v,
 	{
 		assert(atom->begin->nouts == 1);	/* just the EMPTY */
 		delsub(v->nfa, atom->begin, atom->end);
-		assert(v->subs[subno].left != NULL);
+		assert(v->subs[subno] != NULL);
 
 		/*
 		 * And here's why the recursion got postponed: it must wait until the
 		 * skeleton is filled in, because it may hit a backref that wants to
 		 * copy the filled-in skeleton.
 		 */
-		dupnfa(v->nfa, v->subs[subno].left, v->subs[subno].right,
+		dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
 			   atom->begin, atom->end);
 		NOERRN();