diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cmd_encoder.c
index 1abc7f5c345c4f32237b27ad261a0fd3bd541a0b..d6f79dc755b46d9b53e491422c53c604dbe73f4f 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cmd_encoder.c
@@ -159,7 +159,7 @@ void mdp5_cmd_encoder_disable(struct drm_encoder *encoder)
 	pingpong_tearcheck_disable(encoder);
 
 	mdp5_ctl_set_encoder_state(ctl, pipeline, false);
-	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf));
+	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf), true);
 
 	bs_set(mdp5_cmd_enc, 0);
 
@@ -180,7 +180,7 @@ void mdp5_cmd_encoder_enable(struct drm_encoder *encoder)
 	if (pingpong_tearcheck_enable(encoder))
 		return;
 
-	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf));
+	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf), true);
 
 	mdp5_ctl_set_encoder_state(ctl, pipeline, true);
 
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
index 8c5ed0b59e46f89e73e255d14dbb0066b883de9f..91c829a2cc85b81cb4d28a0f6a61b8de05cee7d6 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
@@ -97,9 +97,13 @@ static u32 crtc_flush(struct drm_crtc *crtc, u32 flush_mask)
 	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 	struct mdp5_ctl *ctl = mdp5_cstate->ctl;
 	struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline;
+	bool start = !mdp5_cstate->defer_start;
+
+	mdp5_cstate->defer_start = false;
 
 	DBG("%s: flush=%08x", crtc->name, flush_mask);
-	return mdp5_ctl_commit(ctl, pipeline, flush_mask);
+
+	return mdp5_ctl_commit(ctl, pipeline, flush_mask, start);
 }
 
 /*
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
index 439e0a300e258abab7c513d341af20bcf965dfc3..1197f060c5c687519ec86476a422de33e5e1a55e 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
@@ -41,7 +41,9 @@ struct mdp5_ctl {
 	u32 status;
 
 	bool encoder_enabled;
-	uint32_t start_mask;
+
+	/* pending flush_mask bits */
+	u32 flush_mask;
 
 	/* REG_MDP5_CTL_*(<id>) registers access info + lock: */
 	spinlock_t hw_lock;
@@ -173,16 +175,8 @@ static void set_ctl_op(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline)
 
 int mdp5_ctl_set_pipeline(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline)
 {
-	struct mdp5_ctl_manager *ctl_mgr = ctl->ctlm;
-	struct mdp5_kms *mdp5_kms = get_kms(ctl_mgr);
+	struct mdp5_kms *mdp5_kms = get_kms(ctl->ctlm);
 	struct mdp5_interface *intf = pipeline->intf;
-	struct mdp5_hw_mixer *mixer = pipeline->mixer;
-	struct mdp5_hw_mixer *r_mixer = pipeline->r_mixer;
-
-	ctl->start_mask = mdp_ctl_flush_mask_lm(mixer->lm) |
-			  mdp_ctl_flush_mask_encoder(intf);
-	if (r_mixer)
-		ctl->start_mask |= mdp_ctl_flush_mask_lm(r_mixer->lm);
 
 	/* Virtual interfaces need not set a display intf (e.g.: Writeback) */
 	if (!mdp5_cfg_intf_is_virtual(intf->type))
@@ -198,7 +192,7 @@ static bool start_signal_needed(struct mdp5_ctl *ctl,
 {
 	struct mdp5_interface *intf = pipeline->intf;
 
-	if (!ctl->encoder_enabled || ctl->start_mask != 0)
+	if (!ctl->encoder_enabled)
 		return false;
 
 	switch (intf->type) {
@@ -227,25 +221,6 @@ static void send_start_signal(struct mdp5_ctl *ctl)
 	spin_unlock_irqrestore(&ctl->hw_lock, flags);
 }
 
-static void refill_start_mask(struct mdp5_ctl *ctl,
-			      struct mdp5_pipeline *pipeline)
-{
-	struct mdp5_interface *intf = pipeline->intf;
-	struct mdp5_hw_mixer *mixer = pipeline->mixer;
-	struct mdp5_hw_mixer *r_mixer = pipeline->r_mixer;
-
-	ctl->start_mask = mdp_ctl_flush_mask_lm(mixer->lm);
-	if (r_mixer)
-		ctl->start_mask |= mdp_ctl_flush_mask_lm(r_mixer->lm);
-
-	/*
-	 * Writeback encoder needs to program & flush
-	 * address registers for each page flip..
-	 */
-	if (intf->type == INTF_WB)
-		ctl->start_mask |= mdp_ctl_flush_mask_encoder(intf);
-}
-
 /**
  * mdp5_ctl_set_encoder_state() - set the encoder state
  *
@@ -268,7 +243,6 @@ int mdp5_ctl_set_encoder_state(struct mdp5_ctl *ctl,
 
 	if (start_signal_needed(ctl, pipeline)) {
 		send_start_signal(ctl);
-		refill_start_mask(ctl, pipeline);
 	}
 
 	return 0;
@@ -557,17 +531,14 @@ static void fix_for_single_flush(struct mdp5_ctl *ctl, u32 *flush_mask,
  */
 u32 mdp5_ctl_commit(struct mdp5_ctl *ctl,
 		    struct mdp5_pipeline *pipeline,
-		    u32 flush_mask)
+		    u32 flush_mask, bool start)
 {
 	struct mdp5_ctl_manager *ctl_mgr = ctl->ctlm;
 	unsigned long flags;
 	u32 flush_id = ctl->id;
 	u32 curr_ctl_flush_mask;
 
-	ctl->start_mask &= ~flush_mask;
-
-	VERB("flush_mask=%x, start_mask=%x, trigger=%x", flush_mask,
-			ctl->start_mask, ctl->pending_ctl_trigger);
+	VERB("flush_mask=%x, trigger=%x", flush_mask, ctl->pending_ctl_trigger);
 
 	if (ctl->pending_ctl_trigger & flush_mask) {
 		flush_mask |= MDP5_CTL_FLUSH_CTL;
@@ -582,6 +553,14 @@ u32 mdp5_ctl_commit(struct mdp5_ctl *ctl,
 
 	fix_for_single_flush(ctl, &flush_mask, &flush_id);
 
+	if (!start) {
+		ctl->flush_mask |= flush_mask;
+		return curr_ctl_flush_mask;
+	} else {
+		flush_mask |= ctl->flush_mask;
+		ctl->flush_mask = 0;
+	}
+
 	if (flush_mask) {
 		spin_lock_irqsave(&ctl->hw_lock, flags);
 		ctl_write(ctl, REG_MDP5_CTL_FLUSH(flush_id), flush_mask);
@@ -590,7 +569,6 @@ u32 mdp5_ctl_commit(struct mdp5_ctl *ctl,
 
 	if (start_signal_needed(ctl, pipeline)) {
 		send_start_signal(ctl);
-		refill_start_mask(ctl, pipeline);
 	}
 
 	return curr_ctl_flush_mask;
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h
index b63120388dc67c8f1dec594c63465977c1a617c5..403b0db0fa4c11e6d72076eeca8360d450a00f16 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h
@@ -78,7 +78,7 @@ u32 mdp_ctl_flush_mask_encoder(struct mdp5_interface *intf);
 
 /* @flush_mask: see CTL flush masks definitions below */
 u32 mdp5_ctl_commit(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline,
-		    u32 flush_mask);
+		    u32 flush_mask, bool start);
 u32 mdp5_ctl_get_commit_status(struct mdp5_ctl *ctl);
 
 
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c
index 36ad3cbe5f79a0540f107c86269ef9c2ef71b6b4..9af94e35f678dd9f2a445989fdb8c98278ad60ac 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c
@@ -228,7 +228,7 @@ static void mdp5_vid_encoder_disable(struct drm_encoder *encoder)
 	spin_lock_irqsave(&mdp5_encoder->intf_lock, flags);
 	mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(intfn), 0);
 	spin_unlock_irqrestore(&mdp5_encoder->intf_lock, flags);
-	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf));
+	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf), true);
 
 	/*
 	 * Wait for a vsync so we know the ENABLE=0 latched before
@@ -262,7 +262,7 @@ static void mdp5_vid_encoder_enable(struct drm_encoder *encoder)
 	spin_lock_irqsave(&mdp5_encoder->intf_lock, flags);
 	mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(intfn), 1);
 	spin_unlock_irqrestore(&mdp5_encoder->intf_lock, flags);
-	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf));
+	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf), true);
 
 	mdp5_ctl_set_encoder_state(ctl, pipeline, true);
 
@@ -319,6 +319,7 @@ static int mdp5_encoder_atomic_check(struct drm_encoder *encoder,
 
 	mdp5_cstate->ctl = ctl;
 	mdp5_cstate->pipeline.intf = intf;
+	mdp5_cstate->defer_start = true;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
index aeb94aa461b510c1dd740879de3f7efa312504d7..425a03d213e5f54d1f8660dc3e95dc1549549c82 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
@@ -133,6 +133,14 @@ struct mdp5_crtc_state {
 	u32 pp_done_irqmask;
 
 	bool cmd_mode;
+
+	/* should we not write CTL[n].START register on flush?  If the
+	 * encoder has changed this is set to true, since encoder->enable()
+	 * is called after crtc state is committed, but we only want to
+	 * write the CTL[n].START register once.  This lets us defer
+	 * writing CTL[n].START until encoder->enable()
+	 */
+	bool defer_start;
 };
 #define to_mdp5_crtc_state(x) \
 		container_of(x, struct mdp5_crtc_state, base)
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
index 98d4d73317677621dd7f1bae2a51ec635c54fbda..5dc42d89b588c07039590f956c88fb8af42ea877 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
@@ -545,7 +545,7 @@ static void mdp5_plane_atomic_async_update(struct drm_plane *plane,
 
 		ctl = mdp5_crtc_get_ctl(new_state->crtc);
 
-		mdp5_ctl_commit(ctl, pipeline, mdp5_plane_get_flush(plane));
+		mdp5_ctl_commit(ctl, pipeline, mdp5_plane_get_flush(plane), true);
 	}
 
 	*to_mdp5_plane_state(plane->state) =