diff --git a/libavcodec/aac.c b/libavcodec/aac.c
index 62d4ba5d82c14850664ab4dd8e0b2363b8b6d4c3..9a67b35a3a58caddfb4a357ff9a3fa31d0bf8d66 100644
--- a/libavcodec/aac.c
+++ b/libavcodec/aac.c
@@ -753,7 +753,7 @@ static int decode_spectrum_and_dequant(AACContext * ac, float coef[1024], GetBit
             const int dim = cur_band_type >= FIRST_PAIR_BT ? 2 : 4;
             const int is_cb_unsigned = IS_CODEBOOK_UNSIGNED(cur_band_type);
             int group;
-            if (cur_band_type == ZERO_BT) {
+            if (cur_band_type == ZERO_BT || cur_band_type == INTENSITY_BT2 || cur_band_type == INTENSITY_BT) {
                 for (group = 0; group < ics->group_len[g]; group++) {
                     memset(coef + group * 128 + offsets[i], 0, (offsets[i+1] - offsets[i])*sizeof(float));
                 }
@@ -771,7 +771,7 @@ static int decode_spectrum_and_dequant(AACContext * ac, float coef[1024], GetBit
                         coef[group*128+k] *= scale;
                     }
                 }
-            }else if (cur_band_type != INTENSITY_BT2 && cur_band_type != INTENSITY_BT) {
+            }else {
                 for (group = 0; group < ics->group_len[g]; group++) {
                     for (k = offsets[i]; k < offsets[i+1]; k += dim) {
                         const int index = get_vlc2(gb, vlc_spectral[cur_band_type - 1].table, 6, 3);
@@ -911,21 +911,21 @@ static void apply_prediction(AACContext * ac, SingleChannelElement * sce) {
     int sfb, k;
 
     if (!sce->ics.predictor_initialized) {
-        reset_all_predictors(sce->ics.predictor_state);
+        reset_all_predictors(sce->predictor_state);
         sce->ics.predictor_initialized = 1;
     }
 
     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
-                predict(ac, &sce->ics.predictor_state[k], &sce->coeffs[k],
+                predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
                     sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
             }
         }
         if (sce->ics.predictor_reset_group)
-            reset_predictor_group(sce->ics.predictor_state, sce->ics.predictor_reset_group);
+            reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
     } else
-        reset_all_predictors(sce->ics.predictor_state);
+        reset_all_predictors(sce->predictor_state);
 }
 
 /**
@@ -983,7 +983,7 @@ static int decode_ics(AACContext * ac, SingleChannelElement * sce, GetBitContext
     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
         return -1;
 
-    if(ac->m4ac.object_type == AOT_AAC_MAIN)
+    if(ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
         apply_prediction(ac, sce);
 
     return 0;
@@ -1086,8 +1086,14 @@ static int decode_cpe(AACContext * ac, GetBitContext * gb, int elem_id) {
     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
         return ret;
 
-    if (common_window && ms_present)
+    if (common_window) {
+        if (ms_present)
         apply_mid_side_stereo(cpe);
+        if (ac->m4ac.object_type == AOT_AAC_MAIN) {
+            apply_prediction(ac, &cpe->ch[0]);
+            apply_prediction(ac, &cpe->ch[1]);
+        }
+    }
 
     apply_intensity_stereo(cpe, ms_present);
     return 0;
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index dab85b990db25245f3ec335a93b99c452f6b5c28..d2f81f2ecbb04f109e96822c7a34c7e6c4a9b353 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -163,7 +163,6 @@ typedef struct {
     int predictor_initialized;
     int predictor_reset_group;
     uint8_t prediction_used[41];
-    PredictorState predictor_state[MAX_PREDICTORS];
 } IndividualChannelStream;
 
 /**
@@ -226,6 +225,7 @@ typedef struct {
     DECLARE_ALIGNED_16(float, coeffs[1024]);  ///< coefficients for IMDCT
     DECLARE_ALIGNED_16(float, saved[512]);    ///< overlap
     DECLARE_ALIGNED_16(float, ret[1024]);     ///< PCM output
+    PredictorState predictor_state[MAX_PREDICTORS];
 } SingleChannelElement;
 
 /**