diff --git a/av1/common/arm/convolve_sve2.c b/av1/common/arm/convolve_sve2.c
index 536f4414b..92a52a054 100644
--- a/av1/common/arm/convolve_sve2.c
+++ b/av1/common/arm/convolve_sve2.c
@@ -42,7 +42,7 @@ static inline int32x4_t highbd_convolve12_4_2d_v(int16x8_t s0[2],
   return vcombine_s32(vmovn_s64(sum01), vmovn_s64(sum23));
 }
 
-static inline void convolve_2d_sr_vert_12tap_sve2(
+__attribute__((noinline)) static void convolve_2d_sr_vert_12tap_sve2(
     const int16_t *src_ptr, int src_stride, uint8_t *dst_ptr,
     const int dst_stride, int w, int h, const int16x8_t y_filter_0_7,
     const int16x8_t y_filter_4_11) {
@@ -162,14 +162,14 @@ void av1_convolve_2d_sr_sve2(const uint8_t *src, int src_stride, uint8_t *dst,
                              const InterpFilterParams *filter_params_y,
                              const int subpel_x_qn, const int subpel_y_qn,
                              ConvolveParams *conv_params) {
-  if (w == 2 || h == 2) {
-    av1_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
-                         filter_params_x, filter_params_y, subpel_x_qn,
-                         subpel_y_qn, conv_params);
+  if (w == 2 || h == 2 || w % 4 != 0 || h % 4 != 0) {
+    av1_convolve_2d_sr_neon_i8mm(src, src_stride, dst, dst_stride, w, h,
+                                 filter_params_x, filter_params_y, subpel_x_qn,
+                                 subpel_y_qn, conv_params);
     return;
   }
 
-  if (filter_params_x->taps > 8) {
+  if (filter_params_x->taps == 12) {
     const int im_h = h + filter_params_y->taps - 1;
     const int im_stride = MAX_SB_SIZE;
     const int vert_offset = filter_params_x->taps / 2 - 1;
diff --git a/av1/common/arm/highbd_compound_convolve_sve2.c b/av1/common/arm/highbd_compound_convolve_sve2.c
index 668dfbf5f..667c19944 100644
--- a/av1/common/arm/highbd_compound_convolve_sve2.c
+++ b/av1/common/arm/highbd_compound_convolve_sve2.c
@@ -360,10 +360,17 @@ void av1_highbd_dist_wtd_convolve_x_sve2(
     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
     int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn,
     ConvolveParams *conv_params, int bd) {
+  const int x_filter_taps = get_filter_tap(filter_params_x, subpel_x_qn);
+  if (w < 4 || h < 4 || (w % 8 != 0 && w != 4) || h % 4 != 0 ||
+      x_filter_taps > 8) {
+    av1_highbd_dist_wtd_convolve_x_neon(src, src_stride, dst, dst_stride, w, h,
+                                        filter_params_x, subpel_x_qn,
+                                        conv_params, bd);
+    return;
+  }
   DECLARE_ALIGNED(16, uint16_t,
                   im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]);
   CONV_BUF_TYPE *dst16 = conv_params->dst;
-  const int x_filter_taps = get_filter_tap(filter_params_x, subpel_x_qn);
 
   if (x_filter_taps == 6) {
     av1_highbd_dist_wtd_convolve_x_neon(src, src_stride, dst, dst_stride, w, h,
@@ -794,6 +801,12 @@ void av1_highbd_dist_wtd_convolve_y_sve2(
     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
     int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn,
     ConvolveParams *conv_params, int bd) {
+  if (w < 4 || h < 4 || (w % 8 != 0 && w != 4) || h % 4 != 0) {
+    av1_highbd_dist_wtd_convolve_y_neon(src, src_stride, dst, dst_stride, w, h,
+                                        filter_params_y, subpel_y_qn,
+                                        conv_params, bd);
+    return;
+  }
   DECLARE_ALIGNED(16, uint16_t,
                   im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]);
   CONV_BUF_TYPE *dst16 = conv_params->dst;
@@ -1460,6 +1473,15 @@ void av1_highbd_dist_wtd_convolve_2d_sve2(
     int h, const InterpFilterParams *filter_params_x,
     const InterpFilterParams *filter_params_y, const int subpel_x_qn,
     const int subpel_y_qn, ConvolveParams *conv_params, int bd) {
+  const int x_filter_taps = get_filter_tap(filter_params_x, subpel_x_qn);
+  const int y_filter_taps = get_filter_tap(filter_params_y, subpel_y_qn);
+  if (w < 4 || h < 4 || (w % 8 != 0 && w != 4) || h % 4 != 0 ||
+      x_filter_taps > 8 || y_filter_taps > 8) {
+    av1_highbd_dist_wtd_convolve_2d_neon(
+        src, src_stride, dst, dst_stride, w, h, filter_params_x,
+        filter_params_y, subpel_x_qn, subpel_y_qn, conv_params, bd);
+    return;
+  }
   DECLARE_ALIGNED(16, uint16_t,
                   im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]);
   DECLARE_ALIGNED(16, uint16_t,
@@ -1467,10 +1489,8 @@ void av1_highbd_dist_wtd_convolve_2d_sve2(
 
   CONV_BUF_TYPE *dst16 = conv_params->dst;
   int dst16_stride = conv_params->dst_stride;
-  const int x_filter_taps = get_filter_tap(filter_params_x, subpel_x_qn);
   const int clamped_x_taps = x_filter_taps < 4 ? 4 : x_filter_taps;
 
-  const int y_filter_taps = get_filter_tap(filter_params_y, subpel_y_qn);
   const int clamped_y_taps = y_filter_taps < 4 ? 4 : y_filter_taps;
 
   if (x_filter_taps == 6 || y_filter_taps == 6) {
diff --git a/av1/common/arm/highbd_convolve_sve2.c b/av1/common/arm/highbd_convolve_sve2.c
index fcf9d7b0a..d55611897 100644
--- a/av1/common/arm/highbd_convolve_sve2.c
+++ b/av1/common/arm/highbd_convolve_sve2.c
@@ -100,7 +100,7 @@ static inline uint16x8_t convolve12_8_x(int16x8_t s0, int16x8_t s1,
   return vminq_u16(res, max);
 }
 
-static inline void highbd_convolve_x_sr_12tap_sve2(
+__attribute__((noinline)) static void highbd_convolve_x_sr_12tap_sve2(
     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
     int width, int height, const int16_t *y_filter_ptr,
     ConvolveParams *conv_params, int bd) {
@@ -384,12 +384,24 @@ void av1_highbd_convolve_x_sr_sve2(const uint16_t *src, int src_stride,
   src -= horiz_offset;
 
   if (x_filter_taps == 12) {
+    if ((w % 8 != 0 && w != 4) || h % 4 != 0) {
+      av1_highbd_convolve_x_sr_neon(src, src_stride, dst, dst_stride, w, h,
+                                    filter_params_x, subpel_x_qn, conv_params,
+                                    bd);
+      return;
+    }
     highbd_convolve_x_sr_12tap_sve2(src, src_stride, dst, dst_stride, w, h,
                                     x_filter_ptr, conv_params, bd);
     return;
   }
 
   if (x_filter_taps == 8) {
+    if ((w % 8 != 0 && w != 4) || h % 4 != 0) {
+      av1_highbd_convolve_x_sr_neon(src, src_stride, dst, dst_stride, w, h,
+                                    filter_params_x, subpel_x_qn, conv_params,
+                                    bd);
+      return;
+    }
     highbd_convolve_x_sr_8tap_sve2(src, src_stride, dst, dst_stride, w, h,
                                    x_filter_ptr, conv_params, bd);
     return;
@@ -421,7 +433,7 @@ static inline uint16x4_t highbd_convolve12_4_y(int16x8_t s0[2], int16x8_t s1[2],
   return vmin_u16(res, max);
 }
 
-static inline void highbd_convolve_y_sr_12tap_sve2(
+__attribute__((noinline)) static void highbd_convolve_y_sr_12tap_sve2(
     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
     int width, int height, const int16_t *y_filter_ptr, int bd) {
   const int16x8_t y_filter_0_7 = vld1q_s16(y_filter_ptr);
@@ -848,20 +860,37 @@ void av1_highbd_convolve_y_sr_sve2(const uint16_t *src, int src_stride,
 
   src -= vert_offset * src_stride;
 
-  if (y_filter_taps > 8) {
+  if (w % 4 != 0 || h % 4 != 0) {
+    av1_highbd_convolve_y_sr_neon(src, src_stride, dst, dst_stride, w, h,
+                                  filter_params_y, subpel_y_qn, bd);
+    return;
+  }
+
+  if (y_filter_taps == 12) {
     highbd_convolve_y_sr_12tap_sve2(src, src_stride, dst, dst_stride, w, h,
                                     y_filter_ptr, bd);
     return;
   }
 
+  if (y_filter_taps == 8) {
+    if (w != 4 && w % 8 != 0) {
+      av1_highbd_convolve_y_sr_neon(src, src_stride, dst, dst_stride, w, h,
+                                    filter_params_y, subpel_y_qn, bd);
+      return;
+    }
+    highbd_convolve_y_sr_8tap_sve2(src, src_stride, dst, dst_stride, w, h,
+                                   y_filter_ptr, bd);
+    return;
+  }
+
   if (y_filter_taps == 4) {
     highbd_convolve_y_sr_4tap_sve2(src + 2 * src_stride, src_stride, dst,
                                    dst_stride, w, h, y_filter_ptr, bd);
     return;
   }
 
-  highbd_convolve_y_sr_8tap_sve2(src, src_stride, dst, dst_stride, w, h,
-                                 y_filter_ptr, bd);
+  av1_highbd_convolve_y_sr_neon(src, src_stride, dst, dst_stride, w, h,
+                                filter_params_y, subpel_y_qn, bd);
 }
 
 static inline uint16x4_t convolve12_4_2d_h(
@@ -934,7 +963,7 @@ static inline uint16x8_t convolve12_8_2d_h(int16x8_t s0, int16x8_t s1,
   return vcombine_u16(vqmovun_s32(sum0123), vqmovun_s32(sum4567));
 }
 
-static inline void highbd_convolve_2d_sr_horiz_12tap_sve2(
+__attribute__((noinline)) static void highbd_convolve_2d_sr_horiz_12tap_sve2(
     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
     int width, int height, const int16_t *y_filter_ptr,
     ConvolveParams *conv_params, const int x_offset) {
@@ -1204,7 +1233,7 @@ static inline uint16x4_t highbd_convolve12_4_2d_v(
   return vmin_u16(res, max);
 }
 
-static inline void highbd_convolve_2d_sr_vert_12tap_sve2(
+__attribute__((noinline)) static void highbd_convolve_2d_sr_vert_12tap_sve2(
     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
     int width, int height, const int16_t *y_filter_ptr,
     ConvolveParams *conv_params, int bd, const int y_offset) {
@@ -1645,10 +1674,10 @@ void av1_highbd_convolve_2d_sr_sve2(const uint16_t *src, int src_stride,
                                     const int subpel_x_qn,
                                     const int subpel_y_qn,
                                     ConvolveParams *conv_params, int bd) {
-  if (w == 2 || h == 2) {
-    av1_highbd_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
-                                filter_params_x, filter_params_y, subpel_x_qn,
-                                subpel_y_qn, conv_params, bd);
+  if (w == 2 || h == 2 || w % 4 != 0 || h % 4 != 0) {
+    av1_highbd_convolve_2d_sr_neon(src, src_stride, dst, dst_stride, w, h,
+                                   filter_params_x, filter_params_y,
+                                   subpel_x_qn, subpel_y_qn, conv_params, bd);
     return;
   }
 
