The compiler automatically stripmines your loop and generates a cleanup loop. This means you do not need to unroll your loops, and, in most cases, this will also enable more vectorization.
i = 0;
while (i < n)
{
/* original loop code */
a[i] = b[i] + c[i];
++i;
}
/* the vectorizer generates the following two loops */
i = 0;
while (i < (n - n%4) )
{
/* vector strip-mined loop
*/
/* subscript [i:i+3] denotes SIMD execution */
a[i:i+3] = b[i:i+3] + c[i:i+3];
i = i + 4;
}
while (i < n)
{
/* scalar clean-up loop */
a[i] = b[i] + c[i];
}