- // Caring for the overall coefficients in separate loops can give
- // a performance gain of up to 20%!
- const add & add1 = ex_to<add>(last_expanded);
- const add & add2 = ex_to<add>(cit->rest);
+ // Caring for the overall coefficients in separate loops can
+ // sometimes give a performance gain of up to 15%!
+
+ const int sizedifference = ex_to<add>(last_expanded).seq.size()-ex_to<add>(cit->rest).seq.size();
+ // add2 is for the inner loop and should be the bigger of the two sums
+ // in the presence of asymptotically good sorting:
+ const add& add1 = (sizedifference<0 ? ex_to<add>(last_expanded) : ex_to<add>(cit->rest));
+ const add& add2 = (sizedifference<0 ? ex_to<add>(cit->rest) : ex_to<add>(last_expanded));