inline __m128i interpolate(__m128i a, __m128i b, int weight) {
__m128i w = _mm_set1_epi16(weight);
__m128i wInv = _mm_sub_epi16(_mm_set1_epi16(256), w);
__m128i weightedA = _mm_mullo_epi16(a, wInv);
__m128i weightedB = _mm_mullo_epi16(b, w);
__m128i sum = _mm_add_epi16(weightedA, weightedB);
return _mm_srli_epi16(sum, 8); // divide by 256
}
void resize(unsigned char* sc, int w1, int h1,
unsigned char* dest, int w2, int h2,
int yoff, int yoffend, int xoff, int xoffend) {
const float dx = (float)w1 / (float)w2;
const float dy = (float)h1 / (float)h2;
for (int y = yoff; y < yoffend; ++y) {
// Compute source row indices and interpolation weights
int y1 = (int)(dy * (float)y);
int y2 = (int)(dy * (float)(y + 1)) - 1;
int w1y = y1 * w1;
int w2y = (y - yoff) * w2;
int w2yend = (y + 1 - yoff) * w2;
int weight1 = (int)((dy * (float)y - (float)y1) * 256.f);
int weight2 = 256 - weight1;
for (int x = xoff; x < xoffend; x += 4) {
// Compute source column indices and interpolation weights
int x1 = (int)(dx * (float)x);
int x2 = (int)(dx * (float)(x + 4)) - 1;
__m128i w1x = _mm_set_epi16(
sc[(y1 * w1) + x2], sc[(y1 * w1) + x1], sc[(y1 * w1) + x2 - 1], sc[(y1 * w1) + x1 - 1]…