Code: Select all
u32 get4bits(u8 *a, u8 thres)
{
u32 x;
// get four values
/*1*/ asm("ldw %0,0[%1]" : "=r"(x) : "r"(a));
// put guard bits in
/*2*/ x |= 0x80808080;
u32 thres4 = thres+1;
thres |= thres << 8;
thres <<= thres << 16;
// subtract; the guard bits will stay 1 iff the value was >thres
/*3*/ x -= thres4;
// now move the bits into place
/*4*/ x &= 0x80808080;
/*5*/ x |= x >> 7;
/*6*/ x |= x >> 14;
/*7*/ return x & 15;
}
If you can change the input order, you can shave of some more cycles, indeed.