// This file is distributed under a BSD license. See LICENSE.txt for details. // FRIED // decoding functions. #include "_types.hpp" #include "fried.hpp" #include "fried_internal.hpp" #include namespace FRIED { static void writeBitmapRow(DecodeContext &ctx,sInt row,sS16 *srp) { if(row < 0 || row >= ctx.FH.YRes) return; sInt cols = ctx.FH.XRes; sInt colsPad = ctx.XResPadded; sU8 *dst = ctx.Image; if(ctx.ChannelSetup < 2) // grayscale { dst += row * 2 * cols; if(ctx.ChannelSetup == 0) gray_x_convert_inv(cols,colsPad,srp,dst); else gray_alpha_convert_inv(cols,colsPad,srp,dst); } else { dst += row * 4 * cols; if(ctx.ChannelSetup == 2) color_x_convert_inv(cols,colsPad,srp,dst); else if(ctx.ChannelSetup == 3) color_alpha_convert_inv(cols,colsPad,srp,dst); } // TODO: write the corresponding row using the correct innerloop } #define MM_LOAD4(reg0,reg1,reg2,reg3,offset) \ __asm mov eax, [c0] \ __asm mov ebx, [c1] \ __asm mov ecx, [c2] \ __asm mov edx, [c3] \ __asm movq reg0, [eax+offset] \ __asm movq reg1, [ebx+offset] \ __asm movq reg2, [ecx+offset] \ __asm movq reg3, [edx+offset] #define MM_STORE4(reg0,reg1,reg2,reg3,rc0,rc1,rc2,rc3) \ __asm mov eax, [esi+((rc0>>4)*4)] \ __asm mov ebx, [esi+((rc1>>4)*4)] \ __asm mov ecx, [esi+((rc2>>4)*4)] \ __asm mov edx, [esi+((rc3>>4)*4)] \ __asm movq [eax+edi+((rc0&0xf)*2)], reg0 \ __asm movq [ebx+edi+((rc1&0xf)*2)], reg1 \ __asm movq [ecx+edi+((rc2&0xf)*2)], reg2 \ __asm movq [edx+edi+((rc3&0xf)*2)], reg3 #define MM_TRANSPOSE(reg0,reg1,reg2,reg3,reg4,reg5) \ __asm movq reg4, reg0 \ __asm movq reg5, reg2 \ __asm punpcklwd reg0, reg1 \ __asm punpckhwd reg4, reg1 \ __asm punpcklwd reg2, reg3 \ __asm punpckhwd reg5, reg3 \ __asm movq reg1, reg0 \ __asm movq reg3, reg4 \ __asm punpckldq reg0, reg2 \ __asm punpckhdq reg1, reg2 \ __asm punpckldq reg4, reg5 \ __asm punpckhdq reg3, reg5 static __forceinline void shuffle4x16(sS16 **dest,sInt xOffs,sS16 *c0,sS16 *c1,sS16 *c2,sS16 *c3) { __asm { mov esi, [dest]; mov edi, [xOffs]; add edi, edi; } MM_LOAD4(mm0,mm1,mm2,mm3,0); MM_TRANSPOSE(mm0,mm1,mm2,mm3,mm4,mm5); MM_LOAD4(mm2,mm5,mm6,mm7,8); MM_STORE4(mm0,mm1,mm4,mm3,0x00,0x04,0x44,0x40); MM_TRANSPOSE(mm2,mm5,mm6,mm7,mm0,mm1); MM_LOAD4(mm1,mm3,mm4,mm6,16); MM_STORE4(mm2,mm5,mm0,mm7,0x80,0xc0,0xc4,0x84); MM_TRANSPOSE(mm1,mm3,mm4,mm6,mm2,mm5); MM_LOAD4(mm0,mm4,mm5,mm7,24); MM_STORE4(mm1,mm3,mm2,mm6,0x88,0xc8,0xcc,0x8c); MM_TRANSPOSE(mm0,mm4,mm5,mm7,mm1,mm3); MM_STORE4(mm0,mm4,mm1,mm7,0x4c,0x48,0x08,0x0c); __asm emms; } static void inv_reorder(sS16 **dest,sInt xOffs,sS16 *src,sInt cwidth) { sInt nmb = cwidth/16; sS16 *g0,*g1,*g2,*g3; sInt mb; // first row of block AC coeffs+DC g0 = src; g1 = src + 2 * cwidth; g2 = src + 3 * cwidth; g3 = src + 9 * cwidth; for(mb=0;mb bytesEnd) return -1; const sU8 *bytesChunkEnd = bytes + (bytes[0] + (bytes[1] << 8)); bytes += 2; if(bytesChunkEnd > bytesEnd) return -1; // process channels for(sInt ch=0;ch= bytesEnd) return -1; if(*bytes & 1) // long code { if(bytes + 1 >= bytesEnd) return -1; encsize = ((bytes[0] + (bytes[1] << 8)) & ~1) * 4; bytes += 2; } else // short code encsize = *bytes++ * 4; // decode coefficients g0 = ctx.CK + co; sSetMem(g0,0,cksize * sizeof(sS16)); if(encsize) { sInt xminit,nbs; xminit = 625 >> (qs >> 3); nbs = rlgrdec(bytes,bytesEnd - bytes,g0,sMin(encsize,cwidth),xminit); if(nbs < 0) return -1; else bytes += nbs; if(encsize > cwidth) { xminit = 94 >> (qs >> 3); nbs = rlgrdec(bytes,bytesEnd - bytes,g0+cwidth,encsize-cwidth,xminit); if(nbs < 0) return -1; else bytes += nbs; } } // un-delta dc coefficients sInt nmb = sMin(encsize,cwidth/16); sInt n = 0; while(++n < nmb) g0[n] += g0[n-1]; // dequantize, undo reordering newDequantize(qs,g0,encsize,cwidth); inv_reorder(srp+16,so + cjs[ch],g0,cwidth); // this channel is done cjs[ch] += cwidth; } if(bytes != bytesChunkEnd) return -1; } return bytes - byteStart; } static void ihlbt_group1(sInt swidth,sInt so,sS16 **srp) { sS16 *p0,*p1,*p2,*p3; sInt col; // first row of macroblocks p0 = srp[16] + so; p1 = srp[20] + so; p2 = srp[24] + so; p3 = srp[28] + so; for(col=0;col 16) return sFALSE; sCopyMem(ctx.Chans,data,ctx.FH.Channels * sizeof(ChannelHeader)); data += ctx.FH.Channels * sizeof(ChannelHeader); // calculate some important constants sInt chans = ctx.FH.Channels; ctx.XResPadded = (ctx.FH.XRes + 31) & ~31; ctx.YResPadded = (ctx.FH.YRes + 31) & ~31; sInt sbw = chans * ctx.XResPadded; sInt cbw = chans * ctx.FH.ChunkWidth; ctx.SB = new sS16[sbw * 32]; ctx.QB = new sInt[cbw * 16]; ctx.CK = new sS16[cbw * 16]; // determine channel setup (rather faked at the moment) if(chans <= 1 || ctx.Chans[0].Type != CHANNEL_Y) return sFALSE; if(chans == 1) ctx.ChannelSetup = 0; // gray w/out alpha else if(chans == 2 && ctx.Chans[1].Type == CHANNEL_ALPHA) ctx.ChannelSetup = 1; // gray w/ alpha else if(chans >= 3 && ctx.Chans[1].Type == CHANNEL_CO && ctx.Chans[2].Type == CHANNEL_CG) { if(chans == 3) ctx.ChannelSetup = 2; // color w/out alpha else if(chans == 4 && ctx.Chans[3].Type == CHANNEL_ALPHA) ctx.ChannelSetup = 3; // color w/ alpha else return sFALSE; } else return sFALSE; // allocate image ctx.Image = new sU8[ctx.FH.XRes * ctx.FH.YRes * (ctx.ChannelSetup >= 2 ? 4 : 2)]; // decode if(PerformDecode(ctx,data,dataEnd - data) >= 0) { xout = ctx.FH.XRes; yout = ctx.FH.YRes; dataout = ctx.Image; } else { xout = 0; yout = 0; dataout = 0; delete[] ctx.Image; } // free everything delete[] ctx.SB; delete[] ctx.QB; delete[] ctx.CK; return dataout != 0; }