#include "image.h"
#include "cpuinfo.h"
#include "mmx.h"
#include "fourcc.h"

#ifdef WIN32
#include <io.h>
#else
#include <unistd.h>
#endif
#include <fcntl.h>

#include <cstring>
#include <cstdlib>
#include <cstdio>
#include <cmath>

using namespace std;

lookuptable col::t = lookuptable();

// non-conversion constructors
CImage::CImage(const CImage* im)
    :m_pInfo(new BitmapInfo(im->GetFmt()))
{
    fillMembers();
    m_fQuality = im->GetQuality();
    m_pcData[0] = new uint8_t[m_iBytes[0]];
    if (im->Data())
	memcpy(m_pcData[0], im->Data(), m_iBytes[0]);
    //else
    //    cerr << "FIXME ERROR: CImage::CImage failed: im->Data() is NULL!" << endl;
}

/**
 * Creates new image in format 'header' from specified memory area.
 * Either allocates its own memory area & copies src data into it, or reuses
 * parent data.
 */
CImage::CImage(const BitmapInfo* header, const uint8_t* data, bool copy)
    :m_pInfo(new BitmapInfo(header))
{
    fillMembers();
    if (!copy)
    {
	(const uint8_t*) m_pcData[0] = data;
	m_bDataOwner = false;
    }
    else
    {

	m_pcData[0] = new uint8_t[m_iBytes[0]];
	if (data)
	    memcpy(m_pcData[0], data, m_iBytes[0]);
    }
}

CImage::CImage(const BitmapInfo* header, const uint8_t* data[3], bool copy)
    :m_pInfo(new BitmapInfo(header))
{
    fillMembers();
    switch (m_pInfo->biCompression)
    {
    case fccYV12:
	m_iBytes[0] = Pixels();
	m_iBytes[1] = m_iBytes[2] = Pixels() / 4;
	if (!copy)
	{
	    (const uint8_t*) m_pcData[0] = data[0];
	    (const uint8_t*) m_pcData[1] = data[1];
	    (const uint8_t*) m_pcData[2] = data[2];
	    m_bDataOwner = false;
	}
	else
	{
	    m_pcData[0] = new uint8_t[m_iBytes[0]];
	    if (data[0])
		memcpy(m_pcData[0], data, m_iBytes[0]);
	    if (data[1] || data[2])
		printf("Unsuported CImage constructor for now\n");
	}
	break;
    default:
	if (!copy)
	{
	    (const uint8_t*) m_pcData[0] = data[0];
	    m_bDataOwner = false;
	}
	else
	{
	    m_pcData[0] = new uint8_t[m_iBytes[0]];
	    if (data[0])
		memcpy(m_pcData[0], data, m_iBytes[0]);
	}
    }
}

/* Creates 24-bit RGB image from 24-bit RGB 'data' */
CImage::CImage(const uint8_t* data, int width, int height)
    :m_pInfo(new BitmapInfo(width, height, 24))
{
    fillMembers();
    m_pcData[0] = new uint8_t[m_iBytes[0]];
    if (data)
	memcpy(m_pcData[0], data, m_iBytes[0]);
}

// Conversion constructors
CImage::CImage(const CImage* im, int depth)
    :m_pInfo(new BitmapInfo(im->GetFmt()))
{
    m_pInfo->SetBits(depth);
    fillMembers();
    m_fQuality = im->GetQuality();
    m_pcData[0] = new uint8_t[m_iBytes[0]];
    Convert(im->Data(), im->GetFmt());
}

CImage::CImage(const CImage* im, const BitmapInfo* header)
    :m_pInfo(new BitmapInfo(header))
{
    fillMembers();
    m_fQuality = im->GetQuality();
    m_pcData[0] = new uint8_t[m_iBytes[0]];
    Convert(im->Data(), im->GetFmt());
}

CImage::~CImage()
{
    //cout << "OWNER " << (void*) this << "   " << m_bDataOwner << endl;
    if (m_iRefcount > 1)
    {
	printf("Unexpected delete of refenced image ! (%d)\n", m_iRefcount);
    }
    if (m_bDataOwner)
    {
	delete m_pcData[0];
	delete m_pcData[1];
	delete m_pcData[2];
    }
    delete m_pInfo;
    //cout << " done " << endl;
}

void CImage::fillMembers()
{
    m_iRefcount = 1;
    m_pUserData = 0;
    m_fQuality = 0.0f;
    m_bDataOwner = true;

    //cout << "BITCOUNT " << m_pInfo->biBitCount << endl;
    m_iDepth = m_pInfo->biBitCount;
    m_iBpp = (m_iDepth + 7) / 8;
    if (m_iDepth == 15)
	m_iBpl = 2 * m_pInfo->biWidth;
    else
	m_iBpl = (m_iDepth * m_pInfo->biWidth) / 8;
    m_iBytes[0] = labs(m_iBpl * m_pInfo->biHeight);
    m_iPixels = labs(m_pInfo->biWidth * m_pInfo->biHeight);
    m_iWidth = labs(m_pInfo->biWidth);
    m_iHeight = labs(m_pInfo->biHeight);
    m_pcData[0] = m_pcData[1] = m_pcData[2] = 0;
    m_iBytes[1] = m_iBytes[2] = 0;
}

void CImage::Release() const
{
    m_iRefcount--;
    //cout << "RELEASED IMG " << (void*) this << "   " << m_iRefcount << endl;
    if (!m_iRefcount)
	delete this;
}

void CImage::ToYUV()
{
    if (m_pInfo->biCompression == fccYUV)
	return;

    struct yuv* src;
    src = (struct yuv*)Data()  + m_iPixels - 1;
    if (m_iDepth != 24)
    {
	printf("Cannot convert non-24 bit image to YUV\n");
	return;
    }
    while ((uint8_t*)src > Data() + 3)
    {
	*src = *(col*) src;
	src--;
	*src = *(col*) src;
	src--;
	*src = *(col*) src;
	src--;
	*src = *(col*) src;
	src--;
    }
    m_pInfo->biCompression = fccYUV;
}

void CImage::ToRGB()
{
    if (m_pInfo->biCompression != fccYUV)
	return;
    if (m_iDepth != 24)
	return;
    col* src= (col*)Data() + m_iPixels - 1;
    while ((uint8_t*)src > Data() + 3)
    {
	*src= *(struct yuv*) src;
	src--;
	*src= *(struct yuv*) src;
	src--;
	*src= *(struct yuv*) src;
	src--;
	*src= *(struct yuv*) src;
	src--;
    }
    m_pInfo->SetBits(24);
}

bool CImage::Direction() const
{
    return (m_pInfo) ? !(m_pInfo->biHeight < 0) : true;
}

void CImage::Blur(int range, int from)
{
#ifndef WIN32
    if(range<=0)return;
    if(from<0)return;
    if(range<=from)return;

#ifdef i386
    const int limit = m_iPixels * 3;
    unsigned lim=1<<(range+1);
    unsigned ach=3*(1<<(from+1));
    uint8_t* p = Data();
    char g_pstore[200];
    __asm__ __volatile__
	("fsave (%0)\n\t"
	 "emms\n\t"
	 :
	 :"r"(&g_pstore));

    while (ach < 3 * lim)
    {
	//	cerr<<"ach "<<ach<<" "<<lim<<endl;
	__asm__ __volatile__ (
			      //p %0
			      //ach %1
			      //limit %2
			      "pushl %%eax\n\t"
			      "pushl %%ebx\n\t"
			      "pushl %%ecx\n\t"
			      "pushl %%edx\n\t"
			      "pushl %%edi\n\t"

			      "movl %0, %%ebx\n\t"

			      "movl %1, %%ecx\n\t"

			      "movl %0, %%edx\n\t"
			      "addl %2, %%edx\n\t"
			      "subl %1, %%edx\n\t"

			      "movl %%edx, %%edi\n\t"

			      "pxor %%mm2, %%mm2\n\t"

			      "xorl %%eax, %%eax\n\t"
			      "xorl %%edx, %%edx\n\t"
			      //			align 8
			      "m0:\n\t"
			      "movd (%%ebx), %%mm0\n\t"
			      //			"movd %%ebx(%%ecx), %%mm1 \n\t"
			      "movd (%%ecx,%%ebx), %%mm1 \n\t"
			      "punpcklbw %%mm2, %%mm1\n\t"
			      "punpcklbw %%mm2, %%mm0\n\t"
			      "paddusw %%mm1, %%mm0 \n\t"
			      "psrlw $1, %%mm0\n\t"
			      "packuswb %%mm2, %%mm0\n\t"
			      "movd  %%mm0, (%%ebx)\n\t"

			      "addl $4, %%ebx\n\t"
			      "cmpl %%edi, %%ebx\n\t"
			      "jb m0\n\t"

			      "popl %%edi\n\t"
			      "popl %%edx\n\t"
			      "popl %%ecx\n\t"
			      "popl %%ebx\n\t"
			      "popl %%eax\n\t"
			      :
			      :"m"(p), "m"(ach), "m"(limit)
			     );

	ach*=m_iWidth;

	__asm__ __volatile__ (
			      //p %0
			      //ach %1
			      //limit %2
			      "	pushl %%eax\n\t"
			      "pushl %%ebx\n\t"
			      "pushl %%ecx\n\t"
			      "pushl %%edx\n\t"
			      "pushl %%edi\n\t"

			      "movl %0, %%ebx\n\t"

			      "movl %0, %%ebx\n\t"

			      "movl %1, %%ecx\n\t"

			      "movl %0, %%edx\n\t"
			      "addl %2, %%edx\n\t"
			      "subl %1, %%edx\n\t"

			      "movl %%edx, %%edi\n\t"

			      "pxor %%mm2, %%mm2\n\t"

			      "xorl %%eax, %%eax\n\t"
			      "xorl %%edx, %%edx\n\t"
			      //			align 8
			      "m1:\n\t"
			      "movd (%%ebx), %%mm0\n\t"
			      "movd (%%ebx,%%ecx), %%mm1 \n\t"
			      "punpcklbw %%mm2, %%mm1\n\t"
			      "punpcklbw %%mm2, %%mm0\n\t"
			      "paddusw %%mm1, %%mm0 \n\t"
			      "psrlw $1, %%mm0\n\t"
			      "packuswb %%mm2, %%mm0\n\t"
			      "movd  %%mm0, (%%ebx)\n\t"

			      "addl $4, %%ebx\n\t"
			      "cmpl %%edi, %%ebx\n\t"
			      "jb m1\n\t"

			      "popl %%edi\n\t"
			      "popl %%edx\n\t"
			      "popl %%ecx\n\t"
			      "popl %%ebx\n\t"
			      "popl %%eax\n\t"
			      :
			      :"m"(p), "m"(ach), "m"(limit)
			     );
	ach/=m_iWidth;
	ach*=2;
    }
    __asm__ __volatile__ ("frstor (%0)\n\t": :"r"(&g_pstore));
#else
#warning BLUR not implemented in C code
#endif

#endif
}

BitmapInfo* CImage::GetFmt(){return m_pInfo;}
const BitmapInfo* CImage::GetFmt() const{return m_pInfo;}
bool CImage::Supported(int csp, int bitcount)
{
    //printf("SUPPORTED 0x%x (%.4s)  %d\n", csp, (char*)&csp, bitcount);
    if (csp == 0)
    {
	switch(bitcount)
	{
	case 15:
	case 16:
	case 24:
	case 32:
	    return true;
	default:
	    return false;
	}
    }
    if (csp == 3)
    {
	switch(bitcount)
	{
	case 16:
	    return true;
	default:
	    return false;
	}
    }
    switch (csp)
    {
    case fccYUV:
    case fccYUY2:
    case fccYV12:
    case fccI420:
    case fccUYVY:
	//case fccYVYU:
	//case fccIYUV:
	return true;
    }
    return false;
}

bool CImage::Supported(const BITMAPINFOHEADER& head)
{
    return Supported(head.biCompression, head.biBitCount);
}

int CImage::UnknownColorSpace(int csp)
{
    switch(csp)
    {
    case 0:
    case 3:
    case fccYUV:
    case fccYUY2:
    case fccYV12:
    case fccI420:
    case fccUYVY:
	//case fccIYUV:
	//case fccYVYU:
	return 0;
    }
    return 1;
}

void CImage::ByteSwap()
{
    if (m_pInfo->biCompression != 0 || m_pInfo->biBitCount != 24)
	return;

    uint8_t* t = Data();
    uint8_t* e = Data() + m_iPixels * 3 - 11;
    while (t < e)
    {
	uint8_t tmp = t[0];
	t[0] = t[2];
	t[2] = tmp;

	tmp = t[3];
	t[3] = t[5];
	t[5] = tmp;

	tmp = t[6];
	t[6] = t[8];
	t[8] = tmp;

	tmp = t[9];
	t[9] = t[11];
	t[11] = tmp;

	t += 12;
    }
}

void CImage::Dump(const char* filename)
{
    int fd;
#ifndef WIN32
    fd=open(filename, O_WRONLY|O_CREAT|O_TRUNC, 00666);
#else
    fd=open(filename, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 00666);
#endif
    if(fd<0)
    {
	printf("Warning: could not open %s for writing", filename);
	return;
    }

    //    int w=im->width();
    //    int h=im->height();
    BitmapInfo bi(m_iWidth, m_iHeight, 24);
    CImage* im=0;
    if (!IsFmt(&bi))
	im = new CImage(this, &bi);
    const uint8_t* ptr = (im) ? im->Data() : Data();
    int bs = m_iPixels * 3;
    uint16_t bfh[7];
    bfh[0] = 'B' + 256 * 'M';
    *(int*)&bfh[1]=bs+0x36;
    *(int*)&bfh[3]=0;
    *(int*)&bfh[5]=0x36;
    write(fd, bfh, 14);
    write(fd, &bi, 40);
    write(fd, ptr, bs);
    close(fd);
    if (im)
	im->Release();
}

void CImage::Convert(const CImage* from_img)
{
    Convert(from_img->Data(), from_img->GetFmt());
}

void CImage::Convert(const uint8_t* from_data, const BitmapInfo* from_fmt)
{
#if 0
    printf("Convert\n");
    from_fmt->Print();
    printf("To\n");
    m_pInfo->Print();
#endif
    bool flip_dir = ((from_fmt->biHeight * m_pInfo->biHeight) < 0);

    if ((from_fmt->biCompression == m_pInfo->biCompression)
	&& (from_fmt->Bpp() == m_pInfo->Bpp()))
    {
	//cout << "CONVERT " << flip_dir << endl;
	if (flip_dir)
	{
	    uint8_t* p = At(m_iHeight-1);
	    while (p >= Data())
	    {
		memcpy(p, from_data, m_iBpl);
		p -= m_iBpl;
		from_data += m_iBpl;
	    }
	}
	else if (Data() != from_data)
	{
#if 1
	    memcpy(Data(), from_data, Bytes());
	    // conversion from plain YV12 to planar YV12
	    //cout << "Data2 " << (void*)Data(2)<<"  "<<Bytes(2) << endl;
            //cout << "Data1 " << (void*)Data(1)<<"  "<<Bytes(1) << endl;
	    if (Data(2))
		memcpy(Data(2), from_data + Pixels(), Bytes(2));
	    if (Data(1))
		memcpy(Data(1), from_data + Pixels() * 5 / 4, Bytes(1));
#else
#if 1
	    uint8_t* end = Data() + Bytes();
	    from_data += Bytes();
            int size = 32768;
	    while (end >= (Data() + size))
	    {
		end -= size;
                from_data -= size;
		memcpy(end, from_data, size);
	    }
	    size = end - Data();
            memcpy(Data(), from_data - size, size);
#endif
#endif
	}

	return;
    }

    if ((from_fmt->Bpp()==24) && (from_fmt->biCompression==0))
    {
	anyFromRgb24(Data(), from_data, m_pInfo, flip_dir);
	return;
    }
    if ((m_pInfo->Bpp()==24) && (m_pInfo->biCompression==0))
    {
	anyToRgb24(Data(), from_data, from_fmt, flip_dir);
	return;
    }
    //shortcuts here
    if ((m_pInfo->Bpp() == 16) && (m_pInfo->IsRGB())
	&& (from_fmt->Bpp() == 15) && (from_fmt->IsRGB()))
    {
	rgb15ToRgb16(Data(), from_data, m_iWidth, abs(m_iHeight), flip_dir);
	return;
    }

    if ((m_pInfo->biCompression == fccYUY2)
	&& (from_fmt->biCompression == fccYV12))
    {
	yv12ToYuy2(Data(), from_data, m_iWidth, abs(m_iHeight), flip_dir);
	return;
    }

    if ((m_pInfo->biCompression == fccYV12)
	&& (from_fmt->biCompression == fccYUY2))
    {
	yuy2ToYv12(Data(), from_data, m_iWidth, abs(m_iHeight), flip_dir);
	return;
    }

    uint8_t* tmp = new uint8_t[m_iPixels * 3];
    anyToRgb24(tmp, from_data, from_fmt, flip_dir);
    anyFromRgb24(Data(), tmp, m_pInfo, false);
    delete[] tmp;
}

void CImage::anyFromRgb24(uint8_t* to, const uint8_t* from, const BitmapInfo* from_fmt, bool flip_dir)
{
    void (*callfn)(STDCONV_PARAMS);
    if (from_fmt->IsRGB())
    {
	switch(from_fmt->Bpp())
	{
	case 15:
	    callfn = rgb24ToRgb15;
	    break;
	case 16:
	    callfn = rgb24ToRgb16;
	    break;
	case 32:
	    callfn = rgb24ToRgb32;
	    break;
	default:
	    callfn = 0;
	}
    }
    else
    {
	switch(from_fmt->biCompression)
	{
	case fccYUV:
	    callfn = rgb24ToYuv;
	    break;
	case fccYUY2:
	    callfn = rgb24ToYuy2;
	    break;
	case fccYV12:
	    callfn = rgb24ToYv12;
	    break;
	default:
	    callfn = 0;
	}
    }
    if (callfn)
	callfn(to, from, from_fmt->biWidth, labs(from_fmt->biHeight), flip_dir);
}

void CImage::anyToRgb24(uint8_t* to, const uint8_t* from, const BitmapInfo* to_fmt, bool flip_dir)
{
    void (*callfn)(uint8_t* to, const uint8_t* from, int width, int height, bool flip_dir);
    if (to_fmt->IsRGB())
    {
	switch(to_fmt->Bpp())
	{
	case 15:
	    callfn = rgb15ToRgb24;
	    break;
	case 16:
	    callfn = rgb16ToRgb24;
	    break;
	case 32:
	    callfn = rgb32ToRgb24;
	    break;
	default:
	    callfn = 0;
	}
    }
    else
    {
	switch(to_fmt->biCompression)
	{
	case fccYUV:
	    callfn = yuvToRgb24;
	    break;
	case fccYUY2:
	    callfn = yuy2ToRgb24;
	    break;
	case fccYV12:
	    callfn = yv12ToRgb24;
	    break;
	default:
	    callfn = 0;
	}
    }
    if (callfn)
	callfn(to, from, to_fmt->biWidth, labs(to_fmt->biHeight), flip_dir);
}

void CImage::rgb24ToRgb15(STDCONV_PARAMS)
{
    if (!flip_dir)
    {
	const uint8_t* src = from;
	const void* end = &from[width * height * 3];
	uint16_t* dest = (uint16_t*) to;

	while (src < end)
	{
            // looks ugly but gives very fast compiled code
	    *dest++ = ((((((src[2]<<5)&0xff00)|src[1])<<5)&0xfff00)|src[0])>>3;
	    src += 3;
	}
    }
    else
    {
	const uint8_t* src = &from[width * (height - 1) * 3];
	uint16_t* dest = (uint16_t*) to;

	while (src >= from)
	{
            const void* end = &src[width * 3];
	    while (src < end)
	    {
		*dest++ = ((((((src[2]<<5)&0xff00)|src[1])<<5)&0xfff00)|src[0])>>3;
		src += 3;
	    }
            src -= 2 * 3 * width;
	}
    }
}

//uint8_t* to, const uint8_t* from, int width, int height, bool flip_dir
void CImage::rgb24ToRgb16(STDCONV_PARAMS)
{
    if (!flip_dir)
    {
	const uint8_t* src = from;
	const void* end = &from[width * height * 3];
	uint16_t* dest = (uint16_t*) to;

	while (src < end)
	{
	    *dest++ = ((((((src[2]<<5)&0xff00)|src[1])<<6)&0xfff00)|src[0])>>3;
	    src += 3;
	}
    }
    else
    {
	const uint8_t* src = &from[width * (height - 1) * 3];
	uint16_t* dest = (uint16_t*) to;

	while (src >= from)
	{
	    const void* end = &src[width * 3];
	    while (src < end)
	    {
		*dest++ = ((((((src[2]<<5)&0xff00)|src[1])<<6)&0xfff00)|src[0])>>3;
		src += 3;
	    }
	    src -= 2 * 3 * width;
	}
    }
}

void CImage::rgb24ToRgb32(STDCONV_PARAMS)
{
    int bpl=4*width;
    if(!flip_dir)
	for(int i=0; i<height; i++)
	    for(int j=0; j<width; j++)
	    {
		memcpy(to+4*j+i*bpl, from+3*j+3*i*width, 3);
		to[4*j+i*bpl+3]=0;
	    }
    else
	for(int i=0; i<height; i++)
	    for(int j=0; j<width; j++)
	    {
		memcpy(to+4*j+i*bpl, from+3*j+3*(height-1-i)*width, 3);
		to[4*j+i*bpl+3]=0;
	    }
}

void CImage::rgb24ToYuv(STDCONV_PARAMS)
{
    const struct col* src;
    struct yuv* dest;
    dest=(struct yuv*)to+width*height-1;
    if(!flip_dir)
	src=(const struct col*)from+width*height-1;
    else
	src=(const struct col*)from+width-1;

    for(int i=height-1; i>=0; i--)
    {
	for(int j=width-1; j>=0; j--)
	{
	    *dest=*src;
	    src--;
	    dest--;
	}
	if(flip_dir)src+=2*width;
    }
}

void CImage::rgb24ToYuy2(STDCONV_PARAMS)
{
    uint8_t* dest;
    const col* src;
    dest=to+2*width*height-1;
    if(!flip_dir)
	src=(const col*)from+width*height-1;
    else
	src=(const col*)from+width-1;

    for(int i=height-1; i>=0; i--)
    {
	for(int j=width/2-1; j>=0; j--)
	{
	    yuv yuvt(*src--);
	    *dest--=yuvt.Cr;
	    *dest--=yuvt.Y;
	    *dest--=yuvt.Cb;
	    *dest--=src->Y();
	    src--;
	}
	if(flip_dir)src+=2*width;
    }
}

void CImage::rgb24ToYv12(STDCONV_PARAMS)
{
    const col* src1, *src2;
    if(flip_dir)
    {
	src1=(const col*)from+width*(height-1);
	src2=(const col*)from+width*(height-2);
    }
    else
    {
	src1=(const col*)from;
	src2=(const col*)from+width;
    }
    //    from+=2*width*height-1;
    uint8_t* dest_y1=to;
    uint8_t* dest_y2=to+width;
    uint8_t* dest_cb=to+width*height;
    uint8_t* dest_cr=to+5*width*height/4;
    //    uint8_t* dest;
    //    const col* src;
    //    dest=to+2*(width*height-1);
    //    if(!flip_dir)
    //	src=(const col*)from+width*height-1;
    //    else
    //	src=(const col*)from+width-1;

    for(int i=height/2-1; i>=0; i--)
    {
	for(int j=width/2-1; j>=0; j--)
	{
	    yuv yuvt(*src1++);
	    *dest_cb++=yuvt.Cb;
	    *dest_cr++=yuvt.Cr;
	    *dest_y1++=yuvt.Y;
	    *dest_y1++=src1->Y();
	    src1++;
	    *dest_y2++=src2->Y();
	    src2++;
	    *dest_y2++=src2->Y();
	    src2++;
	}
	dest_y1+=width;
	dest_y2+=width;
	if(flip_dir)
	{
	    src1-=3*width;
	    src2-=3*width;
	} else
	{
	    src1+=width;
	    src2+=width;
	}
    }
}

void CImage::rgb15ToRgb24(STDCONV_PARAMS)
{
    col* dest;
    const uint16_t* src;
    dest=(col*)to+width*height-1;
    if(!flip_dir)
	src=(const uint16_t*)from+width*height-1;
    else
	src=(const uint16_t*)from+width-1;
    for(int i=height-1; i>=0; i--)
    {
	for(int j=width-1; j>=0; j--)
	{
	    uint16_t sh=*src;
	    dest->r=(sh & 0x7C00) >> 7;
	    dest->g=(sh & 0x3E0) >> 2;
	    dest->b=(sh & 0x1F) << 3;
	    dest--;
	    src--;
	}
	if (flip_dir)
	    src+=2*width;
    }
}

void CImage::rgb16ToRgb24(STDCONV_PARAMS)
{
    col* dest;
    const uint16_t* src;
    uint16_t sh;
    dest=(col*)to+width*height-1;
    if(!flip_dir)
	src=(const uint16_t*)from+width*height-1;
    else
	src=(const uint16_t*)from+width-1;
    for(int i=height-1; i>=0; i--)
    {
	for(int j=width-1; j>=0; j--)
	{
	    sh=*src;
	    dest->r=(sh & 0xF800) >> 8;
	    dest->g=(sh & 0x7E0) >> 3;
	    dest->b=(sh & 0x1F) << 3;
	    dest--;
	    src--;
	}
	if (flip_dir)
	    src+=2*width;
    }
}

void CImage::rgb32ToRgb24(STDCONV_PARAMS)
{
    col* dest;
    dest=(col*)to+width*height-1;
    if(!flip_dir)
	from+=4*(width*height-1);
    else
	from+=4*(width-1);
    for(int i=height-1; i>=0; i--)
    {
	for(int j=width-1; j>=0; j--)
	{
	    *dest=*(const col*)from;
	    dest--;
	    from-=4;
	}
	if(flip_dir)from+=8*width;
    }
}

void CImage::yuvToRgb24(STDCONV_PARAMS)
{
    const struct yuv* src;
    struct col* dest;
    src = (const struct yuv*)from+width*height-1;
    if (flip_dir)
	dest = (struct col*)to+width*height-1;
    else
	dest = (struct col*)to+width-1;
    for (int i = height-1; i >= 0; i--)
    {
	for (int j = width-1; j >= 0; j--)
	{
	    *dest = *src;
	    src--;
	    dest--;
	}
	if (flip_dir)
	    dest += 2*width;
    }
}

void CImage::yuy2ToRgb24(STDCONV_PARAMS)
{
    col* dest;
    if(!flip_dir)
	dest=(col*)to+width*height-1;
    else
	dest=(col*)to+width-1;
    from+=2*width*height-1;
    for(int i=height-1; i>=0; i--)
    {
	for(int j=width/2-1; j>=0; j--)
	{
	    yuv p;
	    p.Cr=*from--;
	    p.Y=*from--;
	    p.Cb=*from--;
	    *dest--=p;
	    p.Y=*from--;
	    *dest--=p;
	}
	if(flip_dir)dest+=2*width;
    }
}

void CImage::yv12ToRgb24(STDCONV_PARAMS)
{
    col* dest1, *dest2;
    if(flip_dir)
    {
	dest1=(col*)to+width*(height-1);
	dest2=(col*)to+width*(height-2);
    }
    else
    {
	dest1=(col*)to;
	dest2=(col*)to+width;
    }
    //    from+=2*width*height-1;
    const uint8_t* src_y1=from;
    const uint8_t* src_y2=from+width;
    const uint8_t* src_cb=from+width*height;
    const uint8_t* src_cr=from+5*width*height/4;
    //    if(flip_dir)to+=width*(height-1)*2;
    for(int i=height/2-1; i>=0; i--)
    {
	for(int j=width/2-1; j>=0; j--)
	{
	    yuv p;
	    p.Cb=*src_cb++;
	    p.Y=*src_y1++;
	    p.Cr=*src_cr++;
	    *dest1++=p;
	    p.Y=*src_y1++;
	    *dest1++=p;
	    p.Y=*src_y2++;
	    *dest2++=p;
	    p.Y=*src_y2++;
	    *dest2++=p;
	}
	src_y1+=width;
	src_y2+=width;
	if(flip_dir)
	{
	    dest1-=3*width;
	    dest2-=3*width;
	}
	else
	{
	    dest1+=width;
	    dest2+=width;
	}
    }
}

void CImage::rgb15ToRgb16(STDCONV_PARAMS)
{
    v555to565((uint16_t*)to, (const uint16_t*)from, width, (flip_dir)?-height:height);
}

void CImage::yv12ToYuy2(STDCONV_PARAMS)
{
    const uint8_t* src_y=from;
    const uint8_t* src_y1=from+width;
    const uint8_t* src_cb=from+width*height;
    const uint8_t* src_cr=from+5*width*height/4;
    if (flip_dir)
	to += width*(height-1)*2;
    for(int j=height/2-1; j>=0; j--)
    {
	uint8_t* dest1;
	uint8_t* dest2;
	dest1=to;
	if(flip_dir)
	    dest2=to-2*width;
	else
	    dest2=to+2*width;
	for(int i=width/2-1; i>=0; i--)
	{
	    *dest1++=*src_y++;
	    *dest1++=*src_cb;
	    *dest1++=*src_y++;
	    *dest1++=*src_cr;

	    *dest2++=*src_y1++;
	    *dest2++=*src_cb++;
	    *dest2++=*src_y1++;
	    *dest2++=*src_cr++;
	}
	src_y+=width;
	src_y1+=width;
	if(flip_dir)
	    to-=4*width;
	else
	    to+=4*width;
    }
}

void CImage::yuy2ToYv12(STDCONV_PARAMS)
{
    const uint8_t* src1, *src2;
    if(flip_dir)
    {
	src1=from+2*width*(height-1);
	src2=from+2*width*(height-2);
    }
    else
    {
	src1=from;
	src2=from+2*width;
    }
    uint8_t* dest_y1=to;
    uint8_t* dest_y2=to+width;
    uint8_t* dest_cb=to+width*height;
    uint8_t* dest_cr=to+5*width*height/4;

    for(int i=height/2-1; i>=0; i--)
    {
	for(int j=width/2-1; j>=0; j--)
	{
	    *dest_y1++=*src1++;
	    *dest_y2++=*src2++;
	    *dest_cb++=*src1++;
	    src2++;
	    *dest_y1++=*src1++;
	    *dest_y2++=*src2++;
	    *dest_cr++=*src1++;
	    src2++;
	}
	dest_y1+=width;
	dest_y2+=width;
	if(flip_dir)
	{
	    src1-=6*width;
	    src2-=6*width;
	}
	else
	{
	    src1+=2*width;
	    src2+=2*width;
	}
    }
}

#ifdef IMTEST
#include "cpuinfo.h"
#include "utils.h"

int main(int argc, char* argv[])
{
    BITMAPINFOHEADER bh;
    bh.biSize = sizeof(BITMAPINFOHEADER);
    bh.biWidth = 640;
    bh.biHeight = 480;
    bh.biCompression = 0;

    BitmapInfo bii(bh);
    BitmapInfo bio(bh);

    bio.SetBits(24);
    bio.SetBits(16);
    CImage* cii = new CImage(&bii);
    CImage* cio = new CImage(&bio);

    int64_t t1 = longcount();
    const int iter = 100;
    for (int i = 0; i < iter; i++)
	cio->Convert(cii);
    int64_t t2 = longcount();
    float tm = to_float(t2, t1);

    printf("TIME %f    %f\n", tm, tm / iter);

    return 0;
}
#endif
