#include "c.h"
enum { EAX=0, ECX=1, EDX=2, EBX=3, ESI=6, EDI=7 };

typedef struct tagIntrinsics {
	char *Name;
	short NrOfArgs;
	short Flags;
	void (*fn)(Node p);
	Symbol (*argsFn)(Node p);
} INTRINSICS;

static Node ArgumentsTab[10];
static int ArgumentsNts[10];
static int ArgumentsIndex;
static int labelIdx=1;
extern unsigned (*emitter)(Node, int);
extern Symbol intreg[];
static void fsincos(Node p)
{
	if (p->x.nestedCall) {
		print("\tfldl\t(%%esp)\n\tfsincos\n\taddl\t$8,%%esp\n\tpop\t%%eax\n");
		print("\tfstpl\t(%%eax");
	}
	else {
		print("\tfsincos\n\tfstpl\t(%%eax");
	}
	print(")\n");
}

static void bswap(Node p)	{	print("\tbswap\t%%eax\n");		}
static void carry(Node p)	{	print("\tsbb\t%%eax,%%eax\n");	}
static void bsf(Node p)		{	print("\tbsf\t%%eax,%%eax\n");	}
static void bsr(Node p)		{	print("\tbsr\t%%eax,%%eax\n");	}

static Symbol bswapArgs(Node p)
{
	return intreg[EAX];
}

static void fistp(Node p)
{
	print("\tpushl\t%%eax\n");
	print("\tfistps\t(%%esp)\n\tpopl\t%%eax\n");
}
static void Fabs(Node p)	{	print("\tfabs\n");	}
static void fldpi(Node p)	{	print("\tfldpi\n");	}
static void fldl2e(Node p)	{	print("\tfldl2e\n");}
static void fldlg2(Node p)	{	print("\tfldlg2\n");}
static void fldln2(Node p)	{	print("\tfldln2\n");}
static void f2xm1(Node p)	{	print("\tf2xm1\n");	}

static void mmxVectCall(Node p,char *op)
{
	if (p->x.nestedCall) {
		print("\tpop\t%%eax\n\tpop\t%%edx\n\tpop\t%%ecx\n");
	}
	print("\tjcxz\t_$LM%d\n",labelIdx+1);
	print("_$LM%d:\n",labelIdx);
	print("\tdecl\t%%ecx\n");
	print("\tmovq\t(%%edx,%%ecx,8),%%mm0\n\t");
	outs(op);
	print("\t(%%eax,%%ecx,8),%%mm0\n");
	print("\tmovq\t%%mm0,(%%eax,%%ecx,8)\n");
	print("\tjne\t_$LM%d\n",labelIdx);
	print("_$LM%d:\n",labelIdx+1);
	labelIdx += 2;
}

static void mmxImmCall(Node p,char *op)
{
	if (p->x.nestedCall) {
		print("\tpop\t%%eax\n\tpop\t%%edx\n\tpop\t%%ecx\n");
	}
	print("\tjcxz\t_$LM%d\n",labelIdx+1);
	print("\tmovq\t(%%edx),%%mm0\n");
	printf("_$LM%d\n",labelIdx);
	print("\tdecl\t%%ecx\n\t");
	outs(op);
	print("\t%%mm0,(%%eax,%%ecx,8)\n");
	print("\tjne\t_$LM%d\n",labelIdx);
	print("_$LM%d:\n",labelIdx+1);
	labelIdx += 2;
}
static Symbol paddArgs(Node p)
{
	Symbol r=NULL;

	FunctionInfo.mmxCalls = 1;
	switch (ArgumentsIndex) {
	case 0:
		if (p->x.nestedCall == 0) {
			r = intreg[ECX];
			p->kids[0]->syms[2] = r;
		}
		break;
	case 1:
		if (p->x.nestedCall == 0) {
			r = intreg[EDX];
			p->kids[0]->syms[2] = r;
		}
		break;
	case 2:
		if (p->x.nestedCall == 0) {
			r = intreg[EAX];
			p->kids[0]->syms[2] = r;
		}
		break;
	default:
		assert(0);
		break;
	}
	ArgumentsIndex++;
	if (p->x.nestedCall == 0)
		p->syms[2] = r;
	return r;
}
static void packsswb(Node p)	{	mmxVectCall(p,"packsswb");}
static void packsswbi(Node p)	{	mmxImmCall(p,"packsswb");}
static void packssdw(Node p)	{	mmxVectCall(p,"packssdw");}
static void packssdwi(Node p)	{	mmxImmCall(p,"packssdw");}

static void packuswb(Node p)	{	mmxVectCall(p,"packuswb");}
static void packuswbi(Node p)	{	mmxImmCall(p,"packuswb");}


static void paddd(Node p)	{	mmxVectCall(p,"paddd");		}
static void padddi(Node p)	{	mmxImmCall(p,"paddd");		}
static void paddw(Node p)	{	mmxVectCall(p,"paddw");		}
static void paddwi(Node p)	{	mmxImmCall(p,"paddw");		}
static void paddb(Node p)	{	mmxVectCall(p,"paddb");		}
static void paddbi(Node p)	{	mmxImmCall(p,"paddb");		}
static void paddsb(Node p)	{	mmxVectCall(p,"paddsb");	}
static void paddsbi(Node p)	{	mmxImmCall(p,"paddsb");	}
static void paddsw(Node p)	{	mmxVectCall(p,"paddsw");	}
static void paddswi(Node p)	{	mmxImmCall(p,"paddsw");	}

static void paddusw(Node p)	{	mmxVectCall(p,"paddusw");	}
static void padduswi(Node p){	mmxImmCall(p,"paddusw");	}
static void paddusb(Node p)	{	mmxVectCall(p,"paddusb");	}
static void paddusbi(Node p){	mmxImmCall(p,"paddusb");	}

static void psubd(Node p)	{	mmxVectCall(p,"psubd");		}
static void psubdi(Node p)	{	mmxImmCall(p,"psubd");		}
static void psubw(Node p)	{	mmxVectCall(p,"psubw");		}
static void psubwi(Node p)	{	mmxImmCall(p,"psubw");		}
static void psubb(Node p)	{	mmxVectCall(p,"psubb");		}
static void psubbi(Node p)	{	mmxImmCall(p,"psubb");		}

static void psubsb(Node p)	{	mmxVectCall(p,"psubsb");	}
static void psubsbi(Node p)	{	mmxImmCall(p,"psubsb");		}
static void psubsw(Node p)	{	mmxVectCall(p,"psubsw");	}
static void psubswi(Node p)	{	mmxImmCall(p,"psubsw");		}

static void pand(Node p)	{	mmxVectCall(p,"pand");		}
static void pandi(Node p)	{	mmxImmCall(p,"pand");		}
static void pandn(Node p)	{	mmxVectCall(p,"pandn");		}
static void pandni(Node p)	{	mmxImmCall(p,"pandn");		}

static void pcmeqb(Node p)	{	mmxVectCall(p,"pcmpeqb");	}
static void pcmeqbi(Node p)	{	mmxImmCall(p,"pcmpeqb");	}
static void pcmeqw(Node p)	{	mmxVectCall(p,"pcmpeqw");	}
static void pcmeqwi(Node p)	{	mmxImmCall(p,"pcmpeqw");	}
static void pcmeqd(Node p)	{	mmxVectCall(p,"pcmpeqd");	}
static void pcmeqdi(Node p)	{	mmxImmCall(p,"pcmpeqd");	}

static void pcmpgtb(Node p)	{	mmxVectCall(p,"pcmpeqb");	}
static void pcmpgtbi(Node p){	mmxImmCall(p,"pcmpeqb");	}
static void pcmpgtw(Node p)	{	mmxVectCall(p,"pcmpeqw");	}
static void pcmpgtwi(Node p){	mmxImmCall(p,"pcmpeqw");	}
static void pcmpgtd(Node p)	{	mmxVectCall(p,"pcmpeqd");	}
static void pcmpgtdi(Node p){	mmxImmCall(p,"pcmpeqd");	}

static void pmaddwd(Node p){	mmxVectCall(p,"pmaddwd");	}
static void pmaddwdi(Node p){	mmxImmCall(p,"pmaddwd");	}
static void pmulhw(Node p)	{	mmxVectCall(p,"pmulhw");	}
static void pmulhwi(Node p){	mmxImmCall(p,"pmmulhw");	}

static void pmullw(Node p)	{	mmxVectCall(p,"pmmullw");	}
static void pmullwi(Node p)	{	mmxImmCall(p,"pmmullw");	}
static void por(Node p)		{	mmxVectCall(p,"por");		}
static void pori(Node p)	{	mmxImmCall(p,"por");		}
static void pslw(Node p)	{	mmxVectCall(p,"pslw");		}
static void pslwi(Node p)	{	mmxImmCall(p,"pslw");		}
static void psld(Node p)	{	mmxVectCall(p,"psld");		}
static void psldi(Node p)	{	mmxImmCall(p,"psld");		}
static void pslq(Node p)	{	mmxVectCall(p,"pslq");		}
static void pslqi(Node p)	{	mmxImmCall(p,"pslq");		}
static void psraw(Node p)	{	mmxVectCall(p,"psraw");		}
static void psrawi(Node p)	{	mmxImmCall(p,"psraw");		}
static void psrad(Node p)	{	mmxVectCall(p,"psrad");		}
static void psradi(Node p)	{	mmxImmCall(p,"psrad");		}

static void psrlw(Node p)	{	mmxVectCall(p,"psrlw");		}
static void psrlwi(Node p)	{	mmxImmCall(p,"psrlw");		}
static void psrld(Node p)	{	mmxVectCall(p,"psrld");		}
static void psrldi(Node p)	{	mmxImmCall(p,"psrld");		}
static void psrlq(Node p)	{	mmxVectCall(p,"psrlq");		}
static void psrlqi(Node p)	{	mmxImmCall(p,"psrlq");		}

static void punpckhbw(Node p){	mmxVectCall(p,"punpckhbw");	}
static void punpckhbwi(Node p){	mmxImmCall(p,"punpckhbw");	}
static void punpckhwd(Node p){	mmxVectCall(p,"punpckhwd");	}
static void punpckhwdi(Node p){	mmxImmCall(p,"punpckhwd");	}
static void punpckhdq(Node p){	mmxVectCall(p,"punpckhwq");	}
static void punpckhdqi(Node p){	mmxImmCall(p,"punpckhwq");	}

static void punpcklbw(Node p){	mmxVectCall(p,"punpcklbw");	}
static void punpcklbwi(Node p){	mmxImmCall(p,"punpcklbw");	}
static void punpcklwd(Node p){	mmxVectCall(p,"punpcklwd");	}
static void punpcklwdi(Node p){	mmxImmCall(p,"punpcklwd");	}
static void punpckldq(Node p){	mmxVectCall(p,"punpcklwq");	}
static void punpckldqi(Node p){	mmxImmCall(p,"punpcklwq");	}

static void pxor(Node p)	{	mmxVectCall(p,"pxor");		}
static void pxori(Node p)	{	mmxImmCall(p,"pxor");		}
static void emms(Node p)	{	print("\temms\n");			}
#define OPTIMIZERONLY	1

static INTRINSICS intrinsicTable[] = {
	{	"_fsincos",	2,		0,			fsincos,	NULL		},
	{	"_bswap",	1,		0,			bswap,		bswapArgs	},
	{	"_bsf",		1,		0,			bsf,		bswapArgs	},
	{	"_bsr",		1,		0,			bsr,		bswapArgs	},
	{	"_carry",	0,		0,			carry,		NULL		},
	{	"_fistp",	1,		0,			fistp,		NULL		},
	{	"_fabs",	1,		0,			Fabs,		NULL		},
	{	"_fldpi",	0,		0,			fldpi,		NULL		},
	{	"_fldl2e",	0,		0,			fldl2e,		NULL		},
	{	"_fldlg2",	0,		0,			fldlg2,		NULL		},
	{	"_fldln2",	0,		0,			fldln2,		NULL		},
	{	"_f2xm1",	0,		0,			f2xm1,		NULL		},
//	{	"_memset",	3,	OPTIMIZERONLY,	imemset	},
	/*  MMX section */
	/* packss */
	{	"_packsswb",3,		0,			packsswb,	paddArgs	},
	{	"_packsswbi",3,		0,			packsswbi,	paddArgs	},
	{	"_packssdw",3,		0,			packssdw,	paddArgs	},
	{	"_packssdwi",3,		0,			packssdwi,	paddArgs	},
	{	"_packuswb",3,		0,			packuswb,	paddArgs	},
	{	"_packuswbi",3,		0,			packuswbi,	paddArgs	},

	/* paddX */
	{	"_paddd",	3,		0,			paddd,		paddArgs	},
	{	"_padddi",	3,		0,			padddi,		paddArgs	},
	{	"_paddw",	3,		0,			paddw,		paddArgs	},
	{	"_paddwi",	3,		0,			paddwi,		paddArgs	},
	{	"_paddb",	3,		0,			paddb,		paddArgs	},
	{	"_paddbi",	3,		0,			paddbi,		paddArgs	},
	/* paddsX */
	{	"_paddsb",	3,		0,			paddsb,		paddArgs	},
	{	"_paddsbi",	3,		0,			paddsbi,	paddArgs	},
	{	"_paddsw",	3,		0,			paddsw,		paddArgs	},
	{	"_paddswi",	3,		0,			paddswi,	paddArgs	},
	{	"_paddusb",	3,		0,			paddusb,	paddArgs	},
	{	"_paddusbi",3,		0,			paddusbi,	paddArgs	},
	{	"_paddsusw",3,		0,			paddusw,	paddArgs	},
	{	"_paddsuswi",3,		0,			padduswi,	paddArgs	},
	/* psubX */
	{	"_psubd",	3,		0,			psubd,		paddArgs	},
	{	"_psubdi",	3,		0,			psubdi,		paddArgs	},
	{	"_psubw",	3,		0,			psubw,		paddArgs	},
	{	"_psubwi",	3,		0,			psubwi,		paddArgs	},
	{	"_psubb",	3,		0,			psubb,		paddArgs	},
	{	"_psubbi",	3,		0,			psubbi,		paddArgs	},
	/* psubsX */
	{	"_psubsb",	3,		0,			psubsb,		paddArgs	},
	{	"_psubsbi",	3,		0,			psubsbi,	paddArgs	},
	{	"_psubsw",	3,		0,			psubsw,		paddArgs	},
	{	"_psubswi",	3,		0,			psubswi,	paddArgs	},
	/* pand */
	{	"_pand",	3,		0,			pand,		paddArgs	},
	{	"_pandi",	3,		0,			pandi,		paddArgs	},

	/* pandn */
	{	"_pandn",	3,		0,			pandn,		paddArgs	},
	{	"_pandni",	3,		0,			pandni,		paddArgs	},

	/* pcmeq */
	{	"_pcmpeqb",	3,		0,			pcmeqb,		paddArgs	},
	{	"_pcmpeqbi",3,		0,			pcmeqbi,	paddArgs	},
	{	"_pcmpeqw",	3,		0,			pcmeqw,		paddArgs	},
	{	"_pcmpeqwi",3,		0,			pcmeqwi,	paddArgs	},
	{	"_pcmpeqd",	3,		0,			pcmeqd,		paddArgs	},
	{	"_pcmpeqdi",3,		0,			pcmeqdi,	paddArgs	},

	/* pcmgt */
	{	"_pcmpeqb",	3,		0,			pcmpgtb,	paddArgs	},
	{	"_pcmpeqbi",3,		0,			pcmpgtbi,	paddArgs	},
	{	"_pcmpeqw",	3,		0,			pcmpgtw,	paddArgs	},
	{	"_pcmpeqwi",3,		0,			pcmpgtwi,	paddArgs	},
	{	"_pcmpeqd",	3,		0,			pcmpgtd,	paddArgs	},
	{	"_pcmpeqdi",3,		0,			pcmpgtdi,	paddArgs	},
	/* pmaddwd */
	{	"_pmaddwd",	3,		0,			pmaddwd,	paddArgs	},
	{	"_pmaddwdi",3,		0,			pmaddwdi,	paddArgs	},
	/* pmulhw */
	{	"_pmulhw",	3,		0,			pmulhw,		paddArgs	},
	{	"_pmulhwi",	3,		0,			pmulhwi,	paddArgs	},
	/* pmullw */
	{	"_pmullw",	3,		0,			pmullw,		paddArgs	},
	{	"_pmullwi",	3,		0,			pmullwi,	paddArgs	},
	/* por */
	{	"_por",		3,		0,			por,		paddArgs	},
	{	"_pori",	3,		0,			pori,		paddArgs	},
	
	/* psl */
	{	"_pslw",	3,		0,			pslw,		paddArgs	},
	{	"_pslwi",	3,		0,			pslwi,		paddArgs	},
	{	"_psld",	3,		0,			psld,		paddArgs	},
	{	"_psldi",	3,		0,			psldi,		paddArgs	},
	{	"_pslq",	3,		0,			pslq,		paddArgs	},
	{	"_pslqi",	3,		0,			pslqi,		paddArgs	},
	/* psra */
	{	"_psraw",	3,		0,			psraw,		paddArgs	},
	{	"_psrawi",	3,		0,			psrawi,		paddArgs	},
	{	"_psrad",	3,		0,			psrad,		paddArgs	},
	{	"_psradi",	3,		0,			psradi,		paddArgs	},

	/* psrl */
	{	"_psrlw",	3,		0,			psrlw,		paddArgs	},
	{	"_psrlwi",	3,		0,			psrlwi,		paddArgs	},
	{	"_psrld",	3,		0,			psrld,		paddArgs	},
	{	"_psrldi",	3,		0,			psrldi,		paddArgs	},
	{	"_psrlq",	3,		0,			psrlq,		paddArgs	},
	{	"_psrlqi",	3,		0,			psrlqi,		paddArgs	},
	/* punpckhXXx */
	{	"_punpckhbw",3,		0,			punpckhbw,	paddArgs	},
	{	"_punpckhbwi",3,	0,			punpckhbwi,	paddArgs	},
	{	"_punpckhwd",3,		0,			punpckhwd,	paddArgs	},
	{	"_punpckhwdi",3,	0,			punpckhwdi,	paddArgs	},
	{	"_punpckhdq",3,		0,			punpckhdq,	paddArgs	},
	{	"_punpckhdqi",3,	0,			punpckhdqi,	paddArgs	},

	/* punpcklXXx */
	{	"_punpcklbw",3,		0,			punpcklbw,	paddArgs	},
	{	"_punpcklbwi",3,	0,			punpcklbwi,	paddArgs	},
	{	"_punpcklwd",3,		0,			punpcklwd,	paddArgs	},
	{	"_punpcklwdi",3,	0,			punpcklwdi,	paddArgs	},
	{	"_punpckldq",3,		0,			punpckldq,	paddArgs	},
	{	"_punpckldqi",3,	0,			punpckldqi,	paddArgs	},

	{	"_pxor",	3,		0,			pxor,		paddArgs	},
	{	"_pxori",	3,		0,			pxori,		paddArgs	},

	{	"_emms",	0,		0,			emms,		NULL		},

	{	NULL,		0,		0,			0,			0			}
};


int IsIntrinsic(char *name)
{
	INTRINSICS *p = intrinsicTable;
	int i = 1;

	if (name[0] != '_') return 0;
	while (p->Name) {
		if (name == p->Name)
			if ((p->Flags & OPTIMIZERONLY) && OptimizeFlag == 0)
				return 0;
			else
				return i;
		p++;
		i++;
	}
	return 0;
}


void Intrinsic(Node p)
{
	int idx = IsIntrinsic(p->kids[0]->syms[0]->name);
	(*intrinsicTable[idx-1].fn)(p);
	ArgumentsIndex = 0;
}

void IntrinsicArg(Node kid,int n)
{
	ArgumentsTab[ArgumentsIndex] = kid;
	ArgumentsNts[ArgumentsIndex] = n;
	ArgumentsIndex++;
}

void InstallIntrinsics(void)
{
	INTRINSICS *p = intrinsicTable;

	while (p->Name) {
		p->Name = string(p->Name);
		p++;
	}
}

Symbol AssignIntrinsicArg(Node p)
{
	int idx = p->x.intrinsicArg - 1;

	if (intrinsicTable[idx].argsFn == NULL)
		return intreg[0];
	else
		return (*intrinsicTable[idx].argsFn)(p);
}

int nrOfIntrinsicArgs(int idx)
{
	return intrinsicTable[idx-1].NrOfArgs;
}
