/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

static void inline transpose(dct_t *cache)
{
  asm volatile ("movq 0x30(%0), %%mm0\n"    /* top left corner */
		"movq 0x20(%0), %%mm1\n"
		"movq 0x10(%0), %%mm2\n"
		"movq 0x00(%0), %%mm3\n"
		"movq %%mm1, %%mm4\n"
		"movq %%mm3, %%mm5\n"
		"punpcklwd %%mm0, %%mm1\n"
		"punpcklwd %%mm2, %%mm3\n"
		"punpckhwd %%mm0, %%mm4\n"
		"punpckhwd %%mm2, %%mm5\n"
		"movq %%mm3, %%mm0\n"
		"movq %%mm5, %%mm2\n"
		"punpckldq %%mm1, %%mm3\n"
		"punpckldq %%mm4, %%mm5\n"
		"punpckhdq %%mm1, %%mm0\n"
		"punpckhdq %%mm4, %%mm2\n"
		"movq %%mm3, 0x00(%0)\n"
		"movq %%mm0, 0x10(%0)\n"
		"movq %%mm5, 0x20(%0)\n"
		"movq %%mm2, 0x30(%0)\n"
		"movq 0x78(%0), %%mm0\n"  /* bottom right corner */
		"movq 0x68(%0), %%mm1\n"
		"movq 0x58(%0), %%mm2\n"
		"movq 0x48(%0), %%mm3\n"
		"movq %%mm1, %%mm4\n"
		"movq %%mm3, %%mm5\n"
		"punpcklwd %%mm0, %%mm1\n"
		"punpcklwd %%mm2, %%mm3\n"
		"punpckhwd %%mm0, %%mm4\n"
		"punpckhwd %%mm2, %%mm5\n"
		"movq %%mm3, %%mm0\n"
		"movq %%mm5, %%mm2\n"
		"punpckldq %%mm1, %%mm3\n"
		"punpckldq %%mm4, %%mm5\n"
		"punpckhdq %%mm1, %%mm0\n"
		"punpckhdq %%mm4, %%mm2\n"
		"movq %%mm3, 0x48(%0)\n"
		"movq %%mm0, 0x58(%0)\n"
		"movq %%mm5, 0x68(%0)\n"
		"movq %%mm2, 0x78(%0)\n"
		"movq 0x38(%0), %%mm0\n"  /* top right corner */
		"movq 0x28(%0), %%mm1\n"
		"movq 0x18(%0), %%mm2\n"
		"movq 0x08(%0), %%mm3\n"
		"movq %%mm1, %%mm4\n"
		"movq %%mm3, %%mm5\n"
		"punpcklwd %%mm0, %%mm1\n"
		"punpcklwd %%mm2, %%mm3\n"
		"punpckhwd %%mm0, %%mm4\n"
		"punpckhwd %%mm2, %%mm5\n"
		"movq %%mm3, %%mm0\n"
		"movq %%mm5, %%mm2\n"
		"punpckldq %%mm1, %%mm3\n"
		"punpckldq %%mm4, %%mm5\n"
		"punpckhdq %%mm1, %%mm0\n"
		"punpckhdq %%mm4, %%mm2\n"
		"movq 0x70(%0), %%mm1\n"  /* bottom left corner */
		"movq 0x60(%0), %%mm4\n"
		"movq 0x50(%0), %%mm6\n"
		"movq 0x40(%0), %%mm7\n"
		"movq %%mm3, 0x40(%0)\n"
		"movq %%mm0, 0x50(%0)\n"
		"movq %%mm5, 0x60(%0)\n"
		"movq %%mm2, 0x70(%0)\n"
		"movq %%mm4, %%mm0\n"
		"movq %%mm7, %%mm3\n"
		"punpcklwd %%mm1, %%mm4\n"
		"punpcklwd %%mm6, %%mm7\n"
		"punpckhwd %%mm1, %%mm0\n"
		"punpckhwd %%mm6, %%mm3\n"
		"movq %%mm7, %%mm1\n"
		"movq %%mm3, %%mm6\n"
		"punpckldq %%mm4, %%mm7\n"
		"punpckldq %%mm0, %%mm3\n"
		"punpckhdq %%mm4, %%mm1\n"
		"punpckhdq %%mm0, %%mm6\n"
		"movq %%mm7, 0x08(%0)\n"
		"movq %%mm1, 0x18(%0)\n"
		"movq %%mm3, 0x28(%0)\n"
		"movq %%mm6, 0x38(%0)\n"
		: "=r"(cache)
		: "0"(cache)
		: "memory");
}
