// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package jpeg

// This is a Go translation of idct.c from
//
// http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/Video/verifier/mpeg2decode_960109.tar.gz
//
// which carries the following notice:

/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */

/*
 * Disclaimer of Warranty
 *
 * These software programs are available to the user without any license fee or
 * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
 * any and all warranties, whether express, implied, or statuary, including any
 * implied warranties or merchantability or of fitness for a particular
 * purpose.  In no event shall the copyright-holder be liable for any
 * incidental, punitive, or consequential damages of any kind whatsoever
 * arising from the use of these programs.
 *
 * This disclaimer of warranty extends to the user of these programs and user's
 * customers, employees, agents, transferees, successors, and assigns.
 *
 * The MPEG Software Simulation Group does not represent or warrant that the
 * programs furnished hereunder are free of infringement of any third-party
 * patents.
 *
 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
 * are subject to royalty fees to patent holders.  Many of these patents are
 * general enough such that they are unavoidable regardless of implementation
 * design.
 *
 */

const blockSize = 64 // A DCT block is 8x8.

type block [blockSize]int32

const (
	w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
	w2 = 2676 // 2048*sqrt(2)*cos(2*pi/16)
	w3 = 2408 // 2048*sqrt(2)*cos(3*pi/16)
	w5 = 1609 // 2048*sqrt(2)*cos(5*pi/16)
	w6 = 1108 // 2048*sqrt(2)*cos(6*pi/16)
	w7 = 565  // 2048*sqrt(2)*cos(7*pi/16)

	w1pw7 = w1 + w7
	w1mw7 = w1 - w7
	w2pw6 = w2 + w6
	w2mw6 = w2 - w6
	w3pw5 = w3 + w5
	w3mw5 = w3 - w5

	r2 = 181 // 256/sqrt(2)
)

// idct performs a 2-D Inverse Discrete Cosine Transformation.
//
// The input coefficients should already have been multiplied by the
// appropriate quantization table. We use fixed-point computation, with the
// number of bits for the fractional component varying over the intermediate
// stages.
//
// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the
// discrete W transform and for the discrete Fourier transform", IEEE Trans. on
// ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
func idct( *block) {
	// Horizontal 1-D IDCT.
	for  := 0;  < 8; ++ {
		 :=  * 8
		 := [ : +8 : +8] // Small cap improves performance, see https://golang.org/issue/27857
		// If all the AC components are zero, then the IDCT is trivial.
		if [1] == 0 && [2] == 0 && [3] == 0 &&
			[4] == 0 && [5] == 0 && [6] == 0 && [7] == 0 {
			 := [0] << 3
			[0] = 
			[1] = 
			[2] = 
			[3] = 
			[4] = 
			[5] = 
			[6] = 
			[7] = 
			continue
		}

		// Prescale.
		 := ([0] << 11) + 128
		 := [4] << 11
		 := [6]
		 := [2]
		 := [1]
		 := [7]
		 := [5]
		 := [3]

		// Stage 1.
		 := w7 * ( + )
		 =  + w1mw7*
		 =  - w1pw7*
		 = w3 * ( + )
		 =  - w3mw5*
		 =  - w3pw5*

		// Stage 2.
		 =  + 
		 -= 
		 = w6 * ( + )
		 =  - w2pw6*
		 =  + w2mw6*
		 =  + 
		 -= 
		 =  + 
		 -= 

		// Stage 3.
		 =  + 
		 -= 
		 =  + 
		 -= 
		 = (r2*(+) + 128) >> 8
		 = (r2*(-) + 128) >> 8

		// Stage 4.
		[0] = ( + ) >> 8
		[1] = ( + ) >> 8
		[2] = ( + ) >> 8
		[3] = ( + ) >> 8
		[4] = ( - ) >> 8
		[5] = ( - ) >> 8
		[6] = ( - ) >> 8
		[7] = ( - ) >> 8
	}

	// Vertical 1-D IDCT.
	for  := 0;  < 8; ++ {
		// Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
		// However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
		// we do not bother to check for the all-zero case.
		 := [ : +57 : +57] // Small cap improves performance, see https://golang.org/issue/27857

		// Prescale.
		 := ([8*0] << 8) + 8192
		 := [8*4] << 8
		 := [8*6]
		 := [8*2]
		 := [8*1]
		 := [8*7]
		 := [8*5]
		 := [8*3]

		// Stage 1.
		 := w7*(+) + 4
		 = ( + w1mw7*) >> 3
		 = ( - w1pw7*) >> 3
		 = w3*(+) + 4
		 = ( - w3mw5*) >> 3
		 = ( - w3pw5*) >> 3

		// Stage 2.
		 =  + 
		 -= 
		 = w6*(+) + 4
		 = ( - w2pw6*) >> 3
		 = ( + w2mw6*) >> 3
		 =  + 
		 -= 
		 =  + 
		 -= 

		// Stage 3.
		 =  + 
		 -= 
		 =  + 
		 -= 
		 = (r2*(+) + 128) >> 8
		 = (r2*(-) + 128) >> 8

		// Stage 4.
		[8*0] = ( + ) >> 14
		[8*1] = ( + ) >> 14
		[8*2] = ( + ) >> 14
		[8*3] = ( + ) >> 14
		[8*4] = ( - ) >> 14
		[8*5] = ( - ) >> 14
		[8*6] = ( - ) >> 14
		[8*7] = ( - ) >> 14
	}
}