/*
    Copyright (C) 2016 University of the Basque Country, UPV/EHU.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef FFT_UTILS_H_
#define FFT_UTILS_H_
/********************************************
 * Includes                                 *
 ********************************************/
#include <cufft.h>
#include <pthread.h>

#include "cuPoisson.h"
#include "utils/utils.h"

/********************************************
 * Macros                                   *
 ********************************************/
#define TILE_SIZE 16 // Used for GPU data transpositions.

// Macro that securely calls to cuFFT functions.
#define CUFFT(x) \
{ \
	cufftResult err = x; \
	if( CUFFT_SUCCESS != err ) \
	{ \
		print_msg( ERROR, \
		           "CUFFT Error on call \"%s\": %s\n\tLine: %d, File: %s\n", \
		           #x, cufft_error_string( err ), __LINE__, __FILE__); \
		return CUP_CUDA_ERROR; \
	} \
}

/********************************************
 * Public function prototypes               *
 ********************************************/
cup_error_t get_solver_data( const cup_solver* solver,
                             double***         data,
                             int*              sizes );
const char *cufft_error_string( cufftResult_t error );
__global__ void __launch_bounds__((MAX_NUMBER_OF_THREADS))
           dev_solve_poisson( cufftDoubleComplex* dev_sym_grid,
                              const int         * nps,
                              const int           padded_np0,
                              const int           sym_dim,
                              const int           sym_dim_offset,
                              const double      * size2
                            );
void transpose_210( cufftDoubleComplex*       out,
                    const cufftDoubleComplex* in,
                    int                       np0,
                    int                       np1,
                    int                       np2,
                    cudaStream_t              stream );

#endif /* FFT_UTILS_H_ */
