!
! Copyright (C) 2017 Mitsuaki Kawamura
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
MODULE sctk_coulomb
  !
  IMPLICIT NONE
  !
CONTAINS
!>
!> Allocate Kel & ikq
!>
SUBROUTINE alloc_Kel_Chebyshev()
  !
  USE wvfct, ONLY : nbnd
  USE kinds, ONLY : DP
  USE constants, ONLY : pi
  USE io_global, ONLY : stdout
  USE sctk_val, ONLY : gindx, gq2, Kel, mf, nftot, nqbz, nmf
  !
  IMPLICIT NONE
  !
  INTEGER :: imf
  !
  ALLOCATE(mf(nmf), gq2(nftot), gindx(nftot), Kel(0:nmf+1,nbnd,nbnd,nqbz))
  !
  DO imf = 1, nmf
     mf(imf) = COS(REAL(2 * imf + 1, dp) * pi / REAL(2 * (nmf + 2), dp))
     mf(imf) = (1.0_dp + mf(imf)) / (1.0_dp - mf(imf))
  END DO
  !
END SUBROUTINE alloc_Kel_Chebyshev
!>
!>
!>
SUBROUTINE circular_shift_wrapper(wfc)
  !
  USE kinds, ONLY : DP
  USE sctk_val, ONLY : nkpe, nftot
  USE wvfct, ONLY : nbnd
  USE mp_world, ONLY : world_comm
  USE mp, ONLY : mp_circular_shift_left
  !
  IMPLICIT NONE
  !
  COMPLEX(DP),INTENT(IN) :: wfc(nftot*nbnd,nkpe)
  !
  CALL mp_circular_shift_left( wfc, 1, world_comm )
  !
END SUBROUTINE circular_shift_wrapper
!>
!> Screened coulomb interaction
!>
SUBROUTINE prepare_q(iq)
  !
  USE wvfct, ONLY : nbnd
  USE kinds, ONLY : DP
  USE cell_base, ONLY : bg, tpiba
  USE mp_world, ONLY : mpime, nproc
  USE constants, ONLY : pi
  USE io_global, ONLY : stdout
  USE gvecw, ONLY : ecutwfc
  USE disp,  ONLY : nq1, nq2, nq3, x_q
  USE cell_base, ONLY : at
  !
  USE sctk_val, ONLY : gindx, gq2, igmin, &
  &                     nf, nftot, ngv, nqbz, nkpe, &
  &                     wfc1, wfc1q, wfc2, wfc2q
  ! 
  USE sctk_cnt_dsp, ONLY : cnt_and_dsp_full
  IMPLICIT NONE
  !
  INTEGER,INTENT(IN) :: iq !< index of phonon wavenumber @f$q@f$
  !
  INTEGER :: ik, jk, ib, i1, i2, i3, ikv(3), jkv(3), g0(3), ir(3), ifft, ig, &
  &          cnt(0:nproc - 1), dsp(0:nproc - 1), jkindx(nqbz), org, ipe, iqv(3)
  REAL(dp) :: gv(3), theta, gq20
  COMPLEX(dp) :: phase(nftot), wfctmp(nftot,nbnd,nkpe)
  !
  ! |G+q|^2
  !
  ifft = 0
  ngv = 0
  DO i3 = 1, nf(3)
     DO i2 = 1, nf(2)
        DO i1 = 1, nf(1)
           !
           ifft = ifft + 1
           !
           gv(1:3) = REAL((/i1, i2, i3/) - 1 + igmin(1:3), dp)
           gv(1:3) = MATMUL(bg(1:3,1:3), gv(1:3)) * tpiba
           gv(1:3) = gv(1:3) - x_q(1:3, iq) * tpiba
           !
           gq20 = DOT_PRODUCT(gv(1:3), gv(1:3))
           !
           IF(ecutwfc < 1e-10_dp .OR. gq20 < ecutwfc) THEN
              !
              ngv = ngv + 1
              gq2(ngv) = gq20 / (8.0_dp * pi)
              gindx(ngv) = ifft
              !
           END IF
           !
        END DO ! i1 = 1, nf(1)
     END DO ! i2 = 1, nf(2)
  END DO ! i3 = 1, nf(3)
  !
  WRITE(stdout,*) "    # of PWs for W : ", ngv
  !
  ! Prepare wave functions with pahse shift
  !
  CALL cnt_and_dsp_full(nqbz, cnt, dsp)
  !
  iqv(1:3) = NINT(MATMUL(x_q(1:3,iq), at(1:3,1:3)) * REAL((/nq1, nq2, nq3/), dp) - 0.5_dp)
  !
  DO ik = dsp(mpime) + 1, dsp(mpime) + cnt(mpime)
     !
     ikv(1) = (ik - 1) / (nq3*nq2)
     ikv(2) = (ik - 1 - ikv(1)*nq2*nq3) / nq3
     ikv(3) =  ik - 1 - ikv(1)*nq2*nq3 - ikv(2)*nq3
     !
     WHERE(ikv(1:3)*2 >= (/nq1,nq2,nq3/)) ikv(1:3) = ikv(1:3) - (/nq1,nq2,nq3/) !???
     !
     jkv(1:3) = ikv(1:3) + iqv(1:3)
     jkv(1:3) = MODULO(jkv(1:3), (/nq1,nq2,nq3/))
     WHERE(jkv(1:3)*2 + 1 >= (/nq1,nq2,nq3/)) jkv(1:3) = jkv(1:3) - (/nq1,nq2,nq3/) !???
     !
     g0(1:3) = (jkv(1:3) - ikv(1:3) - iqv(1:3)) / (/nq1,nq2,nq3/) !???
     !
     jkv(1:3) = MODULO(jkv(1:3), (/nq1,nq2,nq3/))
     jk = 1 + jkv(3) + jkv(2)*nq3 + jkv(1)*nq2*nq3
     jkindx(ik) = jk
     !
     IF(ik <= dsp(mpime) .OR. dsp(mpime) + cnt(mpime) < ik) CYCLE
     !
     ig = 0
     DO i3 = 1, nf(3)
        DO i2 = 1, nf(2)
           DO i1 = 1, nf(1)
              !
              ig = ig + 1
              !
              ir(1:3) = (/i1, i2, i3/) - 1
              ir(1:3) = ir(1:3) * (g0(1:3) + igmin(1:3))
              !             
              theta = SUM(REAL(ir(1:3), dp) / REAL(nf(1:3), dp))
              theta = - 2.0_dp * pi * theta
              !
              phase(ig) = CMPLX(COS(theta), SIN(theta), KIND=dp)
              !
           END DO ! i1
        END DO ! i2
     END DO ! i3
     !
     DO ib = 1, nbnd
        !
        wfc1q(1:nftot,ib,ik - dsp(mpime)) = wfc1(1:nftot,ib,ik - dsp(mpime)) * phase(1:nftot) / REAL(nftot, dp)
        wfc2q(1:nftot,ib,ik - dsp(mpime)) = CONJG(wfc2(1:nftot,ib,ik - dsp(mpime)))
        !
     END DO ! ib
     !
  END DO ! ik
  !
  !
  !
  wfctmp(1:nftot,1:nbnd,1:nkpe) = wfc2q(1:nftot,1:nbnd,1:nkpe)
  wfc2q( 1:nftot,1:nbnd,1:nkpe) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
  !
  DO ipe = 1, nproc
     !
     CALL circular_shift_wrapper(wfctmp)
     !
     org = MODULO(mpime + ipe, nproc)
     !
     DO ik = 1, cnt(mpime)
        !
        IF(jkindx(dsp(mpime) + ik) <= dsp(org) .OR. &
        &  dsp(org) + cnt(org) < jkindx(dsp(mpime) + ik)) CYCLE
        !
        wfc2q(1:nftot,1:nbnd,ik) = wfctmp(1:nftot,1:nbnd,jkindx(dsp(mpime) + ik) - dsp(org))
        !
     END DO
     !
  END DO
  !
END SUBROUTINE prepare_q
!>
!> Calculation of weight function 
!> @f$\frac{f(1-f')}{\varepsilon - \evarepsilon'}@f$
!>
SUBROUTINE fermi_factor(iq,wght)
  !
  USE wvfct, ONLY : nbnd
  USE kinds, ONLY : DP
  USE ktetra, ONLY : ntetra, tetra
  USE cell_base, ONLY : at
  USE disp,  ONLY : nq1, nq2, nq3, x_q
  USE start_k, ONLY : nk1, nk2, nk3
  USE sctk_val, ONLY : nqbz, nmf
  USE mp_world, ONLY : world_comm
  USE mp, ONLY : mp_sum
  !
  USE sctk_tetra, ONLY : tetraweight, interpol_indx
  !
  IMPLICIT NONE
  !
  INTEGER,INTENT(IN) :: iq !< index of phonon @f$q@f$
  COMPLEX(dp),INTENT(OUT) :: wght((nmf+1)*nbnd*nbnd,nqbz) !< integration weight
  !
  INTEGER :: ntetra0, ntetra1, nks0, nt, it, ik, ii, iqvd(3), ikv(3), &
  &          indx1(20,ntetra), indx2(20, ntetra), indx3(20 * ntetra), kintp(20)
  REAL(dp) :: kv(3), wintp(1,20)
  COMPLEX(dp),ALLOCATABLE :: wghtd(:,:,:)
  !
  iqvd(1:3) = NINT(MATMUL(x_q(1:3,iq), at(1:3,1:3)) * REAL((/nk1, nk2, nk3/), dp))
  !
  DO it = 1, ntetra
     !
     DO ii = 1, 20
        !
        ik = tetra(ii,it)
        ikv(1) = (ik - 1) / (nk3*nk2)
        ikv(2) = (ik - 1 - ikv(1)*nk2*nk3) / nk3
        ikv(3) =  ik - 1 - ikv(1)*nk2*nk3 - ikv(2)*nk3
        !
        ikv(1:3) = MODULO(ikv(1:3) + iqvd(1:3), (/nk1, nk2, nk3/))
        !
        indx1(ii,it) = 1 + ikv(3) + nk3 * ikv(2) + nk1 * nk2 * ikv(1)
        !
     END DO
     !
  END DO
  !
  indx2(1:20,1:ntetra) = 0
  indx3(1:20 * ntetra) = 0
  !
  CALL divide(world_comm, ntetra,ntetra0,ntetra1)
  !
  nks0 = 0
  DO it = ntetra0, ntetra1
     !
     DO ii = 1, 20
        !
        DO ik = 1, nks0
           !
           IF(tetra(ii,it) == indx3(ik)) THEN
              !
              indx2(ii,it) = ik
              GOTO 10
              !
           END IF
           !
        END DO
        !
        nks0 = nks0 + 1
        indx2(ii,it) = nks0
        indx3(nks0) = tetra(ii,it)
        !
10      continue
        !
     END DO
     !
  END DO
  !
  ALLOCATE(wghtd((nmf+1)*nbnd*nbnd,1,nks0))
  !
  CALL tetraweight(nks0,indx1,indx2,wghtd)
  !
  ! Interpolation of weight
  !
  wght(1:(nmf+1)*nbnd*nbnd,1:nqbz) = 0.0_dp
  DO ik = 1, nks0
     !
     ikv(1) = (indx3(ik) - 1) / (nk3*nk2)
     ikv(2) = (indx3(ik) - 1 - ikv(1)*nk2*nk3) / nk3
     ikv(3) =  indx3(ik) - 1 - ikv(1)*nk2*nk3 - ikv(2)*nk3
     !
     kv(1:3) = REAL(ikv(1:3), dp) / REAL((/nk1,nk2,nk3/), dp)
     CALL interpol_indx((/nq1,nq2,nq3/),kv,kintp,wintp)
     wght(1:(nmf+1)*nbnd*nbnd,kintp(1:20)) = wght(1:(nmf+1)*nbnd*nbnd,             kintp(1:20)) &
     &                       + MATMUL(wghtd(1:(nmf+1)*nbnd*nbnd,1:1,ik), wintp(1:1,1:20))
  END DO
  !
  CALL mp_sum( wght, world_comm )
  !
  DEALLOCATE(wghtd)
  !
END SUBROUTINE fermi_factor
!>
!> Compute screened interaction
!>
SUBROUTINE make_scrn(iq)
  !
  !$USE omp_lib, ONLY : OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM
  !
  USE wvfct, ONLY : nbnd
  USE kinds, ONLY : DP
  USE mp_world, ONLY : mpime, nproc
  USE io_global, ONLY : stdout
  USE sctk_val, ONLY : gindx, nf, nftot, ngv, nqbz, nmf, &
  &                     wfc1q, wfc2q, wscr
  !  
  USE sctk_cnt_dsp, ONLY :  cnt_and_dsp_full, cnt_and_dsp
  !
  IMPLICIT NONE
  !
  INTEGER,INTENT(IN) :: iq !< index of @f$q@f$
  !
  INTEGER :: ik, ib, jb, imf, cnt, dsp, org, ipe, &
  &          kcnt(0:nproc - 1), kdsp(0:nproc - 1)
  INTEGER(8) :: plan
  !
  COMPLEX(dp) :: wght(0:nmf,nbnd,nbnd,nqbz), rhin(nftot), rhout(nftot)
  COMPLEX(dp) :: one = CMPLX(1.0_dp, 0.0_dp, KIND=dp)
  COMPLEX(dp),ALLOCATABLE :: rho1(:,:), rho2(:,:)
  !
  CALL cnt_and_dsp(ngv, cnt, dsp)
  CALL cnt_and_dsp_full(nqbz, kcnt, kdsp)
  !
  ALLOCATE(wscr(ngv, dsp + 1:dsp + cnt, 0:nmf))
  wscr(1:ngv,dsp + 1:dsp + cnt, 0:nmf) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
  !
  ! Calc f * (1 - f') / (e - e' + iw)
  !
  CALL fermi_factor(iq,wght)
  !
  ! Calc. Chi
  !
  !????CALL dfftw_plan_dft_3d(plan, nf(1), nf(2), nf(3), rhin(1:nftot), &
  !????&                      rhout(1:nftot), FFTW_FORWARD, FFTW_ESTIMATE )
  !
  DO ipe = 1, nproc
     !
     CALL circular_shift_wrapper(wfc1q)
     CALL circular_shift_wrapper(wfc2q)
     !
     org = MODULO(mpime + ipe, nproc)
     !
     !$OMP PARALLEL DEFAULT(NONE) &
     !$OMP & SHARED(cnt,dsp,nbnd,nqbz,nftot,nmf,one,wfc1q,wfc2q,plan,wscr,wght,ngv,gindx, &
     !$OMP &        kcnt,kdsp,org) &
     !$OMP & PRIVATE(ik,ib,jb,imf,rhin,rhout,rho1,rho2)
     !
     ALLOCATE(rho1(ngv,nbnd), rho2(cnt,nbnd))
     !
     DO ik = 1, kcnt(org)
        !
        DO ib = 1, nbnd
           !
           DO jb = 1, nbnd
              !
              rhin(1:nftot) = wfc1q(1:nftot,ib,ik) * wfc2q(1:nftot,jb,ik)
              !?????CALL dfftw_execute_dft(plan, rhin(1:nftot), rhout(1:nftot))
              !
              rho1(1:ngv,jb) = CONJG(rhout(gindx(1:ngv)))
              !
           END DO ! jb
           !
           DO imf = 0, nmf
              !
              DO jb = 1, nbnd
                 !
                 rho2(1:cnt,jb) = REAL(wght(imf,jb,ib,kdsp(org) + ik),dp) * CONJG(rho1(1:cnt,jb))
                 !
              END DO ! jb = 1, nbnd
              !
              CALL zgemm("N", "T", ngv, cnt, nbnd, &
              &  one, rho1(1:ngv,       1:nbnd), ngv, &
              &       rho2(       1:cnt,1:nbnd), cnt, &
              &  one, wscr(1:ngv, 1:cnt, imf), ngv  )
              !
           END DO ! imf = 0, nmf
           !
        END DO ! ib = 1, nbnd
        !
     END DO ! ik = 1, kcnt(org)
     !
     DEALLOCATE(rho1, rho2)
     !
     !$OMP END PARALLEL
     !
  END DO ! ipe = 1, nproc
  !
  !?????CALL dfftw_destroy_plan(plan)
  !
END SUBROUTINE make_scrn
!>
!> Calc. Kel
!>
SUBROUTINE make_Kel(linfinite)
  !
  USE wvfct, ONLY : nbnd
  USE kinds, ONLY : DP
  USE cell_base, ONLY : omega
  USE mp_world, ONLY : mpime, nproc
  USE mp, ONLY : mp_sum
  USE mp_world, ONLY : world_comm
  USE sctk_val, ONLY : gindx, gq2, Kel, nf, nftot, ngv, nqbz, nkpe, nmf, &
  &                     wfc1q, wfc2q, wscr
  !
  USE sctk_cnt_dsp, ONLY : cnt_and_dsp, cnt_and_dsp_full
  !
  IMPLICIT NONE
  !
  LOGICAL,INTENT(IN) :: linfinite !< Switch for calculation of infinite frequency part
  !
  INTEGER :: cnt, dsp, ik, ib, jb, imf, org, ipe, &
  &          kcnt(0:nproc - 1), kdsp(0:nproc - 1)
  INTEGER(8) :: plan
  COMPLEX(dp) :: rhin(nftot), rhout(nftot), Kel0, &
  &             one = CMPLX(1.0_dp, 0.0_dp, KIND=dp), zero = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
  COMPLEX(dp),ALLOCATABLE :: rho1(:,:), rho2(:,:), rho3(:,:)
  COMPLEX(DP),EXTERNAL :: zdotc
  !
  CALL cnt_and_dsp(ngv, cnt, dsp)
  CALL cnt_and_dsp_full(nqbz, kcnt, kdsp)
  !
  IF(linfinite) THEN
     Kel(0:nmf + 1,1:nbnd,1:nbnd,1:nqbz) = 0.0_dp
  ELSE
     Kel(0:nmf,1:nbnd,1:nbnd,1:nqbz) = 0.0_dp
  END IF
  !
  !??????CALL dfftw_plan_dft_3d(plan, nf(1), nf(2), nf(3), rhin(1:nftot), &
  !????&                      rhout(1:nftot), FFTW_FORWARD, FFTW_ESTIMATE)
  !
  DO ipe = 1, nproc
     !
     CALL circular_shift_wrapper(wfc1q)
     CALL circular_shift_wrapper(wfc2q)
     !
     org = MODULO(mpime + ipe, nproc)
     !
     !$OMP PARALLEL DEFAULT(NONE) &
     !$OMP & SHARED(linfinite,mpime,nqbz,nbnd,nftot,nmf,wfc1q,wfc2q,plan,cnt,dsp,zero,one,wscr,Kel,omega, &
     !$OMP &        ngv,gindx,kcnt,kdsp,org,gq2) &
     !$OMP & PRIVATE(ik,ib,jb,imf,rhin,rhout,rho1,rho2,rho3,Kel0)
     !
     ALLOCATE(rho1(ngv,nbnd), rho2(dsp + 1:dsp + cnt,nbnd), rho3(ngv,nbnd))
     !
     !$OMP DO
     DO ik = 1, kcnt(org)
        !
        DO ib = 1, nbnd
           !
           DO jb = 1, nbnd
              !
              rhin(1:nftot) = wfc1q(1:nftot,ib,ik) * wfc2q(1:nftot,jb,ik)
              !????CALL dfftw_execute_dft(plan, rhin(1:nftot), rhout(1:nftot))
              !
              rho1(1:ngv,            jb) = rhout(gindx(1:ngv))
              rho2(dsp + 1:dsp + cnt,jb) = rho1(dsp + 1:dsp + cnt,jb)
              !
           END DO ! jb = 1, nbnd
           !
           DO imf = 0, nmf
              !
              CALL zgemm("N", "N", ngv, nbnd, cnt, &
              &          one, wscr(1:ngv, dsp + 1:dsp + cnt,  imf), ngv, &
              &               rho2(       dsp + 1:dsp + cnt, 1:nbnd), cnt, &
              &         zero, rho3(1:ngv,                    1:nbnd), ngv  )
              !
              DO jb = 1, nbnd
                 !
                 Kel0 = zdotc(ngv, rho1(1:ngv,jb), 1, &
                 &                 rho3(1:ngv,jb), 1)
                 !
                 Kel(imf,jb,ib,kdsp(org) + ik) = REAL(Kel0, dp) / omega
                 !
              END DO ! jb = 1, nbnd
              !
           END DO ! imf = 0, nmf
           !
           ! Infinite frequency -> Bare Coulomb
           !
           IF(linfinite) THEN
              DO jb = 1, nbnd
                 !
                 Kel(nmf + 1,jb,ib,kdsp(org) + ik) = SUM(REAL(rho2(dsp + 1:dsp + cnt,jb) &
                 &                                    * CONJG(rho2(dsp + 1:dsp + cnt,jb)), dp) &
                 &                                    /        gq2(dsp + 1:dsp + cnt))  / omega
                 !
              END DO ! jb = 1, nbnd
           END IF
           !
        END DO ! ib = 1, nbnd
        !
     END DO ! ik = 1, kcnt(org)
     !$OMP END DO
     !
     DEALLOCATE(rho1, rho2, rho3)
     !
     !$OMP END PARALLEL
     !
  END DO ! ipe = 1, nproc
  !
  !????CALL dfftw_destroy_plan(plan)
  !
  IF(linfinite) THEN
     CALL mp_sum( Kel, world_comm )
  ELSE
     CALL mp_sum( Kel, world_comm )
  END IF
  !
  DEALLOCATE(wscr)
  !
END SUBROUTINE make_Kel
!>
!> Compute parameter for Chebyshev interpolation
!>
SUBROUTINE Chebyshev_interpol()
  !
  USE wvfct, ONLY : nbnd
  USE kinds, ONLY : DP
  USE constants, ONLY: pi
  USE sctk_val, ONLY : Kel, nqbz, nmf
  USE mp_world, ONLY : world_comm
  USE mp, ONLY : mp_sum
  !
  !
  IMPLICIT NONE
  !
  INTEGER :: ik, ib, jb, imf, jmf, nqbz0, nqbz1
  REAL(dp) :: Cheb(0:nmf + 1,0:nmf + 1), Kel0
  !
  CALL divide(world_comm, nqbz, nqbz0, nqbz1)
  !
  Kel(0:nmf + 1,1:nbnd,1:nbnd,      1:nqbz0-1) = 0.0_dp
  Kel(0:nmf + 1,1:nbnd,1:nbnd,nqbz1+1:nqbz ) = 0.0_dp
  !
  DO imf = 0, nmf + 1
     DO jmf = 0, nmf + 1
        cheb(jmf,imf) = 2.0_dp / REAL(nmf + 2, dp) &
        &  * COS(REAL((2 * imf + 1) * jmf, dp) * pi / REAL(2 * (nmf + 2), dp))
     END DO ! jmf
  END DO ! imf
  cheb(0,0:nmf + 1) = cheb(0,0:nmf + 1) * 0.5_dp
  !
  !$OMP PARALLEL DEFAULT(NONE) &
  !$OMP & SHARED(nqbz0,nqbz1, nbnd, nmf, Cheb, Kel) &
  !$OMP & PRIVATE(ik,ib,jb,Kel0)
  !
  !$OMP DO
  DO ik = nqbz0, nqbz1
     DO ib = 1, nbnd
        DO jb = 1, nbnd
           !
           Kel0 = Kel(0,jb,ib,ik)
           Kel(0,jb,ib,ik) = Kel(nmf + 1,jb,ib,ik)
           Kel(nmf + 1,jb,ib,ik) = Kel0
           !
           Kel(0:nmf + 1,jb,ib,ik) = MATMUL(cheb(0:nmf + 1,0:nmf + 1), Kel(0:nmf + 1,jb,ib,ik))
           !
        END DO  ! jb
     END DO ! ib
  END DO ! ik
  !$OMP END DO
  !$OMP END PARALLEL
  !
  CALL mp_sum( Kel, world_comm )
  !
END SUBROUTINE Chebyshev_interpol
!>
!> Output to file
!>
SUBROUTINE write_Kel_Chebyshev(iq)
  !
  USE wvfct, ONLY : nbnd
  USE kinds, ONLY : DP
  USE mp_world, ONLY : mpime
  USE disp,  ONLY : nq1, nq2, nq3
  USE sctk_val, ONLY : iqv, Kel, nqbz, nmf
  !
  INTEGER,INTENT(IN) :: iq !< index of phonon @f$q@f$
  !
  INTEGER :: fo = 20
  CHARACTER(100) :: fname, ciq
  !
  IF(mpime == 0) THEN
     !
     WRITE(ciq,*) iq
     WRITE(fname,'(3a)') "vel", TRIM(adjustl(ciq)), ".dat"
     open(fo, file = TRIM(fname), form = "unformatted")
     !
     WRITE(fo) nq1, nq2, nq3
     WRITE(fo) nbnd
     WRITE(fo) REAL(iqv(1:3,iq), dp) / REAL(2 * (/nq1,nq2,nq3/), dp)
     WRITE(fo) nmf + 2
     !
     WRITE(fo) Kel(0:nmf + 1,1:nbnd,1:nbnd,1:nqbz)
     !
     close(fo)
     !
  END IF
  !
END SUBROUTINE write_Kel_Chebyshev
!
END MODULE sctk_coulomb
