! See copyright notice in the COPYRIGHT file.
?? include 'header/lbm_macros.inc'
?? include 'header/lbm_interfaceMacros.inc'
?? include 'treelm/source/logMacros.inc'
! ****************************************************************************** !
!> This module provides the definition and methods for
!! BGK advection relaxation scheme.
module mus_bgk_module
  use iso_c_binding, only: c_f_pointer

  ! include treelm modules
  use env_module,               only: rk
  use tem_varSys_module,        only: tem_varSys_type !, tem_varSys_op_type
  use tem_param_module,         only: div1_3, div1_36, div1_8, div3_4h, div1_4,&
    &                                 div3_8, div9_16, div3_16, cs2inv, cs4inv,&
    &                                 rho0
  ! use tem_spacetime_fun_module, only: tem_st_fun_listElem_type,                &
  !   &                                 tem_spacetime_for
  ! use tem_logging_module,       only: logUnit
  ! use tem_dyn_array_module,     only: PositionOfVal
  ! use tem_aux_module,           only: tem_abort

  ! include musubi modules
  use mus_field_prop_module,     only: mus_field_prop_type
  use mus_scheme_layout_module,  only: mus_scheme_layout_type
  use mus_scheme_type_module,    only: mus_scheme_type
  use mus_param_module,          only: mus_param_type
  use mus_varSys_module,         only: mus_varSys_data_type

  implicit none

  private

  public :: bgk_advRel_explicit
  public :: bgk_advRel_explicit_incomp
  public :: bgk_advRel_forcing
  public :: bgk_advRel_flekkoy
  public :: bgk_advRel_flekkoy_noFluid
  public :: bgk_nNwtn_PL_explicit
  public :: bgk_nNwtn_CY_explicit
  public :: bgk_nNwtn_CS_explicit

contains

! ****************************************************************************** !
  !> Advection relaxation routine for the BGK model.
  !!
?? copy :: compute_routineHeader( bgk_advRel_forcing )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem, iDir, QQ, nScalars
    real(kind=rk) pdfTmp( layout%fStencil%QQ ) ! temporary local pdf values
    real(kind=rk) rho     ! local density
    real(kind=rk) inv_rho ! inverse local density
    real(kind=rk) force( layout%fStencil%QQ )  ! local x-velocity
    real(kind=rk) u_x     ! local x-velocity
    real(kind=rk) u_y     ! local y-velocity
    real(kind=rk) u_z     ! local z-velocity
    real(kind=rk) usq     ! square velocity
    ! derived constants
    real(kind=rk) usqn, usqn_o1, usqn_o2 
    real(kind=rk) omega_2, cmpl_o, omega
    real(kind=rk) coeff_1, coeff_2
    real(kind=rk) ui1, ui3, ui10, ui11, ui12, ui13
    real(kind=rk) fac_1, fac_2, fac_3, fac_4, fac_9, fac_10, fac_11, fac_12,   &
      &           fac_13
    real(kind=rk) sum1_1, sum1_2, sum2_1, sum2_2, sum3_1, sum3_2, sum4_1,      &
      &           sum4_2, sum9_1, sum9_2, sum10_1, sum10_2, sum11_1, sum11_2,  &
      &           sum12_1, sum12_2, sum13_1, sum13_2
    ! ---------------------------------------------------------------------------
    QQ = layout%fStencil%QQ
    ! nElems = size(neigh)/QQ
    nScalars = varSys%nScalars

    ! some global variables
    omega = fieldProp(1)%fluid%omLvl( level )
    omega_2 = 2._rk * omega
    cmpl_o  = 1._rk - omega

!$omp do schedule(static)
    nodeloop: do iElem=1,nSolve
      ! First load all local values into temp array
      ! Generic! PUSH+PULL is possible
      pdfTmp(  1 ) = inState( ?FETCH?(  1, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  2 ) = inState( ?FETCH?(  2, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  3 ) = inState( ?FETCH?(  3, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  4 ) = inState( ?FETCH?(  4, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  5 ) = inState( ?FETCH?(  5, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  6 ) = inState( ?FETCH?(  6, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  7 ) = inState( ?FETCH?(  7, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  8 ) = inState( ?FETCH?(  8, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  9 ) = inState( ?FETCH?(  9, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 10 ) = inState( ?FETCH?( 10, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 11 ) = inState( ?FETCH?( 11, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 12 ) = inState( ?FETCH?( 12, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 13 ) = inState( ?FETCH?( 13, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 14 ) = inState( ?FETCH?( 14, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 15 ) = inState( ?FETCH?( 15, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 16 ) = inState( ?FETCH?( 16, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 17 ) = inState( ?FETCH?( 17, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 18 ) = inState( ?FETCH?( 18, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 19 ) = inState( ?FETCH?( 19, 1, iElem, QQ, nScalars, nElems,neigh ))

      ! local density
      rho = pdfTmp(18) + pdfTmp( 5) + pdfTmp(16)                               &
        & + pdfTmp( 1) + pdfTmp(15) + pdfTmp( 2)                               &
        & + pdfTmp(17) + pdfTmp( 4) + pdfTmp( 6)                               &
        & + pdfTmp(14) + pdfTmp(10) + pdfTmp(13)                               &
        & + pdfTmp( 8) + pdfTmp( 3) + pdfTmp(12)                               &
        & + pdfTmp( 9) + pdfTmp(11) + pdfTmp( 7)                               &
        & + pdfTmp(19)

      ! local x-, y- and z-velocity
      u_x = pdfTmp(18) + pdfTmp(17) + pdfTmp( 4)                               &
        & + pdfTmp(14) + pdfTmp(12)                                            &
        & - pdfTmp(16) - pdfTmp( 1) - pdfTmp(15)                               &
        & - pdfTmp(13) - pdfTmp(11)

      u_y = pdfTmp(18) + pdfTmp( 5)  + pdfTmp(16)                              &
        &  + pdfTmp(10) + pdfTmp( 9)                                           &
        &  - pdfTmp(15) - pdfTmp( 2 ) - pdfTmp(17)                             &
        &  - pdfTmp( 8) - pdfTmp( 7)

      u_z = pdfTmp( 6) + pdfTmp(14) + pdfTmp(10)                               &
        & + pdfTmp(13) + pdfTmp( 8)                                            & 
        & - pdfTmp( 3) - pdfTmp(12) - pdfTmp( 9)                               &
        & - pdfTmp(11) - pdfTmp( 7)

      ! inverse local density
      inv_rho = 1._rk / rho

      ! transfer moments to velocities
      u_x = u_x * inv_rho
      u_y = u_y * inv_rho
      u_z = u_z * inv_rho

      ! square velocity and derived constants
      usq = u_x*u_x + u_y*u_y + u_z*u_z
      usqn = div1_36 * (1._rk - 1.5_rk * usq) * rho

      ! Calculate the force for all directions
      do iDir = 1, layout%fStencil%QQ
        force( iDir ) = ( 1._rk -0.5_rk*omega )*layout%Weight( iDir )          &
          &           * ((( layout%fStencil%cxDir( 1, iDir ) - u_x  )        &
          &           * cs2inv + ( layout%fStencil%cxDir( 1, iDir ) * u_x    &
          &           +            layout%fStencil%cxDir( 2, iDir ) * u_y    &
          &           +            layout%fStencil%cxDir( 3, iDir ) * u_z )  &
          &           *     cs4inv*layout%fStencil%cxDir( 1, iDir ))         &
          &           * fieldProp(1)%fluid%force( 1 )                          &
          &           + (( layout%fStencil%cxDir( 2, iDir ) - u_y ) * cs2inv &
          &           +  ( layout%fStencil%cxDir( 2, iDir ) * u_x            &
          &           +    layout%fStencil%cxDir( 2, iDir ) * u_y            &
          &           +    layout%fStencil%cxDir( 3, iDir ) * u_z )          &
          &           * cs4inv*layout%fStencil%cxDir( 2, iDir ))             &
          &           * fieldProp(1)%fluid%force( 2 )                        &
          &           + (( layout%fStencil%cxDir( 3, iDir ) - u_z ) * cs2inv &
          &           +  ( layout%fStencil%cxDir( 1, iDir ) * u_x            &
          &           +    layout%fStencil%cxDir( 2, iDir ) * u_y            &
          &           +    layout%fStencil%cxDir( 3, iDir ) * u_z )          &
          &           * cs4inv*layout%fStencil%cxDir( 3, iDir ))             &
          &           * fieldProp(1)%fluid%force( 3 ))
      end do

      outState( ?SAVE?( 19,1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &   = pdfTmp(19)*cmpl_o+omega*rho*( div1_3-0.5d0*usq ) + force( 19 )

      coeff_1 = div1_8 * omega * rho

      usqn_o1 = omega * usqn

      ui1     =  u_x + u_y
      fac_1   = coeff_1 * ui1
      sum1_1  = fac_1 * div3_4h
      sum1_2  = fac_1 * ui1 + usqn_o1

      outState( ?SAVE?( 18, 1, iElem, QQ, nScalars,nElems,neigh ))                    &
        &   = pdfTmp(18) *cmpl_o+sum1_1 +sum1_2  + force( 18 )
      outState( ?SAVE?( 15, 1, iElem, QQ, nScalars,nElems,neigh ))                    &
        &   = pdfTmp(15) *cmpl_o-sum1_1 +sum1_2  + force( 15 )

      ui3     = -u_x + u_y
      fac_3   = coeff_1 * ui3
      sum3_1  = fac_3 * div3_4h
      sum3_2  = fac_3 * ui3 + usqn_o1

      outState( ?SAVE?( 16, 1, iElem, QQ, nScalars,nElems,neigh ))                    &
        &   = pdfTmp(16) *cmpl_o+sum3_1 +sum3_2 + force( 16 )

      outState( ?SAVE?( 17, 1, iElem, QQ, nScalars,nElems,neigh ))                    &
        &   = pdfTmp(17) *cmpl_o-sum3_1 +sum3_2 + force( 17 )


      ui10    =  u_x + u_z
      fac_10  = coeff_1 * ui10
      sum10_1 = fac_10 * div3_4h
      sum10_2 = fac_10 * ui10 + usqn_o1

      outState( ?SAVE?( 14, 1, iElem, QQ, nScalars,nElems,neigh ))                    &
        &   = pdfTmp(14)*cmpl_o+sum10_1+sum10_2  + force( 14 )

      outState( ?SAVE?( 11, 1, iElem, QQ, nScalars,nElems,neigh ))                    &
        &   = pdfTmp(11)*cmpl_o-sum10_1+sum10_2  + force( 11 )


      ui12    = -u_x + u_z
      fac_12  = coeff_1 * ui12
      sum12_1 = fac_12 * div3_4h
      sum12_2 = fac_12 * ui12 + usqn_o1

      outState( ?SAVE?( 13, 1, iElem, QQ, nScalars,nElems,neigh ))                    &
        &   = pdfTmp(13)*cmpl_o+sum12_1+sum12_2  + force( 13 )

      outState( ?SAVE?( 12, 1, iElem, QQ, nScalars,nElems,neigh ))                    &
        &   = pdfTmp(12)*cmpl_o-sum12_1+sum12_2  + force( 12 )


      ui11    =  u_y + u_z
      fac_11  = coeff_1 * ui11
      sum11_1 = fac_11 * div3_4h
      sum11_2 = fac_11 * ui11 + usqn_o1

      outState( ?SAVE?( 10, 1, iElem, QQ, nScalars,nElems,neigh ))                    &
        &   = pdfTmp(10)*cmpl_o+sum11_1+sum11_2 + force( 10 )

      outState( ?SAVE?( 7, 1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &   = pdfTmp(7)*cmpl_o-sum11_1+sum11_2  + force( 7 )


      ui13    = -u_y + u_z
      fac_13  = coeff_1 * ui13
      sum13_1 = fac_13 * div3_4h
      sum13_2 = fac_13 * ui13 + usqn_o1

      outState( ?SAVE?( 8, 1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &   = pdfTmp(8)*cmpl_o+sum13_1+sum13_2  + force( 8 )

      outState( ?SAVE?( 9, 1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &  = pdfTmp(9)*cmpl_o-sum13_1+sum13_2  + force( 9 )


      coeff_2 = div1_8 * omega_2 * rho
      usqn_o2 = omega_2 * usqn

      fac_2   = coeff_2 * u_y
      sum2_1  = fac_2 * div3_4h
      sum2_2  = fac_2 * u_y + usqn_o2

      outState( ?SAVE?( 5, 1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &  = pdfTmp(5) *cmpl_o+sum2_1 +sum2_2  + force( 5 )

      outState( ?SAVE?( 2, 1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &  = pdfTmp(2) *cmpl_o-sum2_1 +sum2_2  + force( 2 )


      fac_4   = coeff_2 * u_x
      sum4_1  = fac_4 * div3_4h
      sum4_2  = fac_4 * u_x + usqn_o2

      outState( ?SAVE?( 1, 1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &    = pdfTmp(1) *cmpl_o-sum4_1 +sum4_2 + force( 1 )

      outState( ?SAVE?( 4, 1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &   = pdfTmp(4) *cmpl_o+sum4_1 +sum4_2  + force( 4 )


      fac_9   = coeff_2 * u_z
      sum9_1  = fac_9 * div3_4h
      sum9_2  = fac_9 * u_z + usqn_o2

      outState( ?SAVE?( 6, 1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &   = pdfTmp(6) *cmpl_o+sum9_1 +sum9_2  + force( 6 )

      outState( ?SAVE?( 3, 1, iElem, QQ, nScalars,nElems,neigh ))                     &
        &   = pdfTmp(3) *cmpl_o-sum9_1 +sum9_2  + force( 3 )


    end do nodeloop
!$omp end do nowait

  end subroutine bgk_advRel_forcing
! **************************************************************************** !


! **************************************************************************** !
  !> Advection relaxation routine for the flekkoy diffusion model.
?? copy :: compute_routineHeader( bgk_advRel_flekkoy )
?? copy :: compute_routineParams
    ! --------------------------------------------------------------------------
    integer :: iElem, iDir
    type(mus_varSys_data_type), pointer :: fPtr
    type(mus_scheme_type), pointer :: scheme
    real(kind=rk) :: pdfTmp( layout%fStencil%QQ ) ! temporary local pdf values
    real(kind=rk) :: rho, feq
    real(kind=rk) :: d_omega
    real(kind=rk) :: transVel( nSolve*3 ) ! velocity from the transport field
    real(kind=rk) :: uc ! u_i,fluid * c_i
    integer :: vel_varPos ! position of transport velocity variable in varSys
    real(kind=rk) :: inv_vel, u_fluid(3)
    ! --------------------------------------------------------------------------

    ! access scheme via 1st variable method data which is a state variable
    call C_F_POINTER( varSys%method%val(1)%method_Data, fPtr )
    scheme => fPtr%solverData%scheme

    ! passive scalar has only one transport Variable
    vel_varPos = scheme%transVar%method(1)%data_varPos
    ! Get velocity field 
    call varSys%method%val(vel_varPos)%get_valOfIndex( &
      & varSys  = varSys,                              &
      & time    = params%general%simControl%now,       &
      & iLevel  = level,                               &
      & idx     = scheme%transVar%method(1)            &
      &           %pntIndex%indexLvl(level)            &
      &           %val(1:nSolve),                      &
      & nVals   = nSolve,                              &
      & res     = transVel                             )

    ! convert physical velocity into LB velocity
    inv_vel = 1.0_rk / params%physics%fac( level )%vel
    transVel = transVel * inv_vel

    ! initialize and define some field wise parameters
    d_omega = 2._rk / ( 1._rk + 6._rk                               &
      &                       * fieldProp(1)%species%diff_coeff( 1 ))

    nodeloop: do iElem = 1, nSolve
      ! x-, y- and z-velocity from transport field
      u_fluid = transVel( (iElem-1)*3+1 : iElem*3 ) 

      do iDir = 1, layout%fStencil%QQ
        pdfTmp( iDir ) = &
& instate( ?FETCH?( iDir, 1, iElem, layout%fStencil%QQ, varSys%nScalars, nElems,neigh ) )
      end do
      rho = sum( pdfTmp )

      do iDir = 1, layout%fStencil%QQ
        ! compute c_i * u
        uc = dble( layout%fStencil%cxDir(1, iDir)) * u_fluid(1) + &
          &  dble( layout%fStencil%cxDir(2, iDir)) * u_fluid(2) + &
          &  dble( layout%fStencil%cxDir(3, iDir)) * u_fluid(3)

        ! compute the equilibrium (fi_eq = weight_i * rho * ( 1+c_i*u / cs^2))
        feq = rho * layout%weight( iDir ) * (1._rk+3._rk*uc)

        outstate(                                                            &
& ?SAVE?( iDir, 1, iElem, layout%fStencil%QQ, varSys%nScalars, nElems,neigh ) ) =     &
          &                pdfTmp( iDir ) + d_omega * ( feq - pdfTmp( idir ))
      end do

    end do nodeloop

  end subroutine bgk_advRel_flekkoy
! ****************************************************************************** !


! ****************************************************************************** !
  !> Advection relaxation routine for the flekkoy diffusion model without prior
  !! definition of a dependent fluid scheme. Velocity is read from the depend
  !! table.
  !!
?? copy :: compute_routineHeader( bgk_advRel_flekkoy_noFluid )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem
    integer :: iDir
    integer :: QQ, nScalars
    real(kind=rk) :: pdfTmp( layout%fStencil%QQ ) ! temporary local pdf values
    real(kind=rk) :: rho
    real(kind=rk) :: feq
    real(kind=rk) :: d_omega
    ! ---------------------------------------------------------------------------
    QQ       = layout%fStencil%QQ
    nScalars = varSys%nScalars

    ! initialize and define some field wise parameters
    d_omega = 2._rk / ( 1._rk + 6._rk * fieldProp(1)%species%diff_coeff(1) )

    nodeloop: do iElem = 1, nSolve

      rho = 0._rk
      do iDir = 1, QQ
        ! store the pdfs for this element and compute the density locally
        pdfTmp( iDir ) = &
          & instate( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh ))
        rho = rho + pdfTmp( iDir )
      end do

      directionloop: do iDir = 1, QQ
        ! compute the equilibrium
        ! fi_eq = weight_i * rho * ( 1+c_i*u / cs^2))
        !       = weight_i * rho
        feq = rho * layout%weight( iDir )

        outstate( ?SAVE?( iDir, 1, iElem, QQ, nScalars, nElems,neigh ) ) =  &
          &                pdfTmp( iDir ) + d_omega * ( feq - pdfTmp( idir ))
      end do directionloop
    end do nodeloop

  end subroutine bgk_advRel_flekkoy_noFluid
! ****************************************************************************** !


! ****************************************************************************** !
  !> Advection relaxation routine for the 
  !! BGK model with an explicit calculation of all equilibrium 
  !! quantities. Slow and simple. This routine should only be
  !! used for testing purposes
  !!
?? copy :: compute_routineHeader( bgk_advRel_explicit )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem, iDir                       ! voxel element counter
    integer :: QQ, nScalars
    ! temporary distribution variables
    real(kind=rk) pdfTmp(layout%fStencil%QQ)
    real(kind=rk) ux(3)   ! local velocity
    real(kind=rk) rho     ! local density
    real(kind=rk) inv_rho ! inverse local density
    real(kind=rk) usq     ! square velocity
    ! derived constants
    ! equilibrium calculation variables
    real(kind=rk) ucx
    real(kind=rk) eqState(layout%fStencil%QQ) 
    real(kind=rk) omega
    ! ---------------------------------------------------------------------------
    QQ = layout%fStencil%QQ
    ! nElems = size(neigh)/QQ
    nScalars = varSys%nScalars

    omega = fieldProp(1)%fluid%omLvl( level )
    nodeloop: do iElem=1,nSolve
      !> Generic fetching step:
      !! Streaming for pull
      !! Local copy for push
      ux = 0._rk
      rho = 0._rk
      do iDir = 1, QQ
        pdfTmp( iDir ) = inState( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh))
      end do

      ! local density
      do iDir = 1, QQ
        rho = rho + inState( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh ))
      end do

      ! local x-, y- and z-velocity
      do iDir = 1, QQ
        ux(:) = ux(:) + inState( ?FETCH?(iDir, 1, iElem, QQ, nScalars, nElems,neigh)) &
          &             * dble(layout%fStencil%cxDir( :, iDir ))
      end do

      ! inverse local density
      inv_rho = 1._rk / rho

      ! transfer moments to velocities
      ux(:) = ux(:) * inv_rho

      ! square velocity and derived constants
      usq = ux(1)*ux(1) + ux(2)*ux(2) + ux(3)*ux(3)

      do iDir = 1, QQ

        !> Pre-calculate velocitiy terms
        ucx = dble(layout%fStencil%cxDirRK( 1, iDir ))*ux(1) &
          & + dble(layout%fStencil%cxDirRK( 2, iDir ))*ux(2) &
          & + dble(layout%fStencil%cxDirRK( 3, iDir ))*ux(3)

        !> Calculate equilibrium distribution functions fEq
        eqState(iDir) = layout%weight(iDir)*rho*( 1.0_rk &
          &           +  ucx*cs2inv                      &
          &           +  ucx*ucx*cs2inv*cs2inv*0.5d0     &
          &           -  usq*0.5d0*cs2inv )

        !> Relaxation
        outState( ?SAVE?( iDir, 1, iElem, QQ, nScalars, nElems,neigh )) =             &
          &            pdfTmp( iDir ) - omega*( pdfTmp(iDir ) - eqState( iDir ))
      end do  !< iDir

    end do nodeloop

  end subroutine bgk_advRel_explicit
! ****************************************************************************** !


! ****************************************************************************** !
  !> author: Jiaxing Qi
  !! Advection relaxation routine for the non-Newtonian flow
  !! BGK model with an explicit calculation of all equilibrium
  !! quantities. Slow and simple. This routine should only be
  !! used for testing purposes
  !!
?? copy :: compute_routineHeader( bgk_nNwtn_PL_explicit )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem, iDir, omegaStatePos
    integer :: QQ, nScalars
    ! temporary distribution variables
    real(kind=rk) pdfTmp(layout%fStencil%QQ)
    real(kind=rk) ux(3)   ! local velocity
    real(kind=rk) rho     ! local density
    real(kind=rk) usq      ! square velocity
    ! derived constants
    ! equilibrium calculation variables
    real(kind=rk) ucx
    real(kind=rk) eqState(layout%fStencil%QQ)
    real(kind=rk) omega
    ! ---------------------------------------------------------------------------
    ! parameters for non-Newtonian
    real(kind=rk) :: strainRate(6)
    real(kind=rk) :: shearRate ! second invariant of strainRate
    real(kind=rk) :: nuPhy, nuLB
    real(kind=rk) :: nonEq(layout%fStencil%QQ)
    real(kind=rk) :: convSR
    real(kind=rk) :: convVisc
    real(kind=rk) :: densityPhy = 1050.0_rk
    real(kind=rk) :: k, n_1 ! parameters for power-law
    ! ---------------------------------------------------------------------------
    QQ = layout%fStencil%QQ
    ! nElems = size(neigh)/QQ
    nScalars = varSys%nScalars

    ! get nonNewtonian power law parameter
    k   = fieldProp(1)%fluid%nNwtn%PL%k
    n_1 = fieldProp(1)%fluid%nNwtn%PL%n_1

    ! omega pos in state
    omegaStatePos = QQ + 1

    ! conversion factor for Shear Rate (or Strain Rate)
    convSR = params%physics%fac(level)%strainRate

    ! conversion factor for kinematic viscosity
    convVisc = params%physics%fac(level)%visc

    iElemLoop: do iElem = 1, nSolve
      ! get omega for this element.
      ! Use IDX here, as omega doesn't have a neighbor!
      omega = inState( ?IDX?(omegaStatePos, iElem, varSys%nScalars, nElems) )

      ! initialize
      strainRate = 0.0_rk
      shearRate = 0.0_rk

      !> Generic fetching step:
      ux = 0._rk
      rho = 0._rk
      do iDir = 1, layout%fStencil%QQ
        pdfTmp( iDir ) = inState( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh))
      end do

      ! local density
      do iDir = 1,layout%fStencil%QQ
        rho = rho + inState( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh ) )
      end do

      ! local x-, y- and z-velocity
      do iDir = 1,layout%fStencil%QQ
        ux(:) = ux(:) + inState( ?FETCH?(iDir, 1, iElem, QQ, nScalars, nElems,neigh) )&
          &             * layout%fStencil%cxDir(:, iDir)
      end do

      ! transfer moments to velocities
      ux(:) = ux(:) / rho

      ! square velocity and derived constants
      usq = ux(1)*ux(1) + ux(2)*ux(2) + ux(3)*ux(3)

      do iDir = 1, layout%fStencil%QQ

        ! Pre-calculate velocitiy terms
        ucx = dble(layout%fStencil%cxDir( 1, iDir ))*ux(1)          &
          & + dble(layout%fStencil%cxDir( 2, iDir ))*ux(2)          &
          & + dble(layout%fStencil%cxDir( 3, iDir ))*ux(3)

        ! Calculate equilibrium distribution functions fEq
        eqState(iDir)=layout%weight(iDir)*rho*(  1._rk                        &
          &                                    + ucx*cs2inv                   &
          &                                    + ucx*ucx*cs2inv*cs2inv*0.5_rk &
          &                                    - usq*0.5_rk*cs2inv            )

        ! get nonEq
        nonEq(iDir) = pdfTmp(iDir) - eqState(iDir)

        ! Relaxation
        outState( ?SAVE?( iDir, 1, iElem, QQ, nScalars, nElems,neigh )) =   &
          &            pdfTmp(iDir) - omega * nonEq(iDir)

        ! calculate strain rate
        strainRate(1) = strainRate(1) + nonEq(iDir) * layout%fStencil%cxcx(1, iDir) ! strainRate_xx
        strainRate(2) = strainRate(2) + nonEq(iDir) * layout%fStencil%cxcx(2, iDir) ! strainRate_yy
        strainRate(3) = strainRate(3) + nonEq(iDir) * layout%fStencil%cxcx(3, iDir) ! strainRate_zz
        strainRate(4) = strainRate(4) + nonEq(iDir) * layout%fStencil%cxcx(4, iDir) ! strainRate_xy
        strainRate(5) = strainRate(5) + nonEq(iDir) * layout%fStencil%cxcx(5, iDir) ! strainRate_yz
        strainRate(6) = strainRate(6) + nonEq(iDir) * layout%fStencil%cxcx(6, iDir) ! strainRate_xz

      end do  ! iDir

      ! Calculate strain rate and convert physics unit
      strainRate = -1.5_rk * omega / rho * strainRate * convSR

      ! Calculate physical shear rate
      strainRate(:) = strainRate(:) * strainRate(:)
      shearRate = 2._rk * sum( strainRate(:) )
      shearRate = sqrt( shearRate )

      ! calculate kinematic visc by POWER-LAW model
      ! from paper: LBMs for non-newtonian flows, page 803
      nuPhy = ( shearRate ** n_1 ) * k
      nuPhy = nuPhy / densityPhy

      ! get LB kinematic viscosity
      nuLB = nuPhy / convVisc

      ! get LB omega
      omega = 1._rk / ( 0.5_rk + 3._rk * nuLB )

      ! save omega
      outState( ?IDX?(omegaStatePos, iElem, varSys%nScalars, nElems) ) = omega

    end do iElemLoop

  end subroutine bgk_nNwtn_PL_explicit
! ****************************************************************************** !

! ****************************************************************************** !
  !> author: Jiaxing Qi
  !! Advection relaxation routine for the non-Newtonian flow
  !! BGK model with an explicit calculation of all equilibrium
  !! quantities. Slow and simple. This routine should only be
  !! used for testing purposes
  !!
?? copy :: compute_routineHeader( bgk_nNwtn_CY_explicit )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem, iDir, omegaStatePos
    integer :: QQ, nScalars
    real(kind=rk) pdfTmp(layout%fStencil%QQ)
    real(kind=rk) ux(3)   ! local velocity
    real(kind=rk) rho     ! local density
    real(kind=rk) usq      ! square velocity
    real(kind=rk) ucx
    real(kind=rk) eqState(layout%fStencil%QQ)
    real(kind=rk) omega
    ! ---------------------------------------------------------------------------
    ! parameters for non-Newtonian
    real(kind=rk) :: strainRate(6)
    real(kind=rk) :: shearRate ! second invariant of strainRate
    real(kind=rk) :: nuPhy, nuLB
    real(kind=rk) :: nonEq(layout%fStencil%QQ)
    real(kind=rk) :: convSR
    real(kind=rk) :: convVisc
    real(kind=rk) :: densityPhy = 1050.0_rk
    real(kind=rk) :: a, n_1_a, lambda, visc0, viscInf, t
    ! ---------------------------------------------------------------------------
    QQ = layout%fStencil%QQ
    nScalars = varSys%nScalars

    ! get nonNewtonian power law parameter
    a       = fieldProp(1)%fluid%nNwtn%CY%a
    n_1_a   = fieldProp(1)%fluid%nNwtn%CY%n_1_a
    lambda  = fieldProp(1)%fluid%nNwtn%CY%lambda
    visc0   = fieldProp(1)%fluid%nNwtn%CY%visc0
    viscInf = fieldProp(1)%fluid%nNwtn%CY%viscInf

    ! omega pos in state
    omegaStatePos = QQ + 1

    ! conversion factor for Shear Rate (or Strain Rate)
    convSR = params%physics%fac(level)%strainRate

    ! conversion factor for kinematic viscosity
    convVisc = params%physics%fac(level)%visc

    iElemLoop: do iElem = 1, nSolve
      ! get omega for this element.
      ! Use IDX here, as omega doesn't have a neighbor!
      omega = inState( ?IDX?(omegaStatePos, iElem, varSys%nScalars, nElems) )

      ! initialize
      strainRate = 0.0_rk
      shearRate = 0.0_rk

      !> Generic fetching step:
      ux = 0._rk
      rho = 0._rk
      do iDir = 1, layout%fStencil%QQ
        pdfTmp( iDir ) = inState( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh))
      end do

      ! local density
      do iDir = 1,layout%fStencil%QQ
        rho = rho + inState( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh ) )
      end do

      ! local x-, y- and z-velocity
      do iDir = 1,layout%fStencil%QQ
        ux(:) = ux(:) + inState( ?FETCH?(iDir, 1, iElem, QQ, nScalars, nElems,neigh) )&
          &             * layout%fStencil%cxDir(:, iDir)
      end do

      ! transfer moments to velocities
      ux(:) = ux(:) / rho

      ! square velocity and derived constants
      usq = ux(1)*ux(1) + ux(2)*ux(2) + ux(3)*ux(3)

      do iDir = 1, layout%fStencil%QQ

        ! Pre-calculate velocitiy terms
        ucx = dble(layout%fStencil%cxDir( 1, iDir ))*ux(1)          &
          & + dble(layout%fStencil%cxDir( 2, iDir ))*ux(2)          &
          & + dble(layout%fStencil%cxDir( 3, iDir ))*ux(3)

        ! Calculate equilibrium distribution functions fEq
        eqState(iDir)=layout%weight(iDir)*rho*(  1._rk                        &
          &                                    + ucx*cs2inv                   &
          &                                    + ucx*ucx*cs2inv*cs2inv*0.5_rk &
          &                                    - usq*0.5_rk*cs2inv            )

        ! get nonEq
        nonEq(iDir) = pdfTmp(iDir) - eqState(iDir)

        ! Relaxation
        outState( ?SAVE?( iDir, 1, iElem, QQ, nScalars, nElems,neigh )) =   &
          &            pdfTmp(iDir) - omega * nonEq(iDir)

        ! calculate strain rate
        strainRate(1) = strainRate(1) + nonEq(iDir) * layout%fStencil%cxcx(1, iDir) ! strainRate_xx
        strainRate(2) = strainRate(2) + nonEq(iDir) * layout%fStencil%cxcx(2, iDir) ! strainRate_yy
        strainRate(3) = strainRate(3) + nonEq(iDir) * layout%fStencil%cxcx(3, iDir) ! strainRate_zz
        strainRate(4) = strainRate(4) + nonEq(iDir) * layout%fStencil%cxcx(4, iDir) ! strainRate_xy
        strainRate(5) = strainRate(5) + nonEq(iDir) * layout%fStencil%cxcx(5, iDir) ! strainRate_yz
        strainRate(6) = strainRate(6) + nonEq(iDir) * layout%fStencil%cxcx(6, iDir) ! strainRate_xz

      end do  ! iDir

      ! Calculate strain rate
      strainRate = -1.5_rk * omega / rho * strainRate * convSR

      ! Calculate physical shear rate
      strainRate(:) = strainRate(:) * strainRate(:)
      shearRate = 2._rk * sum( strainRate(:) )
      shearRate = sqrt( shearRate )

      ! get physical dynamic viscosity
      ! model from paper of Wang
      t = ( 1._rk + (lambda*shearRate)**a ) ** n_1_a
      ! viscPhy
      nuPhy = viscInf + (visc0-viscInf) * t
      nuPhy = nuPhy / densityPhy

      ! get LB kinematic viscosity
      nuLB = nuPhy / convVisc

      ! get LB omega
      omega = 1._rk / ( 0.5_rk + 3 * nuLB )

      ! save omega
      outState( ?IDX?(omegaStatePos, iElem, varSys%nScalars, nElems) ) = omega

    end do iElemLoop

  end subroutine bgk_nNwtn_CY_explicit
! ****************************************************************************** !

! ****************************************************************************** !
  !> author: Jiaxing Qi
  !! Advection relaxation routine for the non-Newtonian flow
  !! BGK model with an explicit calculation of all equilibrium
  !! quantities. Slow and simple. This routine should only be
  !! used for testing purposes
  !!
?? copy :: compute_routineHeader( bgk_nNwtn_CS_explicit )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem, iDir, omegaStatePos
    integer :: QQ, nScalars
    real(kind=rk) pdfTmp(layout%fStencil%QQ)
    real(kind=rk) ux(3)   ! local velocity
    real(kind=rk) rho     ! local density
    real(kind=rk) usq      ! square velocity
    real(kind=rk) ucx
    real(kind=rk) eqState(layout%fStencil%QQ)
    real(kind=rk) omega
    ! ---------------------------------------------------------------------------
    ! parameters for non-Newtonian
    real(kind=rk) :: strainRate(6)
    real(kind=rk) :: shearRate ! second invariant of strainRate
    real(kind=rk) :: nuPhy, nuLB
    real(kind=rk) :: nonEq(layout%fStencil%QQ)
    real(kind=rk) :: convSR
    real(kind=rk) :: convVisc
    real(kind=rk) :: densityPhy = 1050.0_rk
    real(kind=rk) :: t, k0, k1
    ! ---------------------------------------------------------------------------
    QQ = layout%fStencil%QQ
    nScalars = varSys%nScalars

    ! get nonNewtonian power law parameter
    k0 = fieldProp(1)%fluid%nNwtn%CS%k0
    k1 = fieldProp(1)%fluid%nNwtn%CS%k1

    ! omega pos in state
    omegaStatePos = QQ + 1

    ! conversion factor for Shear Rate (or Strain Rate)
    convSR = params%physics%fac(level)%strainRate

    ! conversion factor for kinematic viscosity
    convVisc = params%physics%fac(level)%visc

    iElemLoop: do iElem = 1, nSolve
      ! get omega for this element.
      ! Use IDX here, as omega doesn't have a neighbor!
      omega = inState( ?IDX?(omegaStatePos, iElem, varSys%nScalars, nElems) )

      ! initialize
      strainRate = 0.0_rk
      shearRate = 0.0_rk

      !> Generic fetching step:
      ux = 0._rk
      rho = 0._rk
      do iDir = 1, QQ
        pdfTmp( iDir ) = inState( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh))
      end do

      ! local density
      do iDir = 1,QQ
        rho = rho + inState( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh ) )
      end do

      ! local x-, y- and z-velocity
      do iDir = 1,QQ
        ux(:) = ux(:) + inState( ?FETCH?(iDir, 1, iElem, QQ, nScalars, nElems,neigh) )&
          &             * layout%fStencil%cxDir(:, iDir)
      end do

      ! transfer moments to velocities
      ux(:) = ux(:) / rho

      ! square velocity and derived constants
      usq = ux(1)*ux(1) + ux(2)*ux(2) + ux(3)*ux(3)

      do iDir = 1, QQ

        ! Pre-calculate velocitiy terms
        ucx = real(layout%fStencil%cxDir( 1, iDir ), kind=rk)*ux(1)          &
          & + real(layout%fStencil%cxDir( 2, iDir ), kind=rk)*ux(2)          &
          & + real(layout%fStencil%cxDir( 3, iDir ), kind=rk)*ux(3)

        ! Calculate equilibrium distribution functions fEq
        eqState(iDir)=layout%weight(iDir)*rho*(  1._rk                        &
          &                                    + ucx*cs2inv                   &
          &                                    + ucx*ucx*cs2inv*cs2inv*0.5_rk &
          &                                    - usq*0.5_rk*cs2inv            )

        ! get nonEq
        nonEq(iDir) = pdfTmp(iDir) - eqState(iDir)

        ! Relaxation
        outState( ?SAVE?( iDir, 1, iElem, QQ, nScalars, nElems,neigh )) =   &
          &            pdfTmp(iDir) - omega * nonEq(iDir)

        ! calculate strain rate
        strainRate(1) = strainRate(1) + nonEq(iDir) * layout%fStencil%cxcx(1, iDir) ! strainRate_xx
        strainRate(2) = strainRate(2) + nonEq(iDir) * layout%fStencil%cxcx(2, iDir) ! strainRate_yy
        strainRate(3) = strainRate(3) + nonEq(iDir) * layout%fStencil%cxcx(3, iDir) ! strainRate_zz
        strainRate(4) = strainRate(4) + nonEq(iDir) * layout%fStencil%cxcx(4, iDir) ! strainRate_xy
        strainRate(5) = strainRate(5) + nonEq(iDir) * layout%fStencil%cxcx(5, iDir) ! strainRate_yz
        strainRate(6) = strainRate(6) + nonEq(iDir) * layout%fStencil%cxcx(6, iDir) ! strainRate_xz

      end do  ! iDir

      ! Calculate strain rate
      strainRate = -1.5_rk * omega / rho * strainRate * convSR

      ! Calculate physical shear rate
      strainRate(:) = strainRate(:) * strainRate(:)
      shearRate = 2._rk * sum( strainRate(:) )
      shearRate = sqrt( shearRate )

      ! get physical dynamic viscosity
      ! model from paper of Wang
      t = ( k0 + k1 * sqrt(shearRate) )
      nuPhy = t * t / shearRate
      nuPhy = nuPhy / densityPhy

      ! get LB kinematic viscosity
      nuLB = nuPhy / convVisc

      ! get LB omega
      omega = 1._rk / ( 0.5_rk + 3 * nuLB )

      ! save omega
      outState( ?IDX?(omegaStatePos, iElem, varSys%nScalars, nElems) ) = omega

    end do iElemLoop

  end subroutine bgk_nNwtn_CS_explicit
! ****************************************************************************** !

! ****************************************************************************** !
  !> Advection relaxation routine for the
  !! BGK model with an explicit calculation of all equilibrium 
  !! quantities. Slow and simple. This routine should only be
  !! used for testing purposes
  !!
?? copy :: compute_routineHeader( bgk_advRel_explicit_incomp )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem, iDir                       ! voxel element counter
    integer :: QQ, nScalars
    ! temporary distribution variables
    real(kind=rk) pdfTmp(layout%fStencil%QQ)
    real(kind=rk) ux(3)   ! local velocity
    real(kind=rk) rho     ! local density
    real(kind=rk) inv_rho0 ! inverse local density
    real(kind=rk) usq      ! square velocity
    ! derived constants
    ! equilibrium calculation variables
    real(kind=rk) ucx
    real(kind=rk) eqState(layout%fStencil%QQ)
    real(kind=rk) omega
    ! ---------------------------------------------------------------------------
    QQ = layout%fStencil%QQ
    ! nElems = size(neigh)/QQ
    nScalars = varSys%nScalars

    omega = fieldProp(1)%fluid%omLvl( level )
    ! inverse local density
    inv_rho0 = 1._rk / rho0

    nodeloop: do iElem = 1, nSolve
      !> Generic fetching step:
      !! Streaming for pull
      !! Local copy for push
      ux = 0._rk
      rho = 0._rk
      do iDir = 1, QQ
        pdfTmp( iDir ) = inState( ?FETCH?(iDir, 1, iElem, QQ, nScalars, nElems,neigh))
      end do

      ! local density
      do iDir = 1,QQ
        rho = rho + inState( ?FETCH?(iDir, 1, iElem, QQ, nScalars, nElems,neigh))
      end do

      ! local x-, y- and z-velocity
      do iDir = 1,QQ
        ux(:) = ux(:) + inState( ?FETCH?(iDir, 1, iElem, QQ, nScalars, nElems,neigh)) &
          &   * dble(layout%fStencil%cxDir( :, iDir ))
      end do

      ! transfer moments to velocities
      ux(:) = ux(:) * inv_rho0

      ! square velocity and derived constants
      usq = ux(1)*ux(1) + ux(2)*ux(2) + ux(3)*ux(3)

      do iDir = 1,QQ

        !> Pre-calculate velocitiy terms
        ucx = dble(layout%fStencil%cxDir( 1, iDir ))*ux(1) &
          & + dble(layout%fStencil%cxDir( 2, iDir ))*ux(2) &
          & + dble(layout%fStencil%cxDir( 3, iDir ))*ux(3)

        !> Calculate equilibrium distribution functions fEq
        eqState(iDir) = layout%weight(iDir)*( rho + rho0*(  &
          & +  ucx*cs2inv                                   &
          & +  ucx*ucx*cs2inv*cs2inv*0.5d0                  &
          & -  usq*0.5d0*cs2inv ))

        !> Relaxation
        outState( ?SAVE?( iDir,1,iElem,QQ,nScalars,nElems,neigh )) =                &
          &            pdfTmp( iDir ) - omega*( pdfTmp(iDir) - eqState(iDir) )

      end do ! iDir

    end do nodeloop

  end subroutine bgk_advRel_explicit_incomp
! ****************************************************************************** !

end module mus_bgk_module
! ****************************************************************************** !
