#include "TaskManager.h"
#include "SchedTask.h"
#include "sort.h"
#include "Func.h"
#include <string.h>

extern int get_split_num(int len, int num);
extern int all;  // allocate task at once

/**
 * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような
 * len の分割数を返す
 *
 * @param  len  sort する data の総数
 * @param  num  使用する SPE の数
 *
 * @return data の分割数
 *
 * TODO:
 *   len が num 以下とか考えてません
 */
int
get_split_num(int len, int num)
{
    if (len / num < MAX_BLOCK_SIZE) {
	return num;
    } else {
	// 切り上げ
	return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE;
    }
}	


/**
 * btask が全て終了したら、再び sort_start を実行する
 * @param d 生成された btask の数
 */

SchedDefineTask1(SortSimple, sort_start );

static int
sort_start(SchedTask *manager, void *d, void *e)
{
    Sort *s =  (Sort*)manager->get_param(0);
    int half_num = s->split_num-1;
    static int sort_count = s->split_num; // sort 完了に必要な回数

    // 一つのタスクで sort する data 数
    int block_num = (s->data_length + s->split_num -1)/s->split_num;
    int half_block_num = block_num/2;

    int last_block_num = s->data_length - (s->split_num-1)*block_num;
    int last_half_block_num = half_block_num+(last_block_num/2);

    if (--sort_count < 0) {
	return 0;
    }


    for (int i = 0; i < s->split_num-1; i++) {
	s->fsort[i] = manager->create_task(QUICK_SORT,
	    (memaddr)&s->data[i*block_num], sizeof(Data)*block_num,
	    (memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
	if (i>0 && s->bsort[i-1]) {
	    s->fsort[i]->wait_for(s->bsort[i-1]);
	}
	if (i<s->split_num-2 && s->bsort[i]) {
	    s->fsort[i]->wait_for(s->bsort[i]);
	}
	s->fsort[i]->set_cpu(SPE_ANY);
    }

    // 最後の block は端数なので last_block_num を使う
    {
	int i = s->split_num-1;

	s->fsort[i] = manager->create_task(QUICK_SORT,
	    (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num,
	    (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
	if (i>0 && s->bsort[i-1]) {
	    s->fsort[i]->wait_for(s->bsort[i-1]);
	}
	s->fsort[i]->set_cpu(SPE_ANY);
   }

    if (s->split_num > 1) {

	for (int i = 0; i < half_num-1; i++) {
	    if (s->bsort[i]) manager->free_htask(s->bsort[i]);
	    s->bsort[i] = manager->create_task(QUICK_SORT,
		(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num,
		(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
	    s->bsort[i]->set_cpu(SPE_ANY);
	}

	{
	    int i = half_num-1;

	    if (s->bsort[i]) manager->free_htask(s->bsort[i]);
	    s->bsort[i] = manager->create_task(QUICK_SORT,
		(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num,
		(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
	    s->bsort[i]->set_cpu(SPE_ANY);	
	}
	
	for (int i = 0; i < half_num; i++) {
	    s->bsort[i]->wait_for(s->fsort[i]);
	    s->bsort[i]->wait_for(s->fsort[i+1]);
	    s->bsort[i]->no_auto_free();
	    s->bsort[i]->spawn();
	}
    }

    HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0);
    restart->set_param(0,(memaddr)s);
    if (!all) restart->wait_for(s->fsort[0]);
    for (int i = 0; i < s->split_num; i++) {
	s->fsort[i]->spawn();
    }
    if (sort_count == 1) {
	// last loop wait for all task 
	// we should not need this?
	for (int i = 0; i < half_num; i++) {
	    restart->wait_for(s->bsort[i]);
	    s->bsort[i]->auto_free();
	}
    }
    restart->spawn();
    return 0;
}


/* end */
