/***********************************************************
        maketree.c -- make Huffman tree
***********************************************************/
#include "ar.h"

typedef uint16_t node_t;

/*
     +-----------------------+-----------------------+
freq | leafs(0..nparm)       | nodes(nparm..nparm*2) |
     +-----------------------+-----------------------+
     0                       nparm                   nparm * 2 - 1

     |<--------------------->|<--------------------->|
   frequency of huffman leafs    frequency of humman nodes


      e.g: nparm = 3

                 4     ... freq[4]
                / \
               3   \   ... freq[3]
              /\    \
             0  1    2 ... freq[0 .. 2]

*/
struct heap_t {
    /*
      buf[i] is node.

      node is below.
         o encoding character. (raw text)
         o node on Huffman tree.
         o the index of freq[].
      the index `i' is order (buf[1] is most high priority)
    */
    node_t buf[NC + 1];          /* buf[0] is not used */
    int size;
    /* freq[c] = priority (frequency). c is letter */
    uint16_t *freq;             /* priority */
};

/*
left and right
     +-----------------------+-----------------------+
     |leafs(0..nparm)        | nodes(nparm..nparm*2) |
     +-----------------------+-----------------------+
     0                       nparm                   nparm * 2 - 1

     |-----------------------|-----------------------|
       no value                child node (index of left/right)

      e.g:
                 .     ... root
                / \
               .   c   ... left[root], right[root]
              /\
             a  b      ... left[left[root]], right[left[root]]

   Note: If left[n] (or right[n]) is less than the max_leafs, left[n]
         point a leaf. however, left[left[n]] has no value.

len_cnt[depth] is the number of leafs at the depth.

                 .     ... len_cnt[0] is not used.
                / \
               .   c   ... len_cnt[1] = 1
              /\
             a  b      ... len_cnt[2] = 2

   Note: len_cnt[16] is counted leafs at 16 depth or more.

   i.e.)
       /\
      a /\       .. len_cnt[ 1] = 0000000000000001
       b /\       .. len_cnt[ 2] = 0000000000000001
        c /\       .. len_cnt[ 3] = 0000000000000001
         d /\       .. len_cnt[ 4] = 0000000000000001
          e /\       .. len_cnt[ 5] = 0000000000000001
           f /\       .. len_cnt[ 6] = 0000000000000001
            g /\       .. len_cnt[ 7] = 0000000000000001
             h /\       .. len_cnt[ 8] = 0000000000000001
              i /\       .. len_cnt[ 9] = 0000000000000001
               j /\       .. len_cnt[10] = 0000000000000001
                k /\       .. len_cnt[11] = 0000000000000001
                 l /\       .. len_cnt[12] = 0000000000000001
                  m /\       .. len_cnt[13] = 0000000000000001
                   n /\       .. len_cnt[14] = 0000000000000001
                    o /\       .. len_cnt[15] = 0000000000000001
                     p /\       .. len_cnt[16] = 0000000000000011
                      q  r                       ||||||||||||||||
                                                 vvvvvvvvvvvvvvvv
                                           cum = 0000000000000001

Keep 16 depth algorithms.

    step1. len_cnt[16] minus 1

                    /\                               :
                   n /\       .. len_cnt[14] = 0000000000000001
                    o /\       .. len_cnt[15] = 0000000000000001
                     p /       .. len_cnt[16] = 0000000000000010
                      q

    step2. len_cnt[15] minus 1

                    /                                :
                   n /\       .. len_cnt[14] = 0000000000000001
                      /\       .. len_cnt[15] = 0000000000000000
                     p /        .. len_cnt[16] = 0000000000000010
                      q

    step3. len_cnt[16] plus 2

                /   \                                :
              n    /  \       .. len_cnt[14] = 0000000000000001
                 /\    /\      .. len_cnt[15] = 0000000000000000
                o  r  p /       .. len_cnt[16] = 0000000000000100
                       q

*/
struct tree_t {
    /* left[n] and right[n] are n's child nodes */
    node_t left[2*NC - 1];      /* left[0]..left[NC-1] is not used */
    node_t right[2*NC - 1];     /* right[0]..right[NC-1] is not used */
    int len_cnt[17];            /* len_cnt[0] is not used */
    int max_leafs;              /* max number of leafs (that is nparm) */
    node_t root;                /* root node */
};

/* priority queue; send i-th entry down heap */
static void
downheap(struct heap_t *heap, int i)
{
    int j, k;

    k = heap->buf[i];
    while ((j = 2 * i) <= heap->size) {
        if (j < heap->size && heap->freq[heap->buf[j]] > heap->freq[heap->buf[j + 1]])
            j++;
        if (heap->freq[k] <= heap->freq[heap->buf[j]])
            break;
        heap->buf[i] = heap->buf[j];
        i = j;
    }
    heap->buf[i] = k;
}

/* count the number of leafs at each depth */
static void
count_len(struct tree_t *t, int i, int depth)
{                               /* call with i = root */
    if (i < t->max_leafs)
        /* i is a leaf */
        t->len_cnt[(depth < 16) ? depth : 16]++;
    else {
        /* i is a node */
        count_len(t, t->left[i], depth+1);
        count_len(t, t->right[i], depth+1);
    }
}

static void
make_len(struct tree_t *t, uint8_t *len, uint16_t *sortptr)
{
    int i, k;
    uint32_t cum;

    for (i = 0; i <= 16; i++)
        t->len_cnt[i] = 0;
    count_len(t, t->root, 0);
    cum = 0;
    for (i = 16; i > 0; i--)
        cum += t->len_cnt[i] << (16 - i);
    while (cum != (1U << 16)) {
        t->len_cnt[16]--;
        for (i = 15; i > 0; i--) {
            if (t->len_cnt[i] != 0) {
                t->len_cnt[i]--;
                t->len_cnt[i + 1] += 2;
                break;
            }
        }
        cum--;
    }
    for (i = 16; i > 0; i--) {
        k = t->len_cnt[i];
        while (--k >= 0)
            len[*sortptr++] = i;
    }
}

static void
make_code(struct tree_t *t, uint8_t *len, uint16_t *code)
{
    int i;
    uint16_t start[18], c;

    /*
       /\               a: 0     len_cnt[1] = 1
      a /\              b: 10    len_cnt[2] = 2
        b c             c: 11    len_cnt[2] = 2

              i     len_cnt[i]   start[i]
          --------------------------------
              1         1         0
              2         2        (0 + 1)*2 = 2
              3         0        (2 + 2)*2 = 8
              4         0        (8 + 0)*2 =16
              5         0                   32
              6         0                   64
              :         :                    :
             15         0    (16384 + 0)*2 =32768
             16         0    (32768 + 0)*2 = 0
             17         -    (65536 + 0)*2 = 2
     */
    start[1] = 0;
    for (i = 1; i <= 16; i++)
        start[i + 1] = (start[i] + t->len_cnt[i]) << 1;

    /*
      c  len[c]   i     len_cnt[i]   start[i]   code[c] (Huffman coding)
     ----------------------------------------------------------------
      a     1     1         1            0      00000000 0000000 0
      b     2     2         2            2      00000000 000000 10
      c     2     2         2            3      00000000 000000 11
    */
    for (c = 0; c < t->max_leafs; c++) {
        i = len[c];
        code[c] = start[i]++;
    }
}

/* make Huffman encoding tables.

   make lenparm and codeparm.

     lenparm[c]:  length of bits of Huffman encoding for the `c'.
     codeparm[c]: Huffman encoding for the `c'.

   return root (freqparm[root] is sum of frequencies. that is original text size).
*/
int
make_tree(uint16_t nparm, uint16_t *freqparm, /* in data */
          uint8_t *lenparm, uint16_t *codeparm) /* out data */
{
    node_t i, avail, *sortptr;
    struct heap_t heap;
    struct tree_t t;

    /* The nparm is the size of the freqparm[] */
    /* That is max number of kinds of raw characters */
    avail = t.max_leafs = nparm;

    /* initialize heap (priority queue) */
    heap.freq = freqparm;
    heap.size = 0;
    heap.buf[1] = 0;
    for (i = 0; i < nparm; i++) {
        lenparm[i] = 0;
        if (heap.freq[i])
            heap.buf[++heap.size] = i;
    }

    if (heap.size < 2) {
        codeparm[heap.buf[1]] = 0;
        return heap.buf[1];
    }

    /* make priority queue */
    for (i = heap.size / 2; i >= 1; i--)
        downheap(&heap, i);

    /* make t (Huffman tree) */
    sortptr = codeparm;         /* codeparm is temporarily used */
    do {                        /* while queue has at least two entries */
        node_t c1, c2;

        /* take out least-freq entry */
        c1 = heap.buf[1];
        heap.buf[1] = heap.buf[heap.size--];
        downheap(&heap, 1);

        /* next least-freq entry */
        c2 = heap.buf[1];

        /* bundle two nodes to t.root */
        t.root = avail++;            /* generate new node */
        heap.freq[t.root] = heap.freq[c1] + heap.freq[c2];

        /* new node put into queue */
        heap.buf[1] = t.root;
        downheap(&heap, 1);

        /* make huffman tree */
        t.left[t.root] = c1;
        t.right[t.root] = c2;

        if (c1 < t.max_leafs) *sortptr++ = c1;
        if (c2 < t.max_leafs) *sortptr++ = c2;
    } while (heap.size > 1);

    /* Huffman tree:

                 . -- t.root (r)
                / \
   left[r] --  .   \
              /\    \
             a  b    c <-- right[r]

             ^   \
             |    right[left[r]]
          left[left[r]]

    */

    /* make lenparm (codeparm is temporarily used) */
    make_len(&t, lenparm, codeparm);

    /* make codeparm */
    make_code(&t, lenparm, codeparm);

    return t.root;
}
