We recommend to read following post as a prerequisite for this.
Huffman Coding
Time complexity of the algorithm discussed in above post is O(nLogn). If we know that the given array is sorted (by non-decreasing order of frequency), we can generate Huffman codes in O(n) time. Following is a O(n) algorithm for sorted input.
1. Create two empty queues.
2. Create a leaf node for each unique character and Enqueue it to the first queue in non-decreasing order of frequency. Initially second queue is empty.
3. Dequeue two nodes with the minimum frequency by examining the front of both queues. Repeat following steps two times
…..a) If second queue is empty, dequeue from first queue.
…..b) If first queue is empty, dequeue from second queue.
…..c) Else, compare the front of two queues and dequeue the minimum.
4. Create a new internal node with frequency equal to the sum of the two nodes frequencies. Make the first Dequeued node as its left child and the second Dequeued node as right child. Enqueue this node to second queue.
5. Repeat steps#3 and #4 until there is more than one node in the queues. The remaining node is the root node and the tree is complete.
c
#include <stdio.h>
#include <stdlib.h>
#define MAX_TREE_HT 100
struct QueueNode
{
char data;
unsigned freq;
struct QueueNode *left, *right;
};
struct Queue
{
int front, rear;
int capacity;
struct QueueNode **array;
};
struct QueueNode* newNode(char data, unsigned freq)
{
struct QueueNode* temp =
(struct QueueNode*) malloc(sizeof(struct QueueNode));
temp->left = temp->right = NULL;
temp->data = data;
temp->freq = freq;
return temp;
}
struct Queue* createQueue(int capacity)
{
struct Queue* queue = (struct Queue*) malloc(sizeof(struct Queue));
queue->front = queue->rear = -1;
queue->capacity = capacity;
queue->array =
(struct QueueNode**) malloc(queue->capacity * sizeof(struct QueueNode*));
return queue;
}
int isSizeOne(struct Queue* queue)
{
return queue->front == queue->rear && queue->front != -1;
}
int isEmpty(struct Queue* queue)
{
return queue->front == -1;
}
int isFull(struct Queue* queue)
{
return queue->rear == queue->capacity - 1;
}
void enQueue(struct Queue* queue, struct QueueNode* item)
{
if (isFull(queue))
return;
queue->array[++queue->rear] = item;
if (queue->front == -1)
++queue->front;
}
struct QueueNode* deQueue(struct Queue* queue)
{
if (isEmpty(queue))
return NULL;
struct QueueNode* temp = queue->array[queue->front];
if (queue->front == queue->rear)
queue->front = queue->rear = -1;
else
++queue->front;
return temp;
}
struct QueueNode* getFront(struct Queue* queue)
{
if (isEmpty(queue))
return NULL;
return queue->array[queue->front];
}
struct QueueNode* findMin(struct Queue* firstQueue, struct Queue* secondQueue)
{
if (isEmpty(firstQueue))
return deQueue(secondQueue);
if (isEmpty(secondQueue))
return deQueue(firstQueue);
if (getFront(firstQueue)->freq < getFront(secondQueue)->freq)
return deQueue(firstQueue);
return deQueue(secondQueue);
}
int isLeaf(struct QueueNode* root)
{
return !(root->left) && !(root->right) ;
}
void printArr(int arr[], int n)
{
int i;
for (i = 0; i < n; ++i)
printf("%d", arr[i]);
printf("\n");
}
struct QueueNode* buildHuffmanTree(char data[], int freq[], int size)
{
struct QueueNode *left, *right, *top;
struct Queue* firstQueue = createQueue(size);
struct Queue* secondQueue = createQueue(size);
for (int i = 0; i < size; ++i)
enQueue(firstQueue, newNode(data[i], freq[i]));
while (!(isEmpty(firstQueue) && isSizeOne(secondQueue)))
{
left = findMin(firstQueue, secondQueue);
right = findMin(firstQueue, secondQueue);
top = newNode('$' , left->freq + right->freq);
top->left = left;
top->right = right;
enQueue(secondQueue, top);
}
return deQueue(secondQueue);
}
void printCodes(struct QueueNode* root, int arr[], int top)
{
if (root->left)
{
arr[top] = 0;
printCodes(root->left, arr, top + 1);
}
if (root->right)
{
arr[top] = 1;
printCodes(root->right, arr, top + 1);
}
if (isLeaf(root))
{
printf("%c: ", root->data);
printArr(arr, top);
}
}
void HuffmanCodes(char data[], int freq[], int size)
{
struct QueueNode* root = buildHuffmanTree(data, freq, size);
int arr[MAX_TREE_HT], top = 0;
printCodes(root, arr, top);
}
int main()
{
char arr[] = {'a', 'b', 'c', 'd', 'e', 'f'};
int freq[] = {5, 9, 12, 13, 16, 45};
int size = sizeof(arr)/sizeof(arr[0]);
HuffmanCodes(arr, freq, size);
return 0;
}
Run on IDE
Output:
f: 0
c: 100
d: 101
a: 1100
b: 1101
e: 111
Time complexity: O(n)
If the input is not sorted, it need to be sorted first before it can be processed by the above algorithm. Sorting can be done using heap-sort or merge-sort both of which run in Theta(nlogn). So, the overall time complexity becomes O(nlogn) for unsorted input.
[ad type=”banner”]