This document provides a C program that identifies duplicate subtrees in a binary tree using hashing techniques. The program uses a hash map to store serialized representations of the subtrees and detect duplicates.
Program Structure
#include
#include
#include
#include
#define MAX_NODES 1000
#define MAX_SERIALIZED_SIZE 100
// Definition of a binary tree node
struct TreeNode {
int val;
struct TreeNode *left;
struct TreeNode *right;
};
// Hash map node to store serialized subtree and its frequency
struct HashNode {
char key[MAX_SERIALIZED_SIZE];
int count;
};
// Global hash map to track duplicate subtrees
struct HashNode hashMap[MAX_NODES];
int hashMapSize = 0;
// Function prototypes
struct TreeNode* createNode(int val);
char* serializeTree(struct TreeNode* root);
bool findDuplicateSubtrees(struct TreeNode* root);
void printDuplicates();
// Create a new binary tree node
struct TreeNode* createNode(int val) {
struct TreeNode* newNode = (struct TreeNode*)malloc(sizeof(struct TreeNode));
newNode->val = val;
newNode->left = newNode->right = NULL;
return newNode;
}
// Serialize the binary tree into a string
char* serializeTree(struct TreeNode* root) {
static char buffer[MAX_SERIALIZED_SIZE];
if (root == NULL) {
strcpy(buffer, "#"); // Use '#' to denote null
return buffer;
}
sprintf(buffer, "%d,%s,%s", root->val, serializeTree(root->left), serializeTree(root->right));
return buffer;
}
// Find and print duplicate subtrees
bool findDuplicateSubtrees(struct TreeNode* root) {
if (root == NULL) return false;
char* serialized = serializeTree(root);
// Check for the subtree in the hash map
for (int i = 0; i < hashMapSize; i++) { if (strcmp(hashMap[i].key, serialized) == 0) { hashMap[i].count++; if (hashMap[i].count == 2) { printf("Duplicate subtree found: %s\n", serialized); } return false; } } // Add new serialized subtree to the hash map strcpy(hashMap[hashMapSize].key, serialized); hashMap[hashMapSize].count = 1; hashMapSize++; // Recursively check left and right subtrees findDuplicateSubtrees(root->left);
findDuplicateSubtrees(root->right);
return true;
}
// Main function
int main() {
struct TreeNode* root = createNode(1);
root->left = createNode(2);
root->right = createNode(3);
root->left->left = createNode(4);
root->left->right = createNode(5);
root->right->left = createNode(2);
root->right->left->left = createNode(4);
findDuplicateSubtrees(root);
return 0;
}
Documentation
- struct TreeNode: Defines a node of the binary tree with an integer value and pointers to left and right children.
- struct HashNode: Represents a hash map entry storing a serialized subtree representation and its occurrence count.
- createNode(int val): Allocates memory for a new node and initializes it with the given value.
- serializeTree(struct TreeNode* root): Serializes the tree into a string format to facilitate hashing.
- findDuplicateSubtrees(struct TreeNode* root): Traverses the tree, serializes each subtree, and checks for duplicates using a hash map.
- main(): Creates a sample binary tree and calls the function to find and print duplicate subtrees.
Explanation of the Program:
- Structure: The program defines a binary tree using a
TreeNodestructure and maintains a hash map using theHashNodestructure to store serialized representations of subtrees and their counts. - Key Functions:
createNode: Allocates memory and initializes a new tree node.serializeTree: Serializes the tree structure into a string for easy comparison and storage in the hash map.findDuplicateSubtrees: Traverses the tree, serializes each subtree, checks for duplicates in the hash map, and prints duplicate subtrees when found.
- Main Function: Constructs a sample binary tree and invokes the function to find and print duplicate subtrees.

