Abstract:
Distributed joins have gained importance in the past decade, mainly
due to the increased number of available data sources on the
Internet. In this work we extend Bloomjoin, the state of the art
algorithm for distributed joins, so that it minimizes the network
usage for the query execution based on database statistics. We present 4
extensions of the algorithm, and construct a query optimizer for
selecting the best extension for each query. Our theoretical analysis and
experimental evaluation shows significant network cost savings
compared to the original Bloomjoin algorithm.
@inproceedings{ramesh:icdcit08,
author = {Sukriti Ramesh, Odysseas Papapetrou, Wolf Siberski},
title = {Optimizing Distributed Joins with Bloom Filters},
booktitle = {5th International Conference on Distributed Computing and
Internet Technologies (ICDCIT) 2008},
year = {2008}
}