Abstract:
Distributed crawling has shown that it can overcome important
limitations of the centralized crawling paradigm. However, the
distributed nature of current distributed crawlers is currently not fully
utilized. The optimal benefits of this approach are usually limited to
the sites hosting the crawler. In this work we describe IPMicra, a distributed
location aware web crawler that utilizes an IP address hierarchy
and allows crawling of links in a near optimal location aware manner.
The crawler outperforms earlier distributed crawling approaches without
a significant overhead.
Keywords: Location Aware Web, Web Crawling, Distributed Web Crawling
@inproceedings{papapetrou:coopis04,
author = {Odysseas Papapetrou and George Samaras},
title = {Minimizing the Network Distance in Distributed Web Crawling.},
booktitle = {CoopIS/DOA/ODBASE (1)},
year = {2004},
pages = {581-596},
publisher={Springer},
publisher-url={http://www.springerlink.com/},
abstract-url={http://www2.cs.ucy.ac.cy/~cspapap/abstracts/coopis04.html},
url = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=3290{\&}spage=581}
}