@inproceedings{4c7e48a8893443f28049a8bd74299705,
title = "Document clustering using small world communities",
abstract = "Words in natural language documents exhibit a small world network structure. Thus the physics community provides us with an extensive supply of algorithms for extracting community structure. We present a novel method for semantically clustering a large collection of documents using small world communities. This method combines modified physics algorithms with traditional information retrieval techniques. A term network is generated from the document collection, the terms are clustered into small world communities, the semantic term clusters are used to generate overlapping document clusters. The algorithm combines the speed of single link with the quality of complete link. Clustering takes place in nearly real-time and the results are judged to be coherent by expert users. Our algorithm occupies a middle ground between speed and quality of document clustering.",
keywords = "Community structure, Document clustering, Scale-free networks, Semantic clustering, Small worlds",
author = "Chee, {Brant W.} and Bruce Schatz",
year = "2007",
doi = "10.1145/1255175.1255186",
language = "English (US)",
isbn = "1595936440",
series = "Proceedings of the ACM International Conference on Digital Libraries",
pages = "53--62",
booktitle = "Proceedings of the 7th ACM/IEEE Joint Conference on Digital Libraries, JCDL 2007",
note = "7th ACM/IEEE Joint Conference on Digital Libraries, JCDL 2007: Building and Sustaining the Digital Environment ; Conference date: 18-06-2007 Through 23-06-2007",
}