Integrating Spring 3.1 and Lucene 4
August 23, 2012 Leave a comment
Integrating Spring 3.1 and Lucene 4 is a fairly trivial matter, but I didn’t want to use the XML configuration so utilizing the @Configuration annotation, I was able to configure the Lucene indexer, analyzer and queryParser. Here is the configuration code:
...
@Configuration
@PropertySource("classpath:/app.properties")
@ComponentScan(basePackages={"com.doozer"})
public class AppConfig {
private @Value("#{appProperties['index.location']}") String indexLocation;
private @Value("#{appProperties['index.source']}") String indexSource;
@Bean(name="analyzer")
public Analyzer getAnalyzer() {
return new StandardAnalyzer(Version.LUCENE_40);
}
@Bean(name="fsDirectory")
@DependsOn("analyzer")
public FSDirectory getFSDirectory() throws IOException {
File location = new File(indexLocation);
if (!location.exists() || !location.canRead()) {
System.out.println("Creating directory: '" +location.getAbsolutePath()+ "'");
location.mkdirs();
}
return FSDirectory.open(location, new NativeFSLockFactory() );
}
@Bean(name="indexWriter")
@DependsOn("fsDirectory")
public IndexWriter getIndexWriter() throws IOException {
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, getAnalyzer());
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
// Optional: for better indexing performance, if you
// are indexing many documents, increase the RAM
// buffer. But if you do this, increase the max heap
// size to the JVM (eg add -Xmx512m or -Xmx1g):
//
// iwc.setRAMBufferSizeMB(256.0);
// NOTE: if you want to maximize search performance,
// you can optionally call forceMerge here. This can be
// a terribly costly operation, so generally it's only
// worth it when your index is relatively static (ie
// you're done adding documents to it):
//
// writer.forceMerge(1);
IndexWriter writer = null;
try {
writer = new IndexWriter(getFSDirectory(), iwc);
indexDocs(writer, new File(indexSource));
} catch (Throwable t) {
System.out.println("Unable to create IndexWriter!: " + t.getMessage());
t.printStackTrace();
}
return writer;
}
@Bean(name="indexSearcher")
@DependsOn("indexWriter")
public IndexSearcher getIndexSearcher() throws IOException {
return new IndexSearcher(DirectoryReader.open(getFSDirectory()));
}
@Bean(name="queryParser")
@DependsOn("analyzer")
public StandardQueryParser getQueryParser() throws IOException {
return new StandardQueryParser(getAnalyzer());
}
static void indexDocs(IndexWriter writer, File file) throws IOException {
// do not try to index files that cannot be read
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {
FileInputStream fis;
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
// at least on windows, some temporary files raise this exception with an "access denied" message
// checking if the file can be read doesn't help
return;
}
try {
// make a new, empty document
Document doc = new Document();
// Add the path of the file as a field named "path". Use a
// field that is indexed (i.e. searchable), but don't tokenize
// the field into separate words and don't index term frequency
// or positional information:
Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
doc.add(pathField);
// Add the last modified date of the file a field named "modified".
// Use a LongField that is indexed (i.e. efficiently filterable with
// NumericRangeFilter). This indexes to milli-second resolution, which
// is often too fine. You could instead create a number based on
// year/month/day/hour/minutes/seconds, down the resolution you require.
// For example the long value 2011021714 would mean
// February 17, 2011, 2-3 PM.
doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
// Add the contents of the file to a field named "contents". Specify a Reader,
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the file to be in UTF-8 encoding.
// If that's not the case searching for special characters will fail.
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old document can be there):
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
// Existing index (an old copy of this document may have been indexed) so
// we use updateDocument instead to replace the old one matching the exact
// path, if present:
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.getPath()), doc);
}
} finally {
fis.close();
}
}
}
}
}
...
@Configurable
public class App
{
@Autowired
public MongoOperations mongoOperation;
@Autowired
public StorageService storageService;
ApplicationContext ctx;
public App() {
ctx = new AnnotationConfigApplicationContext(AppConfig.class);