Java Code Index Search Engine Apache Lucene
import java.io.*;
import java.util.Date;
import
org.apache.lucene.analysis.Analyzer;
import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import
org.apache.lucene.document.NumericField;
import
org.apache.lucene.index.FieldInfo.IndexOptions;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import
org.apache.lucene.store.Directory;
import
org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/** Index all text files under a
directory.
* <p>
* This is a command-line application
demonstrating simple Lucene indexing.
* Run it with no command-line arguments for
usage information.
*/
public class IndexFiles {
private IndexFiles() {}
/** Index all text files under a directory. */
public static void main(String[] args) {
+ " [-index INDEX_PATH]
[-docs DOCS_PATH] [-update]\n\n"
+ "This indexes the
documents in DOCS_PATH, creating a Lucene index"
+ "in INDEX_PATH that
can be searched with SearchFiles";
String indexPath = "index";
String docsPath = null;
boolean create = true;
for(int i=0;i<args.length;i++) {
if ("-index".equals(args[i])) {
indexPath = args[i+1];
i++;
} else if ("-docs".equals(args[i])) {
docsPath = args[i+1];
i++;
} else if ("-update".equals(args[i])) {
create = false;
}
}
if (docsPath == null) {
System.err.println("Usage: " + usage);
System.exit(1);
}
final File docDir = new File(docsPath);
if (!docDir.exists() || !docDir.canRead()) {
System.out.println("Document directory '"
+docDir.getAbsolutePath()+ "' does not exist or is not readable, please
check the path");
System.exit(1);
}
Date start = new Date();
try {
System.out.println("Indexing to directory '" + indexPath +
"'...");
Directory dir = FSDirectory.open(new File(indexPath));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
analyzer);
if (create) {
// Create a new index in the directory, removing any
// previously indexed documents:
iwc.setOpenMode(OpenMode.CREATE);
} else {
// Add new documents to an existing index:
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
// Optional: for better indexing performance, if you
// are indexing many documents, increase the RAM
// buffer. But if you do this,
increase the max heap
// size to the JVM (eg add -Xmx512m or -Xmx1g):
//
// iwc.setRAMBufferSizeMB(256.0);
IndexWriter writer = new IndexWriter(dir, iwc);
indexDocs(writer, docDir);
// NOTE: if you want to maximize search performance,
// you can optionally call forceMerge here. This can be
// a terribly costly operation, so generally it's only
// worth it when your index is relatively static (ie
// you're done adding documents to it):
//
// writer.forceMerge(1);
writer.close();
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total
milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
}
static void indexDocs(IndexWriter writer, File file)
throws IOException {
// do not try to index files that cannot be read
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i <
files.length; i++) {
indexDocs(writer, new File(file,
files[i]));
}
}
} else {
FileInputStream fis;
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
// at least on windows, some
temporary files raise this exception with an "access denied"
message
// checking if the file can be read
doesn't help
return;
}
try {
Field pathField = new
Field("path", file.getPath(), Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS);
pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(pathField);
NumericField modifiedField = new
NumericField("modified");
modifiedField.setLongValue(file.lastModified());
doc.add(modifiedField);
doc.add(new
Field("contents", new BufferedReader(new InputStreamReader(fis,
"UTF-8"))));
if (writer.getConfig().getOpenMode()
== OpenMode.CREATE) {
System.out.println("adding
" + file);
writer.addDocument(doc);
} else {
System.out.println("updating
" + file);
writer.updateDocument(new
Term("path", file.getPath()), doc);
}
} finally {
fis.close();
}
}
}
}
}
|
import java.io.*;
import java.util.Date;
import
org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.index.IndexReader;
import
org.apache.lucene.queryParser.QueryParser;
import
org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import
org.apache.lucene.search.ScoreDoc;
import
org.apache.lucene.search.TopDocs;
import
org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class SearchFiles {
private SearchFiles() {}
public static void main(String[] args) throws Exception {
String usage =
"Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir]
[-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging
hitsPerPage]\n\nSee http://lucene.apache.org/java/4_0/demo.html for
details.";
if (args.length > 0 && ("-h".equals(args[0]) ||
"-help".equals(args[0]))) {
System.out.println(usage);
System.exit(0);
}
String index = "index";
String field = "contents";
String queries = null;
int repeat = 0;
boolean raw = false;
String queryString = null;
int hitsPerPage = 10;
for(int i = 0;i < args.length;i++) {
if ("-index".equals(args[i])) {
index = args[i+1];
i++;
} else if ("-field".equals(args[i])) {
field = args[i+1];
i++;
} else if ("-queries".equals(args[i])) {
queries = args[i+1];
i++;
} else if ("-query".equals(args[i])) {
queryString = args[i+1];
i++;
} else if ("-repeat".equals(args[i])) {
repeat = Integer.parseInt(args[i+1]);
i++;
} else if ("-raw".equals(args[i])) {
raw = true;
} else if ("-paging".equals(args[i])) {
hitsPerPage = Integer.parseInt(args[i+1]);
if (hitsPerPage <= 0) {
System.err.println("There must
be at least 1 hit per page.");
System.exit(1);
}
i++;
}
}
IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
BufferedReader in = null;
if (queries != null) {
in = new BufferedReader(new InputStreamReader(new
FileInputStream(queries), "UTF-8"));
} else {
in = new BufferedReader(new InputStreamReader(System.in,
"UTF-8"));
}
QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer);
while (true) {
if (queries == null && queryString == null) { // prompt the user
System.out.println("Enter query: ");
}
String line = queryString != null ? queryString : in.readLine();
if (line == null || line.length() == -1) {
break;
}
line = line.trim();
if (line.length() == 0) {
break;
}
Query query = parser.parse(line);
System.out.println("Searching for: " + query.toString(field));
if (repeat > 0) { // repeat &
time as benchmark
Date start = new Date();
for (int i = 0; i < repeat; i++) {
searcher.search(query, null, 100);
}
Date end = new Date();
System.out.println("Time:
"+(end.getTime()-start.getTime())+"ms");
}
doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null
&& queryString == null);
if (queryString != null) {
break;
}
}
searcher.close();
reader.close();
}
public static void doPagingSearch(BufferedReader in, IndexSearcher
searcher, Query query,
int
hitsPerPage, boolean raw, boolean interactive) throws IOException {
// Collect enough docs to show 5 pages
TopDocs results = searcher.search(query, 5 * hitsPerPage);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching
documents");
int start = 0;
int end = Math.min(numTotalHits, hitsPerPage);
while (true) {
if (end > hits.length) {
System.out.println("Only results 1 - " + hits.length +"
of " + numTotalHits + " total matching documents collected.");
System.out.println("Collect more (y/n) ?");
String line = in.readLine();
if (line.length() == 0 || line.charAt(0) == 'n') {
break;
}
hits = searcher.search(query, numTotalHits).scoreDocs;
}
end = Math.min(hits.length, start + hitsPerPage);
for (int i = start; i < end; i++) {
if (raw) { // output raw format
System.out.println("doc="+hits[i].doc+"
score="+hits[i].score);
continue;
}
Document doc = searcher.doc(hits[i].doc);
String path = doc.get("path");
if (path != null) {
System.out.println((i+1) + ".
" + path);
String title =
doc.get("title");
if (title != null) {
System.out.println(" Title: " +
doc.get("title"));
}
} else {
System.out.println((i+1) + ".
" + "No path for this document");
}
}
if (!interactive || end == 0) {
break;
}
if (numTotalHits >= end) {
boolean quit = false;
while (true) {
System.out.print("Press
");
if (start - hitsPerPage >= 0) {
System.out.print("(p)revious
page, ");
}
if (start + hitsPerPage <
numTotalHits) {
System.out.print("(n)ext
page, ");
}
System.out.println("(q)uit or
enter number to jump to a page.");
String line = in.readLine();
if (line.length() == 0 ||
line.charAt(0)=='q') {
quit = true;
break;
}
if (line.charAt(0) == 'p') {
start = Math.max(0, start -
hitsPerPage);
break;
} else if (line.charAt(0) == 'n') {
if (start + hitsPerPage <
numTotalHits) {
start+=hitsPerPage;
}
break;
} else {
int page =
Integer.parseInt(line);
if ((page - 1) * hitsPerPage <
numTotalHits) {
start = (page - 1) *
hitsPerPage;
break;
} else {
System.out.println("No
such page");
}
}
}
if (quit) break;
end = Math.min(numTotalHits, start + hitsPerPage);
}
}
}
}
|
1.
Index untuk menyimpan hasil pengurutan document
yang berada pada Koleksi dan folder index di update dengan data pada Koleksi
yang terbaru.
2.
Memanggil Index pada SearchFile dengan IndexReader, dan untuk yang Search index
dengan IndexSearcher.
IndexReader
reader = IndexReader.open(FSDirectory.open(new File(index)));
IndexSearcher
searcher = new IndexSearcher(reader);
|
3.
Jika querynya masih kosong maka cetak “Enter Query” lalu inputkan data yang dicari
dan akan di search dengan IndexSearch