Lucene的简单用法

来源：cnblogs　　作者：DingYu　　时间：2018/10/25 9:36:34　　对本文有异议

1.创建索引

package com.DingYu.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
/**
 * 我们的目标是把索引和文档存入索引库中， 所以首先我们需要创建一个索引库 然后创建一个IndexWrite对象把索引，和文档对象写入，
 * 文档对象中需要自己设置域，索引是通过分词器对域进行分词产生的， 所以我们需要分词器
 * 
 * @author 丁宇
 *
 */
public class LuceneTest {
    /**
     * 创建索引
     * @throws IOException
     */
    @Test
    public void createIndex() throws IOException {
        // 标准分词器
        Analyzer analyzer = new StandardAnalyzer();
        // 创建一个索引
        Directory directory = FSDirectory.open(Paths.get("D:\\LuceneIndex"));
        // 创建一个IndexWriteConfig对象
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        // 创建一个IndexWrite对象
        IndexWriter write = new IndexWriter(directory, config);
        // 获得所有文件下的文件
        File[] files = new File("D:\\LuceneTest").listFiles();
        for (File file : files) {
            // 创建一个文档对象
            Document document = new Document();
            // 增加一个filepath域，不分析 不索引 但会存储在索引库里 把文件路径放到域中
            Field field1 = new StoredField("filepath", file.getPath());
            // 增加一个filename域，会分词，会索引，
            Field field2 = new org.apache.lucene.document.TextField("filename", file.getName(), Store.YES);
            // 增加一个fileContent域，会分词，会索引，只放文件内容的索引
            Field field3 = new org.apache.lucene.document.TextField("filecontent", fileContent(file), Store.NO);
            // 增加一个filesize域，不分析 不索引 但会存储在索引库里 把文件路径放到域中
            Field field4 = new StoredField("filesize", file.length());
            document.add(field1);
            document.add(field2);
            document.add(field3);
            document.add(field4);
            write.addDocument(document);
        }
        write.close();
    }
    /**
     * 获得文件内容
     * @param file
     * @return
     */
    public String fileContent(File file)  {
        byte[] fileContent = new byte[(int) file.length()];
        FileInputStream in = null;
        try {
            in = new FileInputStream(file);
        } catch (FileNotFoundException e2) {
            e2.printStackTrace();
        }
        try {
            in.read(fileContent);
        } catch (IOException e1) {
            e1.printStackTrace();
        }
        try {
            in.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        try {
            return new String(fileContent, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return null;
    }
}

2.查询索引

package com.DingYu.Test;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.junit.Test;
/**
 * 查询索引
 * 
 * @author 丁宇
 *
 */
public class LuceneTest1 {
    @Test
    public void selectIndex() throws IOException {
        // 指定索引库
        Directory directory = FSDirectory.open(Paths.get("D:\\LuceneIndex"));
        // 打开索引库
        IndexReader reader = DirectoryReader.open(directory);
        // 创建查询的对象
        IndexSearcher searcher = new IndexSearcher(reader);
        // 选择合适的查询方法，这里用最简单的，具体的看下图
        Query query = new TermQuery(new Term("filename", "txt"));
        // 执行查询
        TopDocs docs = searcher.search(query, 2);
        // 获得在索引库中存着的文档的id,利用id去寻找文档
        ScoreDoc[] scoreDocs = docs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            // 获得id
            int doc = scoreDoc.doc;
            // 获得文档
            Document document = searcher.doc(doc);
            // 获得这个文档的域
            System.out.println(document.get("filename"));
            System.out.println(document.get("filecontent"));
            System.out.println(document.get("filepath"));
            System.out.println(document.get("filesize"));
            System.out.println("------------------------");
        }
        // 关闭索引库
        reader.close();
    }
}

3.删除索引

package com.DingYu.Test;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
/**
 * 删除索引 一般增删改都是同一个操作对象 这里使用IndexWriter对象
 * 
 * @author 丁宇
 *
 */
public class LuceneTest3 {
    /**
     * 获得IndexWrite对象
     * @return
     * @throws IOException
     */
    public IndexWriter getIndexWrite() throws IOException {
        Analyzer analyzer = new StandardAnalyzer();
        Directory directory = FSDirectory.open(Paths.get("D:\\LuceneIndex"));
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        return new IndexWriter(directory, config);
    }
    /**
     * 删除所有的索引
     * 
     * @throws IOException
     */
    @Test
    public void deleteAllIndex() throws IOException {
        IndexWriter indexWrite = getIndexWrite();
        indexWrite.deleteAll();
        indexWrite.close();
    }
    /**
     * 根据条件删除索引，同时删除文档
     * @throws IOException
     */
    @Test
    public void deleteSomeIndex() throws IOException {
        IndexWriter indexWrite = getIndexWrite();
        Query query = new TermQuery(new Term("filename","txt"));
        indexWrite.deleteDocuments(query);
        indexWrite.close();        
    }
}