上QQ阅读APP看书,第一时间看更新
2.6 Lucene查询高亮
高亮功能一直都是全文检索的一项非常优秀的模块,在一个标准的搜索引擎中,高亮的返回命中结果,几乎是必不可少的一项需求,因为通过高亮,可以在搜索界面上快速标记出用户的检索关键词,从而减少了用户自己寻找想要的结果的时间,在一定程度上大大提高了用户的体验性和友好度。Lucene查询高亮的例子见代码清单2-12。
代码清单2-12
import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import tup.lucene.ik.IKAnalyzer6x; public class HighlighterTest { public static void main(String[] args)throws IOException, InvalidTokenOffsetsException, ParseException { String field = "title"; Path indexPath = Paths.get("indexdir"); Directory dir = FSDirectory.open(indexPath); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new IKAnalyzer6x(); QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse("北大"); System.out.println("Query:" + query); QueryScorer score = new QueryScorer(query, field); SimpleHTMLFormatter fors = new SimpleHTMLFormatter("<span style=\"color:red; \">", "</span>"); //定制高亮标签 Highlighter highlighter = new Highlighter(fors, score); // 高亮分词器 TopDocs tds = searcher.search(query, 10); for(ScoreDoc sd : tds.scoreDocs){ Document doc = searcher.doc(sd.doc); System.out.println("id:" + doc.get("id")); System.out.println("title:" + doc.get("title")); TokenStream tokenStream = TokenSources.getAnyTokenStream (searcher. getIndexReader(), sd.doc, field, analyzer); // 获取tokenstream Fragmenter fragment = new SimpleSpanFragmenter(score); highlighter.setTextFragmenter(fragment); String str = highlighter.getBestFragment(tokenStream, doc.get(field)); //获取高亮的片段 System.out.println("高亮的片段:" + str); } dir.close(); reader.close(); } }
运行结果:
加载扩展词典:ext.dic 加载扩展停止词典:stopword.dic 加载扩展停止词典:ext_stopword.dic Query:title:北大 id:2 title:北大迎4380名新生 农村学生700多人近年最多 高亮的片段:<span style="color:red;">北大</span>迎4380名新生 农村学生700多人近年最多