[lucene] lucene查询时总是返回第一个Doc
bruceWoo
2010-04-30
最近做一个项目时需要频繁进行数据库like查询,所以想到用lucene来优化。网上找了些入门资料,找了个简单的测试程序,发现如果可以查询出结果时却总是返回第一个添加的Doc,比如下面测试代码:首先加入Doc的name域的值为“擎天柱”,第二个加入DOC的name域的值为“搜索引擎”,在搜索关键字为 “搜索 AND 擎”时却返回是第一个DOC,这是怎么回事呢?主要代码如下,请各位帮忙分析,谢谢。
-------------------------------------------------------------------------- public class TestQueryParser1 { public static void main(String[] args) throws Exception { String path = "E:/AppLog/bnq/index"; TestQueryParser1 tqp = new TestQueryParser1(); tqp.createIndex(path); Searcher search = tqp.getSearcher(path); System.out.println("#_3"); tqp.testTime(search, tqp.getQueryParser3()); } public void testTime(Searcher search,Query query) throws IOException{ long start = new Date().getTime(); TopDocs topDocs = search.search(query,GlobalVariables.MAX_QRY_RET_NUM); for (int i = 0; i < topDocs.totalHits; i++) { //ScoreDoc doc = topDocs.scoreDocs[i]; Document document=search.doc(i); String content=document.get(BnqDTO.LNAME); System.out.println("匹配记录["+(i+1)+"]="+content); } System.out.println("本次搜索用时:" + ((new Date()).getTime() - start) + "毫秒"); } public Searcher getSearcher(String path) throws CorruptIndexException, IOException{ Directory indexDir=FSDirectory.open(new File(path)); IndexSearcher idxSearcher = new IndexSearcher(indexDir,true); return idxSearcher; //return new IndexSearcher(path); } public Query getQueryParser3(){ QueryParser queryParser=new QueryParser(Version.LUCENE_29, "name",new ChineseAnalyzer()); //QueryParser queryParser = new QueryParser(BnqDTO.LNAME, new StandardAnalyzer()); try { BooleanQuery query=new BooleanQuery(); query.add(new TermQuery(new Term(BnqDTO.LNAME,"搜索")),BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(BnqDTO.LNAME,"擎")),BooleanClause.Occur.MUST); //return query; return queryParser.parse("搜索 AND 擎"); } catch (Exception e) { e.printStackTrace(); } return null; } public void createIndex(String path){ try { //FileUtil.clearDir(path); Analyzer analyzer=new ChineseAnalyzer(); Directory indexDir=FSDirectory.open(new File(path)); IndexWriter writer = new IndexWriter(indexDir,analyzer,IndexWriter.MaxFieldLength.UNLIMITED); //original //IndexWriter writer = new IndexWriter(path,new StandardAnalyzer(),true); Document docA = new Document(); //相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置 //Field.Store 是否覆盖原来的索引文件,而不是重新建一个 Field fieldA = new Field("name","搜索引擎",Field.Store.YES,Field.Index.ANALYZED); //我们把列(fieldA)加到某一行(docA)中 docA.add(fieldA); docA.add(new Field("title","你好中国",Field.Store.YES,Field.Index.NOT_ANALYZED)); docA.add(new Field("lastModifyTime","2008-9-17",Field.Store.YES,Field.Index.NOT_ANALYZED)); docA.add(new Field("testCapital","HelloWangzi",Field.Store.YES,Field.Index.NOT_ANALYZED)); docA.add(new Field("testAndOr","test and",Field.Store.YES,Field.Index.NOT_ANALYZED)); Document docB = new Document(); //相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置 Field fieldB = new Field("name","擎天柱",Field.Store.YES,Field.Index.ANALYZED); //我们把列(fieldA)加到某一行(docA)中 docB.add(fieldB); docB.add(new Field("title","你好世界",Field.Store.YES,Field.Index.NOT_ANALYZED)); docB.add(new Field("lastModifyTime","2008-9-6",Field.Store.YES,Field.Index.NOT_ANALYZED)); docB.add(new Field("testCapital","hellowangZi",Field.Store.YES,Field.Index.NOT_ANALYZED)); docB.add(new Field("testAndOr","test or",Field.Store.YES,Field.Index.NOT_ANALYZED)); Document docC = new Document(); //相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置 Field fieldC = new Field("name","CCCAAABBB",Field.Store.YES,Field.Index.ANALYZED); //我们把列(fieldA)加到某一行(docA)中 docB.add(fieldC); docB.add(new Field("title","你好世界",Field.Store.YES,Field.Index.NOT_ANALYZED)); docB.add(new Field("lastModifyTime","2008-9-6",Field.Store.YES,Field.Index.NOT_ANALYZED)); docB.add(new Field("testCapital","hellowangZi",Field.Store.YES,Field.Index.NOT_ANALYZED)); docB.add(new Field("testAndOr","test or",Field.Store.YES,Field.Index.NOT_ANALYZED)); writer.addDocument(docB); writer.addDocument(docA); writer.addDocument(docC); //如果对海量数据进行创建索引的时候,需要对索引进行优化,以便提高速度 writer.optimize(); //跟数据库类似,打开一个连接,使用完后,要关闭它 writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } |
|
bruceWoo
2010-05-07
大家帮忙测试这个程序看看我的问题是否会重现,程序中使用的一些常量可以自己定义,谢谢大家
|
|
xanpeng
2010-05-08
bruceWoo 写道 大家帮忙测试这个程序看看我的问题是否会重现,程序中使用的一些常量可以自己定义,谢谢大家
提供下 Analyzer 的 jar 包吧, GlobalVariables, BnqDTO, ChineseAnalyzer 都无法识别 |