Lucene5学习之分页查询

lxwt909

浏览: 566028 次
性别:
来自: 北京

最近访客更多访客>>

akingde

chenghu209

14252316

yinxin2745154

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Lucene

Lucene Query 分页

上篇博文《Lucene5学习之创建索引入门示例》里我们创建了索引，现在我们来编写测试代码来查询索引，具体代码如下：

package com.yida.framework.lucene5.core;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;

import com.yida.framework.lucene5.util.Page;

/**
 * Lucene搜索第一个示例
 * @author Lanxiaowei
 *
 */
public class SearchFirstTest {
	public static void main(String[] args) throws ParseException, IOException {
		//参数定义
		String directoryPath = "D:/lucenedir";
		String fieldName = "contents";
		String queryString = "mount";
		int currentPage = 1;
		int pageSize = 10;
		
		Page<Document> page = pageQuery(fieldName, queryString, directoryPath, currentPage, pageSize);
		if(page == null || page.getItems() == null || page.getItems().size() == 0) {
			System.out.println("No results found.");
			return;
		}
		for(Document doc : page.getItems()) {
			String path = doc.get("path");
			String content = doc.get("contents");
			System.out.println("path:" + path);
			System.out.println("contents:" + content);
		}
	}
	/**
	 * 创建索引阅读器
	 * @param directoryPath  索引目录
	 * @return
	 * @throws IOException   可能会抛出IO异常
	 */
	public static IndexReader createIndexReader(String directoryPath) throws IOException {
		return DirectoryReader.open(FSDirectory.open(Paths.get(directoryPath, new String[0])));
	}
	
	/**
	 * 创建索引查询器
	 * @param directoryPath   索引目录
	 * @return
	 * @throws IOException
	 */
	public static IndexSearcher createIndexSearcher(String directoryPath) throws IOException {
		return new IndexSearcher(createIndexReader(directoryPath));
	}
	
	/**
	 * 创建索引查询器
	 * @param reader
	 * @return
	 */
	public static IndexSearcher createIndexSearcher(IndexReader reader) {
		return new IndexSearcher(reader);
	}
	
	/**
	 * Lucene分页查询
	 * @param directoryPath
	 * @param query
	 * @param page
	 * @throws IOException
	 */
	public static void pageQuery(String directoryPath,Query query,Page<Document> page) throws IOException {
		IndexSearcher searcher = createIndexSearcher(directoryPath);
		int totalRecord = searchTotalRecord(searcher,query);
		//设置总记录数
		page.setTotalRecord(totalRecord);
		TopDocs topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
		List<Document> docList = new ArrayList<Document>();
		ScoreDoc[] docs = topDocs.scoreDocs;
		int index = 0;
		for (ScoreDoc scoreDoc : docs) {
			int docID = scoreDoc.doc;
			Document document = searcher.doc(docID);
			if(index == docs.length - 1) {
				page.setAfterDoc(scoreDoc);
				page.setAfterDocId(docID);
			}
			docList.add(document);
			index++;
		}
		page.setItems(docList);
		searcher.getIndexReader().close();
	}
	
	/**
	 * 索引分页查询
	 * @param fieldName
	 * @param queryString
	 * @param currentPage
	 * @param pageSize
	 * @throws ParseException 
	 * @throws IOException 
	 */
	public static Page<Document> pageQuery(String fieldName,String queryString,String directoryPath,int currentPage,int pageSize) throws ParseException, IOException {
		QueryParser parser = new QueryParser(fieldName, new StandardAnalyzer());
		Query query = parser.parse(queryString);
		Page<Document> page = new Page<Document>(currentPage,pageSize);
		pageQuery(directoryPath, query, page);
		return page;
	}
	
	/**
	 * @Title: searchTotalRecord
	 * @Description: 获取符合条件的总记录数
	 * @param query
	 * @return
	 * @throws IOException
	 */
	public static int searchTotalRecord(IndexSearcher searcher,Query query) throws IOException {
		TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
		if(topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
			return 0;
		}
		ScoreDoc[] docs = topDocs.scoreDocs;
		return docs.length;
	}
}

其实查询的核心代码就这一句：

searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());

searchAfter方法用于分页，如果不需要分页，请使用search方法。

searchAfter需要接收3个参数：

1.afterDocId即下一个Document的id,

2.query接口实现类的对象，query对象可以通过QueryParser类来创建，也可以自己new Query接口的某一个特定接口实现类，Query接口内置有N种实现，具体请查阅Lucene API,这里附上本人制作的Lucene5.0 API文档下载地址：http://pan.baidu.com/s/1uEgB8

3.pageSize即每页显示几条，你懂的。

至于如何创建IndexSearcher实例请看代码，跟Lucene4.x的使用方式没什么太大的区别，有一个较大的区别就是Lucene5.0里打开索引目录采用的是NIO2.0的方式，在Lucene4.0里你打开索引目录是这样的：

FSDirectory.open(directoryPath);

这里的directoryPath为String类型即你的索引目录，而在Lucene5.0里，则是使用NIO2.0的方式：

FSDirectory.open(Paths.get(directoryPath, new String[0]))

FSDirectory.open接收的参数不再是String类型而是Path类型。

测试类里关联了一个自己写的Page分页工具类，代码如下：

package com.yida.framework.lucene5.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.search.ScoreDoc;
public class Page<T> {
	/**当前第几页(从1开始计算)*/
    private int currentPage;
    /**每页显示几条*/
    private int pageSize;
    /**总记录数*/
    private int totalRecord;
    /**总页数*/
    private int totalPage;
    /**分页数据集合[用泛型T来限定集合元素类型]*/
    private Collection<T> items;
    /**当前显示起始索引(从零开始计算)*/
    private int startIndex;
    /**当前显示结束索引(从零开始计算)*/
    private int endIndex;
    /**一组最多显示几个页码[比如Google一组最多显示10个页码]*/
    private int groupSize;
    
    /**左边偏移量*/
    private int leftOffset = 5;
    /**右边偏移量*/
    private int rightOffset = 4;
    /**当前页码范围*/
    private String[] pageRange;
    
    /**分页数据*/
    private List<Document> docList;
    /**上一页最后一个ScoreDoc对象*/
    private ScoreDoc afterDoc;
    
    /**上一页最后一个ScoreDoc对象的Document对象ID*/
    private int afterDocId;

	public void setRangeIndex() {
		int groupSize = getGroupSize();
		int totalPage = getTotalPage();
		if(totalPage < 2) {
			startIndex = 0;
			endIndex = totalPage - startIndex;
		} else {
			int currentPage = getCurrentPage();
			if(groupSize >= totalPage) {
				startIndex = 0;
				endIndex = totalPage - startIndex - 1;
			} else {
				int leftOffset = getLeftOffset();
				int middleOffset = getMiddleOffset();
				if(-1 == middleOffset) {
					startIndex = 0;
					endIndex = groupSize - 1;
				} else if(currentPage <= leftOffset) {
					startIndex = 0;
					endIndex = groupSize - 1;
				} else {
					startIndex = currentPage - leftOffset - 1;
					if(currentPage + rightOffset > totalPage) {
						endIndex = totalPage - 1;
					} else {
						endIndex = currentPage + rightOffset - 1;
					}
				}
			}
		}
	}
    
	public int getCurrentPage() {
		if(currentPage <= 0) {
			currentPage = 1;
		} else {
			int totalPage = getTotalPage();
			if(totalPage > 0 && currentPage > getTotalPage()) {
				currentPage = totalPage;
			}
		}
		return currentPage;
	}
	public void setCurrentPage(int currentPage) {
		this.currentPage = currentPage;
	}
	public int getPageSize() {
		if(pageSize <= 0) {
			pageSize = 10;
		}
		return pageSize;
	}
	public void setPageSize(int pageSize) {
		this.pageSize = pageSize;
	}
	public int getTotalRecord() {
		return totalRecord;
	}
	public void setTotalRecord(int totalRecord) {
		this.totalRecord = totalRecord;
	}
	public int getTotalPage() {
		int totalRecord = getTotalRecord();
		if(totalRecord == 0) {
			totalPage = 0;
		} else {
			int pageSize = getPageSize();
			totalPage = totalRecord % pageSize == 0 ? totalRecord / pageSize : (totalRecord / pageSize) + 1;
		}
		return totalPage;
	}
	public void setTotalPage(int totalPage) {
		this.totalPage = totalPage;
	}
	
	public int getStartIndex() {
		return startIndex;
	}
	public void setStartIndex(int startIndex) {
		this.startIndex = startIndex;
	}
	
	public int getEndIndex() {
		return endIndex;
	}
	public void setEndIndex(int endIndex) {
		this.endIndex = endIndex;
	}
	public int getGroupSize() {
		if(groupSize <= 0) {
			groupSize = 10;
		}
		return groupSize;
	}
	public void setGroupSize(int groupSize) {
		this.groupSize = groupSize;
	}
	
	public int getLeftOffset() {
		leftOffset = getGroupSize() / 2;
		return leftOffset;
		
	}
	public void setLeftOffset(int leftOffset) {
		this.leftOffset = leftOffset;
	}
	public int getRightOffset() {
		int groupSize = getGroupSize();
		if(groupSize % 2 == 0) {
			rightOffset = (groupSize / 2) - 1;
		} else {
			rightOffset = groupSize / 2;
		}
		return rightOffset;
	}
	public void setRightOffset(int rightOffset) {
		this.rightOffset = rightOffset;
	}
	
	/**中心位置索引[从1开始计算]*/
	public int getMiddleOffset() {
		int groupSize = getGroupSize();
		int totalPage = getTotalPage();
		if(groupSize >= totalPage) {
			return -1;
		}
		return getLeftOffset() + 1;
	}
	public String[] getPageRange() {
		setRangeIndex();
		int size = endIndex - startIndex + 1;
		if(size <= 0) {
			return new String[0];
		}
		if(totalPage == 1) {
			return new String[] {"1"};
		}
		pageRange = new String[size];
		for(int i=0; i < size; i++) {
			pageRange[i] = (startIndex + i + 1) + "";
		}
		return pageRange;
	}

	public void setPageRange(String[] pageRange) {
		this.pageRange = pageRange;
	}

	public Collection<T> getItems() {
		return items;
	}
	public void setItems(Collection<T> items) {
		this.items = items;
	}
	
	public List<Document> getDocList() {
		return docList;
	}

	public void setDocList(List<Document> docList) {
		this.docList = docList;
	}

	public ScoreDoc getAfterDoc() {
		setAfterDocId(afterDocId);
		return afterDoc;
	}

	public void setAfterDoc(ScoreDoc afterDoc) {
		this.afterDoc = afterDoc;
	}
	
	public int getAfterDocId() {
		return afterDocId;
	}

	public void setAfterDocId(int afterDocId) {
		this.afterDocId = afterDocId;
		if(null == afterDoc) {
			this.afterDoc = new ScoreDoc(afterDocId, 1.0f);
		}
	}
	
	public Page() {}

	public Page(int currentPage, int pageSize) {
		this.currentPage = currentPage;
		this.pageSize = pageSize;
	}

	public Page(int currentPage, int pageSize, Collection<T> items) {
		this.currentPage = currentPage;
		this.pageSize = pageSize;
		this.items = items;
	}

	public Page(int currentPage, int pageSize, Collection<T> items, int groupSize) {
		this.currentPage = currentPage;
		this.pageSize = pageSize;
		this.items = items;
		this.groupSize = groupSize;
	}

	public Page(int currentPage, int pageSize, int groupSize, int afterDocId) {
		this.currentPage = currentPage;
		this.pageSize = pageSize;
		this.groupSize = groupSize;
		this.afterDocId = afterDocId;
	}

	public static void main(String[] args) {
		Collection<Integer> items = new ArrayList<Integer>();
		int totalRecord = 201;
		for(int i=0; i < totalRecord; i++) {
			items.add(new Integer(i));
		}
		Page<Integer> page = new Page<Integer>(1,10,items,10);
		page.setTotalRecord(totalRecord);
		int totalPage = page.getTotalPage();
		for(int i=0; i < totalPage; i++) {
			page.setCurrentPage(i+1);
			String[] pageRange = page.getPageRange();
			System.out.println("当前第" + page.currentPage + "页");
			for(int j=0; j < pageRange.length; j++) {
				System.out.print(pageRange[j] + "  ");
			}
			System.out.println("\n");
		}	
	}
}

来张运行效果图大家感受下：

demo源代码请在附件里下载，千言万语都在代码中，你们懂的。

若你还有什么疑问，请加我Ｑ－Ｑ：７－３－６－０－３－１－３－０－５，或者加裙：

，欢迎你加入一起交流学习。

lucene5-demo2.rar (21 KB)
下载次数: 152

查看图片附件

分享到：

Lucene5学习之使用Luke查看索引 | Lucene5学习之创建索引入门示例

2015-03-17 20:39
浏览 5323
评论(5)
分类:编程语言
查看更多

5 楼 banana1120 2016-05-04

这边使用普通的WildcardQuery 查询，100条结果使用9S，使用分页带WildcardQuery 查询第一页100条使用了24489 ms，，，，怎么破？原文件2G，创建索引3.4G，xls格式文件

4 楼永志_爱戴 2016-04-02

搜索第一页的时候，Page对象的afterDocId没有被赋值，默认是0，所以你的搜索会把docId等于零的文档给跳过去

3 楼 majiedota 2015-06-29

达子给力，评论细心啊

2 楼 lxwt909 2015-04-15

tianyaluke 写道

类SearchFirstTest的89行，须改成TopDocs topDocs = searcher.searchAfter(fieldDoc, query, page.getPageSize(), sort);否则page.getPageSize()=10,page.getCurrentPage()=1加上page.getCurrentPage()=2的结果跟page.getPageSize()=20,page.getCurrentPage()=1的结果不一样

感谢你的指正，你非常细心

1 楼 tianyaluke 2015-04-15

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论