站点图标 起风网

Lucene.net站内搜索—5、搜索引擎第一版实现

目录

Lucene.net站内搜索—1、SEO优化
Lucene.net站内搜索—2、Lucene.Net简介和分词
Lucene.net站内搜索—3、最简单搜索引擎代码
Lucene.net站内搜索—4、搜索引擎第一版技术储备(简单介绍Log4Net、生产者消费者模式)
Lucene.net站内搜索—5、搜索引擎第一版实现
Lucene.net站内搜索—6、站内搜索第二版

1、线程访问UI线程:

 ParameterizedThreadStart threadStart = (obj) =>                    {                        txtLog.AppendText(obj + "\n");                    };                    txtLog.Invoke(threadStart, item);

详细代码如下:

using System;using System.Collections.Generic;using System.Linq;using System.Web;using System.Threading;using log4net;using System.Configuration;using System.Web.Hosting;using RuPeng.Utils;using RuPengSite.DataTier.DataSetThreadTableAdapters;using System.Text;using Lucene.Net.Store;using Lucene.Net.Index;using System.IO;using Lucene.Net.Analysis.PanGu;using Lucene.Net.Documents;namespace RuPengSite.Search{    public class IndexManager    {        public readonly static IndexManager Instance = new IndexManager();        private HashSet<IndexJobItem> jobs = new HashSet<IndexJobItem>();//任务的集合        private bool isStopped;//任务是否停止                private static ILog log = LogManager.GetLogger(typeof(IndexManager));        private IndexManager()        {                    }        //启动任务        public void Start()        {            isStopped = false;            Thread thread = new Thread(ScanThread);            thread.Start();                    }        //停止任务        public void Stop()        {            isStopped = true;        }        /// <summary>        /// 扫描线程        /// </summary>        private void ScanThread()        {            //如果停止,则不再无限循环            while (!isStopped)            {                Thread.Sleep(5000);//休息5秒钟,尽可能多的累积任务                if (jobs.Count <= 0)                {                    continue;//如果没任务继续睡                }                log.Debug("开始索引预处理");                string indexPath = SearchHelper.GetSearchIndexFullPath();                log.Debug("索引路径是:" + indexPath);                FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());                //判断索引目录是否已经存在                bool isUpdate = IndexReader.IndexExists(directory);                log.Debug("索引路径存在状态是" + isUpdate);                if (isUpdate)                {                    //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁                    if (IndexWriter.IsLocked(directory))                    {                        log.Debug("开始解锁索引路径");                        IndexWriter.Unlock(directory);                    }                }                IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate,                     Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);                try                {                    ProcessJobItems(directory, writer);                }                finally                {                    log.Debug("开始关闭reader、writer");                    writer.Close();                    directory.Close();                    log.Debug("完成关闭reader、writer");                }                                   }        }        /// <summary>        /// 处理队列中的任务        /// </summary>        /// <param name="directory"></param>        /// <param name="writer"></param>        private void ProcessJobItems(FSDirectory directory, IndexWriter writer)        {            log.Debug("开始处理队列中的"+jobs.Count+"个任务");            foreach (var jobItem in jobs.ToArray())//转换为数组,避免读的时候不能修改的问题            {                try                {                    ProcessJobItem(writer, jobItem);                    jobs.Remove(jobItem);//将处理完成的任务移除                }                catch (Exception ex)                {                    log.Error("对任务进行处理失败" + jobItem, ex);                }                            }            log.Debug("队列中的任务处理完毕");        }        private static void ProcessJobItem(IndexWriter writer, IndexJobItem jobItem)        {            long threadId = jobItem.ThreadId;            JobType jobType = jobItem.ItemType;            string url = SearchHelper.GetThreadUrl(threadId);            if (jobType == JobType.Delete)//判断任务的类型            {                log.Debug("将帖子从索引中移除,threadId=" + threadId);                writer.DeleteDocuments(new Term(SearchHelper.URL, url));//删除旧的收录            }            else if (jobType == JobType.Add)            {                writer.DeleteDocuments(new Term(SearchHelper.URL, url));//删除旧的收录                var threads = new rp_threadsTableAdapter().GetDataById(threadId);                if (threads.Count <= 0)                {                    log.Debug("id为"+threadId+"的帖子不存在!");                    return;                }                string body = SearchHelper.GetThreadContent(threadId);//帖子内容                string title = threads.Single().Subject;//主题                Document document = new Document();                document.Add(new Field(SearchHelper.URL, url, Field.Store.YES, Field.Index.NOT_ANALYZED));                document.Add(new Field(SearchHelper.TITLE, title, Field.Store.YES, Field.Index.NOT_ANALYZED));                document.Add(new Field(SearchHelper.BODY, body, Field.Store.YES, Field.Index.ANALYZED,
Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); log.Debug(
"索引帖子" + threadId+"完成"); } else { throw new Exception("错误的jobType:" + jobType); } } /// <summary> /// 添加帖子的索引任务 /// </summary> /// <param name="threadId"></param> public void AddThread(long threadId) { IndexJobItem jobItem = new IndexJobItem() { ItemType=JobType.Add,ThreadId=threadId}; jobs.Add(jobItem); } /// <summary> /// 移除帖子的索引任务 /// </summary> /// <param name="threadId"></param> public void DeleteThread(long threadId) { IndexJobItem jobItem = new IndexJobItem() { ItemType = JobType.Delete, ThreadId = threadId }; jobs.Add(jobItem); } class IndexJobItem { public JobType ItemType { get; set; } public long ThreadId { get; set; } public override bool Equals(object obj) { IndexJobItem item = obj as IndexJobItem; if (item == null) { return false; } return this.ItemType==item.ItemType&&this.ThreadId==item.ThreadId; } public override int GetHashCode() { return ToString().GetHashCode(); } public override string ToString() { return ItemType+":"+ThreadId; } } enum JobType {Delete,Add }//任务类型 }}

多条件查询

我看了下淘宝,淘宝的站内搜索只实现了且条件:

或条件查询可以来看看百度,当然,百度是同时采用了且条件和或条件查询的:

在标题和正文中查找

PhraseQuery queryMsg = new PhraseQuery();foreach (string word in CommonHelper.SplitWords(txtKW.Text))            {                queryMsg.Add(new Term("msg", word));            }queryMsg.SetSlop(100);PhraseQuery queryTitle = new PhraseQuery();foreach (string word in CommonHelper.SplitWords(txtKW.Text))            {                queryTitle.Add(new Term("title", word));            }queryTitle.SetSlop(100);BooleanQuery query = new BooleanQuery();
query.Add(queryMsg, BooleanClause.Occur.SHOULD);query.Add(queryTitle, BooleanClause.Occur.SHOULD);

BooleanQuery相当于盛放其他查询条件的容器,类似于div。第二个参数:Must为必须有,Must_Not为必须没有,Should为可以有

高亮显示

private static String highLight(string keyword,String content)        {            PanGu.HighLight.SimpleHTMLFormatter formatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color='red'>", "</font>");            PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(formatter, new Segment());            highlighter.FragmentSize = 500;            string msg = highlighter.GetBestFragment(keyword,content);
if (string.IsNullOrEmpty(msg)) { return content; } else { return msg; } } String hightlightTitle = highLight(keyword, title); String hightlightBody = HttpUtility.HtmlEncode(body);//防止XSS攻击 hightlightBody = highLight(keyword, hightlightBody);

路径可配置化

解决:地址无法发给好友

我们先看下淘宝的站内搜索:

细心看,我们会发现url是一连串的字符串,可以肯定这是采用了get请求的方式。

只要有runat=server的form就会产生__VIEWSTATE等,所以去掉form的runat=server,这样除了Repeater等少数控件之外服务端控件都没法使用,只能使用html标签。这是为什么说“要求高的互联网项目不用服务端控件”。面试时候说:我在有的项目中没有用服务端控件的例子。

为了能让查询参数显示在地址栏中,方便传播地址,把form的method改为get;因为ViewState太长,所以影响美观,因此禁用ViewState;但是发现哪怕禁用ViewState,ViewState也没有完全消失;研究发现,只有去掉form的runat=server后才能完全干掉ViewState;但是,一旦去掉form的runat=server后几乎所有的WebForm控件都用不了(除了Repeater等少数几个和input无关的之外),只能用html控件,然后在Page_Load中进行响应。

文章转载于:https://www.cnblogs.com/jiekzou/p/4381649.html

原著是一个有趣的人,若有侵权,请通知删除

退出移动版