前言 - sphinx 用着好好的为啥换 manticore ?
sphinxsearch 在3.x系列的版本变更了协议,目前属于闭源软件。而2.x的开源版年久失修,虽然能用,但不再提供支持了。manticore最开始fork自sphinx 2.x的开源分支,后不断发展到今天的4.x版本。
manticore完全免费,且修复了2.x版本众多bug,还开发了很多实用特性。
此外对中文提供了特殊优化选项,比如添加了对中文词法的支持,内置了中文语句断句检测等优化。
经过一番使用,搜索结果最明显的就是效率和精确度上的提升,总之就是起飞了。
假如我没有使用debian 11,这篇文章可能就不会有了。
搬迁服务器后,使用的系统是最新的debian 11(bullseye),配置phpbb搜索索引的时候,顺手一个
apt install sphinxsearch
,提示没这个包,我又去debian仓库查询,发现debian 11官方软件源移除了sphinxsearch。很早之前我就知道sphinxsearch 2.x版本不维护了,所以我就在想有没有替代品。网上搜索找到了manticore,然后搜索一下是否有人成功在phpbb上使用manticore,有人说是兼容的,只需要改相应的配置,有成功案例就好,然后我开始了折腾之旅。
manticore官网: https://manticoresearch.com
安装并配置manticore
# 创建并进入manticore文件夹代码: 全选
mkdir /opt/manticore && cd /opt/manticore
代码: 全选
services:
manticore:
container_name: manticore
image: manticoresearch/manticore
environment:
- EXTRA=1
restart: always
ports:
- 127.0.0.1:9308:9308
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
memlock:
soft: -1
hard: -1
volumes:
- ./data:/var/lib/manticore
- ./manticore.conf:/etc/manticoresearch/manticore.conf
- /var/run/mysqld:/var/run/mysqld
9308
for connections via HTTP# 创建必要文件夹,避免启动报错
代码: 全选
mkdir -p /opt/manticore/data/data
代码: 全选
source source_phpbb_1sqh9wuud70fuxy8_main
{
type = mysql # mysql or pgsql
sql_host = localhost # SQL server host sphinx connects to
sql_user = phpbb
sql_pass = {Your database passwd}
sql_db = phpbb
sql_port = # optional, default is 3306 for mysql and 5432 for pgsql
sql_query_pre = SET NAMES 'utf8'
sql_query_pre = UPDATE phpbb_sphinx SET max_doc_id = (SELECT MAX(post_id) FROM phpbb_posts) WHERE counter_id = 1
sql_query_range = SELECT MIN(post_id), MAX(post_id) FROM phpbb_posts
sql_range_step = 5000
sql_query = SELECT \
p.post_id AS id, \
p.forum_id, \
p.topic_id, \
p.poster_id, \
p.post_visibility, \
CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post, \
p.post_time, \
p.post_subject, \
p.post_subject as title, \
p.post_text as data, \
t.topic_last_post_time, \
0 as deleted \
FROM phpbb_posts p, phpbb_topics t \
WHERE \
p.topic_id = t.topic_id \
AND p.post_id >= $start AND p.post_id <= $end
sql_query_post =
sql_query_post_index = UPDATE phpbb_sphinx SET max_doc_id = $maxid WHERE counter_id = 1
sql_attr_uint = forum_id
sql_attr_uint = topic_id
sql_attr_uint = poster_id
sql_attr_uint = post_visibility
sql_attr_bool = topic_first_post
sql_attr_bool = deleted
sql_attr_timestamp = post_time
sql_attr_timestamp = topic_last_post_time
sql_attr_string = post_subject
}
source source_phpbb_1sqh9wuud70fuxy8_delta : source_phpbb_1sqh9wuud70fuxy8_main
{
sql_query_pre = SET NAMES 'utf8'
sql_query_range =
sql_range_step =
sql_query = SELECT \
p.post_id AS id, \
p.forum_id, \
p.topic_id, \
p.poster_id, \
p.post_visibility, \
CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post, \
p.post_time, \
p.post_subject, \
p.post_subject as title, \
p.post_text as data, \
t.topic_last_post_time, \
0 as deleted \
FROM phpbb_posts p, phpbb_topics t \
WHERE \
p.topic_id = t.topic_id \
AND p.post_id >= ( SELECT max_doc_id FROM phpbb_sphinx WHERE counter_id=1 )
sql_query_post_index =
}
index index_phpbb_1sqh9wuud70fuxy8_main
{
path = /var/lib/manticore/dataindex_phpbb_1sqh9wuud70fuxy8_main
source = source_phpbb_1sqh9wuud70fuxy8_main
morphology = stem_en, icu_chinese
stopwords = zh
wordforms = # optional, specify path to wordforms file. See ./docs/sphinx_wordforms.txt for example
exceptions = # optional, specify path to exceptions file. See ./docs/sphinx_exceptions.txt for example
min_word_len = 1
charset_table = 0..9, cjk, english, _
min_prefix_len = 3 # Minimum number of characters for wildcard searches by prefix (min 1). Default is 3. If specified, set min_infix_len to 0
min_infix_len = 0 # Minimum number of characters for wildcard searches by infix (min 2). If specified, set min_prefix_len to 0
html_strip = 1
index_exact_words = 1 # Set to 1 to enable exact search operator. Requires wordforms or morphology
blend_chars = U+23, U+24, U+25, U+26, U+40
}
index index_phpbb_1sqh9wuud70fuxy8_delta : index_phpbb_1sqh9wuud70fuxy8_main
{
path = /var/lib/manticore/dataindex_phpbb_1sqh9wuud70fuxy8_delta
source = source_phpbb_1sqh9wuud70fuxy8_delta
}
indexer
{
mem_limit = 1024M
}
searchd
{
listen = 0.0.0.0:9308
log = /var/lib/manticore/searchd.log
query_log = /var/lib/manticore/manticore-query.log
network_timeout = 5
pid_file = /var/lib/manticore/datasearchd.pid
binlog_path = /var/lib/manticore
telemetry = 0
}
- 修改相关信息为你的实际情况,index_phpbb_{自行替换}_main/delta 、数据库帐号密码等
- mem_limit自行设置
- morphology、charset_table、stopwords等这些可以参考官方文档,我贴出来的这套配置是专门为中文优化过的,参考官方文档
telemetry = 0
关闭数据收集
# 然后
docker compose up -d
启动容器# 接着手动创建一次索引 {index_phpbb_1sqh9wuud70fuxy8_main 请自行替换}
代码: 全选
docker exec -it manticore gosu manticore indexer --rotate --config /etc/manticoresearch/manticore.conf index_phpbb_1sqh9wuud70fuxy8_main
docker exec -it manticore gosu manticore indexer --rotate --config /etc/manticoresearch/manticore.conf index_phpbb_1sqh9wuud70fuxy8_delta
docker compose restart
即可# 在phpbb的搜索界面测试一下,如果可以正常搜索就Okay了,报错的话,使用
docker logs manticore
查看一下日志# 如果上面测试成功了的话,我们再添加个定时任务吧,让其自动将新帖子纳入索引中(每小时执行一次)
代码: 全选
0 * * * * /usr/bin/docker exec -i manticore gosu manticore indexer --rotate --config /etc/manticoresearch/manticore.conf index_phpbb_1sqh9wuud70fuxy8_main
0 * * * * /usr/bin/docker exec -i manticore gosu manticore indexer --rotate --config /etc/manticoresearch/manticore.conf index_phpbb_1sqh9wuud70fuxy8_delta