oracle 全文索引详细做法

oracle 全文索引详细做法
oracle从7.3开始支持全文检索，即用户可以使用oracle服务器的上下文（context）选项完成基于文本的查询。具体可以采用通配符查找、模糊匹配、相关分类、近似查找、条件加权和词意扩充等方法。在oracle8.0.x中称为context ；在oracle8i中称为intermedia text ； oracle9i中称为oracle text。
oracle text是9i标准版和企业版的一部分。oracle9i将全文检索功能做为内置功能提供给用户，使得用户在创建实例时自动安装全文检索。oracle text的应用领域有很多：
l         搜索文本：需要快捷有效搜索文本数据的应用程序。
l         管理多种文档：允许搜索各种混和文档格式的应用程序,包括ord,excel,lotus等。
l         从多种数据源中检索文本：不仅来自oracle数据库中的文本数据,而且可以来自internet和文件系统的文本数据。
l         搜索xml应用程序。
grant ctxapp to xyadmin;
begin
    -- 定义一个词法分析器
    --ctx_ddl.drop_preference('cnlex');
    --ctx_ddl.create_preference('cnlex','chinese_lexer'); --针对中文
    ctx_ddl.create_preference('cnlex','chinese_vgram_lexer'); --针对中文
    -- 定义一个相关词表
    --ctx_ddl.drop_preference('mywordlist');
    --ctx_ddl.create_preference('mywordlist', 'basic_wordlist');
    --ctx_ddl.set_attribute('mywordlist','prefix_index','true');
    --ctx_ddl.set_attribute('mywordlist','prefix_min_length',1);
    --ctx_ddl.set_attribute('mywordlist','prefix_max_length', 5);
    --ctx_ddl.set_attribute('mywordlist','substring_index', 'yes');
end;
commit;
drop index corporationname_index force
drop index businessscope_index force
--create index corporationname_index on corporationmaintable(corporationname) indextype is ctxsys.context
--parameters ('datastore ctxsys.direct_datastore filter
--ctxsys.null_filter lexer cnlex wordlist mywordlist');
create index corporationname_index on corporationmaintable(corporationname) indextype is ctxsys.context parameters ('lexer cnlex');
create index businessscope_index on corporationmaintable(businessscope) indextype is ctxsys.context parameters ('lexer cnlex');
exec ctx_ddl.sync_index('corporationname_index');
-- sync:
variable jobno number;
begin
dbms_job.submit(:jobno,'ctx_ddl.sync_index(''corporationname_index'');
ctx_ddl.sync_index(''businessscope_index'');
ctx_ddl.sync_index(''legalrepresentative_index'');',
sysdate, 'sysdate + (1/24/4)');
commit;
end;
-- optimizer
variable jobno number;
begin
dbms_job.submit(:jobno,'ctx_ddl.optimize_index(''corporationname_index'',''full'');
ctx_ddl.optimize_index(''businessscope_index'',''full'');
ctx_ddl.optimize_index(''legalrepresentative_index'',''full'');',
sysdate, 'sysdate + 1');
commit;
end;
grant execute any procedure to credit_corp;
exec dbms_job.run(73);
exec dbms_job.remove(69);
exec dbms_job.remove(72);
* from user_jobs;
--其中，第一个job的sysdate + (1/24/4)是指每隔15分钟同步一次，第二个job的sysdate + 1是每隔1天做一次全优化。具体的时间间隔，你可以根据自己的应用的需要而定。至此，你的全文检索功能已设置完成。
搜索文本
       不使用oracle text功能,也有很多方法可以在oracle数据库中搜索文本.可以使用标准的instr函数和like操作符实现.
select *
from mytext
where instr (thetext, 'oracle') > 0;
select *
from mytext
where thetext like '%oracle%';
有很多时候，使用instr和like是很理想的，特别是搜索仅跨越很小的表的时候。然而通过这些文本定位的方法将导致全表扫描,对资源来说消耗比较昂贵，而且实现的搜索功能也非常有限。
利用oracle text，你可以回答如“在存在单词’oracle’的行同时存在单词’corporation’而且两单词间距不超过10个单词的文本，查询含有单词’oracle’或者单词’california’的文本，并且将结果按准确度进行排序，含有词根train的文本”，以下的sql代码实现了如上功能，我们且不管这些语法是如何使用的：
drop　index index mytext_idx; --丢弃索引mytext_idx
/
create index mytext_idx
on mytext( thetext )
indextype is ctxsys.context; --创建context类型索引mytext_idx
/
select id
from mytext
where contains (thetext, 'near((oracle,corporation),10)') > 0; --发出contains查询
／
select score (1), id
    from mytext
   where contains (thetext, 'oracle or california', 1) > 0
order by score (1) desc
/
select id
from mytext
where contains (thetext, '$train') > 0;
--其中，第一个job的sysdate + (1/24/4)是指每隔15分钟同步一次，第二个job的sysdate + 1是每隔1天做一次全优化。具体的时间间隔，你可以根据自己的应用的需要而定。至此，你的全文检索功能已设置完成。

oracle 全文索引详细做法

VIP推荐