This article will describe, in short, what I have done to speed up queries which searches inside email bodies/attachments and RTFM articles which contain large amounts of WikiText.
There are 3 things that need to be done:
- patch SearchBuilder, to remove pre/post pend of %, change LIKE to contain(Content,'searchstring')>0
- change RTFM Search.html, remove pre/post pend of % on WikiText Customfields
- add Oracle Text indexes for Attachments.content and ObjectCustomFieldValues.largecontent
Attached are the file needed todo this, that functionality isn't there anymore ;-( So here they come inline
===Start Searchbuilder patch no. 1===
--- SearchBuilder.pm.orig 2007-07-07 22:45:00.000000000 +0200
+++ SearchBuilder.pm 2007-07-10 08:49:00.000000000 +0200
@@ -767,9 +767,15 @@
if ( $args{'FIELD'} ) {
- #If it's a like, we supply the %s around the search term
+ #If it's a like, we supply the %s around the search term only if its not Oracle
+ #because for Oracle we'll use where contains(content,'text')>1
if ( $args{'OPERATOR'} =~ /LIKE/i ) {
- $args{'VALUE'} = "%" . $args{'VALUE'} . "%";
+ if ( $RT::DatabaseType eq 'Oracle') {
+ $args{'VALUE'} = $args{'VALUE'}
+ }
+ else {
+ $args{'VALUE'} = "%" . $args{'VALUE'} . "%";
+ }
}
elsif ( $args{'OPERATOR'} =~ /STARTSWITH/i ) {
$args{'VALUE'} = $args{'VALUE'} . "%";
@@ -932,6 +938,49 @@
value => $args{'VALUE'},
};
+# Keep the original clause and modify if dbtype is Oracle
+# This patch makes searching for ticket content real fast when you have the appropriate
+# index on Attachments.Content and ObjectCustomFieldValues.Largecontent for RTFM
+# CREATE INDEX CNT ON ATTACHMENTS (CONTENT) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M');
+# CREATE INDEX FM_LARGE_CNT ON ObjectCustomFieldValues(Largecontent) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M');
+# parameters is tunable, see Oracle docs ;-)
+# fixed the static replace Attachments_2.Content by dynamic version for both RT and RTFM Content fields
+# also changed RTFM/Article/Search.html NOT to pre and post pend '%'
+# other things needed is a FMT column for Attachments and ObjectCustomFieldValues
+# which takes care of not including base64 text into the index
+# does this by using a trigger which fills the FMT column and using the appropriate
+# options when creating the Text index. (saved 25% of unneeded index room ~1Gb out of ~4Gb)
+ if ( $RT::DatabaseType eq 'Oracle' ) {
+ my ($tmpfield);
+ # First fix RT to use OracleText when searching Ticket Content
+ if ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/ and $args{'OPERATOR'} eq 'LIKE') {
+ # get rid of the lower since you can't have a functional context index.
+ # don't care about the remaining (), they don't have a significant effect on performance
+ # you can however tell Context to index casesensitive or caseinsensitive.
+ # this is probably not very elegant, excuse my perl ;-)
+ $tmpfield = $QualifiedField;
+ $tmpfield =~ s/(lower)?//g;
+ $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+ }
+ elsif ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/ and $args{'OPERATOR'} eq 'NOT LIKE') {
+ $tmpfield = $QualifiedField;
+ $tmpfield =~ s/(lower)?//g;
+ $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+ }
+ # Second fix RTFM to use OracleText when searching Article Content and LargeContent
+ elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'LIKE') {
+ $tmpfield = $QualifiedField;
+ $tmpfield =~ s/(lower)?//g;
+ $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+ }
+ elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'NOT LIKE') {
+ $tmpfield = $QualifiedField;
+ $tmpfield =~ s/(lower)?//g;
+ $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+ }
+
+ }
+
# Juju because this should come _AFTER_ the EA
my @prefix;
if ( $self->{_open_parens}{ $ClauseId } ) {
===End Searchbuilder patch no. 1===
===Start Searchbuilder patch no. 2===
--- Handle.pm.orig 2007-04-10 22:48:42.000000000 +0200
+++ Handle.pm 2007-07-12 11:13:00.000000000 +0200
@@ -1101,6 +1101,14 @@
# of here as we can't parse subclauses
return 1 if grep $_ ne 'generic_restrictions', keys %{ $args{'SearchBuilder'}->{'subclauses'} };
+# JvdW 2007-07-12
+# Get out of this function if were dealing with either Attachments or ObjectCustomFieldValues
+# since these are modified and parser trips over '(contains(content,'test')>0)
+# haven't seen any side effect but that doesn't mean there aren't any.
+# feel free to fix this lousy perl of mine ;-)
+ return 1 if $args{'ALIAS'} =~ /Attachments_\d+/ ;
+ return 1 if $args{'ALIAS'} =~ /ObjectCustomFieldValues_\d+/ ;
+
# build full list of generic conditions
my @conditions;
foreach ( grep @$_, values %{ $args{'SearchBuilder'}->{'restrictions'} } ) {
===End Searchbuilder patch no. 2===
===Start Search.html=== copy share/html/RTFM/Article/Search.hmtl to local/html/RTFM/Article/Search.html edit Search.html (find the first and second occurrence of $value = "%$1%"; and change it to $value = "$1"; leave the third and fourth occurrence alone, they are responsible for searching small string values and don't have a huge impact on performance. ===End Search.html===
===Start OracleTest part===
/* Direct_datastore is for text in 1 column further no attributes */
begin
ctx_ddl.drop_preference('"CNT_DST"');
end;
begin
ctx_ddl.create_preference('"CNT_DST"','DIRECT_DATASTORE');
end;
/
/* NULL_FILTER because we only want text indexing or auto_filter because*/
/* we add an extra fmt column to Attachments which holds a flag whether */
/* that row is indexed or not */
begin
ctx_ddl.drop_preference('"CNT_FIL"');
end;
/
begin
ctx_ddl.create_preference('"CNT_FIL"','AUTO_FILTER');
end;
/
begin
ctx_ddl.drop_section_group('"CNT_SGP"');
end;
/
begin
ctx_ddl.create_section_group('"CNT_SGP"','NULL_SECTION_GROUP');
end;
/
/* BASIC_LEXER is sufficient because we only index English/Dutch */
/* Its wise to set some attributes, look at Oracle Docs for which */
begin
ctx_ddl.drop_preference('"CNT_LEX"');
end;
/
begin
ctx_ddl.create_preference('"CNT_LEX"','BASIC_LEXER');
ctx_ddl.set_attribute('CNT_LEX', 'composite', 'DUTCH');
ctx_ddl.set_attribute('CNT_LEX', 'index_themes', 'NO');
ctx_ddl.set_attribute('CNT_LEX', 'index_stems', 'DUTCH');
ctx_ddl.set_attribute('CNT_LEX', 'index_text', 'YES');
ctx_ddl.set_attribute('CNT_LEX', 'printjoins', '_-.');
end;
/
/* BASIC_WORDLIST to set properties of stop words */
/* in my case I have added Dutch and English, edit to needs */
begin
ctx_ddl.drop_preference('"CNT_WDL"');
end;
/
begin
ctx_ddl.create_preference('"CNT_WDL"','BASIC_WORDLIST');
ctx_ddl.set_attribute('"CNT_WDL"','STEMMER','DUTCH');
ctx_ddl.set_attribute('"CNT_WDL"','FUZZY_MATCH','DUTCH');
end;
/
begin
ctx_ddl.drop_stoplist('"CNT_SPL"');
end;
/
begin
ctx_ddl.create_stoplist('"CNT_SPL"','BASIC_STOPLIST');
ctx_ddl.add_stopword('"CNT_SPL"','Corp');
ctx_ddl.add_stopword('"CNT_SPL"','Mr');
ctx_ddl.add_stopword('"CNT_SPL"','Mrs');
ctx_ddl.add_stopword('"CNT_SPL"','Ms');
ctx_ddl.add_stopword('"CNT_SPL"','Mz');
ctx_ddl.add_stopword('"CNT_SPL"','a');
ctx_ddl.add_stopword('"CNT_SPL"','about');
ctx_ddl.add_stopword('"CNT_SPL"','after');
ctx_ddl.add_stopword('"CNT_SPL"','all');
ctx_ddl.add_stopword('"CNT_SPL"','also');
ctx_ddl.add_stopword('"CNT_SPL"','an');
ctx_ddl.add_stopword('"CNT_SPL"','and');
ctx_ddl.add_stopword('"CNT_SPL"','any');
ctx_ddl.add_stopword('"CNT_SPL"','are');
ctx_ddl.add_stopword('"CNT_SPL"','as');
ctx_ddl.add_stopword('"CNT_SPL"','at');
ctx_ddl.add_stopword('"CNT_SPL"','be');
ctx_ddl.add_stopword('"CNT_SPL"','because');
ctx_ddl.add_stopword('"CNT_SPL"','been');
ctx_ddl.add_stopword('"CNT_SPL"','but');
ctx_ddl.add_stopword('"CNT_SPL"','by');
ctx_ddl.add_stopword('"CNT_SPL"','can');
ctx_ddl.add_stopword('"CNT_SPL"','co');
ctx_ddl.add_stopword('"CNT_SPL"','could');
ctx_ddl.add_stopword('"CNT_SPL"','for');
ctx_ddl.add_stopword('"CNT_SPL"','from');
ctx_ddl.add_stopword('"CNT_SPL"','had');
ctx_ddl.add_stopword('"CNT_SPL"','has');
ctx_ddl.add_stopword('"CNT_SPL"','have');
ctx_ddl.add_stopword('"CNT_SPL"','he');
ctx_ddl.add_stopword('"CNT_SPL"','her');
ctx_ddl.add_stopword('"CNT_SPL"','his');
ctx_ddl.add_stopword('"CNT_SPL"','if');
ctx_ddl.add_stopword('"CNT_SPL"','in');
ctx_ddl.add_stopword('"CNT_SPL"','inc');
ctx_ddl.add_stopword('"CNT_SPL"','into');
ctx_ddl.add_stopword('"CNT_SPL"','is');
ctx_ddl.add_stopword('"CNT_SPL"','it');
ctx_ddl.add_stopword('"CNT_SPL"','its');
ctx_ddl.add_stopword('"CNT_SPL"','last');
ctx_ddl.add_stopword('"CNT_SPL"','more');
ctx_ddl.add_stopword('"CNT_SPL"','most');
ctx_ddl.add_stopword('"CNT_SPL"','no');
ctx_ddl.add_stopword('"CNT_SPL"','not');
ctx_ddl.add_stopword('"CNT_SPL"','of');
ctx_ddl.add_stopword('"CNT_SPL"','on');
ctx_ddl.add_stopword('"CNT_SPL"','one');
ctx_ddl.add_stopword('"CNT_SPL"','only');
ctx_ddl.add_stopword('"CNT_SPL"','or');
ctx_ddl.add_stopword('"CNT_SPL"','other');
ctx_ddl.add_stopword('"CNT_SPL"','out');
ctx_ddl.add_stopword('"CNT_SPL"','over');
ctx_ddl.add_stopword('"CNT_SPL"','s');
ctx_ddl.add_stopword('"CNT_SPL"','says');
ctx_ddl.add_stopword('"CNT_SPL"','she');
ctx_ddl.add_stopword('"CNT_SPL"','so');
ctx_ddl.add_stopword('"CNT_SPL"','some');
ctx_ddl.add_stopword('"CNT_SPL"','such');
ctx_ddl.add_stopword('"CNT_SPL"','than');
ctx_ddl.add_stopword('"CNT_SPL"','that');
ctx_ddl.add_stopword('"CNT_SPL"','the');
ctx_ddl.add_stopword('"CNT_SPL"','their');
ctx_ddl.add_stopword('"CNT_SPL"','there');
ctx_ddl.add_stopword('"CNT_SPL"','they');
ctx_ddl.add_stopword('"CNT_SPL"','this');
ctx_ddl.add_stopword('"CNT_SPL"','to');
ctx_ddl.add_stopword('"CNT_SPL"','up');
ctx_ddl.add_stopword('"CNT_SPL"','was');
ctx_ddl.add_stopword('"CNT_SPL"','we');
ctx_ddl.add_stopword('"CNT_SPL"','were');
ctx_ddl.add_stopword('"CNT_SPL"','when');
ctx_ddl.add_stopword('"CNT_SPL"','which');
ctx_ddl.add_stopword('"CNT_SPL"','who');
ctx_ddl.add_stopword('"CNT_SPL"','will');
ctx_ddl.add_stopword('"CNT_SPL"','with');
ctx_ddl.add_stopword('"CNT_SPL"','would');
ctx_ddl.add_stopword('"CNT_SPL"','aan');
ctx_ddl.add_stopword('"CNT_SPL"','aangaande');
ctx_ddl.add_stopword('"CNT_SPL"','aangezien');
ctx_ddl.add_stopword('"CNT_SPL"','achter');
ctx_ddl.add_stopword('"CNT_SPL"','achterna');
ctx_ddl.add_stopword('"CNT_SPL"','afgelopen');
ctx_ddl.add_stopword('"CNT_SPL"','al');
ctx_ddl.add_stopword('"CNT_SPL"','aldaar');
ctx_ddl.add_stopword('"CNT_SPL"','aldus');
ctx_ddl.add_stopword('"CNT_SPL"','alhoewel');
ctx_ddl.add_stopword('"CNT_SPL"','alias');
ctx_ddl.add_stopword('"CNT_SPL"','alle');
ctx_ddl.add_stopword('"CNT_SPL"','allebei');
ctx_ddl.add_stopword('"CNT_SPL"','alleen');
ctx_ddl.add_stopword('"CNT_SPL"','alsnog');
ctx_ddl.add_stopword('"CNT_SPL"','altijd');
ctx_ddl.add_stopword('"CNT_SPL"','altoos');
ctx_ddl.add_stopword('"CNT_SPL"','ander');
ctx_ddl.add_stopword('"CNT_SPL"','andere');
ctx_ddl.add_stopword('"CNT_SPL"','anders');
ctx_ddl.add_stopword('"CNT_SPL"','anderszins');
ctx_ddl.add_stopword('"CNT_SPL"','behalve');
ctx_ddl.add_stopword('"CNT_SPL"','behoudens');
ctx_ddl.add_stopword('"CNT_SPL"','beide');
ctx_ddl.add_stopword('"CNT_SPL"','beiden');
ctx_ddl.add_stopword('"CNT_SPL"','ben');
ctx_ddl.add_stopword('"CNT_SPL"','beneden');
ctx_ddl.add_stopword('"CNT_SPL"','bent');
ctx_ddl.add_stopword('"CNT_SPL"','bepaald');
ctx_ddl.add_stopword('"CNT_SPL"','betreffende');
ctx_ddl.add_stopword('"CNT_SPL"','bij');
ctx_ddl.add_stopword('"CNT_SPL"','binnen');
ctx_ddl.add_stopword('"CNT_SPL"','binnenin');
ctx_ddl.add_stopword('"CNT_SPL"','boven');
ctx_ddl.add_stopword('"CNT_SPL"','bovenal');
ctx_ddl.add_stopword('"CNT_SPL"','bovendien');
ctx_ddl.add_stopword('"CNT_SPL"','bovengenoemd');
ctx_ddl.add_stopword('"CNT_SPL"','bovenstaand');
ctx_ddl.add_stopword('"CNT_SPL"','bovenvermeld');
ctx_ddl.add_stopword('"CNT_SPL"','buiten');
ctx_ddl.add_stopword('"CNT_SPL"','daar');
ctx_ddl.add_stopword('"CNT_SPL"','daarheen');
ctx_ddl.add_stopword('"CNT_SPL"','daarin');
ctx_ddl.add_stopword('"CNT_SPL"','daarna');
ctx_ddl.add_stopword('"CNT_SPL"','daarnet');
ctx_ddl.add_stopword('"CNT_SPL"','daarom');
ctx_ddl.add_stopword('"CNT_SPL"','daarop');
ctx_ddl.add_stopword('"CNT_SPL"','daarvanlangs');
ctx_ddl.add_stopword('"CNT_SPL"','dan');
ctx_ddl.add_stopword('"CNT_SPL"','dat');
ctx_ddl.add_stopword('"CNT_SPL"','de');
ctx_ddl.add_stopword('"CNT_SPL"','die');
ctx_ddl.add_stopword('"CNT_SPL"','dikwijls');
ctx_ddl.add_stopword('"CNT_SPL"','dit');
ctx_ddl.add_stopword('"CNT_SPL"','door');
ctx_ddl.add_stopword('"CNT_SPL"','doorgaand');
ctx_ddl.add_stopword('"CNT_SPL"','dus');
ctx_ddl.add_stopword('"CNT_SPL"','echter');
ctx_ddl.add_stopword('"CNT_SPL"','eer');
ctx_ddl.add_stopword('"CNT_SPL"','eerdat');
ctx_ddl.add_stopword('"CNT_SPL"','eerder');
ctx_ddl.add_stopword('"CNT_SPL"','eerlang');
ctx_ddl.add_stopword('"CNT_SPL"','eerst');
ctx_ddl.add_stopword('"CNT_SPL"','elk');
ctx_ddl.add_stopword('"CNT_SPL"','elke');
ctx_ddl.add_stopword('"CNT_SPL"','en');
ctx_ddl.add_stopword('"CNT_SPL"','enig');
ctx_ddl.add_stopword('"CNT_SPL"','enigszins');
ctx_ddl.add_stopword('"CNT_SPL"','enkel');
ctx_ddl.add_stopword('"CNT_SPL"','er');
ctx_ddl.add_stopword('"CNT_SPL"','erdoor');
ctx_ddl.add_stopword('"CNT_SPL"','even');
ctx_ddl.add_stopword('"CNT_SPL"','eveneens');
ctx_ddl.add_stopword('"CNT_SPL"','evenwel');
ctx_ddl.add_stopword('"CNT_SPL"','gauw');
ctx_ddl.add_stopword('"CNT_SPL"','gedurende');
ctx_ddl.add_stopword('"CNT_SPL"','geen');
ctx_ddl.add_stopword('"CNT_SPL"','gehad');
ctx_ddl.add_stopword('"CNT_SPL"','gekund');
ctx_ddl.add_stopword('"CNT_SPL"','geleden');
ctx_ddl.add_stopword('"CNT_SPL"','gelijk');
ctx_ddl.add_stopword('"CNT_SPL"','gemoeten');
ctx_ddl.add_stopword('"CNT_SPL"','gemogen');
ctx_ddl.add_stopword('"CNT_SPL"','geweest');
ctx_ddl.add_stopword('"CNT_SPL"','gewoon');
ctx_ddl.add_stopword('"CNT_SPL"','gewoonweg');
ctx_ddl.add_stopword('"CNT_SPL"','haar');
ctx_ddl.add_stopword('"CNT_SPL"','hadden');
ctx_ddl.add_stopword('"CNT_SPL"','hare');
ctx_ddl.add_stopword('"CNT_SPL"','heb');
ctx_ddl.add_stopword('"CNT_SPL"','hebben');
ctx_ddl.add_stopword('"CNT_SPL"','hebt');
ctx_ddl.add_stopword('"CNT_SPL"','heeft');
ctx_ddl.add_stopword('"CNT_SPL"','hem');
ctx_ddl.add_stopword('"CNT_SPL"','hen');
ctx_ddl.add_stopword('"CNT_SPL"','het');
ctx_ddl.add_stopword('"CNT_SPL"','hierbeneden');
ctx_ddl.add_stopword('"CNT_SPL"','hierboven');
ctx_ddl.add_stopword('"CNT_SPL"','hij');
ctx_ddl.add_stopword('"CNT_SPL"','hoe');
ctx_ddl.add_stopword('"CNT_SPL"','hoewel');
ctx_ddl.add_stopword('"CNT_SPL"','hun');
ctx_ddl.add_stopword('"CNT_SPL"','hunne');
ctx_ddl.add_stopword('"CNT_SPL"','ik');
ctx_ddl.add_stopword('"CNT_SPL"','ikzelf');
/* ctx_ddl.add_stopword('"CNT_SPL"','in');*/
ctx_ddl.add_stopword('"CNT_SPL"','inmiddels');
ctx_ddl.add_stopword('"CNT_SPL"','inzake');
/* ctx_ddl.add_stopword('"CNT_SPL"','is');*/
ctx_ddl.add_stopword('"CNT_SPL"','jezelf');
ctx_ddl.add_stopword('"CNT_SPL"','jij');
ctx_ddl.add_stopword('"CNT_SPL"','jijzelf');
ctx_ddl.add_stopword('"CNT_SPL"','jou');
ctx_ddl.add_stopword('"CNT_SPL"','jouw');
ctx_ddl.add_stopword('"CNT_SPL"','jouwe');
ctx_ddl.add_stopword('"CNT_SPL"','juist');
ctx_ddl.add_stopword('"CNT_SPL"','jullie');
ctx_ddl.add_stopword('"CNT_SPL"','kan');
ctx_ddl.add_stopword('"CNT_SPL"','klaar');
ctx_ddl.add_stopword('"CNT_SPL"','kon');
ctx_ddl.add_stopword('"CNT_SPL"','konden');
ctx_ddl.add_stopword('"CNT_SPL"','krachtens');
ctx_ddl.add_stopword('"CNT_SPL"','kunnen');
ctx_ddl.add_stopword('"CNT_SPL"','kunt');
ctx_ddl.add_stopword('"CNT_SPL"','later');
ctx_ddl.add_stopword('"CNT_SPL"','liever');
ctx_ddl.add_stopword('"CNT_SPL"','maar');
ctx_ddl.add_stopword('"CNT_SPL"','mag');
ctx_ddl.add_stopword('"CNT_SPL"','meer');
ctx_ddl.add_stopword('"CNT_SPL"','met');
ctx_ddl.add_stopword('"CNT_SPL"','mezelf');
ctx_ddl.add_stopword('"CNT_SPL"','mij');
ctx_ddl.add_stopword('"CNT_SPL"','mijn');
ctx_ddl.add_stopword('"CNT_SPL"','mijnent');
ctx_ddl.add_stopword('"CNT_SPL"','mijner');
ctx_ddl.add_stopword('"CNT_SPL"','mijzelf');
ctx_ddl.add_stopword('"CNT_SPL"','misschien');
ctx_ddl.add_stopword('"CNT_SPL"','mocht');
ctx_ddl.add_stopword('"CNT_SPL"','mochten');
ctx_ddl.add_stopword('"CNT_SPL"','moest');
ctx_ddl.add_stopword('"CNT_SPL"','moesten');
ctx_ddl.add_stopword('"CNT_SPL"','moet');
ctx_ddl.add_stopword('"CNT_SPL"','moeten');
ctx_ddl.add_stopword('"CNT_SPL"','mogen');
ctx_ddl.add_stopword('"CNT_SPL"','na');
ctx_ddl.add_stopword('"CNT_SPL"','naar');
ctx_ddl.add_stopword('"CNT_SPL"','nadat');
ctx_ddl.add_stopword('"CNT_SPL"','net');
ctx_ddl.add_stopword('"CNT_SPL"','niet');
ctx_ddl.add_stopword('"CNT_SPL"','noch');
ctx_ddl.add_stopword('"CNT_SPL"','nog');
ctx_ddl.add_stopword('"CNT_SPL"','nogal');
ctx_ddl.add_stopword('"CNT_SPL"','nu');
/* ctx_ddl.add_stopword('"CNT_SPL"','of');*/
ctx_ddl.add_stopword('"CNT_SPL"','ofschoon');
ctx_ddl.add_stopword('"CNT_SPL"','om');
ctx_ddl.add_stopword('"CNT_SPL"','omdat');
ctx_ddl.add_stopword('"CNT_SPL"','omhoog');
ctx_ddl.add_stopword('"CNT_SPL"','omlaag');
ctx_ddl.add_stopword('"CNT_SPL"','omstreeks');
ctx_ddl.add_stopword('"CNT_SPL"','omtrent');
ctx_ddl.add_stopword('"CNT_SPL"','omver');
ctx_ddl.add_stopword('"CNT_SPL"','onder');
ctx_ddl.add_stopword('"CNT_SPL"','ondertussen');
ctx_ddl.add_stopword('"CNT_SPL"','ongeveer');
ctx_ddl.add_stopword('"CNT_SPL"','ons');
ctx_ddl.add_stopword('"CNT_SPL"','onszelf');
ctx_ddl.add_stopword('"CNT_SPL"','onze');
ctx_ddl.add_stopword('"CNT_SPL"','ook');
ctx_ddl.add_stopword('"CNT_SPL"','op');
ctx_ddl.add_stopword('"CNT_SPL"','opnieuw');
ctx_ddl.add_stopword('"CNT_SPL"','opzij');
/* ctx_ddl.add_stopword('"CNT_SPL"','over');*/
ctx_ddl.add_stopword('"CNT_SPL"','overeind');
ctx_ddl.add_stopword('"CNT_SPL"','overigens');
ctx_ddl.add_stopword('"CNT_SPL"','pas');
ctx_ddl.add_stopword('"CNT_SPL"','precies');
ctx_ddl.add_stopword('"CNT_SPL"','reeds');
ctx_ddl.add_stopword('"CNT_SPL"','rond');
ctx_ddl.add_stopword('"CNT_SPL"','rondom');
ctx_ddl.add_stopword('"CNT_SPL"','sedert');
ctx_ddl.add_stopword('"CNT_SPL"','sinds');
ctx_ddl.add_stopword('"CNT_SPL"','sindsdien');
ctx_ddl.add_stopword('"CNT_SPL"','slechts');
ctx_ddl.add_stopword('"CNT_SPL"','sommige');
ctx_ddl.add_stopword('"CNT_SPL"','spoedig');
ctx_ddl.add_stopword('"CNT_SPL"','steeds');
ctx_ddl.add_stopword('"CNT_SPL"','tamelijk');
ctx_ddl.add_stopword('"CNT_SPL"','tenzij');
ctx_ddl.add_stopword('"CNT_SPL"','terwijl');
ctx_ddl.add_stopword('"CNT_SPL"','thans');
ctx_ddl.add_stopword('"CNT_SPL"','tijdens');
ctx_ddl.add_stopword('"CNT_SPL"','toch');
ctx_ddl.add_stopword('"CNT_SPL"','toen');
ctx_ddl.add_stopword('"CNT_SPL"','toenmaals');
ctx_ddl.add_stopword('"CNT_SPL"','toenmalig');
ctx_ddl.add_stopword('"CNT_SPL"','tot');
ctx_ddl.add_stopword('"CNT_SPL"','totdat');
ctx_ddl.add_stopword('"CNT_SPL"','tussen');
ctx_ddl.add_stopword('"CNT_SPL"','uit');
ctx_ddl.add_stopword('"CNT_SPL"','uitgezonderd');
ctx_ddl.add_stopword('"CNT_SPL"','vaak');
ctx_ddl.add_stopword('"CNT_SPL"','van');
ctx_ddl.add_stopword('"CNT_SPL"','vandaan');
ctx_ddl.add_stopword('"CNT_SPL"','vanuit');
ctx_ddl.add_stopword('"CNT_SPL"','vanwege');
ctx_ddl.add_stopword('"CNT_SPL"','veeleer');
ctx_ddl.add_stopword('"CNT_SPL"','verder');
ctx_ddl.add_stopword('"CNT_SPL"','vervolgens');
ctx_ddl.add_stopword('"CNT_SPL"','vol');
ctx_ddl.add_stopword('"CNT_SPL"','volgens');
ctx_ddl.add_stopword('"CNT_SPL"','voor');
ctx_ddl.add_stopword('"CNT_SPL"','vooraf');
ctx_ddl.add_stopword('"CNT_SPL"','vooral');
ctx_ddl.add_stopword('"CNT_SPL"','vooralsnog');
ctx_ddl.add_stopword('"CNT_SPL"','voorbij');
ctx_ddl.add_stopword('"CNT_SPL"','voordat');
ctx_ddl.add_stopword('"CNT_SPL"','voordezen');
ctx_ddl.add_stopword('"CNT_SPL"','voordien');
ctx_ddl.add_stopword('"CNT_SPL"','voorheen');
ctx_ddl.add_stopword('"CNT_SPL"','voorop');
ctx_ddl.add_stopword('"CNT_SPL"','vooruit');
ctx_ddl.add_stopword('"CNT_SPL"','vrij');
ctx_ddl.add_stopword('"CNT_SPL"','vroeg');
ctx_ddl.add_stopword('"CNT_SPL"','waar');
ctx_ddl.add_stopword('"CNT_SPL"','waarom');
ctx_ddl.add_stopword('"CNT_SPL"','wanneer');
ctx_ddl.add_stopword('"CNT_SPL"','want');
ctx_ddl.add_stopword('"CNT_SPL"','waren');
/* ctx_ddl.add_stopword('"CNT_SPL"','was');*/
ctx_ddl.add_stopword('"CNT_SPL"','wat');
ctx_ddl.add_stopword('"CNT_SPL"','weer');
ctx_ddl.add_stopword('"CNT_SPL"','weg');
ctx_ddl.add_stopword('"CNT_SPL"','wegens');
ctx_ddl.add_stopword('"CNT_SPL"','wel');
ctx_ddl.add_stopword('"CNT_SPL"','weldra');
ctx_ddl.add_stopword('"CNT_SPL"','welk');
ctx_ddl.add_stopword('"CNT_SPL"','welke');
ctx_ddl.add_stopword('"CNT_SPL"','wie');
ctx_ddl.add_stopword('"CNT_SPL"','wiens');
ctx_ddl.add_stopword('"CNT_SPL"','wier');
ctx_ddl.add_stopword('"CNT_SPL"','wij');
ctx_ddl.add_stopword('"CNT_SPL"','wijzelf');
ctx_ddl.add_stopword('"CNT_SPL"','zal');
ctx_ddl.add_stopword('"CNT_SPL"','ze');
ctx_ddl.add_stopword('"CNT_SPL"','zelfs');
ctx_ddl.add_stopword('"CNT_SPL"','zichzelf');
ctx_ddl.add_stopword('"CNT_SPL"','zij');
ctx_ddl.add_stopword('"CNT_SPL"','zijn');
ctx_ddl.add_stopword('"CNT_SPL"','zijne');
ctx_ddl.add_stopword('"CNT_SPL"','zo');
ctx_ddl.add_stopword('"CNT_SPL"','zodra');
ctx_ddl.add_stopword('"CNT_SPL"','zonder');
ctx_ddl.add_stopword('"CNT_SPL"','zou');
ctx_ddl.add_stopword('"CNT_SPL"','zouden');
ctx_ddl.add_stopword('"CNT_SPL"','zowat');
ctx_ddl.add_stopword('"CNT_SPL"','zulke');
ctx_ddl.add_stopword('"CNT_SPL"','zullen');
ctx_ddl.add_stopword('"CNT_SPL"','zult');
end;
/
begin
ctx_ddl.drop_preference('"CNT_STO"');
end;
/
begin
ctx_ddl.create_preference('"CNT_STO"','BASIC_STORAGE');
ctx_ddl.set_attribute('"CNT_STO"','R_TABLE_CLAUSE','lob (data) store as (cache
)');
ctx_ddl.set_attribute('"CNT_STO"','I_INDEX_CLAUSE','compress 2');
end;
/
/* Debug/progress check, will log in $ORACLE_HOME/ctx/log */
begin
ctx_output.start_log('CNT_LOG');
end;
/
/* If this is a production database, fill in the added column */
/* with the correct info to rebuild the index later */
update attachments
set fmt='ignore'
where
contentencoding is NULL
update attachments
set fmt='ignore'
where
contentencoding = 'base64'
update attachments
set fmt='text'
where
contentencoding = 'none'
update attachments
set fmt='text'
where
contentencoding = 'quoted-printable'
/* Extent the Attachments table with an extra column */
/* This needs te be done only once */
ALTER TABLE RT_USER.ATTACHMENTS
ADD (context_fmt VARCHAR2(10));
drop index cnt;
create index "RT_USER"."CNT"
on "RT_USER"."ATTACHMENTS"
("CONTENT")
indextype is ctxsys.context
parameters('
format column context_fmt
datastore "CNT_DST"
filter "CNT_FIL"
section group "CNT_SGP"
lexer "CNT_LEX"
wordlist "CNT_WDL"
stoplist "CNT_SPL"
storage "CNT_STO"
')
/
begin
ctx_output.end_log;
end;
/
/* Add a trigger so that new rows are properly tagged */
CREATE OR REPLACE TRIGGER RT_USER.BI_Att
BEFORE INSERT
ON RT_USER.ATTACHMENTS
REFERENCING NEW AS New OLD AS Old
FOR EACH ROW
DECLARE
/******************************************************************************
NAME:
PURPOSE:
REVISIONS:
Ver Date Author Description
--------- ---------- --------------- ------------------------------------
1.0 24-5-2007 Joop v/d Wege 1. Created this trigger.
NOTES:
Trigger takes care of filling the context_fmt column with the right
value so that only text is really indexed and not base64 rows
******************************************************************************/
BEGIN
IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN
:new.context_fmt := 'ignore';
ELSE
:new.context_fmt := 'text';
END IF;
END ;
/
/* There needs to be a job that on a regular basis refreshes the index */
BEGIN
SYS.DBMS_JOB.REMOVE(21);
COMMIT;
END;
/
DECLARE
X NUMBER;
BEGIN
SYS.DBMS_JOB.SUBMIT
( job => X
,what => 'DECLARE
stmt VARCHAR2(200);
BEGIN
stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
EXECUTE IMMEDIATE stmt;
END;
'
,next_date => to_date('20-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss')
,interval => 'TRUNC(SYSDATE+1)+6/24'
,no_parse => FALSE
);
SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x));
COMMIT;
END;
/
/* test if things work */
/* first run is slow, second should be fast <500ms) */
select * from attachments where contains(content,'accounts')>0;
/****************************************************************************/
/* Repeat for ObjectCustomFieldValues */
/****************************************************************************/
alter table objectcustomfieldvalues add (context_fmt varchar2(10));
update objectcustomfieldvalues
set context_fmt='ignore'
where
contentencoding is NULL;
update objectcustomfieldvalues
set context_fmt='ignore'
where
contentencoding = 'base64';
update objectcustomfieldvalues
set context_fmt='text'
where
contentencoding = 'none';
update objectcustomfieldvalues
set context_fmt='text'
where
contentencoding = 'quoted-printable';
CREATE OR REPLACE TRIGGER RT_USER.BI_OCFV
BEFORE INSERT
ON RT_USER.objectcustomfieldvalues
REFERENCING NEW AS New OLD AS Old
FOR EACH ROW
DECLARE
/******************************************************************************
NAME:
PURPOSE:
REVISIONS:
Ver Date Author Description
--------- ---------- --------------- ------------------------------------
1.0 24-5-2007 Joop v/d Wege 1. Created this trigger.
NOTES:
Trigger takes care of filling the context_fmt column with the right
value so that only text is really indexed and not base64 rows
******************************************************************************/
BEGIN
IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN
:new.context_fmt := 'ignore';
ELSE
:new.context_fmt := 'text';
END IF;
END ;
/
drop index ocfv_cnt;
create index ocfv_cnt
on "RT_USER"."OBJECTCUSTOMFIELDVALUES"
("LARGECONTENT")
indextype is ctxsys.context
parameters('
format column context_fmt
datastore "CNT_DST"
filter "CNT_FIL"
section group "CNT_SGP"
lexer "CNT_LEX"
wordlist "CNT_WDL"
stoplist "CNT_SPL"
storage "CNT_STO"
')
/
select * from objectcustomfieldvalues where contains(largecontent,'accounts')>0
/* depending on whether both RT and RTFM are used use this job instead of the */
/* previous one */
BEGIN
SYS.DBMS_JOB.REMOVE(21);
COMMIT;
END;
/
DECLARE
X NUMBER;
BEGIN
SYS.DBMS_JOB.SUBMIT
( job => X
,what => 'DECLARE
stmt VARCHAR2(200);
BEGIN
stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
EXECUTE IMMEDIATE stmt;
stmt := ''ALTER INDEX ocfv_cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
EXECUTE IMMEDIATE stmt;
END;
'
,next_date => to_date('25-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss')
,interval => 'TRUNC(SYSDATE+1)+6/24'
,no_parse => FALSE
);
SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x));
COMMIT;
END;
/
===End OracleTest part===