You're not currently signed in. Sign in.

This article will describe, in short, what I have done to speed up queries which searches inside email bodies/attachments and RTFM articles which contain large amounts of WikiText.

There are 3 things that need to be done:

  • patch SearchBuilder, to remove pre/post pend of %, change LIKE to contain(Content,'searchstring')>0
  • change RTFM Search.html, remove pre/post pend of % on WikiText Customfields
  • add Oracle Text indexes for Attachments.content and ObjectCustomFieldValues.largecontent

Attached are the file needed todo this, that functionality isn't there anymore ;-( So here they come inline

===Start Searchbuilder patch no. 1===

--- SearchBuilder.pm.orig	2007-07-07 22:45:00.000000000 +0200
+++ SearchBuilder.pm	2007-07-10 08:49:00.000000000 +0200
@@ -767,9 +767,15 @@
 
     if ( $args{'FIELD'} ) {
 
-        #If it's a like, we supply the %s around the search term
+        #If it's a like, we supply the %s around the search term only if its not Oracle
+        #because for Oracle we'll use where contains(content,'text')>1
         if ( $args{'OPERATOR'} =~ /LIKE/i ) {
-            $args{'VALUE'} = "%" . $args{'VALUE'} . "%";
+            if ( $RT::DatabaseType eq 'Oracle') {
+                 $args{'VALUE'} = $args{'VALUE'} 
+            }
+            else {
+                $args{'VALUE'} = "%" . $args{'VALUE'} . "%";
+            }
         }
         elsif ( $args{'OPERATOR'} =~ /STARTSWITH/i ) {
             $args{'VALUE'}    = $args{'VALUE'} . "%";
@@ -932,6 +938,49 @@
         value => $args{'VALUE'},
     };
 
+# Keep the original clause and modify if dbtype is Oracle
+# This patch makes searching for ticket content real fast when you have the appropriate
+# index on Attachments.Content and ObjectCustomFieldValues.Largecontent for RTFM
+# CREATE INDEX CNT ON ATTACHMENTS (CONTENT) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M');
+# CREATE INDEX FM_LARGE_CNT ON ObjectCustomFieldValues(Largecontent) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M');
+# parameters is tunable, see Oracle docs ;-)
+# fixed the static replace Attachments_2.Content by dynamic version for both RT and RTFM Content fields
+# also changed RTFM/Article/Search.html NOT to pre and post pend '%'
+# other things needed is a FMT column for Attachments and ObjectCustomFieldValues
+# which takes care of not including base64 text into the index
+# does this by using a trigger which fills the FMT column and using the appropriate
+# options when creating the Text index. (saved 25% of unneeded index room ~1Gb out of ~4Gb)
+    if ( $RT::DatabaseType eq 'Oracle' ) {
+        my ($tmpfield);
+		# First fix RT to use OracleText when searching Ticket Content
+        if ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/  and $args{'OPERATOR'} eq 'LIKE') {
+			# get rid of the lower since you can't have a functional context index.
+			# don't care about the remaining (), they don't have a significant effect on performance
+			# you can however tell Context to index casesensitive or caseinsensitive.
+			# this is probably not very elegant, excuse my perl ;-)
+     	    $tmpfield = $QualifiedField;
+    	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+        elsif ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/  and $args{'OPERATOR'} eq 'NOT LIKE') {
+     	    $tmpfield = $QualifiedField;
+    	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+		# Second fix RTFM to use OracleText when searching Article Content and LargeContent
+        elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'LIKE') {
+     	    $tmpfield = $QualifiedField;
+    	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+		elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'NOT LIKE') {
+     	    $tmpfield = $QualifiedField;
+	   	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+
+    }
+
     # Juju because this should come _AFTER_ the EA
     my @prefix;
     if ( $self->{_open_parens}{ $ClauseId } ) {

===End Searchbuilder patch no. 1===

===Start Searchbuilder patch no. 2===

--- Handle.pm.orig	2007-04-10 22:48:42.000000000 +0200
+++ Handle.pm	2007-07-12 11:13:00.000000000 +0200
@@ -1101,6 +1101,14 @@
     # of here as we can't parse subclauses
     return 1 if grep $_ ne 'generic_restrictions', keys %{ $args{'SearchBuilder'}->{'subclauses'} };
 
+# JvdW 2007-07-12
+# Get out of this function if were dealing with either Attachments or ObjectCustomFieldValues
+# since these are modified and parser trips over '(contains(content,'test')>0)
+# haven't seen any side effect but that doesn't mean there aren't any.
+# feel free to fix this lousy perl of mine ;-)
+	return 1 if $args{'ALIAS'} =~ /Attachments_\d+/ ;
+ 	return 1 if $args{'ALIAS'} =~ /ObjectCustomFieldValues_\d+/ ;
+
     # build full list of generic conditions
     my @conditions;
     foreach ( grep @$_, values %{ $args{'SearchBuilder'}->{'restrictions'} } ) {

===End Searchbuilder patch no. 2===

===Start Search.html=== copy share/html/RTFM/Article/Search.hmtl to local/html/RTFM/Article/Search.html edit Search.html (find the first and second occurrence of $value = "%$1%"; and change it to $value = "$1"; leave the third and fourth occurrence alone, they are responsible for searching small string values and don't have a huge impact on performance. ===End Search.html===

===Start OracleTest part===

/* Direct_datastore is for text in 1 column further no attributes */

begin                                                                           
  ctx_ddl.drop_preference('"CNT_DST"');                    
end;                                                                            

begin                                                                           
  ctx_ddl.create_preference('"CNT_DST"','DIRECT_DATASTORE');                    
end;                                                                            
/                                                                               
                                                                                
/* NULL_FILTER because we only want text indexing or auto_filter because*/
/* we add an extra fmt column to Attachments which holds a flag whether */
/* that row is indexed or not */

begin                                                                           
  ctx_ddl.drop_preference('"CNT_FIL"');                         
end;                                                                            
/
begin                                                                           
  ctx_ddl.create_preference('"CNT_FIL"','AUTO_FILTER');                         
end;                                                                            
/                                                                               
                                                                                
begin                                                                           
  ctx_ddl.drop_section_group('"CNT_SGP"');               
end;                                                                            
/
begin                                                                           
  ctx_ddl.create_section_group('"CNT_SGP"','NULL_SECTION_GROUP');               
end;                                                                            
/                                                                               

/* BASIC_LEXER is sufficient because we only index English/Dutch  */
/* Its wise to set some attributes, look at Oracle Docs for which */                                                                                
begin                                                                           
  ctx_ddl.drop_preference('"CNT_LEX"');
end;                                                                            
/                                                                               
begin                                                                           
  ctx_ddl.create_preference('"CNT_LEX"','BASIC_LEXER');
  ctx_ddl.set_attribute('CNT_LEX', 'composite', 'DUTCH');
  ctx_ddl.set_attribute('CNT_LEX', 'index_themes', 'NO');
  ctx_ddl.set_attribute('CNT_LEX', 'index_stems', 'DUTCH');
  ctx_ddl.set_attribute('CNT_LEX', 'index_text', 'YES');
  ctx_ddl.set_attribute('CNT_LEX', 'printjoins', '_-.');
end;                                                                            
/                                                                               

/* BASIC_WORDLIST to set properties of stop words            */
/* in my case I have added Dutch and English, edit to needs  */
begin                                                                           
  ctx_ddl.drop_preference('"CNT_WDL"');                      
end;
/
begin                                                                           
  ctx_ddl.create_preference('"CNT_WDL"','BASIC_WORDLIST');                      
  ctx_ddl.set_attribute('"CNT_WDL"','STEMMER','DUTCH');                       
  ctx_ddl.set_attribute('"CNT_WDL"','FUZZY_MATCH','DUTCH');                   
end;                                                                            
/                                                                               
                                                                                
begin                                                                           
  ctx_ddl.drop_stoplist('"CNT_SPL"');                        
end;
/
begin                                                                           
  ctx_ddl.create_stoplist('"CNT_SPL"','BASIC_STOPLIST');                        
  ctx_ddl.add_stopword('"CNT_SPL"','Corp');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','Mr');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','Mrs');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','Ms');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','Mz');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','a');                                        
  ctx_ddl.add_stopword('"CNT_SPL"','about');                                    
  ctx_ddl.add_stopword('"CNT_SPL"','after');                                    
  ctx_ddl.add_stopword('"CNT_SPL"','all');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','also');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','an');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','and');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','any');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','are');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','as');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','at');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','be');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','because');                                  
  ctx_ddl.add_stopword('"CNT_SPL"','been');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','but');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','by');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','can');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','co');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','could');                                    
  ctx_ddl.add_stopword('"CNT_SPL"','for');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','from');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','had');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','has');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','have');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','he');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','her');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','his');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','if');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','in');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','inc');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','into');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','is');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','it');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','its');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','last');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','more');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','most');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','no');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','not');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','of');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','on');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','one');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','only');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','or');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','other');                                    
  ctx_ddl.add_stopword('"CNT_SPL"','out');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','over');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','s');                                        
  ctx_ddl.add_stopword('"CNT_SPL"','says');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','she');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','so');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','some');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','such');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','than');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','that');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','the');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','their');                                    
  ctx_ddl.add_stopword('"CNT_SPL"','there');                                    
  ctx_ddl.add_stopword('"CNT_SPL"','they');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','this');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','to');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','up');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','was');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','we');                                       
  ctx_ddl.add_stopword('"CNT_SPL"','were');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','when');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','which');                                    
  ctx_ddl.add_stopword('"CNT_SPL"','who');                                      
  ctx_ddl.add_stopword('"CNT_SPL"','will');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','with');                                     
  ctx_ddl.add_stopword('"CNT_SPL"','would');
  ctx_ddl.add_stopword('"CNT_SPL"','aan');
  ctx_ddl.add_stopword('"CNT_SPL"','aangaande');
  ctx_ddl.add_stopword('"CNT_SPL"','aangezien');
  ctx_ddl.add_stopword('"CNT_SPL"','achter');
  ctx_ddl.add_stopword('"CNT_SPL"','achterna');
  ctx_ddl.add_stopword('"CNT_SPL"','afgelopen');
  ctx_ddl.add_stopword('"CNT_SPL"','al');
  ctx_ddl.add_stopword('"CNT_SPL"','aldaar');
  ctx_ddl.add_stopword('"CNT_SPL"','aldus');
  ctx_ddl.add_stopword('"CNT_SPL"','alhoewel');
  ctx_ddl.add_stopword('"CNT_SPL"','alias');
  ctx_ddl.add_stopword('"CNT_SPL"','alle');
  ctx_ddl.add_stopword('"CNT_SPL"','allebei');
  ctx_ddl.add_stopword('"CNT_SPL"','alleen');
  ctx_ddl.add_stopword('"CNT_SPL"','alsnog');
  ctx_ddl.add_stopword('"CNT_SPL"','altijd');
  ctx_ddl.add_stopword('"CNT_SPL"','altoos');
  ctx_ddl.add_stopword('"CNT_SPL"','ander');
  ctx_ddl.add_stopword('"CNT_SPL"','andere');
  ctx_ddl.add_stopword('"CNT_SPL"','anders');
  ctx_ddl.add_stopword('"CNT_SPL"','anderszins');
  ctx_ddl.add_stopword('"CNT_SPL"','behalve');
  ctx_ddl.add_stopword('"CNT_SPL"','behoudens');
  ctx_ddl.add_stopword('"CNT_SPL"','beide');
  ctx_ddl.add_stopword('"CNT_SPL"','beiden');
  ctx_ddl.add_stopword('"CNT_SPL"','ben');
  ctx_ddl.add_stopword('"CNT_SPL"','beneden');
  ctx_ddl.add_stopword('"CNT_SPL"','bent');
  ctx_ddl.add_stopword('"CNT_SPL"','bepaald');
  ctx_ddl.add_stopword('"CNT_SPL"','betreffende');
  ctx_ddl.add_stopword('"CNT_SPL"','bij');
  ctx_ddl.add_stopword('"CNT_SPL"','binnen');
  ctx_ddl.add_stopword('"CNT_SPL"','binnenin');
  ctx_ddl.add_stopword('"CNT_SPL"','boven');
  ctx_ddl.add_stopword('"CNT_SPL"','bovenal');
  ctx_ddl.add_stopword('"CNT_SPL"','bovendien');
  ctx_ddl.add_stopword('"CNT_SPL"','bovengenoemd');
  ctx_ddl.add_stopword('"CNT_SPL"','bovenstaand');
  ctx_ddl.add_stopword('"CNT_SPL"','bovenvermeld');
  ctx_ddl.add_stopword('"CNT_SPL"','buiten');
  ctx_ddl.add_stopword('"CNT_SPL"','daar');
  ctx_ddl.add_stopword('"CNT_SPL"','daarheen');
  ctx_ddl.add_stopword('"CNT_SPL"','daarin');
  ctx_ddl.add_stopword('"CNT_SPL"','daarna');
  ctx_ddl.add_stopword('"CNT_SPL"','daarnet');
  ctx_ddl.add_stopword('"CNT_SPL"','daarom');
  ctx_ddl.add_stopword('"CNT_SPL"','daarop');
  ctx_ddl.add_stopword('"CNT_SPL"','daarvanlangs');
  ctx_ddl.add_stopword('"CNT_SPL"','dan');
  ctx_ddl.add_stopword('"CNT_SPL"','dat');
  ctx_ddl.add_stopword('"CNT_SPL"','de');
  ctx_ddl.add_stopword('"CNT_SPL"','die');
  ctx_ddl.add_stopword('"CNT_SPL"','dikwijls');
  ctx_ddl.add_stopword('"CNT_SPL"','dit');
  ctx_ddl.add_stopword('"CNT_SPL"','door');
  ctx_ddl.add_stopword('"CNT_SPL"','doorgaand');
  ctx_ddl.add_stopword('"CNT_SPL"','dus');
  ctx_ddl.add_stopword('"CNT_SPL"','echter');
  ctx_ddl.add_stopword('"CNT_SPL"','eer');
  ctx_ddl.add_stopword('"CNT_SPL"','eerdat');
  ctx_ddl.add_stopword('"CNT_SPL"','eerder');
  ctx_ddl.add_stopword('"CNT_SPL"','eerlang');
  ctx_ddl.add_stopword('"CNT_SPL"','eerst');
  ctx_ddl.add_stopword('"CNT_SPL"','elk');
  ctx_ddl.add_stopword('"CNT_SPL"','elke');
  ctx_ddl.add_stopword('"CNT_SPL"','en');
  ctx_ddl.add_stopword('"CNT_SPL"','enig');
  ctx_ddl.add_stopword('"CNT_SPL"','enigszins');
  ctx_ddl.add_stopword('"CNT_SPL"','enkel');
  ctx_ddl.add_stopword('"CNT_SPL"','er');
  ctx_ddl.add_stopword('"CNT_SPL"','erdoor');
  ctx_ddl.add_stopword('"CNT_SPL"','even');
  ctx_ddl.add_stopword('"CNT_SPL"','eveneens');
  ctx_ddl.add_stopword('"CNT_SPL"','evenwel');
  ctx_ddl.add_stopword('"CNT_SPL"','gauw');
  ctx_ddl.add_stopword('"CNT_SPL"','gedurende');
  ctx_ddl.add_stopword('"CNT_SPL"','geen');
  ctx_ddl.add_stopword('"CNT_SPL"','gehad');
  ctx_ddl.add_stopword('"CNT_SPL"','gekund');
  ctx_ddl.add_stopword('"CNT_SPL"','geleden');
  ctx_ddl.add_stopword('"CNT_SPL"','gelijk');
  ctx_ddl.add_stopword('"CNT_SPL"','gemoeten');
  ctx_ddl.add_stopword('"CNT_SPL"','gemogen');
  ctx_ddl.add_stopword('"CNT_SPL"','geweest');
  ctx_ddl.add_stopword('"CNT_SPL"','gewoon');
  ctx_ddl.add_stopword('"CNT_SPL"','gewoonweg');
  ctx_ddl.add_stopword('"CNT_SPL"','haar');
  ctx_ddl.add_stopword('"CNT_SPL"','hadden');
  ctx_ddl.add_stopword('"CNT_SPL"','hare');
  ctx_ddl.add_stopword('"CNT_SPL"','heb');
  ctx_ddl.add_stopword('"CNT_SPL"','hebben');
  ctx_ddl.add_stopword('"CNT_SPL"','hebt');
  ctx_ddl.add_stopword('"CNT_SPL"','heeft');
  ctx_ddl.add_stopword('"CNT_SPL"','hem');
  ctx_ddl.add_stopword('"CNT_SPL"','hen');
  ctx_ddl.add_stopword('"CNT_SPL"','het');
  ctx_ddl.add_stopword('"CNT_SPL"','hierbeneden');
  ctx_ddl.add_stopword('"CNT_SPL"','hierboven');
  ctx_ddl.add_stopword('"CNT_SPL"','hij');
  ctx_ddl.add_stopword('"CNT_SPL"','hoe');
  ctx_ddl.add_stopword('"CNT_SPL"','hoewel');
  ctx_ddl.add_stopword('"CNT_SPL"','hun');
  ctx_ddl.add_stopword('"CNT_SPL"','hunne');
  ctx_ddl.add_stopword('"CNT_SPL"','ik');
  ctx_ddl.add_stopword('"CNT_SPL"','ikzelf');
/*  ctx_ddl.add_stopword('"CNT_SPL"','in');*/
  ctx_ddl.add_stopword('"CNT_SPL"','inmiddels');
  ctx_ddl.add_stopword('"CNT_SPL"','inzake');
/*  ctx_ddl.add_stopword('"CNT_SPL"','is');*/
  ctx_ddl.add_stopword('"CNT_SPL"','jezelf');
  ctx_ddl.add_stopword('"CNT_SPL"','jij');
  ctx_ddl.add_stopword('"CNT_SPL"','jijzelf');
  ctx_ddl.add_stopword('"CNT_SPL"','jou');
  ctx_ddl.add_stopword('"CNT_SPL"','jouw');
  ctx_ddl.add_stopword('"CNT_SPL"','jouwe');
  ctx_ddl.add_stopword('"CNT_SPL"','juist');
  ctx_ddl.add_stopword('"CNT_SPL"','jullie');
  ctx_ddl.add_stopword('"CNT_SPL"','kan');
  ctx_ddl.add_stopword('"CNT_SPL"','klaar');
  ctx_ddl.add_stopword('"CNT_SPL"','kon');
  ctx_ddl.add_stopword('"CNT_SPL"','konden');
  ctx_ddl.add_stopword('"CNT_SPL"','krachtens');
  ctx_ddl.add_stopword('"CNT_SPL"','kunnen');
  ctx_ddl.add_stopword('"CNT_SPL"','kunt');
  ctx_ddl.add_stopword('"CNT_SPL"','later');
  ctx_ddl.add_stopword('"CNT_SPL"','liever');
  ctx_ddl.add_stopword('"CNT_SPL"','maar');
  ctx_ddl.add_stopword('"CNT_SPL"','mag');
  ctx_ddl.add_stopword('"CNT_SPL"','meer');
  ctx_ddl.add_stopword('"CNT_SPL"','met');
  ctx_ddl.add_stopword('"CNT_SPL"','mezelf');
  ctx_ddl.add_stopword('"CNT_SPL"','mij');
  ctx_ddl.add_stopword('"CNT_SPL"','mijn');
  ctx_ddl.add_stopword('"CNT_SPL"','mijnent');
  ctx_ddl.add_stopword('"CNT_SPL"','mijner');
  ctx_ddl.add_stopword('"CNT_SPL"','mijzelf');
  ctx_ddl.add_stopword('"CNT_SPL"','misschien');
  ctx_ddl.add_stopword('"CNT_SPL"','mocht');
  ctx_ddl.add_stopword('"CNT_SPL"','mochten');
  ctx_ddl.add_stopword('"CNT_SPL"','moest');
  ctx_ddl.add_stopword('"CNT_SPL"','moesten');
  ctx_ddl.add_stopword('"CNT_SPL"','moet');
  ctx_ddl.add_stopword('"CNT_SPL"','moeten');
  ctx_ddl.add_stopword('"CNT_SPL"','mogen');
  ctx_ddl.add_stopword('"CNT_SPL"','na');
  ctx_ddl.add_stopword('"CNT_SPL"','naar');
  ctx_ddl.add_stopword('"CNT_SPL"','nadat');
  ctx_ddl.add_stopword('"CNT_SPL"','net');
  ctx_ddl.add_stopword('"CNT_SPL"','niet');
  ctx_ddl.add_stopword('"CNT_SPL"','noch');
  ctx_ddl.add_stopword('"CNT_SPL"','nog');
  ctx_ddl.add_stopword('"CNT_SPL"','nogal');
  ctx_ddl.add_stopword('"CNT_SPL"','nu');
/*  ctx_ddl.add_stopword('"CNT_SPL"','of');*/
  ctx_ddl.add_stopword('"CNT_SPL"','ofschoon');
  ctx_ddl.add_stopword('"CNT_SPL"','om');
  ctx_ddl.add_stopword('"CNT_SPL"','omdat');
  ctx_ddl.add_stopword('"CNT_SPL"','omhoog');
  ctx_ddl.add_stopword('"CNT_SPL"','omlaag');
  ctx_ddl.add_stopword('"CNT_SPL"','omstreeks');
  ctx_ddl.add_stopword('"CNT_SPL"','omtrent');
  ctx_ddl.add_stopword('"CNT_SPL"','omver');
  ctx_ddl.add_stopword('"CNT_SPL"','onder');
  ctx_ddl.add_stopword('"CNT_SPL"','ondertussen');
  ctx_ddl.add_stopword('"CNT_SPL"','ongeveer');
  ctx_ddl.add_stopword('"CNT_SPL"','ons');
  ctx_ddl.add_stopword('"CNT_SPL"','onszelf');
  ctx_ddl.add_stopword('"CNT_SPL"','onze');
  ctx_ddl.add_stopword('"CNT_SPL"','ook');
  ctx_ddl.add_stopword('"CNT_SPL"','op');
  ctx_ddl.add_stopword('"CNT_SPL"','opnieuw');
  ctx_ddl.add_stopword('"CNT_SPL"','opzij');
/*  ctx_ddl.add_stopword('"CNT_SPL"','over');*/
  ctx_ddl.add_stopword('"CNT_SPL"','overeind');
  ctx_ddl.add_stopword('"CNT_SPL"','overigens');
  ctx_ddl.add_stopword('"CNT_SPL"','pas');
  ctx_ddl.add_stopword('"CNT_SPL"','precies');
  ctx_ddl.add_stopword('"CNT_SPL"','reeds');
  ctx_ddl.add_stopword('"CNT_SPL"','rond');
  ctx_ddl.add_stopword('"CNT_SPL"','rondom');
  ctx_ddl.add_stopword('"CNT_SPL"','sedert');
  ctx_ddl.add_stopword('"CNT_SPL"','sinds');
  ctx_ddl.add_stopword('"CNT_SPL"','sindsdien');
  ctx_ddl.add_stopword('"CNT_SPL"','slechts');
  ctx_ddl.add_stopword('"CNT_SPL"','sommige');
  ctx_ddl.add_stopword('"CNT_SPL"','spoedig');
  ctx_ddl.add_stopword('"CNT_SPL"','steeds');
  ctx_ddl.add_stopword('"CNT_SPL"','tamelijk');
  ctx_ddl.add_stopword('"CNT_SPL"','tenzij');
  ctx_ddl.add_stopword('"CNT_SPL"','terwijl');
  ctx_ddl.add_stopword('"CNT_SPL"','thans');
  ctx_ddl.add_stopword('"CNT_SPL"','tijdens');
  ctx_ddl.add_stopword('"CNT_SPL"','toch');
  ctx_ddl.add_stopword('"CNT_SPL"','toen');
  ctx_ddl.add_stopword('"CNT_SPL"','toenmaals');
  ctx_ddl.add_stopword('"CNT_SPL"','toenmalig');
  ctx_ddl.add_stopword('"CNT_SPL"','tot');
  ctx_ddl.add_stopword('"CNT_SPL"','totdat');
  ctx_ddl.add_stopword('"CNT_SPL"','tussen');
  ctx_ddl.add_stopword('"CNT_SPL"','uit');
  ctx_ddl.add_stopword('"CNT_SPL"','uitgezonderd');
  ctx_ddl.add_stopword('"CNT_SPL"','vaak');
  ctx_ddl.add_stopword('"CNT_SPL"','van');
  ctx_ddl.add_stopword('"CNT_SPL"','vandaan');
  ctx_ddl.add_stopword('"CNT_SPL"','vanuit');
  ctx_ddl.add_stopword('"CNT_SPL"','vanwege');
  ctx_ddl.add_stopword('"CNT_SPL"','veeleer');
  ctx_ddl.add_stopword('"CNT_SPL"','verder');
  ctx_ddl.add_stopword('"CNT_SPL"','vervolgens');
  ctx_ddl.add_stopword('"CNT_SPL"','vol');
  ctx_ddl.add_stopword('"CNT_SPL"','volgens');
  ctx_ddl.add_stopword('"CNT_SPL"','voor');
  ctx_ddl.add_stopword('"CNT_SPL"','vooraf');
  ctx_ddl.add_stopword('"CNT_SPL"','vooral');
  ctx_ddl.add_stopword('"CNT_SPL"','vooralsnog');
  ctx_ddl.add_stopword('"CNT_SPL"','voorbij');
  ctx_ddl.add_stopword('"CNT_SPL"','voordat');
  ctx_ddl.add_stopword('"CNT_SPL"','voordezen');
  ctx_ddl.add_stopword('"CNT_SPL"','voordien');
  ctx_ddl.add_stopword('"CNT_SPL"','voorheen');
  ctx_ddl.add_stopword('"CNT_SPL"','voorop');
  ctx_ddl.add_stopword('"CNT_SPL"','vooruit');
  ctx_ddl.add_stopword('"CNT_SPL"','vrij');
  ctx_ddl.add_stopword('"CNT_SPL"','vroeg');
  ctx_ddl.add_stopword('"CNT_SPL"','waar');
  ctx_ddl.add_stopword('"CNT_SPL"','waarom');
  ctx_ddl.add_stopword('"CNT_SPL"','wanneer');
  ctx_ddl.add_stopword('"CNT_SPL"','want');
  ctx_ddl.add_stopword('"CNT_SPL"','waren');
/*  ctx_ddl.add_stopword('"CNT_SPL"','was');*/
  ctx_ddl.add_stopword('"CNT_SPL"','wat');
  ctx_ddl.add_stopword('"CNT_SPL"','weer');
  ctx_ddl.add_stopword('"CNT_SPL"','weg');
  ctx_ddl.add_stopword('"CNT_SPL"','wegens');
  ctx_ddl.add_stopword('"CNT_SPL"','wel');
  ctx_ddl.add_stopword('"CNT_SPL"','weldra');
  ctx_ddl.add_stopword('"CNT_SPL"','welk');
  ctx_ddl.add_stopword('"CNT_SPL"','welke');
  ctx_ddl.add_stopword('"CNT_SPL"','wie');
  ctx_ddl.add_stopword('"CNT_SPL"','wiens');
  ctx_ddl.add_stopword('"CNT_SPL"','wier');
  ctx_ddl.add_stopword('"CNT_SPL"','wij');
  ctx_ddl.add_stopword('"CNT_SPL"','wijzelf');
  ctx_ddl.add_stopword('"CNT_SPL"','zal');
  ctx_ddl.add_stopword('"CNT_SPL"','ze');
  ctx_ddl.add_stopword('"CNT_SPL"','zelfs');
  ctx_ddl.add_stopword('"CNT_SPL"','zichzelf');
  ctx_ddl.add_stopword('"CNT_SPL"','zij');
  ctx_ddl.add_stopword('"CNT_SPL"','zijn');
  ctx_ddl.add_stopword('"CNT_SPL"','zijne');
  ctx_ddl.add_stopword('"CNT_SPL"','zo');
  ctx_ddl.add_stopword('"CNT_SPL"','zodra');
  ctx_ddl.add_stopword('"CNT_SPL"','zonder');
  ctx_ddl.add_stopword('"CNT_SPL"','zou');
  ctx_ddl.add_stopword('"CNT_SPL"','zouden');
  ctx_ddl.add_stopword('"CNT_SPL"','zowat');
  ctx_ddl.add_stopword('"CNT_SPL"','zulke');
  ctx_ddl.add_stopword('"CNT_SPL"','zullen');
  ctx_ddl.add_stopword('"CNT_SPL"','zult');
end;                                                                            
/                                                                               
                                                                                
begin                                                                           
  ctx_ddl.drop_preference('"CNT_STO"');                       
end;
/
begin                                                                           
  ctx_ddl.create_preference('"CNT_STO"','BASIC_STORAGE');                       
  ctx_ddl.set_attribute('"CNT_STO"','R_TABLE_CLAUSE','lob (data) store as (cache
)');                                                                            
  ctx_ddl.set_attribute('"CNT_STO"','I_INDEX_CLAUSE','compress 2');             
end;                                                                            
/                                                                               

/* Debug/progress check, will log in $ORACLE_HOME/ctx/log */
begin                                                                           
  ctx_output.start_log('CNT_LOG');                                              
end;                                                                            
/                                                                               

/* If this is a production database, fill in the added column */
/* with the correct info to rebuild the index later           */  
update attachments 
set fmt='ignore' 
where
contentencoding is NULL

update attachments 
set fmt='ignore' 
where
contentencoding = 'base64'

update attachments 
set fmt='text' 
where
contentencoding = 'none'

update attachments 
set fmt='text' 
where
contentencoding = 'quoted-printable'
 

/* Extent the Attachments table with an extra column */
/* This needs te be done only once                   */
ALTER TABLE RT_USER.ATTACHMENTS
 ADD (context_fmt  VARCHAR2(10));

drop index cnt;
                                                                                
create index "RT_USER"."CNT"                                                    
  on "RT_USER"."ATTACHMENTS"                                                    
      ("CONTENT")                                                               
  indextype is ctxsys.context                                                   
  parameters('
    format column    context_fmt         
    datastore       "CNT_DST"                                                   
    filter          "CNT_FIL"                                                   
    section group   "CNT_SGP"                                                   
    lexer           "CNT_LEX"                                                   
    wordlist        "CNT_WDL"                                                   
    stoplist        "CNT_SPL"                                                   
    storage         "CNT_STO"                                                   
  ')                                                                            
/                                                                               
                                                                                
begin                                                                           
  ctx_output.end_log;                                                           
end;                                                                            
/                                                                               

 
/* Add a trigger so that new rows are properly tagged */
CREATE OR REPLACE TRIGGER RT_USER.BI_Att
BEFORE INSERT
ON RT_USER.ATTACHMENTS 
REFERENCING NEW AS New OLD AS Old
FOR EACH ROW
DECLARE
/******************************************************************************
   NAME:       
   PURPOSE:    

   REVISIONS:
   Ver        Date        Author           Description
   ---------  ----------  ---------------  ------------------------------------
   1.0        24-5-2007   Joop v/d Wege   1. Created this trigger.

   NOTES:
   Trigger takes care of filling the context_fmt column with the right
   value so that only text is really indexed and not base64 rows
   
******************************************************************************/
BEGIN
 
    IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN
       :new.context_fmt := 'ignore';
    ELSE
        :new.context_fmt := 'text';
    END IF;

END ;
/


/* There needs to be a job that on a regular basis refreshes the index */  
BEGIN 
  SYS.DBMS_JOB.REMOVE(21);
COMMIT;
END;
/

DECLARE
  X NUMBER;
BEGIN
  SYS.DBMS_JOB.SUBMIT
  ( job       => X 
   ,what      => 'DECLARE
   stmt VARCHAR2(200);
BEGIN
   stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
   EXECUTE IMMEDIATE stmt;
END;
'
   ,next_date => to_date('20-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss')
   ,interval  => 'TRUNC(SYSDATE+1)+6/24'
   ,no_parse  => FALSE
  );
  SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x));
COMMIT;
END;
/

/* test if things work                                                      */
/* first run is slow, second should be fast <500ms)                         */
select * from attachments where contains(content,'accounts')>0;

/****************************************************************************/
/* Repeat for ObjectCustomFieldValues                                       */
/****************************************************************************/

alter table objectcustomfieldvalues add (context_fmt varchar2(10));

update objectcustomfieldvalues 
set context_fmt='ignore' 
where
contentencoding is NULL;

update objectcustomfieldvalues 
set context_fmt='ignore' 
where
contentencoding = 'base64';

update objectcustomfieldvalues 
set context_fmt='text' 
where
contentencoding = 'none';

update objectcustomfieldvalues 
set context_fmt='text' 
where
contentencoding = 'quoted-printable';


CREATE OR REPLACE TRIGGER RT_USER.BI_OCFV
BEFORE INSERT
ON RT_USER.objectcustomfieldvalues 
REFERENCING NEW AS New OLD AS Old
FOR EACH ROW
DECLARE
/******************************************************************************
   NAME:       
   PURPOSE:    

   REVISIONS:
   Ver        Date        Author           Description
   ---------  ----------  ---------------  ------------------------------------
   1.0        24-5-2007   Joop v/d Wege   1. Created this trigger.

   NOTES:
   Trigger takes care of filling the context_fmt column with the right
   value so that only text is really indexed and not base64 rows
   
******************************************************************************/
BEGIN
 
    IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN
       :new.context_fmt := 'ignore';
    ELSE
        :new.context_fmt := 'text';
    END IF;

END ;
/


drop index ocfv_cnt;
                                                                                
create index ocfv_cnt                                                    
  on "RT_USER"."OBJECTCUSTOMFIELDVALUES"                                                    
      ("LARGECONTENT")                                                               
  indextype is ctxsys.context                                                   
  parameters('
    format column    context_fmt         
    datastore       "CNT_DST"                                                   
    filter          "CNT_FIL"                                                   
    section group   "CNT_SGP"                                                   
    lexer           "CNT_LEX"                                                   
    wordlist        "CNT_WDL"                                                   
    stoplist        "CNT_SPL"                                                   
    storage         "CNT_STO"                                                   
  ')                                                                            
/                                                                               


select * from objectcustomfieldvalues where contains(largecontent,'accounts')>0

/* depending on whether both RT and RTFM are used use this job instead of the */
/* previous one                                                               */
BEGIN 
  SYS.DBMS_JOB.REMOVE(21);
COMMIT;
END;
/

DECLARE
  X NUMBER;
BEGIN
  SYS.DBMS_JOB.SUBMIT
  ( job       => X 
   ,what      => 'DECLARE
   stmt VARCHAR2(200);
BEGIN
   stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
   EXECUTE IMMEDIATE stmt;
   stmt := ''ALTER INDEX ocfv_cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
   EXECUTE IMMEDIATE stmt;

END;
'
   ,next_date => to_date('25-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss')
   ,interval  => 'TRUNC(SYSDATE+1)+6/24'
   ,no_parse  => FALSE
  );
  SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x));
COMMIT;
END;
/

===End OracleTest part===