X-Git-Url: https://harishankar.org/repos/?p=biaweb_qt.git;a=blobdiff_plain;f=search.py;fp=search.py;h=cc5ab6e618e42c76608cad63eec4133510f367e8;hp=0000000000000000000000000000000000000000;hb=826408979db0e8e4675d51def6ce7dadd305cf9c;hpb=b168780a0609f1a8ca2632d6f5e783f200e594d3 diff --git a/search.py b/search.py new file mode 100755 index 0000000..cc5ab6e --- /dev/null +++ b/search.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python + +# hari's search engine front-end +# this program queries the search index created by hari's search index creator application + +import string +import cPickle +import cgi +import os +import os.path +import time + +# ----------------------------------------------------------- +# configuration settings +# ----------------------------------------------------------- + +# script start time +strttime = time.time () + +# path to the search database index file - edit as necessary +search_db_path = "searchwords.idx" + +# path to file containing the "stop words" words which should be excluded from the query +stopwords_list = """$ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +a +able +about +after +again +all +almost +already +also +although +am +an +and +another +any +are +are +around +as +at +b +based +be +because +been +before +being +between +both +bring +but +by +c +came +can +com +come +comes +could +d +did +do +does +doing +done +e +each +eight +else +etc +even +every +f +five +for +four +from +g +get +gets +getting +go +going +got +h +had +has +have +he +he +her +here +him +himself +his +how +however +href +http +i +if +in +including +into +is +it +it +its +j +just +k +kb +know +l +like +looks +m +mailto +make +making +many +may +mb +me +means +might +more +more +most +move +mr +much +must +my +n +need +needs +never +nice +nine +no +not +now +o +of +often +oh +ok +on +on +one +only +or +org +other +our +out +over +own +p +piece +q +r +rather +re +really +s +said +same +say +says +see +seven +several +she +should +since +single +six +so +so +some +something +still +stuff +such +t +take +ten +than +that +the +their +them +them +then +there +there +these +they +they +thing +things +this +those +three +through +to +too +took +two +u +under +up +us +use +used +using +usual +v +ve +very +via +w +want +was +way +we +we +well +were +what +when +where +whether +which +while +whilst +who +why +will +with +within +would +x +y +yes +yet +you +your +z""".split ("\n") + +# first print the header and the HTML code +print "Content-type: text/html; charset=utf-8" +print """ + +
+