3 # hari's search engine front-end
4 # this program queries the search index created by hari's search index creator application
13 # -----------------------------------------------------------
14 # configuration settings
15 # -----------------------------------------------------------
18 strttime
= time
.time ()
20 # path to the search database index file - edit as necessary
21 search_db_path
= "searchwords.idx"
23 # path to file containing the "stop words" words which should be excluded from the query
267 # first print the header and the HTML code
268 print "Content-type: text/html; charset=utf-8"
272 <title>Search Results</title>
275 <h1>Search results</h1>
279 # check whether the search database exists at all
280 if not os
.path
.exists (search_db_path
) and not os
.path
.isfile (search_db_path
):
281 print "Search database file seems to be missing. Cannot search."
286 frm
= cgi
.FieldStorage ()
288 # if the form has not been submitted, then ask for it to be submitted
289 if not frm
.has_key ("fromsearch") or frm
.getfirst("query") == "" :
290 print "No search query defined."
295 raw_query
= frm
.getfirst ("query").strip ().lower ()
296 # get the mode of the query - AND or NOT
297 mode
= int (frm
.getfirst ("criteria"))
300 raw_query_list
= raw_query
.split ()
302 # get the database of search terms
304 search_db
= cPickle
.load (open (search_db_path
, "rb"))
306 print "Error reading search database: might not valid."
308 # if the number of search words is too much
309 if len (raw_query_list
) > 5:
310 print "<p>Too many terms. Try fewer search terms (avoid common words like a, and, or, in).</p>"
316 # display the list of search terms
317 print "Search terms: ",
320 search_words
= set ()
322 # add the search words to the set
323 for raw_word
in raw_query_list
:
324 # strip the raw word of all non-alphabetic characters at the beginnning and end
325 word
= raw_word
.strip (":;?!_<>,.+-\"'=`!@#$%^&*()[]{}/= \n\r\t")
327 # only if the word is not in the stopwords list add it to the query
328 if not word
in stopwords_list
:
329 print "<b>" + word
+ "</b>, ",
330 search_words
.add (word
)
331 # just display the word with a strike-through to indicate its non-inclusion
333 print "<strike>" + word
+ "</strike>, ",
335 # if there are no specific terms found
336 if len(search_words
) == 0:
337 print "<p>Search terms are too broad and commonly used words. Try searching with at least one specific term.</p>"
342 # get the search results
343 search_results
= set ()
347 print "<p>Search mode: ",
349 print "documents containing all search terms</p>"
351 print "documents containing any of the search terms</p>"
353 # get the words found in the database
354 for word
in search_words
:
355 # if the word is found
356 if search_db
.has_key (word
):
357 if (flagfirst
== True):
358 search_results
= search_db
[word
]
363 search_results
= search_results
& search_db
[word
]
366 search_results
= search_results | search_db
[word
]
367 # if word is not found and the mode is AND, then return empty result
370 search_results
.clear ()
373 # display the results
375 # if no results are found
376 if len (search_results
) == 0:
377 print "<p>No pages matched your query.</p>"
380 print "<h2>Documents found</h2>"
382 for title
, url
in search_results
:
383 print '<li><a href="' + url
+ '">' + title
+ '</a>'
386 # print the script execution time
387 endtime
= time
.time ()
388 dif
= endtime
- strttime
391 print "<small>Script execution time: ",