import sqlite3
import string
import shutil
+import HTMLParser
+import cPickle
import biaweb_db
+import biaweb_strings
+
+# class to remove HTML tags from a string - used by the search
+# generator to get an article without HTML tags
+class HTMLTagRemover (HTMLParser.HTMLParser):
+ def __init__ (self):
+ HTMLParser.HTMLParser.__init__ (self)
+ # initialize the list for data - this will be converted
+ # to a string before returning
+ self.data_list = []
+
+ # This event gets the data of the string that is anything that
+ # is not a HTML tag
+ def handle_data (self, data):
+ self.data_list.append (data)
+
+ # return the string thus collected by the handle_data event
+ def get_raw_string (self):
+ raw_string = "".join (self.data_list)
+ return raw_string
# to format the best rated articles in a HTML link list
def html_format_best_rated (best_rated):
rating_str = "".join (items)
return rating_str
+# function to generate the search index file
+def generate_search_index (dbname, conf, full_text_index = True):
+ # get all the articles
+ arts = biaweb_db.site_articles (dbname)
+ # if cannot get articles
+ if arts is False:
+ return False
+
+ # if full text index, then field should be article content otherwise keywords
+ if full_text_index:
+ searchfield = 4
+ else:
+ searchfield = 3
+
+ # initialize the search index dictionary
+ search_index = dict ()
+
+ # now run through the articles and generate a table of unique words (except
+ # stop words)
+ for art in arts:
+ # now strip out the HTML tags from the articles
+ parser = HTMLTagRemover ()
+ parser.feed (art[searchfield])
+ parser.close ()
+ # get the word list
+ word_list = parser.get_raw_string ().split ()
+
+ # now run through each word, make it lowercase, remove all cruft from it
+ # and add it to a dictionary
+ for word in word_list:
+ cleanword = word.strip (":;?!_<>,.+-\"'=`!@#$%^&*()[]{}/= \n\r\t").lower ()
+ # if the word is not a "stop word", then add it to the search database
+ if cleanword not in biaweb_strings.stopwords:
+ # title of the search entry should be the article title
+ title = art[1]
+ # url should be the article URL: http://siteurl/Category/Article.html
+ url = "http://" + conf[0] + art[13] + "/" + art[8] + ".html"
+ # if search index has the word (as key)
+ if search_index.has_key (cleanword):
+ # add the title and url as a tuple to the set
+ search_index[cleanword].add ((title, url))
+ # create the key as the word
+ else:
+ # create a set for the keyword. Set will hold the tuples
+ # representing article title and url
+ search_index[cleanword] = set ()
+ search_index[cleanword].add ((title, url))
+
+ # done now write the search database as a python pickle object of search_index
+ search_index_path = os.path.join (conf[5], "cgi-bin", "searchwords.idx")
+ htaccess_path = os.path.join (conf[5], "cgi-bin", ".htaccess")
+ try:
+ # open the file in write binary mode
+ fsearchindex = open (search_index_path, "wb")
+ # dump the dictionary as a pickle object in binary mode
+ cPickle.dump (search_index, fsearchindex, 2)
+ fsearchindex.close ()
+ # write the htaccess file to prevent opening the index file from web browser
+ fhtaccess = open (htaccess_path, "w+")
+ fhtaccess.write (biaweb_strings.searchindex_htaccess)
+ fhtaccess.close ()
+ except (OSError, IOError):
+ return False
+
+ # finally return true
+ return True
+
# function to copy additional files and folders to the destination path
def copy_files_folders (conf, files_to_copy, folders_to_copy):
# create the cgi-bin directory and try to copy search.py into the destination directory if possible
try:
os.mkdir (os.path.join (conf[5], "cgi-bin"))
shutil.copy2 (search_script_path, os.path.join(conf[5], "cgi-bin"))
- except IOError, OSError:
+ except (IOError, OSError):
return False
# try to copy the star rating images to destination directory if possible
if os.path.exists (rating_img_star):
try:
shutil.copy2 (rating_img_star, conf[5])
- except IOError, OSError:
+ except (IOError, OSError):
return False
if os.path.exists (rating_img_greystar):
try:
shutil.copy2 (rating_img_greystar, conf[5])
- except IOError, OSError:
+ except (IOError, OSError):
return False
# additional files to copy
full_dest = os.path.join (conf[5], dest)
try:
shutil.copy2 (src, full_dest)
- except IOError, OSError:
+ except (IOError, OSError):
return False
# additional folders to copy
full_dest = os.path.join (conf[5], dest)
try:
shutil.copytree (src, full_dest)
- except IOError, OSError:
+ except (IOError, OSError):
return False
# finally return true
return True
-
-
# function to generate article pages
def generate_article_pages (dbname, conf, templates, category_str, bestrated_str):
# main template
# get all articles from the database
articles = biaweb_db.site_articles (dbname)
- if articles == False:
+ if articles is False:
return
# walk through each article and generate the file in the appropriate category
article_contents = art[4])
# now build the article page
- articlepage_str = tpl_main.safe_substitute (site_title = conf[1],
+ articlepage_str = tpl_main.safe_substitute (site_title = art[1],
site_url = "http://" + conf[0],
meta_keywords = art[3],
meta_description = art[2],
try:
farticle = open (os.path.join (conf[5], art[13], art[8] + ".html"), "w+")
farticle.write (articlepage_str)
- except OSError, IOError:
+ except (OSError, IOError):
return False
# finally return true
try:
# create the category directory
os.mkdir (os.path.join (conf[5], cat[3]))
- except IOError, OSError:
+ except (IOError, OSError):
return False
# now get the list of articles for the specified category
articles_list = biaweb_db.site_articles (dbname, cat[0])
- if articles_list == False:
+ if articles_list is False:
return False
tableitems = []
table_rows = tablerows_str)
# now create the index page
- categoryindex_str = tpl_main.safe_substitute (site_title = conf[1],
+ categoryindex_str = tpl_main.safe_substitute (site_title = conf[1] + " - " + cat[1],
site_url = "http://" + conf[0],
meta_keywords = conf[2],
meta_description = cat[2],
fcatindex = open (os.path.join (conf[5], cat[3], "index.html"), "w+")
fcatindex.write (categoryindex_str)
fcatindex.close ()
- except OSError, IOError:
+ except (OSError, IOError):
return False
# finally return true
return True
+# function to generate the RSS feed for the website
+def generate_rss_feed (dbname, conf):
+ # rss main template
+ tpl_rss = string.Template (biaweb_strings.template_rss)
+ # rss item bit template
+ tpl_rss_itembit = string.Template (biaweb_strings.template_rss_item)
+
+ # get the latest articles (limit by number of rss items)
+ arts = biaweb_db.site_latest_articles (dbname, conf[4])
+ if arts is False:
+ return False
+
+ rss_item_list = []
+ # run through the articles and generate the rss items
+ for art in arts:
+ # link
+ itemlink = "http://" + conf[0] + art[13] + "/" + art[8] + ".html"
+ item_str = tpl_rss_itembit.safe_substitute (item_title = art[1],
+ item_link = itemlink,
+ description = art[2])
+ rss_item_list.append (item_str)
+
+ # now get the rss items as a string
+ rss_item_str = "".join (rss_item_list)
+
+ # now generate the feed
+ rss_str = tpl_rss.safe_substitute (title = conf[1],
+ link = "http://" + conf[0],
+ description = conf[3],
+ rss_items = rss_item_str)
+
+ # now try to write it to the rss file
+ try:
+ frss = open (os.path.join (conf[5], "subscribe.xml"), "w+")
+ frss.write (rss_str)
+ except (IOError, OSError):
+ return False
+
+ # finally return true
+ return True
+
# function to generate main index file and stylesheet
def generate_home_page (dbname, conf, templates, category_str, bestrated_str):
# main template
# get the latest articles - conf[4] is num of rss entries to be used also
latest_arts = biaweb_db.site_latest_articles (dbname, conf[4])
- if latest_arts == False:
+ if latest_arts is False:
return False
news_items = []
findex = open (os.path.join (conf[5], "index.html"), "w+")
findex.write (main_str)
findex.close ()
- except IOError, OSError:
+ except (IOError, OSError):
return False
# write the style.css file in the destination directory
fstyle = open (os.path.join (conf[5], "style.css"), "w+")
fstyle.write (templates[5][1])
fstyle.close ()
- except IOError, OSError:
+ except (IOError, OSError):
return False
return True
def generate_site (dbname, files_to_copy, folders_to_copy, search_type_full=True):
# get the configuration
conf = biaweb_db.get_configuration (dbname)
+ # if cannot get configuration
+ if conf is False:
+ return False
+
# get the templates
tpls = biaweb_db.get_templates (dbname)
+ if tpls is False:
+ return False
# get the list of categories
cats = biaweb_db.get_categories (dbname)
# cannot get categories return false
- if cats == False:
+ if cats is False:
return False
# format the categories as a html bulleted list
# get the best rated articles
best_rated = biaweb_db.site_get_bestrated (dbname)
# if cannot retrieve
- if best_rated == False:
+ if best_rated is False:
return False
# format the best rated articles as a html bulleted list
best_rated_str = html_format_best_rated (best_rated)
# generate the index page including style sheet
ret = generate_home_page (dbname, conf, tpls, cats_str, best_rated_str)
- if ret == False:
+ if ret is False:
+ return False
+
+ # generate the rss feed
+ ret = generate_rss_feed (dbname, conf)
+ if ret is False:
return False
# generate the category directories and indices
ret = generate_category_indices (dbname, conf, tpls, cats_str, best_rated_str, cats)
- if ret == False:
+ if ret is False:
return False
# generate the article pages
ret = generate_article_pages (dbname, conf, tpls, cats_str, best_rated_str)
- if ret == False:
+ if ret is False:
return False
# copy other files/folders into the destination path
ret = copy_files_folders (conf, files_to_copy, folders_to_copy)
- if ret == False:
+ if ret is False:
+ return False
+
+ # now generate the search index database
+ ret = generate_search_index (dbname, conf, search_type_full)
+ if ret is False:
return False
# finally when all is successfully done return true