# BiaWeb Website content manager (c) 2010 V.Harishankar
# Site exporter/generator class
import os
import os.path
import sys
import time
import sqlite3
import string
import shutil
import HTMLParser
import cPickle
import biaweb_db
import biaweb_strings
# class to remove HTML tags from a string - used by the search
# generator to get an article without HTML tags
class HTMLTagRemover (HTMLParser.HTMLParser):
def __init__ (self):
HTMLParser.HTMLParser.__init__ (self)
# initialize the list for data - this will be converted
# to a string before returning
self.data_list = []
# This event gets the data of the string that is anything that
# is not a HTML tag
def handle_data (self, data):
self.data_list.append (data)
# return the string thus collected by the handle_data event
def get_raw_string (self):
raw_string = "".join (self.data_list)
return raw_string
# to format the best rated articles in a HTML link list
def html_format_best_rated (best_rated):
items = [ "
\n", ]
for art in best_rated:
# art[13] is category stub, art[8] is article stub
# thus forming the relative url as Category/Article.html
str_art = '- ' + art[1] + '
\n'
items.append (str_art)
items.append ("
\n")
str_items = "".join (items)
return str_items
# to format categories in a HTML link list
def html_format_categories (cats):
items = [ "\n", ]
for cat in cats:
# cat[3] is category stub and cat[1] is category name
str_cat = '- ' + cat[1] + '
\n'
items.append (str_cat)
items.append ("
\n")
str_items = "".join (items)
return str_items
# to convert a rating number into rating images out of 10 stars
def html_format_rating (rating):
items = []
# if -1 then return unrated as the text
if rating == -1:
return "unrated"
# fill up the number of stars for the rating
for i in range (rating):
items.append ('')
# fill up remaining slots (of 10) with grey stars
for i in range (10 - rating):
items.append ('')
rating_str = "".join (items)
return rating_str
# function to generate the search index file
def generate_search_index (dbname, conf, full_text_index = True):
# get all the articles
arts = biaweb_db.site_articles (dbname)
# if cannot get articles
if arts is False:
return False
# if full text index, then field should be article content otherwise keywords
if full_text_index:
searchfield = 4
else:
searchfield = 3
# initialize the search index dictionary
search_index = dict ()
# now run through the articles and generate a table of unique words (except
# stop words)
for art in arts:
# now strip out the HTML tags from the articles
parser = HTMLTagRemover ()
parser.feed (art[searchfield])
parser.close ()
# get the word list
word_list = parser.get_raw_string ().split ()
# now run through each word, make it lowercase, remove all cruft from it
# and add it to a dictionary
for word in word_list:
cleanword = word.strip (":;?!_<>,.+-\"'=`!@#$%^&*()[]{}/= \n\r\t").lower ()
# if the word is not a "stop word", then add it to the search database
if cleanword not in biaweb_strings.stopwords:
# title of the search entry should be the article title
title = art[1]
# url should be the article URL: http://siteurl/Category/Article.html
url = "http://" + conf[0] + art[13] + "/" + art[8] + ".html"
# if search index has the word (as key)
if search_index.has_key (cleanword):
# add the title and url as a tuple to the set
search_index[cleanword].add ((title, url))
# create the key as the word
else:
# create a set for the keyword. Set will hold the tuples
# representing article title and url
search_index[cleanword] = set ()
search_index[cleanword].add ((title, url))
# done now write the search database as a python pickle object of search_index
search_index_path = os.path.join (conf[5], "cgi-bin", "searchwords.idx")
htaccess_path = os.path.join (conf[5], "cgi-bin", ".htaccess")
try:
# open the file in write binary mode
fsearchindex = open (search_index_path, "wb")
# dump the dictionary as a pickle object in binary mode
cPickle.dump (search_index, fsearchindex, 2)
fsearchindex.close ()
# write the htaccess file to prevent opening the index file from web browser
fhtaccess = open (htaccess_path, "w+")
fhtaccess.write (biaweb_strings.searchindex_htaccess)
fhtaccess.close ()
except (OSError, IOError):
return False
# finally return true
return True
# function to copy additional files and folders to the destination path
def copy_files_folders (conf, files_to_copy, folders_to_copy):
# create the cgi-bin directory and try to copy search.py into the destination directory if possible
# otherwise user must copy it manually
search_script_path = os.path.join (sys.path[0], "search.py")
if os.path.exists (search_script_path):
try:
os.mkdir (os.path.join (conf[5], "cgi-bin"))
shutil.copy2 (search_script_path, os.path.join(conf[5], "cgi-bin"))
except (IOError, OSError):
return False
# try to copy the star rating images to destination directory if possible
# otherwise user must copy it manually
rating_img_star = os.path.join (sys.path[0], "star.gif")
rating_img_greystar = os.path.join (sys.path[0], "star-grey.gif")
if os.path.exists (rating_img_star):
try:
shutil.copy2 (rating_img_star, conf[5])
except (IOError, OSError):
return False
if os.path.exists (rating_img_greystar):
try:
shutil.copy2 (rating_img_greystar, conf[5])
except (IOError, OSError):
return False
# additional files to copy
# first copy files
# check if files to copy is not empty
if files_to_copy <> []:
for src, dest in files_to_copy:
# get full path from relative path in dest
full_dest = os.path.join (conf[5], dest)
try:
shutil.copy2 (src, full_dest)
except (IOError, OSError):
return False
# additional folders to copy
# now copy the folders
if folders_to_copy <> []:
for src, dest in folders_to_copy:
# get full path from relative path in dest
full_dest = os.path.join (conf[5], dest)
try:
shutil.copytree (src, full_dest)
except (IOError, OSError):
return False
# finally return true
return True
# function to generate article pages
def generate_article_pages (dbname, conf, templates, category_str, bestrated_str):
# main template
tpl_main = string.Template (templates[0][1])
# article template
tpl_articlebit = string.Template (templates[1][1])
# get all articles from the database
articles = biaweb_db.site_articles (dbname)
if articles is False:
return
# walk through each article and generate the file in the appropriate category
# folder
for art in articles:
art_cdate = time.ctime (art[5])
art_mdate = time.ctime (art[6])
rating_str = html_format_rating (art[9])
# now build the article from the article bit template
article_str = tpl_articlebit.safe_substitute (article_title = art[1],
article_cdate = art_cdate,
article_mdate = art_mdate,
rating = rating_str,
article_contents = art[4])
# now build the article page
articlepage_str = tpl_main.safe_substitute (site_title = art[1],
site_url = "http://" + conf[0],
meta_keywords = art[3],
meta_description = art[2],
page_title = conf[1],
page_desc = conf[3],
contents_bit = article_str,
list_of_categories = category_str,
list_best_rated = bestrated_str,
copyright = conf[6])
# write to the article file
try:
farticle = open (os.path.join (conf[5], art[13], art[8] + ".html"), "w+")
farticle.write (articlepage_str)
except (OSError, IOError):
return False
# finally return true
return True
# function to generate category directories and indices
def generate_category_indices (dbname, conf, templates, category_str, bestrated_str, category_list):
# main template
tpl_main = string.Template (templates[0][1])
# table bit
tpl_tablebit = string.Template (templates[3][1])
# table row bit
tpl_trowbit = string.Template (templates[4][1])
# run through each category and generate category index page
for cat in category_list:
try:
# create the category directory
os.mkdir (os.path.join (conf[5], cat[3]))
except (IOError, OSError):
return False
# now get the list of articles for the specified category
articles_list = biaweb_db.site_articles (dbname, cat[0])
if articles_list is False:
return False
tableitems = []
# run through the list of articles in category
for art in articles_list:
url = art[13] + "/" + art[8] + ".html"
creattime = time.ctime (art[5])
rating_str = html_format_rating (art[9])
# now build the table rows for each article
tableitem_str = tpl_trowbit.safe_substitute (article_url = url,
title = art[1],
created = creattime,
rating = rating_str)
tableitems.append (tableitem_str)
# generate the rows as a string
tablerows_str = "".join (tableitems)
# now create the page template
table_str = tpl_tablebit.safe_substitute (category_title = cat[1],
category_desc = cat[2],
table_rows = tablerows_str)
# now create the index page
categoryindex_str = tpl_main.safe_substitute (site_title = conf[1] + " - " + cat[1],
site_url = "http://" + conf[0],
meta_keywords = conf[2],
meta_description = cat[2],
page_title = conf[1],
page_desc = conf[3],
contents_bit = table_str,
list_of_categories = category_str,
list_best_rated = bestrated_str,
copyright = conf[6])
# now write to Category/index.html
try:
fcatindex = open (os.path.join (conf[5], cat[3], "index.html"), "w+")
fcatindex.write (categoryindex_str)
fcatindex.close ()
except (OSError, IOError):
return False
# finally return true
return True
# function to generate the RSS feed for the website
def generate_rss_feed (dbname, conf):
# rss main template
tpl_rss = string.Template (biaweb_strings.template_rss)
# rss item bit template
tpl_rss_itembit = string.Template (biaweb_strings.template_rss_item)
# get the latest articles (limit by number of rss items)
arts = biaweb_db.site_latest_articles (dbname, conf[4])
if arts is False:
return False
rss_item_list = []
# run through the articles and generate the rss items
for art in arts:
# link
itemlink = "http://" + conf[0] + art[13] + "/" + art[8] + ".html"
item_str = tpl_rss_itembit.safe_substitute (item_title = art[1],
item_link = itemlink,
description = art[2])
rss_item_list.append (item_str)
# now get the rss items as a string
rss_item_str = "".join (rss_item_list)
# now generate the feed
rss_str = tpl_rss.safe_substitute (title = conf[1],
link = "http://" + conf[0],
description = conf[3],
rss_items = rss_item_str)
# now try to write it to the rss file
try:
frss = open (os.path.join (conf[5], "subscribe.xml"), "w+")
frss.write (rss_str)
except (IOError, OSError):
return False
# finally return true
return True
# function to generate main index file and stylesheet
def generate_home_page (dbname, conf, templates, category_str, bestrated_str):
# main template
tpl_main = string.Template (templates[0][1])
# index bit
tpl_indexbit = string.Template (templates[6][1])
# news bits
tpl_newsbit = string.Template (templates[2][1])
# get the latest articles - conf[4] is num of rss entries to be used also
latest_arts = biaweb_db.site_latest_articles (dbname, conf[4])
if latest_arts is False:
return False
news_items = []
# Run through the latest articles
# for the num of latest news items on index
for art in latest_arts:
# url is Category/Article.html
url = art[13] + "/" + art[8] + ".html"
# art[5] is creation time
strdate = time.ctime (art[5])
# now populate the template variables. art[1] is title, art[2] is summary
strnews = tpl_newsbit.safe_substitute (news_title = art[1],
news_link = url,
news_datetime = strdate,
news_description = art[2]
)
news_items.append (strnews)
# now convert it into a string
newsbit_str = "".join (news_items)
# now populate the index template
indexbit_str = tpl_indexbit.safe_substitute (site_name = conf[1],
news_updates = newsbit_str
)
# now populate the main page template
main_str = tpl_main.safe_substitute (site_title = conf[1],
site_url = "http://" + conf[0],
meta_keywords = conf[2],
meta_description = conf[3],
page_title = conf[1],
page_desc = conf[3],
contents_bit = indexbit_str,
list_of_categories = category_str,
list_best_rated = bestrated_str,
copyright = conf[6])
# write the index.html file in the destination directory
try:
findex = open (os.path.join (conf[5], "index.html"), "w+")
findex.write (main_str)
findex.close ()
except (IOError, OSError):
return False
# write the style.css file in the destination directory
try:
fstyle = open (os.path.join (conf[5], "style.css"), "w+")
fstyle.write (templates[5][1])
fstyle.close ()
except (IOError, OSError):
return False
return True
# superfunction to generate the site
def generate_site (dbname, files_to_copy, folders_to_copy, search_type_full=True):
# get the configuration
conf = biaweb_db.get_configuration (dbname)
# if cannot get configuration
if conf is False:
return False
# get the templates
tpls = biaweb_db.get_templates (dbname)
if tpls is False:
return False
# get the list of categories
cats = biaweb_db.get_categories (dbname)
# cannot get categories return false
if cats is False:
return False
# format the categories as a html bulleted list
cats_str = html_format_categories (cats)
# get the best rated articles
best_rated = biaweb_db.site_get_bestrated (dbname)
# if cannot retrieve
if best_rated is False:
return False
# format the best rated articles as a html bulleted list
best_rated_str = html_format_best_rated (best_rated)
# remove the destination tree and recreate it
try:
if os.path.exists (conf[5]):
shutil.rmtree (conf[5])
os.mkdir (conf[5])
except OSError:
return False
# generate the index page including style sheet
ret = generate_home_page (dbname, conf, tpls, cats_str, best_rated_str)
if ret is False:
return False
# generate the rss feed
ret = generate_rss_feed (dbname, conf)
if ret is False:
return False
# generate the category directories and indices
ret = generate_category_indices (dbname, conf, tpls, cats_str, best_rated_str, cats)
if ret is False:
return False
# generate the article pages
ret = generate_article_pages (dbname, conf, tpls, cats_str, best_rated_str)
if ret is False:
return False
# copy other files/folders into the destination path
ret = copy_files_folders (conf, files_to_copy, folders_to_copy)
if ret is False:
return False
# now generate the search index database
ret = generate_search_index (dbname, conf, search_type_full)
if ret is False:
return False
# finally when all is successfully done return true
return True