1 # BiaWeb Website content manager (c) 2010 V.Harishankar
2 # Site exporter/generator class
16 # class to remove HTML tags from a string - used by the search
17 # generator to get an article without HTML tags
18 class HTMLTagRemover (HTMLParser
.HTMLParser
):
20 HTMLParser
.HTMLParser
.__init
__ (self
)
21 # initialize the list for data - this will be converted
22 # to a string before returning
25 # This event gets the data of the string that is anything that
27 def handle_data (self
, data
):
28 self
.data_list
.append (data
)
30 # return the string thus collected by the handle_data event
31 def get_raw_string (self
):
32 raw_string
= "".join (self
.data_list
)
35 # to format the best rated articles in a HTML link list
36 def html_format_best_rated (best_rated
):
38 for art
in best_rated
:
39 # art[13] is category stub, art[8] is article stub
40 # thus forming the relative url as Category/Article.html
41 str_art
= '<li><a href="' + art
[13] + '/' + art
[8] + '.html">' + art
[1] + '</a></li>\n'
42 items
.append (str_art
)
43 items
.append ("</ul>\n")
44 str_items
= "".join (items
)
47 # to format categories in a HTML link list
48 def html_format_categories (cats
):
51 # cat[3] is category stub and cat[1] is category name
52 str_cat
= '<li><a href="' + cat
[3] + '/">' + cat
[1] + '</a></li>\n'
53 items
.append (str_cat
)
54 items
.append ("</ul>\n")
55 str_items
= "".join (items
)
58 # to convert a rating number into rating images out of 10 stars
59 def html_format_rating (rating
):
61 # if -1 then return unrated as the text
64 # fill up the number of stars for the rating
65 for i
in range (rating
):
66 items
.append ('<img src="star.gif" alt="*" />')
67 # fill up remaining slots (of 10) with grey stars
68 for i
in range (10 - rating
):
69 items
.append ('<img src="star-grey.gif" alt="-" />')
71 rating_str
= "".join (items
)
74 # function to generate the search index file
75 def generate_search_index (dbname
, conf
, full_text_index
= True):
76 # get all the articles
77 arts
= biaweb_db
.site_articles (dbname
)
78 # if cannot get articles
82 # if full text index, then field should be article content otherwise keywords
88 # initialize the search index dictionary
89 search_index
= dict ()
91 # now run through the articles and generate a table of unique words (except
94 # now strip out the HTML tags from the articles
95 parser
= HTMLTagRemover ()
96 parser
.feed (art
[searchfield
])
99 word_list
= parser
.get_raw_string ().split ()
101 # now run through each word, make it lowercase, remove all cruft from it
102 # and add it to a dictionary
103 for word
in word_list
:
104 cleanword
= word
.strip (":;?!_<>,.+-\"'=`!@#$%^&*()[]{}/= \n\r\t").lower ()
105 # if the word is not a "stop word", then add it to the search database
106 if cleanword
not in biaweb_strings
.stopwords
:
107 # title of the search entry should be the article title
109 # url should be the article URL: http://siteurl/Category/Article.html
110 url
= "http://" + conf
[0] + art
[13] + "/" + art
[8] + ".html"
111 # if search index has the word (as key)
112 if search_index
.has_key (cleanword
):
113 # add the title and url as a tuple to the set
114 search_index
[cleanword
].add ((title
, url
))
115 # create the key as the word
117 # create a set for the keyword. Set will hold the tuples
118 # representing article title and url
119 search_index
[cleanword
] = set ()
120 search_index
[cleanword
].add ((title
, url
))
122 # done now write the search database as a python pickle object of search_index
123 search_index_path
= os
.path
.join (conf
[5], "cgi-bin", "searchwords.idx")
124 htaccess_path
= os
.path
.join (conf
[5], "cgi-bin", ".htaccess")
126 # open the file in write binary mode
127 fsearchindex
= open (search_index_path
, "wb")
128 # dump the dictionary as a pickle object in binary mode
129 cPickle
.dump (search_index
, fsearchindex
, 2)
130 fsearchindex
.close ()
131 # write the htaccess file to prevent opening the index file from web browser
132 fhtaccess
= open (htaccess_path
, "w+")
133 fhtaccess
.write (biaweb_strings
.searchindex_htaccess
)
135 except OSError, IOError:
138 # finally return true
141 # function to copy additional files and folders to the destination path
142 def copy_files_folders (conf
, files_to_copy
, folders_to_copy
):
143 # create the cgi-bin directory and try to copy search.py into the destination directory if possible
144 # otherwise user must copy it manually
145 search_script_path
= os
.path
.join (sys
.path
[0], "search.py")
146 if os
.path
.exists (search_script_path
):
148 os
.mkdir (os
.path
.join (conf
[5], "cgi-bin"))
149 shutil
.copy2 (search_script_path
, os
.path
.join(conf
[5], "cgi-bin"))
150 except IOError, OSError:
153 # try to copy the star rating images to destination directory if possible
154 # otherwise user must copy it manually
155 rating_img_star
= os
.path
.join (sys
.path
[0], "star.gif")
156 rating_img_greystar
= os
.path
.join (sys
.path
[0], "star-grey.gif")
157 if os
.path
.exists (rating_img_star
):
159 shutil
.copy2 (rating_img_star
, conf
[5])
160 except IOError, OSError:
162 if os
.path
.exists (rating_img_greystar
):
164 shutil
.copy2 (rating_img_greystar
, conf
[5])
165 except IOError, OSError:
168 # additional files to copy
171 # check if files to copy is not empty
172 if files_to_copy
<> []:
173 for src
, dest
in files_to_copy
:
174 # get full path from relative path in dest
175 full_dest
= os
.path
.join (conf
[5], dest
)
177 shutil
.copy2 (src
, full_dest
)
178 except IOError, OSError:
181 # additional folders to copy
183 # now copy the folders
184 if folders_to_copy
<> []:
185 for src
, dest
in folders_to_copy
:
186 # get full path from relative path in dest
187 full_dest
= os
.path
.join (conf
[5], dest
)
189 shutil
.copytree (src
, full_dest
)
190 except IOError, OSError:
193 # finally return true
196 # function to generate article pages
197 def generate_article_pages (dbname
, conf
, templates
, category_str
, bestrated_str
):
199 tpl_main
= string
.Template (templates
[0][1])
201 tpl_articlebit
= string
.Template (templates
[1][1])
203 # get all articles from the database
204 articles
= biaweb_db
.site_articles (dbname
)
205 if articles
== False:
208 # walk through each article and generate the file in the appropriate category
211 art_cdate
= time
.ctime (art
[5])
212 art_mdate
= time
.ctime (art
[6])
213 rating_str
= html_format_rating (art
[9])
214 # now build the article from the article bit template
215 article_str
= tpl_articlebit
.safe_substitute (article_title
= art
[1],
216 article_cdate
= art_cdate
,
217 article_mdate
= art_mdate
,
219 article_contents
= art
[4])
221 # now build the article page
222 articlepage_str
= tpl_main
.safe_substitute (site_title
= conf
[1],
223 site_url
= "http://" + conf
[0],
224 meta_keywords
= art
[3],
225 meta_description
= art
[2],
226 page_title
= conf
[1],
228 contents_bit
= article_str
,
229 list_of_categories
= category_str
,
230 list_best_rated
= bestrated_str
,
232 # write to the article file
234 farticle
= open (os
.path
.join (conf
[5], art
[13], art
[8] + ".html"), "w+")
235 farticle
.write (articlepage_str
)
236 except OSError, IOError:
239 # finally return true
242 # function to generate category directories and indices
243 def generate_category_indices (dbname
, conf
, templates
, category_str
, bestrated_str
, category_list
):
245 tpl_main
= string
.Template (templates
[0][1])
247 tpl_tablebit
= string
.Template (templates
[3][1])
249 tpl_trowbit
= string
.Template (templates
[4][1])
251 # run through each category and generate category index page
252 for cat
in category_list
:
254 # create the category directory
255 os
.mkdir (os
.path
.join (conf
[5], cat
[3]))
256 except IOError, OSError:
259 # now get the list of articles for the specified category
260 articles_list
= biaweb_db
.site_articles (dbname
, cat
[0])
261 if articles_list
== False:
265 # run through the list of articles in category
266 for art
in articles_list
:
267 url
= art
[13] + "/" + art
[8] + ".html"
268 creattime
= time
.ctime (art
[5])
269 rating_str
= html_format_rating (art
[9])
270 # now build the table rows for each article
271 tableitem_str
= tpl_trowbit
.safe_substitute (article_url
= url
,
275 tableitems
.append (tableitem_str
)
276 # generate the rows as a string
277 tablerows_str
= "".join (tableitems
)
279 # now create the page template
280 table_str
= tpl_tablebit
.safe_substitute (category_title
= cat
[1],
281 category_desc
= cat
[2],
282 table_rows
= tablerows_str
)
284 # now create the index page
285 categoryindex_str
= tpl_main
.safe_substitute (site_title
= conf
[1],
286 site_url
= "http://" + conf
[0],
287 meta_keywords
= conf
[2],
288 meta_description
= cat
[2],
289 page_title
= conf
[1],
291 contents_bit
= table_str
,
292 list_of_categories
= category_str
,
293 list_best_rated
= bestrated_str
,
296 # now write to Category/index.html
298 fcatindex
= open (os
.path
.join (conf
[5], cat
[3], "index.html"), "w+")
299 fcatindex
.write (categoryindex_str
)
301 except OSError, IOError:
304 # finally return true
307 # function to generate the RSS feed for the website
308 def generate_rss_feed (dbname
, conf
):
310 tpl_rss
= string
.Template (biaweb_strings
.template_rss
)
311 # rss item bit template
312 tpl_rss_itembit
= string
.Template (biaweb_strings
.template_rss_item
)
314 # get the latest articles (limit by number of rss items)
315 arts
= biaweb_db
.site_latest_articles (dbname
, conf
[4])
320 # run through the articles and generate the rss items
323 itemlink
= "http://" + conf
[0] + art
[13] + "/" + art
[8] + ".html"
324 item_str
= tpl_rss_itembit
.safe_substitute (item_title
= art
[1],
325 item_link
= itemlink
,
326 description
= art
[2])
327 rss_item_list
.append (item_str
)
329 # now get the rss items as a string
330 rss_item_str
= "".join (rss_item_list
)
332 # now generate the feed
333 rss_str
= tpl_rss
.safe_substitute (title
= conf
[1],
334 link
= "http://" + conf
[0],
335 description
= conf
[3],
336 rss_items
= rss_item_str
)
338 # now try to write it to the rss file
340 frss
= open (os
.path
.join (conf
[5], "subscribe.xml"), "w+")
342 except IOError, OSError:
345 # finally return true
348 # function to generate main index file and stylesheet
349 def generate_home_page (dbname
, conf
, templates
, category_str
, bestrated_str
):
351 tpl_main
= string
.Template (templates
[0][1])
353 tpl_indexbit
= string
.Template (templates
[6][1])
355 tpl_newsbit
= string
.Template (templates
[2][1])
357 # get the latest articles - conf[4] is num of rss entries to be used also
358 latest_arts
= biaweb_db
.site_latest_articles (dbname
, conf
[4])
359 if latest_arts
== False:
364 # Run through the latest articles
365 # for the num of latest news items on index
366 for art
in latest_arts
:
367 # url is Category/Article.html
368 url
= art
[13] + "/" + art
[8] + ".html"
369 # art[5] is creation time
370 strdate
= time
.ctime (art
[5])
371 # now populate the template variables. art[1] is title, art[2] is summary
372 strnews
= tpl_newsbit
.safe_substitute (news_title
= art
[1],
374 news_datetime
= strdate
,
375 news_description
= art
[2]
377 news_items
.append (strnews
)
378 # now convert it into a string
379 newsbit_str
= "".join (news_items
)
381 # now populate the index template
382 indexbit_str
= tpl_indexbit
.safe_substitute (site_name
= conf
[1],
383 news_updates
= newsbit_str
385 # now populate the main page template
386 main_str
= tpl_main
.safe_substitute (site_title
= conf
[1],
387 site_url
= "http://" + conf
[0],
388 meta_keywords
= conf
[2],
389 meta_description
= conf
[3],
390 page_title
= conf
[1],
392 contents_bit
= indexbit_str
,
393 list_of_categories
= category_str
,
394 list_best_rated
= bestrated_str
,
397 # write the index.html file in the destination directory
399 findex
= open (os
.path
.join (conf
[5], "index.html"), "w+")
400 findex
.write (main_str
)
402 except IOError, OSError:
405 # write the style.css file in the destination directory
407 fstyle
= open (os
.path
.join (conf
[5], "style.css"), "w+")
408 fstyle
.write (templates
[5][1])
410 except IOError, OSError:
415 # superfunction to generate the site
416 def generate_site (dbname
, files_to_copy
, folders_to_copy
, search_type_full
=True):
417 # get the configuration
418 conf
= biaweb_db
.get_configuration (dbname
)
419 # if cannot get configuration
424 tpls
= biaweb_db
.get_templates (dbname
)
428 # get the list of categories
429 cats
= biaweb_db
.get_categories (dbname
)
430 # cannot get categories return false
434 # format the categories as a html bulleted list
435 cats_str
= html_format_categories (cats
)
437 # get the best rated articles
438 best_rated
= biaweb_db
.site_get_bestrated (dbname
)
440 if best_rated
== False:
442 # format the best rated articles as a html bulleted list
443 best_rated_str
= html_format_best_rated (best_rated
)
445 # remove the destination tree and recreate it
447 if os
.path
.exists (conf
[5]):
448 shutil
.rmtree (conf
[5])
453 # generate the index page including style sheet
454 ret
= generate_home_page (dbname
, conf
, tpls
, cats_str
, best_rated_str
)
458 # generate the rss feed
459 ret
= generate_rss_feed (dbname
, conf
)
463 # generate the category directories and indices
464 ret
= generate_category_indices (dbname
, conf
, tpls
, cats_str
, best_rated_str
, cats
)
468 # generate the article pages
469 ret
= generate_article_pages (dbname
, conf
, tpls
, cats_str
, best_rated_str
)
473 # copy other files/folders into the destination path
474 ret
= copy_files_folders (conf
, files_to_copy
, folders_to_copy
)
478 # now generate the search index database
479 ret
= generate_search_index (dbname
, conf
, search_type_full
)
483 # finally when all is successfully done return true