Source code for dorie.parscraper.writers.html

from __future__ import unicode_literals

import datetime
import os
import re

from dorie.utilities.check_path import check_path
from dorie.parscraper.parameter import Parameter

[docs]def write(parameters,out,path_base,*args,**kwargs): """ Writes the contents of the ``parameters`` input dict to a .html file. The output is structured as **category** ============ =============== =============== =============== ================= Parameter Definition Possible values Default Queried at ============ =============== =============== =============== ================= p.key p.definition p.values p.suggestion all sources of p ============ =============== =============== =============== ================= :param dict parameters: dict with categories as keys and iterables of :class:`dorie.parscraper.parameter.Parameter` \ instances as values. :param str out: Path to the output file. Must be writable. :param str path_base: Base path to the source files. Needed for creation of working links \ to the files. :param str css: Path to a CSS file containing the styling of the HTML output. Is \ included into the HTML output (inline). Optional. """ # DEFINE SOME PROPERTIES title = "DORiE parameter cheat sheet" headings = ["Parameter","Definition","Possible values","Default","Queried at"] table_content = lambda p: (p.key,p.definition,p.values,p.suggestion,_sources(p,path_base)) widths = [13, 40, 17, 10, 20] # relative column widths # WRITE HTML CODE TO OUTPUT check_path(out) with open(out, 'wb') as output: html_document = _format_document(parameters,title,headings,widths,table_content,**kwargs) output.write(html_document.encode('ascii', 'xmlcharrefreplace')) # escape HTML entities
def _format_document(parameters,title,headings,widths,table_content,**kwargs): """ Returns the complete html document as a unicode string. This mainly defines the skeleton of the html document, loops over the categories, and passes all relevant data to :func:`_format_table`, which creates each parameter table. :param parameters: Parameter dict :param title: Title of the document :param headings: Iterable of the table headings :param widths: Iterable of the column widths :param table_content: List of callables that return the content of a table column when called with a Parameter instance :param css: CSS to be included into the HTML header (inline). Optional """ if "css" in kwargs: css = kwargs["css"] else: css = None html = u"<!DOCTYPE html>\n" html += "<html>\n" html += "<head>\n\n" html += " <title> {} </title>\n".format(title) if not css is None: html += " <style>\n" html += open(css).read() html += " </style>\n" html += "\n</head>\n\n" html += "<body>\n\n" html += "<div class='main'>\n" html += " <h1> {} </h1>\n\n".format(title) for category in parameters: rows = [table_content(p) for p in parameters[category]] html += _format_table(category,headings,widths,rows) html += "\n<div class='footer'>\n" html += " Automatically created by the DORiE parameter scraper <br />\n" html += " {0:%d-%m-%Y, %H:%M}\n".format(datetime.datetime.today()) html += "</div>\n" html += "</div>\n\n" html += "</body>\n" html += "</html>" return html def _format_table(category,headings,widths,rows): """ Assembles a table skeleton, loops over all rows and gets the html code for each row from :func:`_format_row`. Returns the table HTML code. :param category: Heading to be printed above the table (category) :param headings: Table headings as iterable of strings :param widths: Column widths as iterable of strings :param rows: Row data as iterable of strings """ table = " <h2> {} </h2>\n".format(category) table += " <table>\n" table += " <thead>\n" table += _format_row(headings, widths, True) table += " </thead>\n" table += " <tbody>\n" for row in rows: table += _format_row(row, widths) table += " </tbody>\n" table += " </table>\n\n" return table def _format_row(row,widths,th=False): """ Returns HTML code for a single row of a table. :param row: Text of each row cell as iterable of strings :param widths: Width of each column as iterable of strings :param th: If true, use <th> instead of <td> (table headings). Default False. """ if th: _format_function = _format_heading else: _format_function = _format_element widths = [100*w/sum(widths) for w in widths] # normalize widths to 100% return ' <tr>' + ''.join([_format_function(element,width) for element,width in zip(row,widths)]) + '\n </tr>\n' def _format_element(element,width): """ Returns HTML code for a single table cell. :param element: Cell text. :param width: Cell width. """ if not element: element="" element = _parse_markdown(element) return '\n <td width="{width}%">{elem}</td>'.format(width=width,elem=element) def _format_heading(element, width): """ Returns HTML code for a single table heading. :param element: Cell text. :param width: Cell width. """ if not element: element="" return '\n <th width="{width}%">{elem}</th>'.format(width=width,elem=element) def _parse_markdown(text): """ Rudimentary markdown/rst support: * An empty line is replaced by a paragraph (<p />), * Text enclosed by two asterisk is printed bold (<b>), * Text in double backticks is marked to be printed monospaced (CSS controlled). Returns a string where all markdown has been replaced by its HTML equivalent. :param text: String to be parsed. """ # replace two newlines with paragraph text = re.sub(r"\n\s*?\n", "<p />", text) # replace **text** with bold text text = re.sub(r"\*\*(.+?)\*\*", "<b>\g<1></b>", text) # replace ``text`` with monospaced text text = re.sub(r"``(.+?)``", "<span class='mono'>\g<1></span>", text) return text def _sources(p,path_base): """ Assembles the cell text for the sources of a parameter. Since the Parameter._sources attribute is a list of tuples, we need to parse this into a printable HTML format. Prints a link to each source file, the corresponding line number, and concatenates them with newlines. Returns a unicode string containing the HTML code. :param p: Parameter object :param path_base: Base path to the source files. Needed for creation of working links to the files. """ out = u"" for source_file, line_num, var_type in p._sources: full_path = os.path.join(path_base,source_file) # truncate long file names if len(source_file) > 20: link_text = "..." + source_file[-20:] else: link_text = source_file out += "<a href='file://{2}' title='{3}'>{0}</a>:{1}<br />"\ .format(link_text,line_num,full_path,source_file) return out