from __future__ import unicode_literals
import datetime
import os
import re
from dorie.utilities.check_path import check_path
from dorie.parscraper.parameter import Parameter
[docs]def write(parameters,out,path_base,*args,**kwargs):
"""
Writes the contents of the ``parameters`` input dict to a .html file. The output is
structured as
**category**
============ =============== =============== =============== =================
Parameter Definition Possible values Default Queried at
============ =============== =============== =============== =================
p.key p.definition p.values p.suggestion all sources of p
============ =============== =============== =============== =================
:param dict parameters: dict with categories as keys and iterables of :class:`dorie.parscraper.parameter.Parameter` \
instances as values.
:param str out: Path to the output file. Must be writable.
:param str path_base: Base path to the source files. Needed for creation of working links \
to the files.
:param str css: Path to a CSS file containing the styling of the HTML output. Is \
included into the HTML output (inline). Optional.
"""
# DEFINE SOME PROPERTIES
title = "DORiE parameter cheat sheet"
headings = ["Parameter","Definition","Possible values","Default","Queried at"]
table_content = lambda p: (p.key,p.definition,p.values,p.suggestion,_sources(p,path_base))
widths = [13, 40, 17, 10, 20] # relative column widths
# WRITE HTML CODE TO OUTPUT
check_path(out)
with open(out, 'wb') as output:
html_document = _format_document(parameters,title,headings,widths,table_content,**kwargs)
output.write(html_document.encode('ascii', 'xmlcharrefreplace')) # escape HTML entities
def _format_document(parameters,title,headings,widths,table_content,**kwargs):
"""
Returns the complete html document as a unicode string. This mainly defines
the skeleton of the html document, loops over the categories, and passes
all relevant data to :func:`_format_table`, which creates each parameter table.
:param parameters: Parameter dict
:param title: Title of the document
:param headings: Iterable of the table headings
:param widths: Iterable of the column widths
:param table_content: List of callables that return the content of a table column when called with a Parameter instance
:param css: CSS to be included into the HTML header (inline). Optional
"""
if "css" in kwargs:
css = kwargs["css"]
else:
css = None
html = u"<!DOCTYPE html>\n"
html += "<html>\n"
html += "<head>\n\n"
html += " <title> {} </title>\n".format(title)
if not css is None:
html += " <style>\n"
html += open(css).read()
html += " </style>\n"
html += "\n</head>\n\n"
html += "<body>\n\n"
html += "<div class='main'>\n"
html += " <h1> {} </h1>\n\n".format(title)
for category in parameters:
rows = [table_content(p) for p in parameters[category]]
html += _format_table(category,headings,widths,rows)
html += "\n<div class='footer'>\n"
html += " Automatically created by the DORiE parameter scraper <br />\n"
html += " {0:%d-%m-%Y, %H:%M}\n".format(datetime.datetime.today())
html += "</div>\n"
html += "</div>\n\n"
html += "</body>\n"
html += "</html>"
return html
def _format_table(category,headings,widths,rows):
"""
Assembles a table skeleton, loops over all rows and gets the html code for
each row from :func:`_format_row`. Returns the table HTML code.
:param category: Heading to be printed above the table (category)
:param headings: Table headings as iterable of strings
:param widths: Column widths as iterable of strings
:param rows: Row data as iterable of strings
"""
table = " <h2> {} </h2>\n".format(category)
table += " <table>\n"
table += " <thead>\n"
table += _format_row(headings, widths, True)
table += " </thead>\n"
table += " <tbody>\n"
for row in rows:
table += _format_row(row, widths)
table += " </tbody>\n"
table += " </table>\n\n"
return table
def _format_row(row,widths,th=False):
"""
Returns HTML code for a single row of a table.
:param row: Text of each row cell as iterable of strings
:param widths: Width of each column as iterable of strings
:param th: If true, use <th> instead of <td> (table headings). Default False.
"""
if th:
_format_function = _format_heading
else:
_format_function = _format_element
widths = [100*w/sum(widths) for w in widths] # normalize widths to 100%
return ' <tr>' + ''.join([_format_function(element,width) for element,width in zip(row,widths)]) + '\n </tr>\n'
def _format_element(element,width):
"""
Returns HTML code for a single table cell.
:param element: Cell text.
:param width: Cell width.
"""
if not element:
element=""
element = _parse_markdown(element)
return '\n <td width="{width}%">{elem}</td>'.format(width=width,elem=element)
def _format_heading(element, width):
"""
Returns HTML code for a single table heading.
:param element: Cell text.
:param width: Cell width.
"""
if not element:
element=""
return '\n <th width="{width}%">{elem}</th>'.format(width=width,elem=element)
def _parse_markdown(text):
"""
Rudimentary markdown/rst support:
* An empty line is replaced by a paragraph (<p />),
* Text enclosed by two asterisk is printed bold (<b>),
* Text in double backticks is marked to be printed monospaced (CSS controlled).
Returns a string where all markdown has been replaced by its HTML equivalent.
:param text: String to be parsed.
"""
# replace two newlines with paragraph
text = re.sub(r"\n\s*?\n", "<p />", text)
# replace **text** with bold text
text = re.sub(r"\*\*(.+?)\*\*", "<b>\g<1></b>", text)
# replace ``text`` with monospaced text
text = re.sub(r"``(.+?)``", "<span class='mono'>\g<1></span>", text)
return text
def _sources(p,path_base):
"""
Assembles the cell text for the sources of a parameter. Since the
Parameter._sources attribute is a list of tuples, we need to parse this
into a printable HTML format. Prints a link to each source file, the
corresponding line number, and concatenates them with newlines.
Returns a unicode string containing the HTML code.
:param p: Parameter object
:param path_base: Base path to the source files. Needed for creation of working links to the files.
"""
out = u""
for source_file, line_num, var_type in p._sources:
full_path = os.path.join(path_base,source_file)
# truncate long file names
if len(source_file) > 20:
link_text = "..." + source_file[-20:]
else:
link_text = source_file
out += "<a href='file://{2}' title='{3}'>{0}</a>:{1}<br />"\
.format(link_text,line_num,full_path,source_file)
return out