diff options
author | Blake DeMarcy <ofunknowndescent@gmail.com> | 2017-04-11 15:31:01 -0500 |
---|---|---|
committer | Blake DeMarcy <ofunknowndescent@gmail.com> | 2017-04-11 15:31:01 -0500 |
commit | b731ab69fad4112b126b1f7586e433eb2f21fbcc (patch) | |
tree | cdb36c30cd8168061e5ec90e5ba92cb9706cde3a /src | |
parent | 28680865eec35c7abaa994fa6e76f837f8cb20fe (diff) | |
download | bbj-b731ab69fad4112b126b1f7586e433eb2f21fbcc.tar.gz |
primitive, awful text formatting
Diffstat (limited to 'src')
-rw-r--r-- | src/formatting.py | 123 |
1 files changed, 112 insertions, 11 deletions
diff --git a/src/formatting.py b/src/formatting.py index 07250e8..d0a64a3 100644 --- a/src/formatting.py +++ b/src/formatting.py @@ -3,34 +3,135 @@ This module is not complete and none of its functions are currently used elsewhere. Subject to major refactoring. """ -from markdown import markdown -from html import escape +test = """ +This is a small paragraph +thats divided between a +few rows. + +this opens a few linequotes. +>this is a few +>rows of +>sequential line breaks +and this is what follows right after +""" + +# from markdown import markdown +# from html import escape import re -#0, 1 2 3 4 5 6 colors = [ +#0, 1 2 3 4 5 6 "red", "yellow", "green", "blue", "cyan", "magenta" ] markup = [ - "bold", "italic", "underline", "strike" + "bold", "italic", "underline", "linequote", "quote", "rainbow" ] -tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup), - flags=re.DOTALL) +# tokens being [red: this will be red] and [bold: this will be bold] +# tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup), flags=re.DOTALL) +# quotes being references to other post_ids, like >>34 or >>0 for OP quotes = re.compile(">>([0-9]+)") -linequotes = re.compile("^(>.+)$", - flags=re.MULTILINE) + +# linequotes being chan-style greentext, +# >like this +linequotes = re.compile("^(>.+)$", flags=re.MULTILINE) + + +def parse_segments(text, sanitize_linequotes=True): + """ + Parse linequotes, quotes, and paragraphs into their appropriate + representations. Paragraphs are represented as separate strings + in the returned list, and quote-types are compiled to their + [bracketed] representations. + """ + result = list() + for paragraph in [p.strip() for p in re.split("\n{2,}", text)]: + pg = str() + for segment in [s.strip() for s in paragraph.split("\n")]: + if not segment: + continue + segment = quotes.sub(lambda m: "[quote: %s]" % m.group(1), segment) + if segment.startswith(">"): + if sanitize_linequotes: + inner = segment.replace("]", "\\]") + else: + inner = segment + segment = "[linequote: %s]" % inner + # pg = pg[0:-1] + pg += segment + else: + pg += segment + " " + result.append(pg.strip()) + return result + + +def sequential_expressions(string): + """ + Takes a string, sexpifies it, and returns a list of lists + who contain tuples. Each list of tuples represents a paragraph. + Within each paragraph, [0] is either None or a markup directive, + and [1] is the body of text to which it applies. This representation + is very easy to handle for a client. It semi-supports nesting: + eg, the expression [red: this [blue: is [green: mixed]]] will + return [("red", "this "), ("blue", "is "), ("green", "mixed")], + but this cannot effectively express an input like + [bold: [red: bolded colors.]], in which case the innermost + expression will take precedence. For the input: + "[bold: [red: this] is some shit [green: it cant handle]]" + you get: + [('red', 'this'), ('bold', ' is some shit '), ('green', 'it cant handle')] + """ + # abandon all hope ye who enter here + directives = colors + markup + result = list() + for paragraph in parse_segments(string): + stack = [[None, str()]] + skip_iters = [] + nest = [None] + escaped = False + for index, char in enumerate(paragraph): + if skip_iters: + skip_iters.pop() + continue + + if not escaped and char == "[": + directive = paragraph[index+1:paragraph.find(": ", index+1)] + open_p = directive in directives + else: open_p = False + clsd_p = not escaped and nest[-1] != None and char == "]" + + # dont splice other directives into linequotes: that is far + # too confusing for the client to determine where to put line + # breaks + if open_p and nest[-1] != "linequote": + stack.append([directive, str()]) + nest.append(directive) + [skip_iters.append(x) for x in range(len(directive)+2)] + + elif clsd_p: + nest.pop() + stack.append([nest[-1], str()]) + + else: + escaped = char == "\\" + if not (escaped and paragraph[index+1] in "[]"): + stack[-1][1] += char + # filter out unused stacks, eg ["red", ""] + result.append([(directive, body) for directive, body in stack if body]) + return result def apply_formatting(msg_obj, formatter): """ Receives a messages object from a thread and returns it with - all the message bodies passed through FORMATTER. + all the message bodies passed through FORMATTER. Not all + formatting functions have to return a string. Refer to the + documentation for each formatter. """ - for x in range(len(msg_obj)): - msg_obj[x]["body"] = formatter(msg_obj[x]["body"]) + for x, obj in enumerate(msg_obj): + msg_obj[x]["body"] = formatter(obj["body"]) return msg_obj |