primitive, awful text formatting

author: Blake DeMarcy <ofunknowndescent@gmail.com> 2017-04-11 15:31:01 -0500
committer: Blake DeMarcy <ofunknowndescent@gmail.com> 2017-04-11 15:31:01 -0500
commit: b731ab69fad4112b126b1f7586e433eb2f21fbcc (patch)
tree: cdb36c30cd8168061e5ec90e5ba92cb9706cde3a /src
parent: 28680865eec35c7abaa994fa6e76f837f8cb20fe (diff)
download: bbj-b731ab69fad4112b126b1f7586e433eb2f21fbcc.tar.gz
1 files changed, 112 insertions, 11 deletions
diff --git a/src/formatting.py b/src/formatting.py
index 07250e8..d0a64a3 100644
--- a/src/formatting.py
+++ b/src/formatting.py
@@ -3,34 +3,135 @@ This module is not complete and none of its functions are currently
 used elsewhere. Subject to major refactoring.
 """
 
-from markdown import markdown
-from html import escape
+test = """
+This is a small paragraph
+thats divided between a
+few rows.
+
+this opens a few linequotes.
+>this is a few
+>rows of
+>sequential line breaks
+and this is what follows right after
+"""
+
+# from markdown import markdown
+# from html import escape
 import re
 
-#0,   1        2        3        4       5        6
 colors = [
+#0,   1        2        3        4       5        6
     "red", "yellow", "green", "blue", "cyan", "magenta"
 ]
 
 markup = [
-    "bold", "italic", "underline", "strike"
+    "bold", "italic", "underline", "linequote", "quote", "rainbow"
 ]
 
-tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup),
-                    flags=re.DOTALL)
+# tokens being [red: this will be red] and [bold: this will be bold]
+# tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup), flags=re.DOTALL)
 
+# quotes being references to other post_ids, like >>34 or >>0 for OP
 quotes = re.compile(">>([0-9]+)")
-linequotes = re.compile("^(>.+)$",
-    flags=re.MULTILINE)
+
+# linequotes being chan-style greentext,
+# >like this
+linequotes = re.compile("^(>.+)$", flags=re.MULTILINE)
+
+
+def parse_segments(text, sanitize_linequotes=True):
+    """
+    Parse linequotes, quotes, and paragraphs into their appropriate
+    representations. Paragraphs are represented as separate strings
+    in the returned list, and quote-types are compiled to their
+    [bracketed] representations.
+    """
+    result = list()
+    for paragraph in [p.strip() for p in re.split("\n{2,}", text)]:
+        pg = str()
+        for segment in [s.strip() for s in paragraph.split("\n")]:
+            if not segment:
+                continue
+            segment = quotes.sub(lambda m: "[quote: %s]" % m.group(1), segment)
+            if segment.startswith(">"):
+                if sanitize_linequotes:
+                    inner = segment.replace("]", "\\]")
+                else:
+                    inner = segment
+                segment = "[linequote: %s]" % inner
+                # pg = pg[0:-1]
+                pg += segment
+            else:
+                pg += segment + " "
+        result.append(pg.strip())
+    return result
+
+
+def sequential_expressions(string):
+    """
+    Takes a string, sexpifies it, and returns a list of lists
+    who contain tuples. Each list of tuples represents a paragraph.
+    Within each paragraph, [0] is either None or a markup directive,
+    and [1] is the body of text to which it applies. This representation
+    is very easy to handle for a client. It semi-supports nesting:
+    eg, the expression [red: this [blue: is [green: mixed]]] will
+    return [("red", "this "), ("blue", "is "), ("green", "mixed")],
+    but this cannot effectively express an input like
+    [bold: [red: bolded colors.]], in which case the innermost
+    expression will take precedence. For the input:
+        "[bold: [red: this] is some shit [green: it cant handle]]"
+    you get:
+    [('red', 'this'), ('bold', ' is some shit '), ('green', 'it cant handle')]
+    """
+    # abandon all hope ye who enter here
+    directives = colors + markup
+    result = list()
+    for paragraph in parse_segments(string):
+        stack = [[None, str()]]
+        skip_iters = []
+        nest = [None]
+        escaped = False
+        for index, char in enumerate(paragraph):
+            if skip_iters:
+                skip_iters.pop()
+                continue
+
+            if not escaped and char == "[":
+                directive = paragraph[index+1:paragraph.find(": ", index+1)]
+                open_p = directive in directives
+            else: open_p = False
+            clsd_p = not escaped and nest[-1] != None and char == "]"
+
+            # dont splice other directives into linequotes: that is far
+            # too confusing for the client to determine where to put line
+            # breaks
+            if open_p and nest[-1] != "linequote":
+                stack.append([directive, str()])
+                nest.append(directive)
+                [skip_iters.append(x) for x in range(len(directive)+2)]
+
+            elif clsd_p:
+                nest.pop()
+                stack.append([nest[-1], str()])
+
+            else:
+                escaped = char == "\\"
+                if not (escaped and paragraph[index+1] in "[]"):
+                    stack[-1][1] += char
+        # filter out unused stacks, eg ["red", ""]
+        result.append([(directive, body) for directive, body in stack if body])
+    return result
 
 
 def apply_formatting(msg_obj, formatter):
     """
     Receives a messages object from a thread and returns it with
-    all the message bodies passed through FORMATTER.
+    all the message bodies passed through FORMATTER. Not all
+    formatting functions have to return a string. Refer to the
+    documentation for each formatter.
     """
-    for x in range(len(msg_obj)):
-        msg_obj[x]["body"] = formatter(msg_obj[x]["body"])
+    for x, obj in enumerate(msg_obj):
+        msg_obj[x]["body"] = formatter(obj["body"])
     return msg_obj
author	Blake DeMarcy <ofunknowndescent@gmail.com>	2017-04-11 15:31:01 -0500
committer	Blake DeMarcy <ofunknowndescent@gmail.com>	2017-04-11 15:31:01 -0500
commit	b731ab69fad4112b126b1f7586e433eb2f21fbcc (patch)
tree	cdb36c30cd8168061e5ec90e5ba92cb9706cde3a /src
parent	28680865eec35c7abaa994fa6e76f837f8cb20fe (diff)
download	bbj-b731ab69fad4112b126b1f7586e433eb2f21fbcc.tar.gz