aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBlake DeMarcy <ofunknowndescent@gmail.com>2017-04-11 15:31:01 -0500
committerBlake DeMarcy <ofunknowndescent@gmail.com>2017-04-11 15:31:01 -0500
commitb731ab69fad4112b126b1f7586e433eb2f21fbcc (patch)
treecdb36c30cd8168061e5ec90e5ba92cb9706cde3a /src
parent28680865eec35c7abaa994fa6e76f837f8cb20fe (diff)
downloadbbj-b731ab69fad4112b126b1f7586e433eb2f21fbcc.tar.gz
primitive, awful text formatting
Diffstat (limited to 'src')
-rw-r--r--src/formatting.py123
1 files changed, 112 insertions, 11 deletions
diff --git a/src/formatting.py b/src/formatting.py
index 07250e8..d0a64a3 100644
--- a/src/formatting.py
+++ b/src/formatting.py
@@ -3,34 +3,135 @@ This module is not complete and none of its functions are currently
used elsewhere. Subject to major refactoring.
"""
-from markdown import markdown
-from html import escape
+test = """
+This is a small paragraph
+thats divided between a
+few rows.
+
+this opens a few linequotes.
+>this is a few
+>rows of
+>sequential line breaks
+and this is what follows right after
+"""
+
+# from markdown import markdown
+# from html import escape
import re
-#0, 1 2 3 4 5 6
colors = [
+#0, 1 2 3 4 5 6
"red", "yellow", "green", "blue", "cyan", "magenta"
]
markup = [
- "bold", "italic", "underline", "strike"
+ "bold", "italic", "underline", "linequote", "quote", "rainbow"
]
-tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup),
- flags=re.DOTALL)
+# tokens being [red: this will be red] and [bold: this will be bold]
+# tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup), flags=re.DOTALL)
+# quotes being references to other post_ids, like >>34 or >>0 for OP
quotes = re.compile(">>([0-9]+)")
-linequotes = re.compile("^(>.+)$",
- flags=re.MULTILINE)
+
+# linequotes being chan-style greentext,
+# >like this
+linequotes = re.compile("^(>.+)$", flags=re.MULTILINE)
+
+
+def parse_segments(text, sanitize_linequotes=True):
+ """
+ Parse linequotes, quotes, and paragraphs into their appropriate
+ representations. Paragraphs are represented as separate strings
+ in the returned list, and quote-types are compiled to their
+ [bracketed] representations.
+ """
+ result = list()
+ for paragraph in [p.strip() for p in re.split("\n{2,}", text)]:
+ pg = str()
+ for segment in [s.strip() for s in paragraph.split("\n")]:
+ if not segment:
+ continue
+ segment = quotes.sub(lambda m: "[quote: %s]" % m.group(1), segment)
+ if segment.startswith(">"):
+ if sanitize_linequotes:
+ inner = segment.replace("]", "\\]")
+ else:
+ inner = segment
+ segment = "[linequote: %s]" % inner
+ # pg = pg[0:-1]
+ pg += segment
+ else:
+ pg += segment + " "
+ result.append(pg.strip())
+ return result
+
+
+def sequential_expressions(string):
+ """
+ Takes a string, sexpifies it, and returns a list of lists
+ who contain tuples. Each list of tuples represents a paragraph.
+ Within each paragraph, [0] is either None or a markup directive,
+ and [1] is the body of text to which it applies. This representation
+ is very easy to handle for a client. It semi-supports nesting:
+ eg, the expression [red: this [blue: is [green: mixed]]] will
+ return [("red", "this "), ("blue", "is "), ("green", "mixed")],
+ but this cannot effectively express an input like
+ [bold: [red: bolded colors.]], in which case the innermost
+ expression will take precedence. For the input:
+ "[bold: [red: this] is some shit [green: it cant handle]]"
+ you get:
+ [('red', 'this'), ('bold', ' is some shit '), ('green', 'it cant handle')]
+ """
+ # abandon all hope ye who enter here
+ directives = colors + markup
+ result = list()
+ for paragraph in parse_segments(string):
+ stack = [[None, str()]]
+ skip_iters = []
+ nest = [None]
+ escaped = False
+ for index, char in enumerate(paragraph):
+ if skip_iters:
+ skip_iters.pop()
+ continue
+
+ if not escaped and char == "[":
+ directive = paragraph[index+1:paragraph.find(": ", index+1)]
+ open_p = directive in directives
+ else: open_p = False
+ clsd_p = not escaped and nest[-1] != None and char == "]"
+
+ # dont splice other directives into linequotes: that is far
+ # too confusing for the client to determine where to put line
+ # breaks
+ if open_p and nest[-1] != "linequote":
+ stack.append([directive, str()])
+ nest.append(directive)
+ [skip_iters.append(x) for x in range(len(directive)+2)]
+
+ elif clsd_p:
+ nest.pop()
+ stack.append([nest[-1], str()])
+
+ else:
+ escaped = char == "\\"
+ if not (escaped and paragraph[index+1] in "[]"):
+ stack[-1][1] += char
+ # filter out unused stacks, eg ["red", ""]
+ result.append([(directive, body) for directive, body in stack if body])
+ return result
def apply_formatting(msg_obj, formatter):
"""
Receives a messages object from a thread and returns it with
- all the message bodies passed through FORMATTER.
+ all the message bodies passed through FORMATTER. Not all
+ formatting functions have to return a string. Refer to the
+ documentation for each formatter.
"""
- for x in range(len(msg_obj)):
- msg_obj[x]["body"] = formatter(msg_obj[x]["body"])
+ for x, obj in enumerate(msg_obj):
+ msg_obj[x]["body"] = formatter(obj["body"])
return msg_obj
Un proyecto texto-plano.xyz