From 802e8493b35c966af4ceee2db812133106750399 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sat, 28 Aug 2010 15:12:15 -0400 Subject: [PATCH] =?utf8?q?Added=20=CA=87x=C7=9D=CA=87=E2=80=BEu=CA=8Dop-?= =?utf8?q?=C7=9Dp=C4=B1sd=E2=88=A9.mdwn=20and=20180.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- posts/180.py | 202 ++++++++++++++++++ ...15op-\307\235p\304\261sd\342\210\251.mdwn" | 26 +++ 2 files changed, 228 insertions(+) create mode 100755 posts/180.py create mode 100644 "posts/\312\207x\307\235\312\207\342\200\276u\312\215op-\307\235p\304\261sd\342\210\251.mdwn" diff --git a/posts/180.py b/posts/180.py new file mode 100755 index 0000000..ebdc23a --- /dev/null +++ b/posts/180.py @@ -0,0 +1,202 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Original maps mostly drawn from +# http://www.fileformat.info/convert/text/upside-down-map.htm +# http://www.upsidedowntext.com/unicode + +"""Rotate ASCII text using unicode homographs. + +Most glaringly obvious in my terminal: + 25@BDKRUZ`jl + +Some of these choices were a result of my font setup. For example, + ᴚ LATIN LETTER SMALL CAPITAL TURNED R (U+1D1A) +is clearly superior to + ʁ LATIN LETTER SMALL CAPITAL INVERTED R (U+0281) +but it't not in my default font. +""" + +__version__ = '0.1' + +import sys + + +ROTATE_TABLE = { # Ordered by key's code point. + # ASCII keys + + # Code points U+0000 through U+0020 are whitespace. Fall through. + u'!': u'¡', # EXCLAMATION MARK (U+0021), INVERTED EXCLAMATION MARK (U+00A1) + u'"': u'„', # QUOTATION MARK (U+0022), DOUBLE LOW-9 QUOTATION MARK (U+201E) + # # NUMBER SIGN (U+0023). Fall through. + # $ DOLLAR SIGN (U+0024). Fall through. + # % PERCENT SIGN (U+2005). Fall through. + u'&': u'⅋', # AMPERSAND (U+0026), TURNED AMPERSAND (U+214B) + u"'": u'‚', # APOSTROPHE (U+0027), SINGLE LOW-9 QUOTATION MARK (U+201A) + # Alternatives: , COMMA (U+002C) + u'(': u')', # LEFT PARENTHESIS (U+0028), RIGHT PARENTHESIS (U+0029) + # ) RIGHT PARENTHESIS (U+0029). Inverse of '(' -> ')'. + u'*': u'⁎', # ASTERIX (U+002A), LOW ASTERIX (U+204E) + # + PLUS SIGN (U+002B). Fall through. + u',': u'‘', # COMMA (U+002C), LEFT SINGLE QUOTATION MARK (U+2018) + # - HYPHEN-MINUS (U+2002D). Fall through. + u'.': u'˙', # FULL STOP (U+002E), DOT ABOVE (U+02D9) + # / SOLIDUS (U+002F). Fall through. + # 0 DIGIT ZERO (U+0030). Fall through. + u'1': u'⇂', # 1 DIGIT ONE (U+0031), DOWNWARDS HARPOON WITH BARB RIGHTWARDS (U+21C2) + # Alternatives: Ɩ LATIN CAPITAL LETTER IOTA (U+0196) HACK + u'2': u'Z', # DIGIT TWO (U+0032), Z LATIN CAPITAL LETTER Z (U+005A) HACK + # Alternatives: ჷ GEORGIAN LETTER YN (U+10F7) + u'3': u'Ɛ', # DIGIT THREE (U+0033), LATIN CAPITAL LETTER OPEN E (U+0190) + u'4': u'Ϟ', # DIGIT FOUR (U+0034), GREEK LETTER KOPPA (U+03DE) HACK + # Alternatives: ᔭ CANADIAN SYLLABICS YA (U+152D) + # ㄣ BOPOMOFO LETTER EN (U+3123) + # ߈ NKO DIGIT EIGHT (U+07C8) + u'5': u'ʢ', # 2 DIGIT TWO (U+2035), LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE (U+02A2) HACK + # Alternatives: ϛ GREEK SMALL LETTER STIGMA (U+03DB) + u'6': u'9', # DIGIT SIX (U+0036), DIGIT NINE (U+0039) + u'7': u'Ł', # DIGIT SEVEN (U+0037), LATIN CAPITAL LETTER L WITH STROKE (U+0141) + # Alternatives: L LATIN CAPITAL LETTER L (U+004C) + # Ɫ LATIN CAPITAL LETTER L WITH MIDDLE TILDE (U+2C62) + # Ƚ LATIN CAPITAL LETTER L WITH BAR (U+023D) + # ㄥ BOPOMOFO LETTER ENG (U+3125) + # 8 DIGIT EIGHT (U+0038). Fall through. + # 9 DIGIT NINE (U+0039). Inverse of '6' -> '9'. + # : COLON (U+003A). Fall through. + u';': u'⁏', # SEMICOLON (U+003B), REVERSED SEMICOLON (U+204F) HACK + # Alternatives: ؛ ARABIC SEMICOLON (U+061B) + u'<': u'>', # LESS-THAN SIGN (U+003C), GREATER-THAN SIGN (U+003E) + # = EQUALS SIGN (U+003D). Fall through. + # > GREATER-THAN SIGN (U+003E). Inverse of '<' -> '>'. + u'?': u'¿', # QUESTION MARK (U+003F), INVERTED QUESTION MARK (U+00BF) + # @ COMMERCIAL AT (U+0040). Fall through. + # Alternatives: ᠗ MONGOLIAN DIGIT SEVEN (U+1817) + u'A': u'∀', # LATIN CAPITAL LETTER A (U+0041), FOR ALL (U+2200) + # Alternatives: Ɐ LATIN CAPITAL LETTER TURNED A (U+2C6F) + u'B': u'θ', # LATIN CAPITAL LETTER B (U+0042), GREEK SMALL LETTER THETA (U+03B8) + # Alternatives: 𐐒 DESERET CAPITAL LETTER BEE (U+10412) + # ৪ BENGALI DIGIT FOUR (U+09EA) + u'C': u'Ɔ', # LATIN CAPITAL LETTER C (U+0043), LATIN CAPITAL LETTER OPEN O (U+0186) + # Alternatives: Ↄ ROMAN NUMERAL REVERSED ONE HUNDRED (U+2183) + u'D': u'◖', # LATIN CAPITAL LETTER D (U+0044), LEFT HALF BLACK CIRCLE (U+25D6) + # Alternatives: ⫏ CLOSED SUBSET (U+2ACF) + # Ɑ LATIN CAPITAL LETTER ALPHA (U+2C6D) + u'E': u'Ǝ', # LATIN CAPITAL LETTER E (U+0045), LATIN CAPITAL LETTER REVERSED E (U+018E) + u'F': u'Ⅎ', # LATIN CAPITAL LETTER F (U+0046), TURNED CAPITAL F (U+2132) + u'G': u'⅁', # LATIN CAPITAL LETTER G (U+0047), TURNED SANS-SERIF CAPITAL G (U+2141) + # H LATIN CAPITAL LETTER H (U+0048). Fall through. + # I LATIN CAPITAL LETTER I (U+0049). Fall through. + u'J': u'ſ', # LATIN CAPITAL LETTER J (U+004A), LATIN SMALL LETTER LONG S (U+017F) + u'K': u'⋊', # LATIN CAPITAL LETTER K (U+004B), RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT (U+22CA) HACK + # Alternatives: ⺦ CJK RADICAL SIMPLIFIED HALF TREE TRUNK (U+2EA6) + u'L': u'⅂', # LATIN CAPITAL LETTER L (U+004C), TURNED SANS-SERIF CAPITAL L (U+2142) + # Alternatives: Ꞁ LATIN CAPITAL LETTER TURNED L (U+A780) + u'M': u'W', # LATIN CAPITAL LETTER M (U+004D), LATIN CAPITAL LETTER W (U+0057) + # Alternatives: ꟽ LATIN EPIGRAPHIC LETTER INVERTED M (U+A7FD) + # Ɯ LATIN CAPITAL LETTER TURNED M (U+019C) + u'N': u'N', # LATIN CAPITAL LETTER N (U+004E), LATIN CAPITAL LETTER N (U+004E) + # Alternatives: ᴎ LATIN LETTER SMALL CAPITAL REVERSED N (U+1D0E) + u'O': u'O', # LATIN CAPITAL LETTER O (U+004F). Fall through. + u'P': u'Ԁ', # LATIN CAPITAL LETTER P (U+0050), CYRILLIC CAPITAL LETTER KOMI DE (U+0500) + u'Q': u'Ό', # LATIN CAPITAL LETTER Q (U+0051), GREEK CAPITAL LETTER OMICRON WITH TONOS (U+038C) HACK + u'R': u'ʁ', # LATIN CAPITAL LETTER R (U+0052), LATIN LETTER SMALL CAPITAL INVERTED R (U+0281) HACK:not rotated + # Alternatives: ᴚ LATIN LETTER SMALL CAPITAL TURNED R (U+1D1A) + u'S': u'S', # LATIN CAPITAL LETTER S (U+0053). Fall through. + u'T': u'⊥', # LATIN CAPITAL LETTER T (U+0054), UP TACK (U+22A5) + # Alternatives: ⟂ PERPENDICULAR (U+27C2) + u'U': u'∩', # LATIN CAPITAL LETTER U (U+0055), INTERSECTION (U+2229) HACK + u'V': u'Λ', # LATIN CAPITAL LETTER V (U+0056), GREEK CAPITAL LETTER LAMBDA (U+039B) + # Alternatives: Ʌ LATIN CAPITAL LETTER TURNED V (U+0245) + # W LATIN CAPITAL LETTER W (U+0057). Inverse of 'M' -> 'W'. + # X LATIN CAPITAL LETTER X (U+0058). Fall through. + u'Y': u'⅄', # LATIN CAPITAL LETTER Y (U+0059), TURNED SANS-SERIF CAPITAL Y (U+2144) + # Z LATIN CAPITAL LETTER Z (U+005A). Fall through. + u'[': u']', # LEFT SQUARE BRACKET (U+005B), RIGHT SQUARE BRACKET (U+005D) + # \ REVERSE SOLIDUS (U+005C). Fall through. + # ] RIGHT SQUARE BRACKET (U+005D). Inverse of '[' -> ']' + u'^': u'⌵', # CIRCUMFLEX ACCENT (U+005E), COUNTERSINK (U+2335) + u'_': u'‾', # LOW LINE (U+005F), OVERLINE (U+203E) + u'`': u'․', # GRAVE ACCENT (U+0060), ONE DOT LEADER (U+2024) HACK + # Alternatives: , COMMA (U+002C) + # ، ARABIC COMMA (U+060C) + u'a': u'ɐ', # LATIN SMALL LETTER A (U+0061), LATIN SMALL LETTER TURNED A (U+0250) + u'b': u'q', # LATIN SMALL LETTER B (U+0062), LATIN SMALL LETTER Q (U+0071) + u'c': u'ɔ', # LATIN SMALL LETTER C (U+0063), LATIN SMALL LETTER OPEN O (U+0254) + u'd': u'p', # LATIN SMALL LETTER D (U+0064), LATIN SMALL LETTER P (U+0070) + u'e': u'ǝ', # LATIN SMALL LETTER E (U+0065), LATIN SMALL LETTER TURNED E (U+01DD) + u'f': u'ɟ', # LATIN SMALL LETTER F (U+0066), LATIN SMALL LETTER DOTLESS J WITH STROKE (U+025F) + u'g': u'ƃ', # LATIN SMALL LETTER G (U+0067), LATIN SMALL LETTER B WITH TOPBAR (U+0183) + # Alternatives: ᵷ LATIN SMALL LETTER TURNED G (U+1D77) + u'h': u'ɥ', # LATIN SMALL LETTER H (U+0068), LATIN SMALL LETTER TURNED H (U+0265) + u'i': u'ı', # LATIN SMALL LETTER I (U+0069), LATIN SMALL LETTER DOTLESS I (U+0131) + u'j': u'ɾ', # LATIN SMALL LETTER J (U+006A), LATIN SMALL LETTER R WITH FISHHOOK (U+027E) + u'k': u'ʞ', # LATIN SMALL LETTER K (U+006B), LATIN SMALL LETTER TURNED K (U+029E) + # l LATIN SMALL LETTER L (U+006C). Fall through. + # Alternatives: ʃ LATIN SMALL LETTER ESH (U+0283) + # ן HEBREW LETTER FINAL NUN (U+05DF, right to left) + u'm': u'ɯ', # LATIN SMALL LETTER M (U+006D), LATIN SMALL LETTER TURNED M (U+026F) + u'n': u'u', # LATIN SMALL LETTER N (U+006E), LATIN SMALL LETTER U (U+0075) + # Alternatives: и CYRILLIC SMALL LETTER I (U+0438) + # o LATIN SMALL LETTER Z (U+006F). Fall through. + # p LATIN SMALL LETTER P (U+0070). Inverse of 'd' -> 'p'. + # q LATIN SMALL LETTER Q (U+0071). Inverse of 'b' -> 'q'. + u'r': u'ɹ', # LATIN SMALL LETTER R (U+0072), LATIN SMALL LETTER TURNED R (U+0279) + # s LATIN SMALL LETTER S (U+0073). Fall through. + u't': u'ʇ', # LATIN SMALL LETTER T (U+0074), LATIN SMALL LETTER TURNED T (U+0287) + # u LATIN SMALL LETTER U (U+0075). Inverse of 'n' -> 'u'. + u'v': u'ʌ', # LATIN SMALL LETTER V (U+0076), LATIN SMALL LETTER TURNED V (U+028C) + u'w': u'ʍ', # LATIN SMALL LETTER W (U+0077), LATIN SMALL LETTER TURNED W (U+028D) + # x LATIN SMALL LETTER X (U+0078). Fall through. + u'y': u'ʎ', # LATIN SMALL LETTER Y (U+0079), LATIN SMALL LETTER TURNED Y (U+028E) + # z LATIN SMALL LETTER Z (U+007A). Fall through. + u'{': u'}', # LEFT CURLY BRACKET (U+007B), RIGHT CURLY BRACKET (U+007D) + # | VERTICAL LINE (U+007C). Fall through. + # } RIGHT CURLY BRACKET (U+007B). Inverse of '{' -> '}'. + # ~ TILDE (U+007E). Fall through. + # Code point U+007F (NULL) is whitespace. Fall through. + } + +INVERSE_ROTATE_TABLE = dict([[v,k] for k,v in ROTATE_TABLE.items()]) + +def rotate(c): + return ROTATE_TABLE.get(c, INVERSE_ROTATE_TABLE.get(c, c)) + +def write_in_rows(values, stream, encoding): + M = max([len(v) for v in values]) + m = min([len(v) for v in values]) + assert M == m, 'string lengths range from %d to %d' % (m, M) + columns = 80 / (2+m) # integer division rounds down + row = 0 + for i,v in enumerate(values): + if row == columns: + stream.write('\n') + row = 0 + elif i != 0: + stream.write(' ') + row += 1 + stream.write(v.encode(encoding)) + stream.write('\n') + +def test(stream=sys.stdout, encoding='utf-8'): + format = u'%2x %s %s' + stream.write('Printable rotations:\n') + write_in_rows( + values=[format % (i, chr(i), rotate(chr(i))) for i in range(33, 127)], + stream=stream, encoding=encoding) + collisions = [format % (i, chr(i), rotate(chr(i))) for i in range(0, 128) + if chr(i) != rotate(rotate(chr(i)))] + if len(collisions) > 0: + stream.write('Collisions:\n') + write_in_rows(values=collisions, stream=stream, encoding=encoding) + + +if __name__ == '__main__': + if len(sys.argv) > 1: # print all the ascii chars and their inverse + encoding = sys.argv[1] + try: + u''.encode(encoding) + except LookupError: + encoding = 'utf-8' # default to utf-8 + test(encoding=encoding) + sys.exit(0) + print ''.join([rotate(c) for c in reversed(sys.stdin.read().strip())]) diff --git "a/posts/\312\207x\307\235\312\207\342\200\276u\312\215op-\307\235p\304\261sd\342\210\251.mdwn" "b/posts/\312\207x\307\235\312\207\342\200\276u\312\215op-\307\235p\304\261sd\342\210\251.mdwn" new file mode 100644 index 0000000..6b3e242 --- /dev/null +++ "b/posts/\312\207x\307\235\312\207\342\200\276u\312\215op-\307\235p\304\261sd\342\210\251.mdwn" @@ -0,0 +1,26 @@ +[Upside-down +text](http://en.wikipedia.org/wiki/Transformation_of_text#Upside-down_text) +(actually, rotated by π) appears to have been a hit last summer, but +I've been seeing a bit on +[#python](http://www.python.org/community/irc/) recently: + + 15:21 < lieuwe> dash: so how should i call decode on a possibly unsafe string? + ... + 15:25 < kerio> lieuwe: ɯǝןqoɹd ɐ ǝq ןןıʍ sıɥʇ ǝʞıן buıɥʇǝɯos + +I though that was slick, so I looked around a bit today to see what +people were doing in this regard (see the Wikipedia page for a list). +Turns out to be a bit more complicated than I'd initially expected. +The Unicode people apparently didn't see a need to methodically rotate +characters, so while many have official "turned" forms (e.g. ɐ +(U+0250) LATIN SMALL LETTER TURNED A), many others do not. The +solution seems to be hunting around through the unicode tables looking +for [homoglyphs](http://en.wikipedia.org/wiki/Homoglyphs) (which turns +out to be an interesting [phishing +scheme](http://en.wikipedia.org/wiki/IDN_homograph_attack) in its own +right). + +Anyhow, none of the implementations I found addressed conversion of +ASCII characters with the scope and formality I felt this important +topic deserved, so I put together [my own converter](180.py) ;). +¡ʎoɾuƎ -- 2.26.2