Add known_hosts post and related scripts.
authorW. Trevor King <wking@drexel.edu>
Wed, 1 Jun 2011 12:08:25 +0000 (08:08 -0400)
committerW. Trevor King <wking@drexel.edu>
Wed, 1 Jun 2011 12:08:25 +0000 (08:08 -0400)
posts/Monkeysphere/unhash-known-hosts.sh [new file with mode: 0755]
posts/SSH/crack_known_hosts.py [new file with mode: 0755]
posts/SSH/unique_known_hosts.py [new file with mode: 0755]
posts/known_hosts.mdwn [new file with mode: 0644]

diff --git a/posts/Monkeysphere/unhash-known-hosts.sh b/posts/Monkeysphere/unhash-known-hosts.sh
new file mode 100755 (executable)
index 0000000..b4654a6
--- /dev/null
@@ -0,0 +1,60 @@
+#!/bin/bash
+#
+# Copyright (C) 2011 W. Trevor King <wking@drexel.edu>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# Use Monkeysphere IDs in your GnuPG keyring to identify entries in
+# your known_hosts file.
+#
+# usage: unhash-known-hosts.sh path/to/known_hosts
+
+KNOWN_HOSTS="~/.ssh/known_hosts"
+if [ -n "${1}" ]; then
+               KNOWN_HOSTS="${1}"
+fi
+
+GPG_IDS=$(gpg --list-keys ssh | grep pub | sed 's/^[^/]*\///' | sed 's/ .*//')
+if [ -z "${GPG_IDS}" ]; then
+               echo 'no SSH IDs found in GnuPG keyring'
+               exit 1
+fi
+
+declare -A GPG_KEY
+declare -A GPG_UID
+
+for GPG_ID in $GPG_IDS; do
+    GPG_ENTRY=$(gpg --export "${GPG_ID}" | openpgp2ssh)
+               GPG_KEY["${GPG_ENTRY}"]="${GPG_ID}"
+               GPG_UID["${GPG_ID}"]=$(gpg --list-keys "${GPG_ID}" | sed -n 's/^uid *//p')
+done
+
+while read ENTRY; do
+    if [ -n "${ENTRY}" ] && [ "${ENTRY:0:1}" == "|" ]; then
+                               HASH=$(echo "${ENTRY}" | awk '{print $1}')
+                               ALG=$(echo "${ENTRY}" | awk '{print $2}')
+                               KEY=$(echo "${ENTRY}" | awk '{print $3}')
+                               #echo "${ENTRY}"
+                               #echo "ALG: ${ALG}"
+                               #echo "KEY: ${KEY}"
+                               ALG_KEY="${ALG} ${KEY}"
+                               GPG_ID="${GPG_KEY[${ALG_KEY}]}"
+                               if [ -n "${GPG_ID}" ]; then
+                                               echo "GnuPG ID ${GPG_ID} (${GPG_UID[$GPG_ID]}) matches ${HASH}"
+                               else
+                                               echo "did not match ${HASH}"
+                               fi
+               fi
+done < "${KNOWN_HOSTS}"
diff --git a/posts/SSH/crack_known_hosts.py b/posts/SSH/crack_known_hosts.py
new file mode 100755 (executable)
index 0000000..ca129d4
--- /dev/null
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2011 W. Trevor King <wking@drexel.edu>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+'Check a guess against the hashed entries in a `known_hosts` file.'
+
+import base64
+import hashlib
+import hmac
+import itertools
+import os.path
+import sys
+
+
+VERBOSE = 0
+
+
+def parse_line(line):
+    """
+    >>> line = '|1|0YP15ycxoYMonCwcTo+zg9HMR3s=|GapoIyXQk7XJ+j3Kcg6yGC16Y/Y= ssh-rsa ...'
+    >>> info = parse_line(line)
+    >>> info['hashed']
+    '|1|0YP15ycxoYMonCwcTo+zg9HMR3s=|GapoIyXQk7XJ+j3Kcg6yGC16Y/Y='
+    >>> info['?']
+    1
+    >>> base64.b64encode(info['key'])
+    '0YP15ycxoYMonCwcTo+zg9HMR3s='
+    >>> base64.b64encode(info['name'])
+    'GapoIyXQk7XJ+j3Kcg6yGC16Y/Y='
+    """
+    if not line.startswith('|'):
+        raise ValueError(line)
+    host = line.split()[0]
+    fields = host.split('|')
+    assert len(fields) == 4, '%d fields in %s' % (len(fields), host)
+    assert fields[0] == '', host
+    assert fields[1] == '1', host
+    q = int(fields[1])
+    key = base64.b64decode(fields[2])
+    name = base64.b64decode(fields[3])
+    return {'?': q, 'key': key, 'name': name, 'hashed': host}
+
+def read_known_hosts(stream=None):
+    """
+    >>> import StringIO
+    >>> stream = StringIO.StringIO(
+    ...     '|1|0YP15ycxoYMonCwcTo+zg9HMR3s=|GapoIyXQk7XJ+j3Kcg6yGC16Y/Y= ssh-rsa ...\\n')
+    >>> entries = read_known_hosts(stream)
+    >>> for key in entries.keys():
+    ...     print base64.b64encode(key)
+    0YP15ycxoYMonCwcTo+zg9HMR3s=
+    >>> info = entries[base64.b64decode('0YP15ycxoYMonCwcTo+zg9HMR3s=')]
+    >>> info['hashed']
+    '|1|0YP15ycxoYMonCwcTo+zg9HMR3s=|GapoIyXQk7XJ+j3Kcg6yGC16Y/Y='
+    """
+    if not stream:
+        path = os.path.expanduser(os.path.join('~', '.ssh', 'known_hosts'))
+        stream = open(path, 'r')
+    entries = {}
+    for i,line in enumerate(stream):
+        try:
+            info = parse_line(line)
+        except ValueError:
+            continue
+        info['line'] = i
+        entries[info['key']] = info
+    return entries
+
+def match_guess(name, entries):
+    """
+    >>> line = '|1|0YP15ycxoYMonCwcTo+zg9HMR3s=|GapoIyXQk7XJ+j3Kcg6yGC16Y/Y= ssh-rsa ...'
+    >>> info = parse_line(line)
+    >>> entries = {info['key']: info}
+    >>> match_guess('wrong', entries)
+    >>> match = match_guess('einstein', entries)
+    >>> match == info
+    True
+    """
+    for key,info in entries.items():
+        h = hmac.new(info['key'], name, hashlib.sha1)
+        if h.digest() == info['name']:
+            return info
+
+def ip_glob_entries(ip_glob):
+    """
+    >>> list(ip_glob_entries('192.168.0.*'))  # doctest: +ELLIPSIS
+    ['192.168.0.0', '192.168.0.1', ..., '192.168.0.255']
+    >>> list(ip_glob_entries('192.168.*.*'))  # doctest: +ELLIPSIS
+    ['192.168.0.0', '192.168.0.1', ..., '192.168.255.255']
+    """
+    values = []
+    for field in ip_glob.split('.'):
+        if field == '*':
+            value = [str(x) for x in range(256)]
+        else:
+            value = [field]
+        values.append(value)
+    for selection in selections(values):
+        yield '.'.join(selection)
+
+def alphanum_entries(min_length=1, max_length=8, chars=None):
+    """
+    >>> list(alphanum_entries(max_length=3, chars=['a', 'b']))
+    ... # doctest: +ELLIPSIS
+    ['a', 'b', 'aa', 'ab', 'ba', 'bb', 'aaa', 'aab', ..., 'bbb']
+    >>> list(alphanum_entries(min_length=3, max_length=3))
+    ... # doctest: +ELLIPSIS
+    ['aaa', 'aab', 'aac', ..., '999', '...']
+    """
+    if chars == None:
+        chars = [chr(x) for x in range(ord('a'), ord('z')+1)]
+        chars.extend([chr(x) for x in range(ord('0'), ord('9')+1)])
+        chars.extend(['.'])
+    for length in range(min_length, max_length+1):
+        if VERBOSE >= 1:
+            sys.stderr.write('generate alpha-nums of length %d (max %d)\n' %
+                             (length, max_length))
+            sys.stderr.flush()
+        for selection in selections([chars for i in range(length)]):
+            yield ''.join(selection)
+
+def selections(values):
+    """
+    >>> list(selections(values=[[0,1,2], [3], [4,5]]))
+    [[0, 3, 4], [0, 3, 5], [1, 3, 4], [1, 3, 5], [2, 3, 4], [2, 3, 5]]
+    """
+    index = [0]*len(values)
+    while True:
+        yield [values[i][j] for i,j in enumerate(index)]
+        index[-1] += 1
+        for i in range(len(values), 0, -1):  # carry, if necessary
+            i -= 1  # convert to [len(values)-1, ..., 0]
+            j = index[i]
+            if j >= len(values[i]):
+                if i == 0:
+                    return
+                elif i == 1 and VERBOSE >= 2:
+                    sys.stderr.write('selection completed %d of %d\n' %
+                                     (index[0], len(values[0])))
+                    sys.stderr.flush()
+                index[i] = 0
+                index[i-1] += 1
+
+
+if __name__ == '__main__':
+    from argparse import ArgumentParser, Action
+
+    class CountAction (Action):
+        def __call__(self, parser, namespace, values, option_string=None):
+            value = getattr(namespace, self.dest)
+            if value is None:
+                value = 0
+            setattr(namespace, self.dest, value + 1)
+
+    p = ArgumentParser(description=__doc__)
+    p.add_argument('names', metavar='NAME', type=unicode, nargs='*',
+                   help='a guessed host name or IP')
+    p.add_argument('--known-hosts', dest='known_hosts', type=unicode,
+                   help='alternate path to known_hosts file')
+    p.add_argument('--ip', metavar='GLOB', dest='ip_glob', type=unicode,
+                   help="IP glob (e.g. '192.168.*.*')")
+    p.add_argument('--alphanum', metavar='LENGTH', dest='max_an_len', type=int,
+                   help='scan all alpha-numeric names up to this max length')
+    p.add_argument('-v', '--verbose', dest='verbose', type=int, nargs=0,
+                   action=CountAction, help='increment verbosity')
+
+    args = p.parse_args()
+
+    VERBOSE = args.verbose or 0
+
+    stream = None
+    if args.known_hosts:
+        stream = open(args.known_hosts, 'r')
+
+    entries = read_known_hosts(stream)
+
+    if args.known_hosts:
+        stream.close()
+
+    names = args.names
+
+    if args.ip_glob:
+        names = itertools.chain(names, ip_glob_entries(args.ip_glob))
+
+    if args.max_an_len:
+        names = itertools.chain(names, alphanum_entries(
+                max_length=args.max_an_len))
+
+    for name in names:
+        if VERBOSE >= 3:
+            sys.stderr.write('check %s\n' % name)
+            sys.stderr.flush()
+        match = match_guess(name, entries)
+        if match:
+            print '%s %s (line %d)' % (name, match['hashed'], match['line']+1)
diff --git a/posts/SSH/unique_known_hosts.py b/posts/SSH/unique_known_hosts.py
new file mode 100755 (executable)
index 0000000..54a43a0
--- /dev/null
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2011 W. Trevor King <wking@drexel.edu>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+"""Sort a `known_hosts` file by public key.
+
+This makes it easier to associate entries in your `known_hosts` file,
+which may have separate, hashed entries for `server`,
+`server.example.net`, and `123.456.789.123`.
+
+usage: unique_known_hosts.py path/to/known_hosts
+"""
+
+import os.path
+import sys
+
+
+known_hosts = os.path.expanduser(os.path.join('~', '.ssh', 'known_hosts'))
+if len(sys.argv) > 1:
+    known_hosts = sys.argv[1]
+
+keys = {}
+
+for line in open(known_hosts, 'r'):
+    name,key = line.split(' ', 1)
+    if key in keys:
+        keys[key].append(name)
+    else:
+        keys[key] = [name]
+
+for key,names in keys.items():
+    print '%s  %s' % (key, '\n  '.join(names))
diff --git a/posts/known_hosts.mdwn b/posts/known_hosts.mdwn
new file mode 100644 (file)
index 0000000..c418b3f
--- /dev/null
@@ -0,0 +1,62 @@
+[OpenSSH][] stores lists of [[SSH]] public keys in `known_hosts`
+files, so it can verify that the host you're logging into is the host
+you expect and not a man-in-the-middle attacker.  To reduce the risk
+of [island-hopping attacks][hop], OpenSSH has a `HashKnownHosts yes`
+option to store [HMAC][]-[SHA1][] encrypted versions of host names and
+IPs in your `known_hosts` files rather than the clear text.  This
+makes it harder for an attacker to use the information stored in your
+`known_hosts`.  However, it also makes it harder for *you* to use that
+information.
+
+I was digging through my `known_hosts` file yesterday compiling a list
+of servers where I have login accounts.  I keep better track of these
+things recently (using [[GPG|PGP]] to symmetrically encrypt the list),
+but my `known_hosts` file predates my quality-accounting phase.
+Anyhow, I wrote up some simple tools to make reverse-engineering a
+`known_hosts` file a bit less painful.
+
+You can use your [[monkeysphere]] keyring to see if you recognize any
+of the public keys.  This avoids having to deal with the hashed names
+at all, but assumes none of your servers are sharing keys.
+[[Monkeysphere/unhash-known-hosts.sh]] automates this:
+
+    $ unhash-known-hosts.sh path/to/known_hosts
+    GnuPG ID 01234567 (ssh://server.example.net) matches |1|Bvjsg3lqJJ/M9rTYz1HfY+T/RoM=|DhZlGg3GFMWtVcjz4LNfJ8afi7w=
+    did not match |1|vug6FlX6GCaIIzkv3wS3zftQyyw=|PdMYEIaWTzHCv/4ZhNiR2DD6E0A=
+    ...
+
+Once you've got the low-hanging fruit out of the way, you can get a
+list of the high-hanging fruit:
+
+    $ unhash-known-hosts.sh path/to/known_hosts | sed -n 's/^did not match //p' > unknown_hosts
+
+Start guessing with [[SSH/crack_known_hosts.py]]!  IPs are usually a
+good starting point, because any host in your `known_hosts` file must
+have an entry for its IP.
+
+    $ crack_known_hosts.py --known-hosts unknown_hosts --ip 192.168.*.*
+
+You can also run a full scan of alphanumeric entries up to a specified
+length (this gets slow quickly, which is, after all, why you hashed
+the entries in the fiest place).
+
+    $ crack_known_hosts.py --known-hosts unknown_hosts --alphanum 16
+
+Removing entries from `unknown_hosts` as you crack them will make
+future `crack_known_hosts.py` attempts on that file faster.
+
+Once you've cracked one name, you can use
+[[SSH/unique_known_hosts.py]] to find other entries that share the
+same key.
+
+    $ unique_known_hosts.py path/to/known_hosts
+
+And there you have it.  Happy cracking! ;).
+
+[OpenSSH]: http://www.openssh.com/
+[hop]: http://itso.iu.edu/Hashing_the_OpenSSH_known__hosts_File
+[HMAC]: http://en.wikipedia.org/wiki/HMAC
+[SHA1]: http://en.wikipedia.org/wiki/SHA-1
+
+[[!tag tags/linux]]
+[[!tag tags/tools]]