posts/Factor_analysis/pca.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright (C) 2013 W. Trevor King <wking@drexel.edu>
   5 #
   6 # This program is free software: you can redistribute it and/or modify
   7 # it under the terms of the GNU Lesser General Public License as
   8 # published by the Free Software Foundation, either version 3 of the
   9 # License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # Lesser General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU Lesser General Public
  17 # License along with this program.  If not, see
  18 # <http://www.gnu.org/licenses/>.
  19
  20 """Understanding principal component analysis and varimax rotation
  21
  22 Based Hervé Abdi's example [1] and Trevor Park's implementation notes
  23 [2] for varimax rotation.
  24
  25 >>> import numpy
  26 >>> scores = numpy.array(  # five wines scored with seven tests
  27 ...     [[14, 7, 8,  7, 7, 13, 7],
  28 ...      [10, 7, 6,  4, 3, 14, 7],
  29 ...      [ 8, 5, 6, 10, 5, 12, 5],
  30 ...      [ 2, 4, 7, 16, 7, 11, 3],
  31 ...      [ 6, 2, 4, 13, 3, 10, 3]])
  32 >>> A = numpy.array(  # original loadings (after PCA)
  33 ...     [[-0.3965,  0.1149],   # hedonic
  34 ...      [-0.4454, -0.1090],   # for meat
  35 ...      [-0.2646, -0.5854],   # for dessert
  36 ...      [ 0.4160, -0.3111],   # price
  37 ...      [-0.0485, -0.7245],   # sugar
  38 ...      [-0.4385,  0.0555],   # alcohol
  39 ...      [-0.4547,  0.0865]])  # acidity
  40 >>> print(varimax_criterion(A).round(5))
  41 0.02112
  42
  43 >>> Ar = varimax(A)
  44 >>> Ar.round(4)
  45 array([[-0.4117,  0.03  ],
  46        [-0.413 , -0.1991],
  47        [-0.1372, -0.6276],
  48        [ 0.4716, -0.2179],
  49        [ 0.1031, -0.7188],
  50        [-0.4405, -0.0368],
  51        [-0.4627, -0.0098]])
  52 >>> print(varimax_criterion(Ar).round(5))
  53 0.02367
  54
  55 This is slightly better than Abdi's rotation (his table 3):
  56
  57 >>> Az = numpy.array(  # original loadings (after PCA)
  58 ...     [[-0.4125,  0.0153],   # hedonic
  59 ...      [-0.4057, -0.2138],   # for meat
  60 ...      [-0.1147, -0.6321],   # for dessert
  61 ...      [ 0.4790, -0.2010],   # price
  62 ...      [ 0.1286, -0.7146],   # sugar
  63 ...      [-0.4389, -0.0525],   # alcohol
  64 ...      [-0.4620, -0.0264]])  # acidity
  65 >>> print(varimax_criterion(Az).round(5))
  66 0.02359
  67
  68 There may be some copy/paste or rounding errors in Abdi's table 3,
  69 because he claims a rotation of -15 degrees in his text (which matches
  70 my calculated value), but his table 3 has only been rotated by -14
  71 degrees.
  72
  73 >>> for i,(a,ar,az) in enumerate(zip(A, Ar, Az)):
  74 ...     theta_a = numpy.arctan2(a[1], a[0])
  75 ...     theta_r = numpy.arctan2(ar[1], ar[0])
  76 ...     theta_z = numpy.arctan2(az[1], az[0])
  77 ...     # (theta_init - theta_final) so we can print *axis* rotation
  78 ...     dr = ((theta_a - theta_r) * 180 / numpy.pi) % 360 - 360
  79 ...     dz = ((theta_a - theta_z) * 180 / numpy.pi) % 360 - 360
  80 ...     print("{}: {:.2f}  (vs Abdi's {:.2f})".format(i, dr, dz))
  81 0: -11.99  (vs Abdi's -14.04)
  82 1: -11.99  (vs Abdi's -14.04)
  83 2: -11.99  (vs Abdi's -14.04)
  84 3: -11.99  (vs Abdi's -14.03)
  85 4: -11.99  (vs Abdi's -14.03)
  86 5: -11.99  (vs Abdi's -14.03)
  87 6: -11.99  (vs Abdi's -14.04)
  88
  89 These loading matricies all have the variance stripped out:
  90
  91 >>> print_row(eigenvalues(A), precision=3)
  92  1.000   1.000
  93 >>> print_row(eigenvalues(Ar), precision=3)
  94  1.000   1.000
  95 >>> print_row(eigenvalues(Az), precision=3)
  96  1.000   1.000
  97
  98 As an additional test, let's try to reproduce the factor loadings from
  99 the raw scores:
 100
 101 >>> evals,Ai = pca(scores, output_dim=2)
 102 >>> print_row(evals, precision=3)
 103  4.799   1.846   0.352   0.003   0.000
 104 >>> varimax(Ai).round(4)
 105 array([[-0.4113, -0.0306],
 106        [-0.4111,  0.1994],
 107        [-0.1435,  0.6274],
 108        [ 0.4723,  0.218 ],
 109        [ 0.1076,  0.7188],
 110        [-0.4396,  0.0372],
 111        [-0.4619,  0.0098]])
 112
 113 This is quite similar (modulo column sign) to our earlier rotation of
 114 Abdi's PCA results, and the eigenvalues are close to his (4.7627,
 115 1.8101, 0.3527, and 0.0744).
 116
 117 >>> Ar.round(4)
 118 array([[-0.4117,  0.03  ],
 119        [-0.413 , -0.1991],
 120        [-0.1372, -0.6276],
 121        [ 0.4716, -0.2179],
 122        [ 0.1031, -0.7188],
 123        [-0.4405, -0.0368],
 124        [-0.4627, -0.0098]])
 125
 126 The fraction of the variance explained by the first two factors is:
 127
 128 >>> print(format_number(explained_variance(evals)[:2].sum()))
 129  0.95
 130
 131 which is reasonably close to Abdi's claimed 94%.  The communalities
 132 will be:
 133
 134 >>> print_row(communalities(Ai))
 135  0.17   0.21   0.41   0.27   0.53   0.19   0.21
 136
 137 These loading matricies all have the variance stripped out:
 138
 139 >>> print_row(eigenvalues(Ai), precision=3)
 140  1.000   1.000
 141 >>> print_row(eigenvalues(Ar), precision=3)
 142  1.000   1.000
 143
 144 Checks against Andrew Wiesner
 145 -----------------------------
 146
 147 Checking against the example loadings given by Andrew Wiesner [3]:
 148
 149 >>> A = numpy.array(
 150 ...     [[0.286,  0.076,  0.841],
 151 ...      [0.698,  0.153,  0.084],
 152 ...      [0.744, -0.410, -0.020],
 153 ...      [0.471,  0.522,  0.135],
 154 ...      [0.681, -0.156, -0.148],
 155 ...      [0.498, -0.498, -0.253],
 156 ...      [0.861, -0.115,  0.011],
 157 ...      [0.642,  0.322,  0.044],
 158 ...      [0.298,  0.595, -0.533]])
 159
 160 Wiesner also gives the eigenvalues [3],
 161
 162 >>> evals = numpy.array(
 163 ...    [3.2978,
 164 ...     1.2136,
 165 ...     1.1055,
 166 ...     0.9073,
 167 ...     0.8606,
 168 ...     0.5622,
 169 ...     0.4838,
 170 ...     0.3181,
 171 ...     0.2511])
 172
 173 communalities [4],
 174
 175 >>> com_z = numpy.array(
 176 ...     [0.79500707,
 177 ...      0.51783185,
 178 ...      0.72230182,
 179 ...      0.51244913,
 180 ...      0.50977159,
 181 ...      0.56073895,
 182 ...      0.75382091,
 183 ...      0.51725940,
 184 ...      0.72770402])
 185
 186 and loadings [4]:
 187
 188 >>> Az = numpy.array(
 189 ...     [[ 0.021,  0.239,  0.859],
 190 ...      [ 0.483,  0.547,  0.166],
 191 ...      [ 0.829,  0.127,  0.137],
 192 ...      [ 0.031,  0.702,  0.139],
 193 ...      [ 0.652,  0.289, -0.028],
 194 ...      [ 0.734, -0.094, -0.117],
 195 ...      [ 0.738,  0.432,  0.150],
 196 ...      [ 0.301,  0.656,  0.099],
 197 ...      [-0.022,  0.651, -0.551]])
 198
 199 Basic stuff matches almost exactly with our calculations [3]:
 200
 201 >>> print_row(explained_variance(evals), precision=3)
 202  0.366   0.135   0.123   0.101   0.096   0.062   0.054   0.035   0.028
 203
 204 (Wiesner has 0.3664, 0.1348, 0.1228, 0.1008, 0.0956, 0.0625, 0.0538,
 205 0.0353, and 0.0279 [3])
 206
 207 >>> print(format_number(explained_variance(evals)[:3].sum()))
 208 ... # fraction of total varianced explained by the first 3 common factors
 209  0.62
 210
 211 (Wiesner has 62% [3])
 212
 213 >>> print_row(communalities(A), precision=3)
 214  0.795   0.518   0.722   0.513   0.510   0.560   0.755   0.518   0.727
 215 >>> print_row(com_z, precision=3)
 216  0.795   0.518   0.722   0.512   0.510   0.561   0.754   0.517   0.728
 217
 218 To get the rotated loading to match, we need to scale by the
 219 eigenvalues:
 220
 221 >>> Ar = varimax(A)
 222 >>> Ar.round(3)
 223 array([[ 0.078, -0.06 ,  0.886],
 224        [ 0.497,  0.423,  0.303],
 225        [ 0.842,  0.006,  0.112],
 226        [ 0.107,  0.611,  0.357],
 227        [ 0.677,  0.228,  0.022],
 228        [ 0.715, -0.11 , -0.193],
 229        [ 0.784,  0.296,  0.229],
 230        [ 0.368,  0.55 ,  0.281],
 231        [ 0.022,  0.795, -0.307]])
 232
 233 but they're in the same ballpark, and we have a higher varimax
 234 criterion:
 235
 236 >>> print(format_number(varimax_criterion(Ar), precision=4))
 237  0.0591
 238 >>> print(format_number(varimax_criterion(Az), precision=4))
 239  0.0552
 240
 241 These loading matricies all have the variance still included:
 242
 243 >>> print_row(evals, precision=3)
 244  3.298   1.214   1.105   0.907   0.861   0.562   0.484   0.318   0.251
 245 >>> print_row(eigenvalues(A), precision=3)
 246  3.298   1.213   1.105
 247 >>> print_row(eigenvalues(Ar), precision=3)
 248  2.693   1.643   1.280
 249 >>> print_row(eigenvalues(Az), precision=3)
 250  2.522   1.998   1.154
 251
 252 Checks against Daniel Denis
 253 ---------------------------
 254
 255 Checking against the example PCA given by Daniel Denis [6]:
 256
 257 >>> scores = numpy.array(
 258 ...     [[32, 64, 65, 67],
 259 ...      [61, 37, 62, 65],
 260 ...      [59, 40, 45, 43],
 261 ...      [36, 62, 34, 35],
 262 ...      [62, 46, 43, 40]])
 263 >>> evals,Ai = pca(scores, output_dim=2)
 264 >>> print_row(eigenvalues(Ai), precision=3)
 265  1.000   1.000
 266 >>> Ai *= numpy.sqrt(evals[:2])  # insert the variance
 267 >>> print_row(eigenvalues(Ai), precision=3)
 268  2.016   1.942
 269 >>> print_row(evals, precision=3)
 270  2.016   1.942   0.038   0.004
 271
 272 (Denis has the exact same values on page 4)
 273
 274 >>> print_row(explained_variance(evals))
 275  0.50   0.49   0.01   0.00
 276
 277 (Denis has 50.408%, 48.538%, 0.945%, and 0.109%)
 278 >>> Ar = varimax(Ai)
 279 >>> Ar.round(4)
 280 array([[ 0.0872, -0.9877],
 281        [ 0.0723,  0.9886],
 282        [-0.9973, -0.0258],
 283        [-0.9976,  0.0401]])
 284
 285 This matches up well (modulo column sign) with Denis' loadings:
 286
 287 >>> A = numpy.array(
 288 ...     [[-0.500,  0.856],
 289 ...      [ 0.357, -0.925],
 290 ...      [ 0.891,  0.449],
 291 ...      [ 0.919,  0.390]])
 292 >>> Az = varimax(A)
 293 >>> Az.round(4)
 294 array([[-0.0867,  0.9875],
 295        [-0.0721, -0.9889],
 296        [ 0.9974,  0.0256],
 297        [ 0.9975, -0.0397]])
 298
 299 Denis' loadings are much more tightly aligned, as you can see from the
 300 varimax criterion:
 301
 302 >>> print(format_number(varimax_criterion(Ar), precision=4))
 303  0.2411
 304 >>> print(format_number(varimax_criterion(Az), precision=4))
 305  0.2411
 306
 307 These loading matricies all have the variance still included (because
 308 we added it to our PCA loadings by hand):
 309
 310 >>> print_row(eigenvalues(A), precision=3)
 311  2.016   1.942
 312 >>> print_row(eigenvalues(Az), precision=3)
 313  2.003   1.955
 314 >>> print_row(eigenvalues(Ai), precision=3)
 315  2.016   1.942
 316 >>> print_row(eigenvalues(Ar), precision=3)
 317  2.003   1.955
 318
 319 Denis also has a separate varimax example (page 7):
 320
 321 >>> A = numpy.array(
 322 ...     [[-0.400,  0.900],
 323 ...      [ 0.251, -0.947],
 324 ...      [ 0.932,  0.348],
 325 ...      [ 0.956,  0.286]])
 326
 327 which he rotates into:
 328
 329 >>> Az = numpy.array(
 330 ...     [[-0.086, -0.981],
 331 ...      [-0.071,  0.977],
 332 ...      [ 0.994, -0.026],
 333 ...      [ 0.997,  0.040]])
 334
 335 This matches up well (modulo column sign) with our:
 336
 337 >>> varimax(A).round(3)
 338 array([[-0.085,  0.981],
 339        [-0.071, -0.977],
 340        [ 0.995,  0.026],
 341        [ 0.997, -0.041]])
 342
 343 Checks against SPSS via Newcastle upon Tyne
 344 -------------------------------------------
 345
 346 Putting everything together for some sample data from an old Newcastle
 347 page [7]:
 348
 349 >>> scores = numpy.array(
 350 ...     [[1, 3, 4, 6, 7, 2, 4, 5],
 351 ...      [2, 3, 4, 3, 4, 6, 7, 6],
 352 ...      [4, 5, 6, 7, 7, 2, 3, 4],
 353 ...      [3, 4, 5, 6, 7, 3, 5, 4],
 354 ...      [2, 5, 5, 5, 6, 2, 4, 5],
 355 ...      [3, 4, 6, 7, 7, 4, 3, 5],
 356 ...      [2, 3, 6, 4, 5, 4, 4, 4],
 357 ...      [1, 3, 4, 5, 6, 3, 3, 4],
 358 ...      [3, 3, 5, 6, 6, 4, 4, 3],
 359 ...      [4, 4, 5, 6, 7, 4, 3, 4],
 360 ...      [2, 3, 6, 7, 5, 4, 4, 4],
 361 ...      [2, 3, 5, 7, 6, 3, 3, 3]])
 362 >>> analyze(scores, output_dim=3, unitary=False, delimiter=' ', missing=' '*6)
 363 #eig prop r-eig r-prop com f-0 f-1 f-2 mean std
 364  3.709  0.464  2.456  0.307  0.844 -0.071 -0.122  0.908  2.417  1.101
 365  1.478  0.185  2.072  0.259  0.918 -0.603  0.310  0.677  3.583  0.877
 366  1.361  0.170  2.020  0.252  0.735  0.075 -0.429  0.738  5.083  0.877
 367  0.600  0.075                0.768 -0.485 -0.659  0.313  5.750  1.424
 368  0.417  0.052                0.814 -0.837 -0.219  0.256  6.083  1.101
 369  0.281  0.035                0.865  0.909  0.170  0.102  3.417  1.287
 370  0.129  0.016                0.800  0.564  0.682 -0.131  3.917  1.287
 371  0.026  0.003                0.804  0.051  0.895 -0.032  4.250  0.957
 372
 373 Which matchesthe values given on the Newcastle page (where I've
 374 unshuffled their rows to match the initial column ordering used for
 375 the scores):
 376
 377  3.709  0.464  2.501  0.313  0.844                0.908  2.42   1.00
 378  1.478  0.185  2.045  0.256  0.918  0.637         0.634  3.58   0.79
 379  1.361  0.170  2.002  0.250  0.735                0.760  5.08   0.79
 380  0.600  0.075                0.768        -0.652         5.75   1.29
 381  0.417  0.052                0.814  0.845                6.08   1.00
 382  0.281  0.035                0.865 -0.901                3.42   1.16
 383  0.129  0.016                0.800 -0.560  0.684         3.92   1.16
 384  0.026  0.003                0.804         0.893         4.25   0.87
 385
 386 Their standard deviations differ from mine because they were using one
 387 for the delta degrees of freedom while I was using the number of
 388 factors:
 389
 390 >>> print_row(scores.std(axis=0, ddof=1))
 391  1.00   0.79   0.79   1.29   1.00   1.16   1.16   0.87
 392
 393 [1]: http://www.utd.edu/~herve/Abdi-rotations-pretty.pdf
 394 [2]: http://www.stat.ufl.edu/~tpark/Research/about_varimax.html
 395 [3]: http://sites.stat.psu.edu/~ajw13/stat505/fa06/17_factor/06_factor_example.html
 396 [4]: http://sites.stat.psu.edu/~ajw13/stat505/fa06/17_factor/07_factor_commun.html
 397 [5]: http://sites.stat.psu.edu/~ajw13/stat505/fa06/17_factor/13_factor_varimax.html
 398 [6]: http://psychweb.psy.umt.edu/denis/datadecision/factor/dd_fa_part_2_aug_2009.pdf
 399 [7]: http://web.archive.org/web/20051125011642/http://www.ncl.ac.uk/iss/statistics/docs/factoranalysis.html
 400 """
 401
 402 import itertools as _itertools
 403
 404 import numpy as _numpy
 405
 406
 407 def format_number(x, precision=2):
 408     if isinstance(x, str):
 409         return x
 410     elif _numpy.isnan(x) or isinstance(x, int):
 411         return str(x)
 412     format_string = '{{: .{}f}}'.format(precision)
 413     return format_string.format(x)
 414
 415
 416 def print_row(row, delimiter='  ', precision=2):
 417     print(delimiter.join(format_number(x, precision=precision) for x in row))
 418
 419
 420 def print_matrix(matrix, delimiter='  ', precision=2):
 421     print('\n'.join(delimiter.join(format_number(x, precision=precision)
 422                                    for x in row)
 423                     for row in matrix))
 424
 425
 426 def varimax_criterion(A):
 427     """Return the varimax criterion of a factor-loading matrix A
 428
 429       V(A) = 1/k ∑ⱼ (1/d ∑ᵢ Aᵢⱼ⁴ - (1/d ∑ᵢ Aᵢⱼ²)²)
 430            = 1/k ∑ⱼ (<Aᵢⱼ⁴>ᵢ - <Aᵢⱼ²>ᵢ²)
 431            = var(A²)
 432     """
 433     return _numpy.var(A**2)
 434
 435
 436 def pca(scores, ddof=0, output_dim=None):
 437     """Principal component analysis via singular value decomposition
 438
 439     Return an array of eigenvalues (variance scaling) and an
 440     orthogonal matrix of unitary eigenvectors (the factor loadings).
 441     """
 442     X = scores
 443     d,k = X.shape
 444     X = (X - X.mean(axis=0)) / X.std(axis=0, ddof=0) / _numpy.sqrt(d-ddof)
 445     U,s,V = _numpy.linalg.svd(X, full_matrices=False)
 446     S = _numpy.diag(s)
 447     G = U.dot(_numpy.diag(S))
 448     ss = _numpy.sort(s)[::-1]
 449     assert (s == ss).all(), (s, ss, s-ss)
 450     if output_dim is None:
 451         output_dim = V.T.shape[1]
 452     eigenvalues = s**2  # see explained_variance.__doc__
 453     return (eigenvalues, V.T[:,:output_dim])
 454
 455
 456 def communalities(A):
 457     """Communalities for each measured attribute
 458
 459     This how well the model fits each attribute.  It's similar to the
 460     coefficient of determination (R²) for linear regressions.
 461
 462     http://sites.stat.psu.edu/~ajw13/stat505/fa06/17_factor/07_factor_commun.html
 463     """
 464     return (A**2).sum(axis=1).T
 465
 466
 467 def eigenvalues(A):
 468     """Extract the measured attribute variance from the loading matrix
 469
 470     Some loading matricies have the measured attribute variance still
 471     built into the loading terms (like covariance), while some have
 472     the variance scaled out of them (i.e. unit eigenvectors, like
 473     correlations).  If the variance is built in, this function find
 474     it.
 475     """
 476     return (A**2).sum(axis=0)
 477
 478
 479 def sort_eigenvalues(A):
 480     """Sort non-unitary eigenvalues in decreasing magnitude
 481     """
 482     evals = eigenvalues(A)
 483     B = 0*A
 484     for i,j in enumerate(reversed(evals.argsort())):
 485         B[:,i] = A[:,j]
 486     return B
 487
 488
 489 def reduce_double_negatives(A):
 490     """Flip eigenvalue sign to make largest component positive
 491     """
 492     for i in range(A.shape[1]):
 493         if abs(A[:,i].min()) > A[:,i].max():
 494             A[:,i] *= -1
 495     return A
 496
 497
 498 def explained_variance(eigenvalues, total=None):
 499     """Fraction of explained variance for each measured attribute
 500
 501     The common factors have unit variance, so the (squared)
 502     eigenvalues of the factors give the variance of the modeled
 503     attributes.  "Squared" is in parenthesis, because you need to
 504     square the eigenvalues calculated using the score SVD (converting
 505     standard deviations to variance), but you don't need to square
 506     eigenvalues calculated from the covariance matrix.
 507     """
 508     if total is None:
 509         total = eigenvalues.sum()
 510     return eigenvalues / total
 511
 512
 513 def varimax(A, max_iterations=None, epsilon=1e-7):
 514     """Return the varimax-rotated version of a factor-loading matrix A
 515
 516       z  = 1/d ∑ⱼ (Aⱼᵤ + iAⱼᵥ)⁴ - (1/d ∑ⱼ (Aⱼᵤ + iAⱼᵥ)²)²
 517       φ* = 1/4 ∠(z)
 518     """
 519     d,k = A.shape
 520     od = 1.0/d
 521     if k == 1:  # only one common factor, so there's nothing to rotate
 522         return A
 523     if max_iterations is None:
 524         if k == 2:  # only one combination, so repetition is redundant
 525             max_iterations = 1
 526         else:
 527             max_iterations = 50
 528     ovc = varimax_criterion(A)
 529     for i in range(max_iterations):
 530         for p,q in _itertools.combinations(range(k), 2):
 531             Aj = _numpy.array([_numpy.complex(Ajp,Ajq)
 532                                for Ajp,Ajq in zip(A[:,p], A[:,q])])
 533             z = od * (Aj**4).sum() - (od * (Aj**2).sum())**2
 534             phi = 0.25 * _numpy.angle(z)
 535             R = _numpy.eye(k)
 536             R[p,p] = R[q,q] = _numpy.cos(phi)
 537             R[q,p] = _numpy.sin(phi)
 538             R[p,q] = - R[q,p]
 539             A = A.dot(R)
 540         vc = varimax_criterion(A)
 541         if (vc - ovc) < epsilon * vc:
 542             return A  # converged  :)
 543         ovc = vc
 544     # didn't converge (if k != 2)  :(
 545     return A
 546
 547
 548 def analyze(scores, output_dim=3, unitary=False, delimiter='\t', missing=''):
 549     d,k = scores.shape
 550     masked_scores = _numpy.ma.array(scores, mask=_numpy.isnan(scores))
 551     mu = masked_scores.mean(axis=0)
 552     std = masked_scores.std(axis=0, ddof=output_dim)
 553     masked_scores = (masked_scores - mu).filled(0)
 554     evals,A = pca(masked_scores, output_dim=output_dim)
 555     if not unitary:
 556         A *= _numpy.sqrt(evals[:output_dim])
 557     A = varimax(A)
 558     if not unitary:
 559         A = sort_eigenvalues(A)
 560     A = reduce_double_negatives(A)
 561     print('#' + delimiter.join(
 562             [
 563                 'eig', 'prop', 'r-eig', 'r-prop', 'com'
 564                 ] + [
 565                 'f-{}'.format(i) for i in range(output_dim)
 566                 ] + [
 567                 'mean', 'std']))
 568     r_evals = eigenvalues(A)
 569     explained = explained_variance(evals)
 570     r_explained = explained_variance(r_evals, total=evals.sum())
 571     com = communalities(A)
 572     for i in range(k):
 573         r_eval = r_exp = missing
 574         if i < len(r_evals):
 575             r_eval = r_evals[i]
 576             r_exp = r_explained[i]
 577         row = [
 578             evals[i],
 579             explained[i],
 580             r_eval,
 581             r_exp,
 582             com[i],
 583             ] + list(A[i,:]) + [
 584             mu[i],
 585             std[i],
 586             ]
 587         print_row(row, delimiter=delimiter, precision=3)
 588
 589
 590 if __name__ == '__main__':
 591     import argparse
 592
 593     parser = argparse.ArgumentParser()
 594     parser.add_argument(
 595         '-n', dest='output_dim', type=int, default=1,
 596         help='number of common factors to use')
 597     parser.add_argument(
 598         '-u', dest='unitary', default=False, action='store_const', const=True,
 599         help='use unitary eigenvalues for factor loading')
 600     parser.add_argument(
 601         '-d', dest='drop', type=int, action='append',
 602         help='drop a column from the analysis')
 603     parser.add_argument(
 604         'path',
 605         help='path to the tab-delimited data file to analyze')
 606
 607     args = parser.parse_args()
 608
 609     scores = _numpy.genfromtxt(fname=args.path, delimiter='\t')
 610     if args.drop:
 611         scores = _numpy.delete(scores, args.drop, axis=1)
 612     analyze(scores=scores, output_dim=args.output_dim, unitary=args.unitary)