git-svn: convert 'set-tree' command to use Git::SVN
authorEric Wong <normalperson@yhbt.net>
Mon, 15 Jan 2007 07:21:16 +0000 (23:21 -0800)
committerEric Wong <normalperson@yhbt.net>
Fri, 23 Feb 2007 08:57:09 +0000 (00:57 -0800)
Signed-off-by: Eric Wong <normalperson@yhbt.net>
git-svn.perl

index bf53b2d69b62048bc22a0b1e731cf60383555d9f..261e33d02302d68c0d10fc3d3ad5e59d13975e01 100755 (executable)
@@ -24,16 +24,6 @@ $ENV{TZ} = 'UTC';
 $ENV{LC_ALL} = 'C';
 $| = 1; # unbuffer STDOUT
 
-# properties that we do not log:
-my %SKIP = ( 'svn:wc:ra_dav:version-url' => 1,
-             'svn:special' => 1,
-             'svn:executable' => 1,
-             'svn:entry:committed-rev' => 1,
-             'svn:entry:last-author' => 1,
-             'svn:entry:uuid' => 1,
-             'svn:entry:committed-date' => 1,
-);
-
 sub fatal (@) { print STDERR @_; exit 1 }
 require SVN::Core; # use()-ing this causes segfaults for me... *shrug*
 require SVN::Ra;
@@ -113,8 +103,9 @@ my %cmd = (
                          'strategy|s=s' => \$_strategy,
                          'dry-run|n' => \$_dry_run,
                        %cmt_opts, %fc_opts } ],
-       'set-tree' => [ \&commit, "Set an SVN repository to a git tree-ish",
-                       {       'stdin|' => \$_stdin, %cmt_opts, %fc_opts, } ],
+       'set-tree' => [ \&cmd_set_tree,
+                       "Set an SVN repository to a git tree-ish",
+                       { 'stdin|' => \$_stdin, %cmt_opts, %fc_opts, } ],
        'show-ignore' => [ \&cmd_show_ignore, "Show svn:ignore listings",
                        { 'revision|r=i' => \$_revision } ],
        rebuild => [ \&cmd_rebuild, "Rebuild git-svn metadata (after git clone)",
@@ -301,94 +292,8 @@ sub cmd_fetch {
        }
 }
 
-sub fetch {
-       check_upgrade_needed();
-       $SVN_URL ||= file_to_s("$GIT_SVN_DIR/info/url");
-       my $ret = fetch_lib(@_);
-       if ($ret->{commit} && !verify_ref('refs/heads/master^0')) {
-               command_noisy(qw(update-ref refs/heads/master),$ret->{commit});
-       }
-       return $ret;
-}
-
-sub fetch_lib {
-       my (@parents) = @_;
-       $SVN_URL ||= file_to_s("$GIT_SVN_DIR/info/url");
-       $SVN ||= Git::SVN::Ra->new($SVN_URL);
-       my ($last_rev, $last_commit) = svn_grab_base_rev();
-       my ($base, $head) = libsvn_parse_revision($last_rev);
-       if ($base > $head) {
-               return { revision => $last_rev, commit => $last_commit }
-       }
-       my $index = set_index($GIT_SVN_INDEX);
-
-       # limit ourselves and also fork() since get_log won't release memory
-       # after processing a revision and SVN stuff seems to leak
-       my $inc = 1000;
-       my ($min, $max) = ($base, $head < $base+$inc ? $head : $base+$inc);
-       if (defined $last_commit) {
-               unless (-e $GIT_SVN_INDEX) {
-                       command_noisy('read-tree', $last_commit);
-               }
-               my $x = command_oneline('write-tree');
-               my ($y) = (command(qw/cat-file commit/, $last_commit)
-                                                       =~ /^tree ($sha1)/m);
-               if ($y ne $x) {
-                       unlink $GIT_SVN_INDEX or croak $!;
-                       command_noisy('read-tree', $last_commit);
-               }
-               $x = command_oneline('write-tree');
-               if ($y ne $x) {
-                       print STDERR "trees ($last_commit) $y != $x\n",
-                                "Something is seriously wrong...\n";
-               }
-       }
-       while (1) {
-               # fork, because using SVN::Pool with get_log() still doesn't
-               # seem to help enough to keep memory usage down.
-               defined(my $pid = fork) or croak $!;
-               if (!$pid) {
-                       $SVN::Error::handler = \&libsvn_skip_unknown_revs;
-
-                       # Yes I'm perfectly aware that the fourth argument
-                       # below is the limit revisions number.  Unfortunately
-                       # performance sucks with it enabled, so it's much
-                       # faster to fetch revision ranges instead of relying
-                       # on the limiter.
-                       $SVN->dup->get_log([''], $min, $max, 0, 1, 1,
-                               sub {
-                                       my $log_entry;
-                                       if ($last_commit) {
-                                               $log_entry = libsvn_fetch(
-                                                       $last_commit, @_);
-                                               $last_commit = git_commit(
-                                                       $log_entry,
-                                                       $last_commit,
-                                                       @parents);
-                                       } else {
-                                               $log_entry = libsvn_new_tree(@_);
-                                               $last_commit = git_commit(
-                                                       $log_entry, @parents);
-                                       }
-                               });
-                       exit 0;
-               }
-               waitpid $pid, 0;
-               croak $? if $?;
-               ($last_rev, $last_commit) = svn_grab_base_rev();
-               last if ($max >= $head);
-               $min = $max + 1;
-               $max += $inc;
-               $max = $head if ($max > $head);
-               $SVN = Git::SVN::Ra->new($SVN_URL);
-       }
-       restore_index($index);
-       return { revision => $last_rev, commit => $last_commit };
-}
-
-sub commit {
+sub cmd_set_tree {
        my (@commits) = @_;
-       check_upgrade_needed();
        if ($_stdin || !@commits) {
                print "Reading from stdin...\n";
                @commits = ();
@@ -406,81 +311,20 @@ sub commit {
                } elsif (scalar @tmp > 1) {
                        push @revs, reverse(command('rev-list',@tmp));
                } else {
-                       die "Failed to rev-parse $c\n";
+                       fatal "Failed to rev-parse $c\n";
                }
        }
-       commit_lib(@revs);
-       print "Done committing ",scalar @revs," revisions to SVN\n";
-}
-
-sub commit_lib {
-       my (@revs) = @_;
-       my ($r_last, $cmt_last) = svn_grab_base_rev();
-       defined $r_last or die "Must have an existing revision to commit\n";
-       my $fetched = fetch();
-       if ($r_last != $fetched->{revision}) {
-               print STDERR "There are new revisions that were fetched ",
-                               "and need to be merged (or acknowledged) ",
-                               "before committing.\n",
-                               "last rev: $r_last\n",
-                               " current: $fetched->{revision}\n";
-               exit 1;
-       }
-       my $commit_msg = "$GIT_SVN_DIR/.svn-commit.tmp.$$";
-
-       my $repo;
-       set_svn_commit_env();
-       foreach my $c (@revs) {
-               my $log_entry = get_commit_entry($c, $commit_msg);
-
-               # fork for each commit because there's a memory leak I
-               # can't track down... (it's probably in the SVN code)
-               defined(my $pid = open my $fh, '-|') or croak $!;
-               if (!$pid) {
-                       my $pool = SVN::Pool->new;
-                       my $ed = SVN::Git::Editor->new(
-                                       {       r => $r_last,
-                                               ra => $SVN->dup,
-                                               svn_path => $SVN->{svn_path},
-                                       },
-                                       $SVN->get_commit_editor(
-                                               $log_entry->{log},
-                                               sub {
-                                                       libsvn_commit_cb(
-                                                               @_, $c,
-                                                               $log_entry->{log},
-                                                               $r_last,
-                                                               $cmt_last)
-                                               }, $pool)
-                                       );
-                       my $mods = $ed->apply_diff($cmt_last, $c);
-                       if (@$mods == 0) {
-                               print "No changes\nr$r_last = $cmt_last\n";
-                       }
-                       $pool->clear;
-                       exit 0;
-               }
-               my ($r_new, $cmt_new, $no);
-               while (<$fh>) {
-                       print $_;
-                       chomp;
-                       if (/^r(\d+) = ($sha1)$/o) {
-                               ($r_new, $cmt_new) = ($1, $2);
-                       } elsif ($_ eq 'No changes') {
-                               $no = 1;
-                       }
-               }
-               close $fh or exit 1;
-               if (! defined $r_new && ! defined $cmt_new) {
-                       unless ($no) {
-                               die "Failed to parse revision information\n";
-                       }
-               } else {
-                       ($r_last, $cmt_last) = ($r_new, $cmt_new);
-               }
+       my $gs = Git::SVN->new;
+       my ($r_last, $cmt_last) = $gs->last_rev_commit;
+       $gs->fetch;
+       if ($r_last != $gs->{last_rev}) {
+               fatal "There are new revisions that were fetched ",
+                     "and need to be merged (or acknowledged) ",
+                     "before committing.\nlast rev: $r_last\n",
+                     " current: $gs->{last_rev}\n";
        }
-       $ENV{LC_ALL} = 'C';
-       unlink $commit_msg;
+       $gs->set_tree($_) foreach @revs;
+       print "Done committing ",scalar @revs," revisions to SVN\n";
 }
 
 sub cmd_dcommit {
@@ -1055,14 +899,6 @@ sub get_commit_entry {
        \%log_entry;
 }
 
-sub set_svn_commit_env {
-       if (defined $LC_ALL) {
-               $ENV{LC_ALL} = $LC_ALL;
-       } else {
-               delete $ENV{LC_ALL};
-       }
-}
-
 sub rev_list_raw {
        my ($fh, $c) = command_output_pipe(qw/rev-list --pretty=raw/, @_);
        return { fh => $fh, ctx => $c, t => { } };
@@ -1109,124 +945,6 @@ sub file_to_s {
        return $ret;
 }
 
-sub assert_revision_unknown {
-       my $r = shift;
-       if (my $c = revdb_get($REVDB, $r)) {
-               croak "$r = $c already exists! Why are we refetching it?";
-       }
-}
-
-sub git_commit {
-       my ($log_entry, @parents) = @_;
-       assert_revision_unknown($log_entry->{revision});
-       map_tree_joins() if (@_branch_from && !%tree_map);
-
-       my (@tmp_parents, @exec_parents, %seen_parent);
-       if (my $lparents = $log_entry->{parents}) {
-               @tmp_parents = @$lparents
-       }
-       # commit parents can be conditionally bound to a particular
-       # svn revision via: "svn_revno=commit_sha1", filter them out here:
-       foreach my $p (@parents) {
-               next unless defined $p;
-               if ($p =~ /^(\d+)=($sha1_short)$/o) {
-                       if ($1 == $log_entry->{revision}) {
-                               push @tmp_parents, $2;
-                       }
-               } else {
-                       push @tmp_parents, $p if $p =~ /$sha1_short/o;
-               }
-       }
-       my $tree = $log_entry->{tree};
-       if (!defined $tree) {
-               my $index = set_index($GIT_SVN_INDEX);
-               $tree = command_oneline('write-tree');
-               croak $? if $?;
-               restore_index($index);
-       }
-       # just in case we clobber the existing ref, we still want that ref
-       # as our parent:
-       if (my $cur = verify_ref("refs/remotes/$GIT_SVN^0")) {
-               chomp $cur;
-               push @tmp_parents, $cur;
-       }
-
-       if (exists $tree_map{$tree}) {
-               foreach my $p (@{$tree_map{$tree}}) {
-                       my $skip;
-                       foreach (@tmp_parents) {
-                               # see if a common parent is found
-                               my $mb = eval { command('merge-base', $_, $p) };
-                               next if ($@ || $?);
-                               $skip = 1;
-                               last;
-                       }
-                       next if $skip;
-                       my ($url_p, $r_p, $uuid_p) = cmt_metadata($p);
-                       next if (($SVN->uuid eq $uuid_p) &&
-                                               ($log_entry->{revision} > $r_p));
-                       next if (defined $url_p && defined $SVN_URL &&
-                                               ($SVN->uuid eq $uuid_p) &&
-                                               ($url_p eq $SVN_URL));
-                       push @tmp_parents, $p;
-               }
-       }
-       foreach (@tmp_parents) {
-               next if $seen_parent{$_};
-               $seen_parent{$_} = 1;
-               push @exec_parents, $_;
-               # MAXPARENT is defined to 16 in commit-tree.c:
-               last if @exec_parents > 16;
-       }
-
-       set_commit_env($log_entry);
-       my @exec = ('git-commit-tree', $tree);
-       push @exec, '-p', $_  foreach @exec_parents;
-       defined(my $pid = open3(my $msg_fh, my $out_fh, '>&STDERR', @exec))
-                                                               or croak $!;
-       print $msg_fh $log_entry->{log} or croak $!;
-       unless ($_no_metadata) {
-               print $msg_fh "\ngit-svn-id: $SVN_URL\@$log_entry->{revision} ",
-                                       $SVN->uuid,"\n" or croak $!;
-       }
-       $msg_fh->flush == 0 or croak $!;
-       close $msg_fh or croak $!;
-       chomp(my $commit = do { local $/; <$out_fh> });
-       close $out_fh or croak $!;
-       waitpid $pid, 0;
-       croak $? if $?;
-       if ($commit !~ /^$sha1$/o) {
-               die "Failed to commit, invalid sha1: $commit\n";
-       }
-       command_noisy('update-ref',"refs/remotes/$GIT_SVN",$commit);
-       revdb_set($REVDB, $log_entry->{revision}, $commit);
-
-       # this output is read via pipe, do not change:
-       print "r$log_entry->{revision} = $commit\n";
-       return $commit;
-}
-
-sub check_repack {
-       if ($_repack && (--$_repack_nr == 0)) {
-               $_repack_nr = $_repack;
-               # repack doesn't use any arguments with spaces in them, does it?
-               command_noisy('repack', split(/\s+/, $_repack_flags));
-       }
-}
-
-sub set_commit_env {
-       my ($log_entry) = @_;
-       my $author = $log_entry->{author};
-       if (!defined $author || length $author == 0) {
-               $author = '(no author)';
-       }
-       my ($name,$email) = defined $users{$author} ?  @{$users{$author}}
-                               : ($author,$author . '@' . $SVN->uuid);
-       $ENV{GIT_AUTHOR_NAME} = $ENV{GIT_COMMITTER_NAME} = $name;
-       $ENV{GIT_AUTHOR_EMAIL} = $ENV{GIT_COMMITTER_EMAIL} = $email;
-       $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_entry->{date};
-}
-
 sub check_upgrade_needed {
        if (!-r $REVDB) {
                -d $GIT_SVN_DIR or mkpath([$GIT_SVN_DIR]);
@@ -1859,7 +1577,7 @@ sub write_untracked {
                foreach my $path (sort keys %$h) {
                        my $ppath = $path eq '' ? '.' : $path;
                        foreach my $prop (sort keys %{$h->{$path}}) {
-                               next if $SKIP{$prop};
+                               next if $SKIP_PROP{$prop};
                                my $v = $h->{$path}->{$prop};
                                if (defined $v) {
                                        print $fh "  +$t: ",
@@ -1975,7 +1693,7 @@ sub set_tree_cb {
 
 sub set_tree {
        my ($self, $tree) = (shift, shift);
-       my $log_entry = get_commit_entry($tree);
+       my $log_entry = ::get_commit_entry($tree);
        unless ($self->{last_rev}) {
                fatal("Must have an existing revision to commit\n");
        }
@@ -2218,118 +1936,6 @@ sub uri_decode {
        $f
 }
 
-sub libsvn_log_entry {
-       my ($rev, $author, $date, $log, $parents, $untracked) = @_;
-       my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
-                                        (\d\d)\:(\d\d)\:(\d\d).\d+Z$/x)
-                               or die "Unable to parse date: $date\n";
-       if (defined $author && length $author > 0 &&
-           defined $_authors && ! defined $users{$author}) {
-               die "Author: $author not defined in $_authors file\n";
-       }
-       $log = '' if ($rev == 0 && !defined $log);
-
-       open my $un, '>>', "$GIT_SVN_DIR/unhandled.log" or croak $!;
-       my $h;
-       print $un "r$rev\n" or croak $!;
-       $h = $untracked->{empty};
-       foreach (sort keys %$h) {
-               my $act = $h->{$_} ? '+empty_dir' : '-empty_dir';
-               print $un "  $act: ", uri_encode($_), "\n" or croak $!;
-               warn "W: $act: $_\n";
-       }
-       foreach my $t (qw/dir_prop file_prop/) {
-               $h = $untracked->{$t} or next;
-               foreach my $path (sort keys %$h) {
-                       my $ppath = $path eq '' ? '.' : $path;
-                       foreach my $prop (sort keys %{$h->{$path}}) {
-                               next if $SKIP{$prop};
-                               my $v = $h->{$path}->{$prop};
-                               if (defined $v) {
-                                       print $un "  +$t: ",
-                                                 uri_encode($ppath), ' ',
-                                                 uri_encode($prop), ' ',
-                                                 uri_encode($v), "\n"
-                                                 or croak $!;
-                               } else {
-                                       print $un "  -$t: ",
-                                                 uri_encode($ppath), ' ',
-                                                 uri_encode($prop), "\n"
-                                                 or croak $!;
-                               }
-                       }
-               }
-       }
-       foreach my $t (qw/absent_file absent_directory/) {
-               $h = $untracked->{$t} or next;
-               foreach my $parent (sort keys %$h) {
-                       foreach my $path (sort @{$h->{$parent}}) {
-                               print $un "  $t: ",
-                                     uri_encode("$parent/$path"), "\n"
-                                     or croak $!;
-                               warn "W: $t: $parent/$path ",
-                                    "Insufficient permissions?\n";
-                       }
-               }
-       }
-
-       # revprops (make this optional? it's an extra network trip...)
-       my $rp = $SVN->rev_proplist($rev);
-       foreach (sort keys %$rp) {
-               next if /^svn:(?:author|date|log)$/;
-               print $un "  rev_prop: ", uri_encode($_), ' ',
-                         uri_encode($rp->{$_}), "\n";
-       }
-       close $un or croak $!;
-
-       { revision => $rev, date => "+0000 $Y-$m-$d $H:$M:$S",
-         author => $author, log => $log."\n", parents => $parents || [],
-         revprops => $rp }
-}
-
-sub libsvn_fetch {
-       my ($last_commit, $paths, $rev, $author, $date, $log) = @_;
-       my $ed = SVN::Git::Fetcher->new({ c => $last_commit, q => $_q });
-       my (undef, $last_rev, undef) = cmt_metadata($last_commit);
-       unless ($SVN->gs_do_update($last_rev, $rev, '', 1, $ed)) {
-               die "SVN connection failed somewhere...\n";
-       }
-       libsvn_log_entry($rev, $author, $date, $log, [$last_commit], $ed);
-}
-
-sub svn_grab_base_rev {
-       my $c = eval { command_oneline([qw/rev-parse --verify/,
-                                       "refs/remotes/$GIT_SVN^0"],
-                                       { STDERR => 0 }) };
-       if (defined $c && length $c) {
-               my ($url, $rev, $uuid) = cmt_metadata($c);
-               return ($rev, $c) if defined $rev;
-       }
-       if ($_no_metadata) {
-               my $offset = -41; # from tail
-               my $rl;
-               open my $fh, '<', $REVDB or
-                       die "--no-metadata specified and $REVDB not readable\n";
-               seek $fh, $offset, 2;
-               $rl = readline $fh;
-               defined $rl or return (undef, undef);
-               chomp $rl;
-               while ($c ne $rl && tell $fh != 0) {
-                       $offset -= 41;
-                       seek $fh, $offset, 2;
-                       $rl = readline $fh;
-                       defined $rl or return (undef, undef);
-                       chomp $rl;
-               }
-               my $rev = tell $fh;
-               croak $! if ($rev < -1);
-               $rev =  ($rev - 41) / 41;
-               close $fh or croak $!;
-               return ($rev, $c);
-       }
-       return (undef, undef);
-}
-
 sub libsvn_parse_revision {
        my $base = shift;
        my $head = $SVN->get_latest_revnum();
@@ -2450,14 +2056,6 @@ sub libsvn_find_parent_branch {
        return undef;
 }
 
-sub libsvn_new_tree {
-       if (my $log_entry = libsvn_find_parent_branch(@_)) {
-               return $log_entry;
-       }
-       my ($paths, $rev, $author, $date, $log) = @_; # $pool is last
-       _libsvn_new_tree($paths, $rev, $author, $date, $log, []);
-}
-
 sub _libsvn_new_tree {
        my ($paths, $rev, $author, $date, $log, $parents) = @_;
        my $ed = SVN::Git::Fetcher->new({q => $_q});
@@ -2513,82 +2111,6 @@ sub libsvn_graft_file_copies {
        }
 }
 
-sub set_index {
-       my $old = $ENV{GIT_INDEX_FILE};
-       $ENV{GIT_INDEX_FILE} = shift;
-       return $old;
-}
-
-sub restore_index {
-       my ($old) = @_;
-       if (defined $old) {
-               $ENV{GIT_INDEX_FILE} = $old;
-       } else {
-               delete $ENV{GIT_INDEX_FILE};
-       }
-}
-
-sub libsvn_commit_cb {
-       my ($rev, $date, $committer, $c, $log, $r_last, $cmt_last) = @_;
-       if ($_optimize_commits && $rev == ($r_last + 1)) {
-               my $log = libsvn_log_entry($rev,$committer,$date,$log);
-               $log->{tree} = get_tree_from_treeish($c);
-               my $cmt = git_commit($log, $cmt_last, $c);
-               my @diff = command('diff-tree', $cmt, $c);
-               if (@diff) {
-                       print STDERR "Trees differ: $cmt $c\n",
-                                       join('',@diff),"\n";
-                       exit 1;
-               }
-       } else {
-               fetch("$rev=$c");
-       }
-}
-
-sub libsvn_skip_unknown_revs {
-       my $err = shift;
-       my $errno = $err->apr_err();
-       # Maybe the branch we're tracking didn't
-       # exist when the repo started, so it's
-       # not an error if it doesn't, just continue
-       #
-       # Wonderfully consistent library, eh?
-       # 160013 - svn:// and file://
-       # 175002 - http(s)://
-       # 175007 - http(s):// (this repo required authorization, too...)
-       #   More codes may be discovered later...
-       if ($errno == 175007 || $errno == 175002 || $errno == 160013) {
-               return;
-       }
-       croak "Error from SVN, ($errno): ", $err->expanded_message,"\n";
-};
-
-# Tie::File seems to be prone to offset errors if revisions get sparse,
-# it's not that fast, either.  Tie::File is also not in Perl 5.6.  So
-# one of my favorite modules is out :<  Next up would be one of the DBM
-# modules, but I'm not sure which is most portable...  So I'll just
-# go with something that's plain-text, but still capable of
-# being randomly accessed.  So here's my ultra-simple fixed-width
-# database.  All records are 40 characters + "\n", so it's easy to seek
-# to a revision: (41 * rev) is the byte offset.
-# A record of 40 0s denotes an empty revision.
-# And yes, it's still pretty fast (faster than Tie::File).
-sub revdb_set {
-       my ($file, $rev, $commit) = @_;
-       length $commit == 40 or croak "arg3 must be a full SHA1 hexsum\n";
-       open my $fh, '+<', $file or croak $!;
-       my $offset = $rev * 41;
-       # assume that append is the common case:
-       seek $fh, 0, 2 or croak $!;
-       my $pos = tell $fh;
-       if ($pos < $offset) {
-               print $fh (('0' x 40),"\n") x (($offset - $pos) / 41);
-       }
-       seek $fh, $offset, 0 or croak $!;
-       print $fh $commit,"\n";
-       close $fh or croak $!;
-}
-
 sub revdb_get {
        my ($file, $rev) = @_;
        my $ret;