From eaf718f3ece277462de4e47391e5a965bbbaa297 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Mon, 10 Oct 2005 11:40:43 +0200 Subject: [PATCH] New: git-svnimport. As the name suggests, this script imports from SVN. Only "normal" SVN repositories (with single trunk/, branches/, and tags/ subdrectories) are supported. Incremental imports require preserving the file .git/svn2git. Signed-Off-by: Matthias Urlichs --- Documentation/git-svnimport.txt | 99 +++++ Makefile | 3 +- git-svnimport.perl | 671 ++++++++++++++++++++++++++++++++ 3 files changed, 772 insertions(+), 1 deletion(-) create mode 100644 Documentation/git-svnimport.txt create mode 100755 git-svnimport.perl diff --git a/Documentation/git-svnimport.txt b/Documentation/git-svnimport.txt new file mode 100644 index 000000000..be03a65f8 --- /dev/null +++ b/Documentation/git-svnimport.txt @@ -0,0 +1,99 @@ +git-svnimport(1) +================ +v0.1, July 2005 + +NAME +---- +git-svnimport - Import a SVN repository into git + + +SYNOPSIS +-------- +'git-svnimport' [ -o ] [ -h ] [ -v ] + [ -C ] [ -i ] [ -u ] + [ -b branch_subdir ] [ -t trunk_subdir ] [ -T tag_subdir ] + [ -m ] [ -M regex ] [ ] + + +DESCRIPTION +----------- +Imports a SVN repository into git. It will either create a new +repository, or incrementally import into an existing one. + +SVN access is done by the SVN:: Perl module. + +git-svnimport assumes that SVN repositories are organized into one +"trunk" directory where the main development happens, "branch/FOO" +directories for branches, and "/tags/FOO" directories for tags. +Other subdirectories are ignored. + +git-svnimport creates a file ".git/svn2git", which is required for +incremental SVN imports. + +OPTIONS +------- +-C :: + The GIT repository to import to. If the directory doesn't + exist, it will be created. Default is the current directory. + +-i:: + Import-only: don't perform a checkout after importing. This option + ensures the working directory and cache remain untouched and will + not create them if they do not exist. + +-t :: + Name the SVN trunk. Default "trunk". + +-T :: + Name the SVN subdirectory for tags. Default "tags". + +-b :: + Name the SVN subdirectory for branches. Default "branches". + +-o :: + The 'trunk' branch from SVN is imported to the 'origin' branch within + the git repository. Use this option if you want to import into a + different branch. + +-m:: + Attempt to detect merges based on the commit message. This option + will enable default regexes that try to capture the name source + branch name from the commit message. + +-M :: + Attempt to detect merges based on the commit message with a custom + regex. It can be used with -m to also see the default regexes. + You must escape forward slashes. + +-v:: + Verbosity: let 'svnimport' report what it is doing. + +:: + The URL of the SVN module you want to import. For local + repositories, use "file:///absolute/path". + +-h:: + Print a short usage message and exit. + +OUTPUT +------ +If '-v' is specified, the script reports what it is doing. + +Otherwise, success is indicated the Unix way, i.e. by simply exiting with +a zero exit status. + +Author +------ +Written by Matthias Urlichs , with help from +various participants of the git-list . + +Based on a cvs2git script by the same author. + +Documentation +-------------- +Documentation by Matthias Urlichs . + +GIT +--- +Part of the gitlink:git[7] suite + diff --git a/Makefile b/Makefile index fd4e163ba..30fda9ec7 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,8 @@ SCRIPT_SH = \ SCRIPT_PERL = \ git-archimport.perl git-cvsimport.perl git-relink.perl \ - git-rename.perl git-shortlog.perl git-fmt-merge-msg.perl + git-rename.perl git-shortlog.perl git-fmt-merge-msg.perl \ + git-svnimport-perl SCRIPT_PYTHON = \ git-merge-recursive.py diff --git a/git-svnimport.perl b/git-svnimport.perl new file mode 100755 index 000000000..08645f7eb --- /dev/null +++ b/git-svnimport.perl @@ -0,0 +1,671 @@ +#!/usr/bin/perl -w + +# This tool is copyright (c) 2005, Matthias Urlichs. +# It is released under the Gnu Public License, version 2. +# +# The basic idea is to pull and analyze SVN changes. +# +# Checking out the files is done by a single long-running CVS connection +# / server process. +# +# The head revision is on branch "origin" by default. +# You can change that with the '-o' option. + +require v5.8.0; # for shell-safe open("-|",LIST) +use strict; +use warnings; +use Getopt::Std; +use File::Spec; +use File::Temp qw(tempfile); +use File::Path qw(mkpath); +use File::Basename qw(basename dirname); +use Time::Local; +use IO::Pipe; +use POSIX qw(strftime dup2); +use IPC::Open2; +use SVN::Core; +use SVN::Ra; + +$SIG{'PIPE'}="IGNORE"; +$ENV{'TZ'}="UTC"; + +our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b); + +sub usage() { + print STDERR <; + chomp $cvs_tree; + close $f; +} else { + usage(); +} + +our @mergerx = (); +if ($opt_m) { + @mergerx = ( qr/\W(?:from|of|merge|merging|merged) (\w+)/i ); +} +if ($opt_M) { + push (@mergerx, qr/$opt_M/); +} + +select(STDERR); $|=1; select(STDOUT); + + +package SVNconn; +# Basic SVN connection. +# We're only interested in connecting and downloading, so ... + +use File::Spec; +use File::Temp qw(tempfile); +use POSIX qw(strftime dup2); + +sub new { + my($what,$repo) = @_; + $what=ref($what) if ref($what); + + my $self = {}; + $self->{'buffer'} = ""; + bless($self,$what); + + $repo =~ s#/+$##; + $self->{'fullrep'} = $repo; + $self->conn(); + + $self->{'lines'} = undef; + + return $self; +} + +sub conn { + my $self = shift; + my $repo = $self->{'fullrep'}; + my $s = SVN::Ra->new($repo); + + die "SVN connection to $repo: $!\n" unless defined $s; + $self->{'svn'} = $s; + $self->{'repo'} = $repo; + $self->{'maxrev'} = $s->get_latest_revnum(); +} + +sub file { + my($self,$path,$rev) = @_; + my $res; + + my ($fh, $name) = tempfile('gitsvn.XXXXXX', + DIR => File::Spec->tmpdir(), UNLINK => 1); + + $self->{'svn'}->get_file($path,$rev,$fh) or do { + # retry + $self->conn(); + $self->{'svn'}->get_file($path,$rev,$fh) + or die "$rev: No file $path at $rev\n"; + }; + close ($fh); + + return ($name, $res); +} + + +package main; + +my $svn = SVNconn->new($cvs_tree); + + +sub pdate($) { + my($d) = @_; + $d =~ m#(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)# + or die "Unparseable date: $d\n"; + my $y=$1; $y-=1900 if $y>1900; + return timegm($6||0,$5,$4,$3,$2-1,$y); +} + +sub pmode($) { + my($mode) = @_; + my $m = 0; + my $mm = 0; + my $um = 0; + for my $x(split(//,$mode)) { + if($x eq ",") { + $m |= $mm&$um; + $mm = 0; + $um = 0; + } elsif($x eq "u") { $um |= 0700; + } elsif($x eq "g") { $um |= 0070; + } elsif($x eq "o") { $um |= 0007; + } elsif($x eq "r") { $mm |= 0444; + } elsif($x eq "w") { $mm |= 0222; + } elsif($x eq "x") { $mm |= 0111; + } elsif($x eq "=") { # do nothing + } else { die "Unknown mode: $mode\n"; + } + } + $m |= $mm&$um; + return $m; +} + +sub getwd() { + my $pwd = `pwd`; + chomp $pwd; + return $pwd; +} + + +sub get_headref($$) { + my $name = shift; + my $git_dir = shift; + my $sha; + + if (open(C,"$git_dir/refs/heads/$name")) { + chomp($sha = ); + close(C); + length($sha) == 40 + or die "Cannot get head id for $name ($sha): $!\n"; + } + return $sha; +} + + +-d $git_tree + or mkdir($git_tree,0777) + or die "Could not create $git_tree: $!"; +chdir($git_tree); + +my $orig_branch = ""; +my $forward_master = 0; +my %branches; + +my $git_dir = $ENV{"GIT_DIR"} || ".git"; +$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#; +$ENV{"GIT_DIR"} = $git_dir; +my $orig_git_index; +$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE}; +my ($git_ih, $git_index) = tempfile('gitXXXXXX', SUFFIX => '.idx', + DIR => File::Spec->tmpdir()); +close ($git_ih); +$ENV{GIT_INDEX_FILE} = $git_index; +my $maxnum = 0; +my $last_rev = ""; +my $last_branch; +my $current_rev = 0; +unless(-d $git_dir) { + system("git-init-db"); + die "Cannot init the GIT db at $git_tree: $?\n" if $?; + system("git-read-tree"); + die "Cannot init an empty tree: $?\n" if $?; + + $last_branch = $opt_o; + $orig_branch = ""; +} else { + -f "$git_dir/refs/heads/$opt_o" + or die "Branch '$opt_o' does not exist.\n". + "Either use the correct '-o branch' option,\n". + "or import to a new repository.\n"; + + -f "$git_dir/svn2git" + or die "'$git_dir/svn2git' does not exist.\n". + "You need that file for incremental imports.\n"; + $last_branch = basename(readlink("$git_dir/HEAD")); + unless($last_branch) { + warn "Cannot read the last branch name: $! -- assuming 'master'\n"; + $last_branch = "master"; + } + $orig_branch = $last_branch; + $last_rev = get_headref($orig_branch, $git_dir); + if (-f "$git_dir/SVN2GIT_HEAD") { + die <) { + chomp; + my($num,$branch,$ref) = split; + $branches{$branch}{$num} = $ref; + $branches{$branch}{"LAST"} = $ref; + $current_rev = $num+1 if $current_rev < $num+1; + } + close($B); +} +-d $git_dir + or die "Could not create git subdir ($git_dir).\n"; + +open BRANCHES,">>", "$git_dir/svn2git"; + + +## cvsps output: +#--------------------- +#PatchSet 314 +#Date: 1999/09/18 13:03:59 +#Author: wkoch +#Branch: STABLE-BRANCH-1-0 +#Ancestor branch: HEAD +#Tag: (none) +#Log: +# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch +#Members: +# README:1.57->1.57.2.1 +# VERSION:1.96->1.96.2.1 +# +#--------------------- + +my $state = 0; + +sub get_file($$$) { + my($rev,$branch,$path) = @_; + + # revert split_path(), below + my $svnpath; + $path = "" if $path eq "/"; # this should not happen, but ... + if($branch eq "/") { + $svnpath = "/$trunk_name/$path"; + } elsif($branch =~ m#^/#) { + $svnpath = "/$tag_name$branch/$path"; + } else { + $svnpath = "/$branch_name/$branch/$path"; + } + + # now get it + my ($name, $res) = $svn->file($svnpath,$rev); + + open my $F, '-|', "git-hash-object -w $name" + or die "Cannot create object: $!\n"; + my $sha = <$F>; + chomp $sha; + close $F; + # my $mode = pmode($cvs->{'mode'}); + my $mode = "0644"; # SV does not seem to store any file modes + return [$mode, $sha, $path]; +} + +sub split_path($$) { + my($rev,$path) = @_; + my $branch; + + if($path =~ s#^/\Q$tag_name\E/([^/]+)/?##) { + $branch = "/$1"; + } elsif($path =~ s#^/\Q$trunk_name\E/?##) { + $branch = "/"; + } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) { + $branch = $1; + } else { + print STDERR "$rev: Unrecognized path: $path\n"; + return () + } + $path = "/" if $path eq ""; + return ($branch,$path); +} + +sub commit { + my($branch, $changed_paths, $revision, $author, $date, $message) = @_; + my($author_name,$author_email,$dest); + my(@old,@new); + + if ($author =~ /^(.*?)\s+<(.*)>$/) { + ($author_name, $author_email) = ($1, $2); + } else { + $author =~ s/^<(.*)>$/$1/; + $author_name = $author_email = $author; + } + $date = pdate($date); + + my $tag; + my $parent; + if($branch eq "/") { # trunk + $parent = $opt_o; + } elsif($branch =~ m#^/(.+)#) { # tag + $tag = 1; + $parent = $1; + } else { # "normal" branch + # nothing to do + $parent = $branch; + } + $dest = $parent; + + my $prev = $changed_paths->{"/"}; + if($prev and $prev->action eq "A") { + delete $changed_paths->{"/"}; + my $oldpath = $prev->copyfrom_path; + my $rev; + if(defined $oldpath) { + my $p; + ($parent,$p) = split_path($revision,$oldpath); + if($parent eq "/") { + $parent = $opt_o; + } else { + $parent =~ s#^/##; # if it's a tag + } + } else { + $parent = undef; + } + } + + my $rev; + if(defined $parent) { + open(H,"git-rev-parse --verify $parent |"); + $rev = ; + close(H) or do { + print STDERR "$revision: cannot find commit '$parent'!\n"; + return; + }; + chop $rev; + if(length($rev) != 40) { + print STDERR "$revision: cannot find commit '$parent'!\n"; + return; + } + $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"}; + if($revision != 1 and not $rev) { + print STDERR "$revision: do not know ancestor for '$parent'!\n"; + return; + } + } else { + $rev = undef; + } + +# if($prev and $prev->action eq "A") { +# if(not $tag) { +# unless(open(H,"> $git_dir/refs/heads/$branch")) { +# print STDERR "$revision: Could not create branch $branch: $!\n"; +# $state=11; +# next; +# } +# print H "$rev\n" +# or die "Could not write branch $branch: $!"; +# close(H) +# or die "Could not write branch $branch: $!"; +# } +# } + if(not defined $rev) { + unlink($git_index); + } elsif ($rev ne $last_rev) { + print "Switching from $last_rev to $rev ($branch)\n" if $opt_v; + system("git-read-tree", $rev); + die "read-tree failed for $rev: $?\n" if $?; + $last_rev = $rev; + } + + while(my($path,$action) = each %$changed_paths) { + if ($action->action eq "A") { + my $f = get_file($revision,$branch,$path); + push(@new,$f) if $f; + } elsif ($action->action eq "D") { + push(@old,$path); + } elsif ($action->action eq "M") { + my $f = get_file($revision,$branch,$path); + push(@new,$f) if $f; + } elsif ($action->action eq "R") { + # refer to a file/tree in an earlier commit + push(@old,$path); # remove any old stuff + + # ... and add any new stuff + my($b,$p) = split_path($revision,$action->oldpath); + open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->oldrev}, $p; + local $/ = '\0'; + while(<$F>) { + chomp; + my($m,$p) = split(/\t/,$_,2); + my($mode,$type,$sha1) = split(/ /,$m); + next if $type ne "blob"; + push(@new,[$mode,$sha1,$p]); + } + } else { + die "$revision: unknown action '".$action->action."' for $path\n"; + } + } + + if(@old) { + open F, "-│", "git-ls-files", "-z", @old or die $!; + @old = (); + local $/ = '\0'; + while() { + chomp; + push(@old,$_); + } + close(F); + + while(@old) { + my @o2; + if(@old > 55) { + @o2 = splice(@old,0,50); + } else { + @o2 = @old; + @old = (); + } + system("git-update-index","--force-remove","--",@o2); + die "Cannot remove files: $?\n" if $?; + } + } + while(@new) { + my @n2; + if(@new > 12) { + @n2 = splice(@new,0,10); + } else { + @n2 = @new; + @new = (); + } + system("git-update-index","--add", + (map { ('--cacheinfo', @$_) } @n2)); + die "Cannot add files: $?\n" if $?; + } + + my $pid = open(C,"-|"); + die "Cannot fork: $!" unless defined $pid; + unless($pid) { + exec("git-write-tree"); + die "Cannot exec git-write-tree: $!\n"; + } + chomp(my $tree = ); + length($tree) == 40 + or die "Cannot get tree id ($tree): $!\n"; + close(C) + or die "Error running git-write-tree: $?\n"; + print "Tree ID $tree\n" if $opt_v; + + my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n"; + my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n"; + $pid = fork(); + die "Fork: $!\n" unless defined $pid; + unless($pid) { + $pr->writer(); + $pw->reader(); + open(OUT,">&STDOUT"); + dup2($pw->fileno(),0); + dup2($pr->fileno(),1); + $pr->close(); + $pw->close(); + + my @par = (); + @par = ("-p",$rev) if defined $rev; + + # loose detection of merges + # based on the commit msg + foreach my $rx (@mergerx) { + if ($message =~ $rx) { + my $mparent = $1; + if ($mparent eq 'HEAD') { $mparent = $opt_o }; + if ( -e "$git_dir/refs/heads/$mparent") { + $mparent = get_headref($mparent, $git_dir); + push @par, '-p', $mparent; + print OUT "Merge parent branch: $mparent\n" if $opt_v; + } + } + } + + exec("env", + "GIT_AUTHOR_NAME=$author_name", + "GIT_AUTHOR_EMAIL=$author_email", + "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), + "GIT_COMMITTER_NAME=$author_name", + "GIT_COMMITTER_EMAIL=$author_email", + "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), + "git-commit-tree", $tree,@par); + die "Cannot exec git-commit-tree: $!\n"; + } + $pw->writer(); + $pr->reader(); + + $message =~ s/[\s\n]+\z//; + + print $pw "$message\n" + or die "Error writing to git-commit-tree: $!\n"; + $pw->close(); + + print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v; + chomp(my $cid = <$pr>); + length($cid) == 40 + or die "Cannot get commit id ($cid): $!\n"; + print "Commit ID $cid\n" if $opt_v; + $pr->close(); + + waitpid($pid,0); + die "Error running git-commit-tree: $?\n" if $?; + + if(defined $dest) { + print "Writing to refs/heads/$dest\n" if $opt_v; + open(C,">$git_dir/refs/heads/$dest") and + print C ("$cid\n") and + close(C) + or die "Cannot write branch $dest for update: $!\n"; + } else { + print "... no known parent\n" if $opt_v; + } + $branches{$branch}{"LAST"} = $cid; + $branches{$branch}{$revision} = $cid; + $last_rev = $cid; + print BRANCHES "$revision $branch $cid\n"; + print "DONE: $revision $dest $cid\n" if $opt_v; + + if($tag) { + my($in, $out) = ('',''); + $last_rev = "-" if %$changed_paths; + # the tag was 'complex', i.e. did not refer to a "real" revision + + $tag =~ tr/_/\./ if $opt_u; + + my $pid = open2($in, $out, 'git-mktag'); + print $out ("object $cid\n". + "type commit\n". + "tag $tag\n". + "tagger $author_name <$author_email>\n") and + close($out) + or die "Cannot create tag object $tag: $!\n"; + + my $tagobj = <$in>; + chomp $tagobj; + + if ( !close($in) or waitpid($pid, 0) != $pid or + $? != 0 or $tagobj !~ /^[0123456789abcdef]{40}$/ ) { + die "Cannot create tag object $tag: $!\n"; + } + + + open(C,">$git_dir/refs/tags/$tag") + or die "Cannot create tag $tag: $!\n"; + print C "$tagobj\n" + or die "Cannot write tag $tag: $!\n"; + close(C) + or die "Cannot write tag $tag: $!\n"; + + print "Created tag '$tag' on '$branch'\n" if $opt_v; + } +} + +my ($changed_paths, $revision, $author, $date, $message, $pool) = @_; +sub _commit_all { + ($changed_paths, $revision, $author, $date, $message, $pool) = @_; +} +sub commit_all { + my %done; + my @col; + my $pref; + my $branch; + + while(my($path,$action) = each %$changed_paths) { + ($branch,$path) = split_path($revision,$path); + next if not defined $branch; + $done{$branch}{$path} = $action; + } + while(($branch,$changed_paths) = each %done) { + commit($branch, $changed_paths, $revision, $author, $date, $message); + } +} + +while(++$current_rev < $svn->{'maxrev'}) { + $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,""); + commit_all(); +} + + +unlink($git_index); + +if (defined $orig_git_index) { + $ENV{GIT_INDEX_FILE} = $orig_git_index; +} else { + delete $ENV{GIT_INDEX_FILE}; +} + +# Now switch back to the branch we were in before all of this happened +if($orig_branch) { + print "DONE\n" if $opt_v; + system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master") + if $forward_master; + unless ($opt_i) { + system('git-read-tree', '-m', '-u', 'SVN2GIT_HEAD', 'HEAD'); + die "read-tree failed: $?\n" if $?; + } +} else { + $orig_branch = "master"; + print "DONE; creating $orig_branch branch\n" if $opt_v; + system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master") + unless -f "$git_dir/refs/heads/master"; + unlink("$git_dir/HEAD"); + symlink("refs/heads/$orig_branch","$git_dir/HEAD"); + unless ($opt_i) { + system('git checkout'); + die "checkout failed: $?\n" if $?; + } +} +unlink("$git_dir/SVN2GIT_HEAD"); +close(BRANCHES); -- 2.26.2