Most folks using git-gui on internationalized files have complained
that it doesn't recognize UTF-8 correctly. In the past we have just
ignored the problem and showed the file contents as binary/US-ASCII,
which is wrong no matter how you look at it.
This really should be a per-file attribute, managed by .gitattributes,
so we now pull the "encoding" attribute data for the given path from
the .gitattributes (if available) and use that, falling back to UTF-8
if the attributes are unavailable, git-check-attr is broken, or an
encoding for this path not specified.
We apply the encoding anytime we show file content, which currently
is limited to only the diff viewer and the blame viewer.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
--- /dev/null
+* encoding=US-ASCII
+git-gui.sh encoding=UTF-8
+/po/*.po encoding=UTF-8
}
}
+proc gitattr {path attr default} {
+ if {[catch {set r [git check-attr $attr -- $path]}]} {
+ set r unspecified
+ } else {
+ set r [join [lrange [split $r :] 2 end] :]
+ regsub {^ } $r {} r
+ }
+ if {$r eq {unspecified}} {
+ return $default
+ }
+ return $r
+}
+
proc sq {value} {
regsub -all ' $value "'\\''" value
return "'$value'"
} else {
set fd [git_read cat-file blob "$commit:$path"]
}
- fconfigure $fd -blocking 0 -translation lf -encoding binary
+ fconfigure $fd \
+ -blocking 0 \
+ -translation lf \
+ -encoding [tcl_encoding [gitattr $path encoding UTF-8]]
fileevent $fd readable [cb _read_file $fd $jump]
set current_fd $fd
}
set sz [string length $content]
}
file {
+ set enc [gitattr $path encoding UTF-8]
set fd [open $path r]
- fconfigure $fd -eofchar {}
+ fconfigure $fd \
+ -eofchar {} \
+ -encoding [tcl_encoding $enc]
set content [read $fd $max_sz]
close $fd
set sz [file size $path]
set ::current_diff_inheader 1
fconfigure $fd \
-blocking 0 \
- -encoding binary \
- -translation binary
+ -encoding [tcl_encoding [gitattr $path encoding UTF-8]] \
+ -translation lf
fileevent $fd readable [list read_diff $fd $scroll_pos]
}