From f07c7444127f5a73d08b3d18508e6c25a02d109b Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sat, 10 Mar 2012 18:35:09 -0500 Subject: [PATCH] Add linking post. --- posts/Linking.mdwn | 343 +++++++++++++++++++++++++++ posts/Linking/Makefile | 109 +++++++++ posts/Linking/hello_world.cpp | 27 +++ posts/Linking/hello_world_string.cpp | 25 ++ posts/Linking/hello_world_string.h | 25 ++ posts/Linking/print_hello_world.cpp | 30 +++ posts/Linking/print_hello_world.h | 25 ++ posts/Linking/simple.c | 3 + 8 files changed, 587 insertions(+) create mode 100644 posts/Linking.mdwn create mode 100644 posts/Linking/Makefile create mode 100644 posts/Linking/hello_world.cpp create mode 100644 posts/Linking/hello_world_string.cpp create mode 100644 posts/Linking/hello_world_string.h create mode 100644 posts/Linking/print_hello_world.cpp create mode 100644 posts/Linking/print_hello_world.h create mode 100644 posts/Linking/simple.c diff --git a/posts/Linking.mdwn b/posts/Linking.mdwn new file mode 100644 index 0000000..b2df413 --- /dev/null +++ b/posts/Linking.mdwn @@ -0,0 +1,343 @@ +This example shows the details of linking a simple program from three +source files. There are three ways to link: directly from object +files, statically from static libraries, or dynamically from shared +libraries. If you're following along in my [[example +source|linking.tar.gz]], you can compile the three flavors of the +`hello_world` program with: + + $ make + +And then run them with: + + $ make run + +Compiling and linking +===================== + +Here's the general compilation process: + +1. Write code in a human-readable language (C, C++, …). +2. Compile the code to [object files][object-files] (`*.o`) using a + compiler (`gcc`, `g++`, …). +3. Link the code into executables or libraries using a [linker][] + (`ld`, `gcc`, `g++`, …). + +Object files are binary files containing [machine code][machine-code] +versions of the human-readable code, along with some bookkeeping +information for the linker (relocation information, stack unwinding +information, program symbols, …). The machine code is specific to a +particular processor architecture (e.g. [x86-64][]). + +Linking files resolves references to symbols defined in translation +units, because a single object file will rarely (never?) contain +definitions for all the symbols it requires. It's easy to get +confused about the difference between compiling and linking, because +you often use the same program (e.g. `gcc`) for both steps. In +reality, `gcc` is performing the compilation on its own, but is +using external utilities like `ld` for the linking. To see this in +action, add the `-v` (verbose) option to your `gcc` (or `g++`) +calls. You can do this for all the rules in the `Makefile` with: + + make CC="gcc -v" CXX="g++ -v" + +On my system, that shows `g++` using `/lib64/ld-linux-x86-64.so.2` +for dynamic linking. On my system, C++ seems to require at least some +dynamic linkning, but a simple C program like `simple.c` can be +linked statically. For static linking, `gcc` uses `collect2`. + +Symbols in object files +======================= + +Sometimes you'll want to take a look at the symbols exported and +imported by your code, since there can be [subtle bugs][bugs] if you +link two sets of code that use the same symbol for different purposes. +You can use `nm` to inspect the intermediate object files. I've saved +the command line in the `Makefile`: + + $ make inspect-object-files + nm -Pg hello_world.o print_hello_world.o hello_world_string.o + hello_world.o: + _Z17print_hello_worldv U + main T 0000000000000000 0000000000000010 + print_hello_world.o: + _Z17print_hello_worldv T 0000000000000000 0000000000000027 + _ZNSolsEPFRSoS_E U + _ZNSt8ios_base4InitC1Ev U + _ZNSt8ios_base4InitD1Ev U + _ZSt4cout U + _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_ U + _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc U + __cxa_atexit U + __dso_handle U + hello_world_string U + hello_world_string.o: + hello_world_string R 0000000000000010 0000000000000008 + +The output format for `nm` is described in its man page. With the +`-g` option, output is restricted to globally visible symbols. With +the `-P` option, each symbol line is: + + + +For example, we see that `hello_world.o` defines a global text +symbol `main` with at position 0 with a size of 0x10. This is where +the loader will start execution. + +We also see that `hello_world.o` needs (i.e. “has an undefineed symbol +for”) `_Z17print_hello_worldv`. This means that, in order to run, +`hello_world.o` must be linked against something else which provides +that symbol. The symbol is for our `print_hello_world` function. The +`_Z17` prefix and `v` postfix are a result of [name +mangling][mangling], and depend on the compiler used and function +signature. Moving on, we see that `print_hello_world.o` defines the +`_Z17print_hello_worldv` at position 0 with a size of 0x27. So +linking `print_hello_world.o` with `hello_world.o` would resolve the +symbols needed by `hello_world.o`. + +`print hello_world.o` has undefined symbols of its own, so we can't +stop yet. It needs `hello_world_string` (provided by +`hello_world_string.o`), `_ZSt4cout` (provided by `libcstd++`), +…. + +The process of linking involves bundling up enough of these partial +code chunks so that each of them has access to the symbols it needs. + +There are a number of other tools that will let you poke into the +innards of object files. If `nm` doesn't scratch your itch, you may +want to look at the more general `objdump`. + +Storage classes +=============== + +In the previous section I mentioned “globally visible symbols”. When +you declare or define a symbol (variable, function, …), you can use +[storage classes][storage-classes] to tell the compiler about your +symbols' *linkage* and *storage duration*. + +For more details, you can read through *§6.2.2 Linkages of +identifiers*, *§6.2.4 Storage durations of objects*, and *§6.7.1 +Storage-class specifiers* in [WG14/N1570][N1570], the last public +version of [ISO/IEC 9899:2011][9899] (i.e. the C11 standard). + +Since we're just worried about linking, I'll leave the discussion of +storage duration to others. With linkage, you're basically deciding +which of the symbols you define in your translation unit should be +visible from other translation units. For example, in +`print_hello_world.h`, we declare that there is a function +`print_hello_world` (with a particular signature). The `extern` +means that may be defined in another translation unit. For +block-level symbols (i.e. things defined in the root level of your +source file, not inside functions and the like), this is the default; +writing `extern` just makes it explicit. When we define the +function in `print_hello_world.cpp`, we also label it as `extern` +(again, this is the default). This means that the defined symbol +should be exported for use by other translation units. + +By way of comparison, the string `secret_string` defined in +`hello_world_string.cpp` is declared `static`. This means that +the symbol should be restricted to that translation unit. In other +words, you won't be able to access the value of `secret_string` from +`print_hello_world.cpp`. + +When you're writing a library, it is best to make any functions that +you don't *need* to export `static` and to [avoid global variables +altogether][global]. + +Static libraries +================ + +You never want to code *everything* required by a program on your own. +Because of this, people package related groups of functions into +libraries. Programs can then take use functions from the library, and +avoid coding that functionality themselves. For example, you could +consider `print_hello_world.o` and `hello_world_string.o` to be +little libraries used by `hello_world.o`. Because the two object +files are so tightly linked, it would be convenient to bundle them +together in a single file. This is what static libraries are, bundles +of object files. You can create them using `ar` (from “archive”; +`ar` is the ancestor of `tar`, from “tape archive”). + +You can use `nm` to list the symbols for static libraries exactly as +you would for object files: + + $ make inspect-static-library + nm -Pg libhello_world.a + libhello_world.a[print_hello_world.o]: + _Z17print_hello_worldv T 0000000000000000 0000000000000027 + _ZNSolsEPFRSoS_E U + _ZNSt8ios_base4InitC1Ev U + _ZNSt8ios_base4InitD1Ev U + _ZSt4cout U + _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_ U + _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc U + __cxa_atexit U + __dso_handle U + hello_world_string U + libhello_world.a[hello_world_string.o]: + hello_world_string R 0000000000000010 0000000000000008 + +Notice that nothing has changed from the object file output, except +that object file names like `print_hello_world.o` have been replaced +by `libhello_world.a[print_hello_world.o]`. + +Shared libraries +================ + +Library code from static libraries (and object files) is built into +your executable at link time. This means that when the library is +updated in the future (bug fixes, extended functionality, …), you'll +have to relink your program to take advantage of the new features. +Because no body wants to recompile an entire system when someone makes +`cout` a bit more efficient, people developed shared libraries. The +code from shared libraries is never built into your executable. +Instead, instructions on how to find the dynamic libraries are built +in. When you run your executable, a loader finds all the shared +libraries your program needs and copies the parts you need from the +libraries into your program's memory. This means that when a system +programmer improves `cout`, your program will use the new version +automatically. This is a Good Thing™. + +You can use `ldd` to list the shared libraries your program needs: + + $ make list-executable-shared-libraries + ldd hello_world + linux-vdso.so.1 => (0x00007fff76fbb000) + libstdc++.so.6 => /usr/lib/gcc/x86_64-pc-linux-gnu/4.5.3/libstdc++.so.6 (0x00007ff7467d8000) + libm.so.6 => /lib64/libm.so.6 (0x00007ff746555000) + libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007ff74633e000) + libc.so.6 => /lib64/libc.so.6 (0x00007ff745fb2000) + /lib64/ld-linux-x86-64.so.2 (0x00007ff746ae7000) + +The format is: + + soname => path (load address) + +You can also use `nm` to list symbols for shared libraries: + + $ make inspect-shared-libary | head + nm -Pg --dynamic libhello_world.so + _Jv_RegisterClasses w + _Z17print_hello_worldv T 000000000000098c 0000000000000034 + _ZNSolsEPFRSoS_E U + _ZNSt8ios_base4InitC1Ev U + _ZNSt8ios_base4InitD1Ev U + _ZSt4cout U + _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_ U + _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc U + __bss_start A 0000000000201030 + __cxa_atexit U + __cxa_finalize w + __gmon_start__ w + _edata A 0000000000201030 + _end A 0000000000201048 + _fini T 0000000000000a58 + _init T 0000000000000810 + hello_world_string D 0000000000200dc8 0000000000000008 + +You can see our `hello_world_string` and `_Z17print_hello_worldv`, +along with the undefined symbols like `_ZSt4cout` that our code +needs. There are also a number of symbols to help with the shared +library mechanics (e.g. `_init`). + +To illustrate the “link time” vs. “load time” distinction, run: + + $ make run + ./hello_world + Hello, World! + ./hello_world-static + Hello, World! + LD_LIBRARY_PATH=. ./hello_world-dynamic + Hello, World! + +Then switch to the `Goodbye` definition in +`hello_world_string.cpp`: + + //extern const char * const hello_world_string = "Hello, World!"; + extern const char * const hello_world_string = "Goodbye!"; + +Recompile the libraries (but not the executables) and run again: + + $ make libs + … + $ make run + ./hello_world + Hello, World! + ./hello_world-static + Hello, World! + LD_LIBRARY_PATH=. ./hello_world-dynamic + Goodbye! + +Finally, relink the executables and run again: + + $ make + … + $ make run + ./hello_world + Goodbye! + ./hello_world-static + Goodbye! + LD_LIBRARY_PATH=. ./hello_world-dynamic + Goodbye! + +When you have many packages depending on the same low-level libraries, +the savings on avoided rebuilding is large. However, shared libraries +have another benefit over static libraries: shared memory. + +Much of the machine code in shared libraries is static (i.e. it +doesn't change as a program is run). Because of this, several +programs may share the same in-memory version of a library without +stepping on each others toes. With statically linked code, each +program has its own in-memory version: + + + + + + + + + +
StaticShared
Program A → Library BProgram A → Library B
Program C → Library BProgram C ⎯⎯⎯⎯⬏
+ + + +Further reading +=============== + +If you're curious about the details on loading and shared libraries, +[Eli Bendersky][EB] has a nice series of articles on [load time +relocation][relocation], [PIC on x86][PIC-x86], and [PIC on +x86-64][PIC-x86-64]. + +[object-files]: http://en.wikipedia.org/wiki/Object_file +[linker]: http://en.wikipedia.org/wiki/Linker_%28computing%29 +[machine-code]: http://en.wikipedia.org/wiki/Instruction_set_architecture +[x86-64]: http://en.wikipedia.org/wiki/X86-64 +[collect2]: http://gcc.gnu.org/onlinedocs/gccint/Collect2.html +[bugs]: + http://blog.flameeyes.eu/2008/02/09/flex-and-linking-conflicts-or-a-possible-reason-why-php-and-recode-are-so-crashy +[mangling]: http://en.wikipedia.org/wiki/Name_mangling +[storage-classes]: + http://ee.hawaii.edu/~tep/EE160/Book/chap14/section2.1.1.html +[N1570]: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf +[9899]: http://www.open-std.org/jtc1/sc22/wg14/ +[global]: http://c2.com/cgi/wiki?GlobalVariablesAreBad + +[EB]: http://eli.thegreenplace.net/ +[relocation]: + http://eli.thegreenplace.net/2011/08/25/load-time-relocation-of-shared-libraries/ +[PIC-x86]: + http://eli.thegreenplace.net/2011/11/03/position-independent-code-pic-in-shared-libraries/ +[PIC-x86-64]: + http://eli.thegreenplace.net/2011/11/11/position-independent-code-pic-in-shared-libraries-on-x64/ + +[[!tag tags/C]] +[[!tag tags/linux]] diff --git a/posts/Linking/Makefile b/posts/Linking/Makefile new file mode 100644 index 0000000..219735a --- /dev/null +++ b/posts/Linking/Makefile @@ -0,0 +1,109 @@ +# Copyright (C) 2012 W. Trevor King +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# The variables CC, CXX, and AR are implicit variables. See +# http://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html + +SOURCE = Makefile hello_world.cpp hello_world_string.cpp hello_world_string.h \ + print_hello_world.cpp print_hello_world.h simple.c +NM = nm -Pg + +all: hello_world hello_world-static hello_world-dynamic simple + +# directly link object files using $(CXX) +hello_world: hello_world.o print_hello_world.o hello_world_string.o + $(CXX) -o hello_world hello_world.o print_hello_world.o hello_world_string.o + +# static linking using $(CXX) +hello_world-static: hello_world.o libhello_world.a + $(CXX) -static -o hello_world-static hello_world.o -L. -lhello_world + +# dynamic linking using $(CXX) +hello_world-dynamic: hello_world.o libhello_world.so + $(CXX) -o hello_world-dynamic hello_world.o -L. -lhello_world + + +# compile using $(CXX) +hello_world.o: hello_world.cpp print_hello_world.h + $(CXX) -c hello_world.cpp +print_hello_world.o: print_hello_world.cpp print_hello_world.h + $(CXX) -c print_hello_world.cpp +hello_world_string.o: hello_world_string.cpp hello_world_string.h + $(CXX) -c hello_world_string.cpp + +# to use them in shared object files, we'll want to compile them with PIC. +print_hello_world-PIC.o: print_hello_world.cpp print_hello_world.h + $(CXX) -fPIC -c -o print_hello_world-PIC.o print_hello_world.cpp +hello_world_string-PIC.o: hello_world_string.cpp hello_world_string.h + $(CXX) -fPIC -c -o hello_world_string-PIC.o hello_world_string.cpp + + +# create a static library with ar +libhello_world.a: print_hello_world.o hello_world_string.o + ar -cvr libhello_world.a print_hello_world.o hello_world_string.o + +# create a dynamic library with $(CXX) +libhello_world.so: print_hello_world-PIC.o hello_world_string-PIC.o + $(CXX) -shared -Wl,-soname,libhello_world.so \ + -o libhello_world.so \ + print_hello_world-PIC.o hello_world_string-PIC.o + + +# optional stuff +# ============== + +run: + ./hello_world + ./hello_world-static + LD_LIBRARY_PATH=. ./hello_world-dynamic + +libs: libhello_world.a libhello_world.so + +inspect-object-files: hello_world.o print_hello_world.o hello_world_string.o + $(NM) hello_world.o print_hello_world.o hello_world_string.o + +inspect-executable: hello_world + $(NM) hello_world + +inspect-static-library: libhello_world.a + $(NM) libhello_world.a + +inspect-shared-libary: libhello_world.so + $(NM) --dynamic libhello_world.so + +list-executable-shared-libraries: hello_world + ldd hello_world + +# compile simple.c using static linking +simple.o: simple.c + $(CC) -c simple.c +# statically link simple +simple: simple.o + $(CC) -static -o simple simple.o + +# convert the README to HTML using reStructuredText +README.html: README + rst2html.py README > README.html + +# distribution +linking.tar.gz: $(SOURCE) + mkdir linking + cp -r $^ linking/ + tar -czf $@ linking/ + rm -rf linking/ + +# remove temporary files +clean: + rm -f *.o *.a *.so hello_world hello_world-* simple README.html diff --git a/posts/Linking/hello_world.cpp b/posts/Linking/hello_world.cpp new file mode 100644 index 0000000..046deb8 --- /dev/null +++ b/posts/Linking/hello_world.cpp @@ -0,0 +1,27 @@ +/* +A simple "hello world" example in C++ to demonstrate linking. + +Copyright (C) 2012 W. Trevor King + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +#include "print_hello_world.h" + +using namespace std; + +int main() { + print_hello_world(); + return 0; +} diff --git a/posts/Linking/hello_world_string.cpp b/posts/Linking/hello_world_string.cpp new file mode 100644 index 0000000..673c57d --- /dev/null +++ b/posts/Linking/hello_world_string.cpp @@ -0,0 +1,25 @@ +/* +A simple "hello world" example in C++ to demonstrate linking. + +Copyright (C) 2012 W. Trevor King + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +#include "hello_world_string.h" + +//extern const char * const hello_world_string = "Hello, World!"; +extern const char * const hello_world_string = "Goodbye!"; + +static const char * const secret_string = "You can't find me!"; diff --git a/posts/Linking/hello_world_string.h b/posts/Linking/hello_world_string.h new file mode 100644 index 0000000..b01a1de --- /dev/null +++ b/posts/Linking/hello_world_string.h @@ -0,0 +1,25 @@ +/* +A simple "hello world" example in C++ to demonstrate linking. + +Copyright (C) 2012 W. Trevor King + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +#ifndef _HELLO_WORLD_STRING_H_ +#define _HELLO_WORLD_STRING_H_ + +extern const char * const hello_world_string; + +#endif // _HELLO_WORLD_STRING_H_ diff --git a/posts/Linking/print_hello_world.cpp b/posts/Linking/print_hello_world.cpp new file mode 100644 index 0000000..b8a60d2 --- /dev/null +++ b/posts/Linking/print_hello_world.cpp @@ -0,0 +1,30 @@ +/* +A simple "hello world" example in C++ to demonstrate linking. + +Copyright (C) 2012 W. Trevor King + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +#include +#include "print_hello_world.h" +#include "hello_world_string.h" + +using namespace std; + +extern void print_hello_world() +{ + cout << hello_world_string << endl; + return; +} diff --git a/posts/Linking/print_hello_world.h b/posts/Linking/print_hello_world.h new file mode 100644 index 0000000..a76ca88 --- /dev/null +++ b/posts/Linking/print_hello_world.h @@ -0,0 +1,25 @@ +/* +A simple "hello world" example in C++ to demonstrate linking. + +Copyright (C) 2012 W. Trevor King + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +#ifndef _PRINT_HELLO_WORLD_H_ +#define _PRINT_HELLO_WORLD_H_ + +extern void print_hello_world(); + +#endif // _PRINT_HELLO_WORLD_H_ diff --git a/posts/Linking/simple.c b/posts/Linking/simple.c new file mode 100644 index 0000000..19bfd16 --- /dev/null +++ b/posts/Linking/simple.c @@ -0,0 +1,3 @@ +int main(int argc, char **argv) { + return 1; +} -- 2.26.2