1 # Copyright 1999-2014 Gentoo Foundation
2 # Distributed under the terms of the GNU General Public License v2
4 # @ECLASS: multiprocessing.eclass
6 # base-system@gentoo.org
8 # Brian Harring <ferringb@gentoo.org>
9 # Mike Frysinger <vapier@gentoo.org>
10 # @BLURB: parallelization with bash (wtf?)
12 # The multiprocessing eclass contains a suite of functions that allow ebuilds
13 # to quickly run things in parallel using shell code.
15 # It has two modes: pre-fork and post-fork. If you don't want to dive into any
16 # more nuts & bolts, just use the pre-fork mode. For main threads that mostly
17 # spawn children and then wait for them to finish, use the pre-fork mode. For
18 # main threads that do a bit of processing themselves, use the post-fork mode.
19 # You may mix & match them for longer computation loops.
23 # # First initialize things:
26 # # Then hash a bunch of files in parallel:
27 # for n in {0..20} ; do
28 # multijob_child_init md5sum data.${n} > data.${n}
31 # # Then wait for all the children to finish:
35 if [[ -z ${_MULTIPROCESSING_ECLASS} ]]; then
36 _MULTIPROCESSING_ECLASS=1
40 # Return the process id of the current sub shell. This is to support bash
41 # versions older than 4.0 that lack $BASHPID support natively. Simply do:
42 # echo ${BASHPID:-$(bashpid)}
44 # Note: Using this func in any other way than the one above is not supported.
46 # Running bashpid plainly will return incorrect results. This func must
47 # be run in a subshell of the current subshell to get the right pid.
48 # i.e. This will show the wrong value:
50 # But this will show the right value:
55 # @FUNCTION: get_nproc
56 # @USAGE: [${fallback:-1}]
58 # Attempt to figure out the number of processing units available.
59 # If the value can not be determined, prints the provided fallback
60 # instead. If no fallback is provided, defaults to 1.
65 if type -P nproc &>/dev/null; then
70 if [[ -z ${nproc} ]] && type -P sysctl &>/dev/null; then
71 nproc=$(sysctl -n hw.ncpu 2>/dev/null)
74 # fallback to python2.6+
75 # note: this may fail (raise NotImplementedError)
76 if [[ -z ${nproc} ]] && type -P python &>/dev/null; then
77 nproc=$(python -c 'import multiprocessing; print(multiprocessing.cpu_count());' 2>/dev/null)
80 if [[ -n ${nproc} ]]; then
87 # @FUNCTION: makeopts_jobs
88 # @USAGE: [${MAKEOPTS}] [${inf:-999}]
90 # Searches the arguments (defaults to ${MAKEOPTS}) and extracts the jobs number
91 # specified therein. Useful for running non-make tools in parallel too.
92 # i.e. if the user has MAKEOPTS=-j9, this will echo "9" -- we can't return the
93 # number as bash normalizes it to [0, 255]. If the flags haven't specified a
94 # -j flag, then "1" is shown as that is the default `make` uses. Since there's
95 # no way to represent infinity, we return ${inf} (defaults to 999) if the user
96 # has -j without a number.
98 [[ $# -eq 0 ]] && set -- "${MAKEOPTS}"
99 # This assumes the first .* will be more greedy than the second .*
100 # since POSIX doesn't specify a non-greedy match (i.e. ".*?").
101 local jobs=$(echo " $* " | sed -r -n \
102 -e 's:.*[[:space:]](-[a-z]*j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p' \
103 -e "s:.*[[:space:]](-[a-z]*j|--jobs)[[:space:]].*:${2:-999}:p")
107 # @FUNCTION: makeopts_loadavg
108 # @USAGE: [${MAKEOPTS}] [${inf:-999}]
110 # Searches the arguments (defaults to ${MAKEOPTS}) and extracts the value set
111 # for load-average. For make and ninja based builds this will mean new jobs are
112 # not only limited by the jobs-value, but also by the current load - which might
113 # get excessive due to I/O and not just due to CPU load.
114 # Be aware that the returned number might be a floating-point number. Test
115 # whether your software supports that.
116 # If no limit is specified or --load-average is used without a number, ${inf}
117 # (defaults to 999) is returned.
119 [[ $# -eq 0 ]] && set -- "${MAKEOPTS}"
120 # This assumes the first .* will be more greedy than the second .*
121 # since POSIX doesn't specify a non-greedy match (i.e. ".*?").
122 local lavg=$(echo " $* " | sed -r -n \
123 -e 's:.*[[:space:]](-[a-z]*l|--(load-average|max-load)[=[:space:]])[[:space:]]*([0-9]+|[0-9]+\.[0-9]+).*:\3:p' \
124 -e "s:.*[[:space:]](-[a-z]*l|--(load-average|max-load))[[:space:]].*:${2:-999}:p")
125 # Default to ${inf} since the default is to not use a load limit.
126 echo ${lavg:-${2:-999}}
129 # @FUNCTION: multijob_init
130 # @USAGE: [${MAKEOPTS}]
132 # Setup the environment for executing code in parallel.
133 # You must call this before any other multijob function.
135 # When something goes wrong, try to wait for all the children so we
136 # don't leave any zombies around.
137 has wait ${EBUILD_DEATH_HOOKS} || EBUILD_DEATH_HOOKS+=" wait "
139 # Setup a pipe for children to write their pids to when they finish.
140 # We have to allocate two fd's because POSIX has undefined behavior
141 # when using one single fd for both read and write. #487056
142 # However, opening an fd for read or write only will block until the
143 # opposite end is opened as well. Thus we open the first fd for both
144 # read and write to not block ourselve, but use it for reading only.
145 # The second fd really is opened for write only, as Cygwin supports
146 # just one single read fd per FIFO. #583962
147 local pipe="${T}/multijob.pipe"
148 mkfifo -m 600 "${pipe}"
149 redirect_alloc_fd mj_read_fd "${pipe}"
150 redirect_alloc_fd mj_write_fd "${pipe}" '>'
153 # See how many children we can fork based on the user's settings.
154 mj_max_jobs=$(makeopts_jobs "$@")
158 # @FUNCTION: multijob_child_init
159 # @USAGE: [--pre|--post] [command to run in background]
161 # This function has two forms. You can use it to execute a simple command
162 # in the background (and it takes care of everything else), or you must
163 # call this first thing in your forked child process.
165 # The --pre/--post options allow you to select the child generation mode.
168 # # 1st form: pass the command line as arguments:
169 # multijob_child_init ls /dev
170 # # Or if you want to use pre/post fork modes:
171 # multijob_child_init --pre ls /dev
172 # multijob_child_init --post ls /dev
174 # # 2nd form: execute multiple stuff in the background (post fork):
176 # multijob_child_init
178 # if echo "${out}" | grep foo ; then
184 # # 2nd form: execute multiple stuff in the background (pre fork):
187 # multijob_child_init
189 # if echo "${out}" | grep foo ; then
194 multijob_child_init() {
197 --pre) mode="pre" ; shift ;;
198 --post) mode="post"; shift ;;
201 if [[ $# -eq 0 ]] ; then
202 trap 'echo ${BASHPID:-$(bashpid)} $? >&'${mj_write_fd} EXIT
203 trap 'exit 1' INT TERM
206 [[ ${mode} == "pre" ]] && { multijob_pre_fork; ret=$?; }
207 ( multijob_child_init ; "$@" ) &
208 [[ ${mode} == "post" ]] && { multijob_post_fork; ret=$?; }
213 # @FUNCTION: _multijob_fork
216 # Do the actual book keeping.
218 [[ $# -eq 1 ]] || die "incorrect number of arguments"
221 [[ $1 == "post" ]] && : $(( ++mj_num_jobs ))
222 if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
226 [[ $1 == "pre" ]] && : $(( ++mj_num_jobs ))
230 # @FUNCTION: multijob_pre_fork
232 # You must call this in the parent process before forking a child process.
233 # If the parallel limit has been hit, it will wait for one child to finish
234 # and return its exit status.
235 multijob_pre_fork() { _multijob_fork pre "$@" ; }
237 # @FUNCTION: multijob_post_fork
239 # You must call this in the parent process after forking a child process.
240 # If the parallel limit has been hit, it will wait for one child to finish
241 # and return its exit status.
242 multijob_post_fork() { _multijob_fork post "$@" ; }
244 # @FUNCTION: multijob_finish_one
246 # Wait for a single process to exit and return its exit code.
247 multijob_finish_one() {
248 [[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
251 read -r -u ${mj_read_fd} pid ret || die
252 : $(( --mj_num_jobs ))
256 # @FUNCTION: multijob_finish
258 # Wait for all pending processes to exit and return the bitwise or
259 # of all their exit codes.
262 while [[ ${mj_num_jobs} -gt 0 ]] ; do
266 # Let bash clean up its internal child tracking state.
269 # Do this after reaping all the children.
270 [[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
272 # No need to hook anymore.
273 EBUILD_DEATH_HOOKS=${EBUILD_DEATH_HOOKS/ wait / }
278 # @FUNCTION: redirect_alloc_fd
279 # @USAGE: <var> <file> [redirection]
281 # Find a free fd and redirect the specified file via it. Store the new
282 # fd in the specified variable. Useful for the cases where we don't care
283 # about the exact fd #.
284 redirect_alloc_fd() {
285 local var=$1 file=$2 redir=${3:-"<>"}
287 # Make sure /dev/fd is sane on Linux hosts. #479656
288 if [[ ! -L /dev/fd && ${CBUILD} == *linux* ]] ; then
289 eerror "You're missing a /dev/fd symlink to /proc/self/fd."
290 eerror "Please fix the symlink and check your boot scripts (udev/etc...)."
291 die "/dev/fd is broken"
294 if [[ $(( (BASH_VERSINFO[0] << 8) + BASH_VERSINFO[1] )) -ge $(( (4 << 8) + 1 )) ]] ; then
295 # Newer bash provides this functionality.
296 eval "exec {${var}}${redir}'${file}'"
298 # Need to provide the functionality ourselves.
301 # Make sure the fd isn't open. It could be a char device,
302 # or a symlink (possibly broken) to something else.
303 if [[ ! -e /dev/fd/${fd} ]] && [[ ! -L /dev/fd/${fd} ]] ; then
304 eval "exec ${fd}${redir}'${file}'" && break
306 [[ ${fd} -gt 1024 ]] && die 'could not locate a free temp fd !?'