812 lines
		
	
	
	
		
			33 KiB
		
	
	
	
		
			Scheme
		
	
	
	
	
	
			
		
		
	
	
			812 lines
		
	
	
	
		
			33 KiB
		
	
	
	
		
			Scheme
		
	
	
	
	
	
;;; GNU Guix --- Functional package management for GNU
 | 
						|
;;; Copyright © 2014, 2015 Ricardo Wurmus <rekado@elephly.net>
 | 
						|
;;;
 | 
						|
;;; This file is part of GNU Guix.
 | 
						|
;;;
 | 
						|
;;; GNU Guix is free software; you can redistribute it and/or modify it
 | 
						|
;;; under the terms of the GNU General Public License as published by
 | 
						|
;;; the Free Software Foundation; either version 3 of the License, or (at
 | 
						|
;;; your option) any later version.
 | 
						|
;;;
 | 
						|
;;; GNU Guix is distributed in the hope that it will be useful, but
 | 
						|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
;;; GNU General Public License for more details.
 | 
						|
;;;
 | 
						|
;;; You should have received a copy of the GNU General Public License
 | 
						|
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
 | 
						|
(define-module (gnu packages bioinformatics)
 | 
						|
  #:use-module ((guix licenses) #:prefix license:)
 | 
						|
  #:use-module (guix packages)
 | 
						|
  #:use-module (guix download)
 | 
						|
  #:use-module (guix build-system gnu)
 | 
						|
  #:use-module (guix build-system cmake)
 | 
						|
  #:use-module (guix build-system python)
 | 
						|
  #:use-module (guix build-system trivial)
 | 
						|
  #:use-module (gnu packages)
 | 
						|
  #:use-module (gnu packages base)
 | 
						|
  #:use-module (gnu packages compression)
 | 
						|
  #:use-module (gnu packages java)
 | 
						|
  #:use-module (gnu packages ncurses)
 | 
						|
  #:use-module (gnu packages perl)
 | 
						|
  #:use-module (gnu packages pkg-config)
 | 
						|
  #:use-module (gnu packages python)
 | 
						|
  #:use-module (gnu packages tbb)
 | 
						|
  #:use-module (gnu packages vim)
 | 
						|
  #:use-module (gnu packages zip))
 | 
						|
 | 
						|
(define-public bedops
 | 
						|
  (package
 | 
						|
    (name "bedops")
 | 
						|
    (version "2.4.5")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append "https://github.com/bedops/bedops/archive/v"
 | 
						|
                                  version ".tar.gz"))
 | 
						|
              (file-name (string-append name "-" version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "0wmg6j0icimlrnsidaxrzf3hfgjvlkkcwvpdg7n4gg7hdv2m9ni5"))))
 | 
						|
    (build-system gnu-build-system)
 | 
						|
    (arguments
 | 
						|
     '(#:tests? #f
 | 
						|
       #:make-flags (list (string-append "BINDIR=" %output "/bin"))
 | 
						|
       #:phases
 | 
						|
       (alist-cons-after
 | 
						|
         'unpack 'unpack-tarballs
 | 
						|
         (lambda _
 | 
						|
           ;; FIXME: Bedops includes tarballs of minimally patched upstream
 | 
						|
           ;; libraries jansson, zlib, and bzip2.  We cannot just use stock
 | 
						|
           ;; libraries because at least one of the libraries (zlib) is
 | 
						|
           ;; patched to add a C++ function definition (deflateInit2cpp).
 | 
						|
           ;; Until the Bedops developers offer a way to link against system
 | 
						|
           ;; libraries we have to build the in-tree copies of these three
 | 
						|
           ;; libraries.
 | 
						|
 | 
						|
           ;; See upstream discussion:
 | 
						|
           ;; https://github.com/bedops/bedops/issues/124
 | 
						|
 | 
						|
           ;; Unpack the tarballs to benefit from shebang patching.
 | 
						|
           (with-directory-excursion "third-party"
 | 
						|
             (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
 | 
						|
                  (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
 | 
						|
                  (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
 | 
						|
           ;; Disable unpacking of tarballs in Makefile.
 | 
						|
           (substitute* "system.mk/Makefile.linux"
 | 
						|
             (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
 | 
						|
             (("\\./configure") "CONFIG_SHELL=bash ./configure"))
 | 
						|
           (substitute* "third-party/zlib-1.2.7/Makefile.in"
 | 
						|
             (("^SHELL=.*$") "SHELL=bash\n")))
 | 
						|
         (alist-delete 'configure %standard-phases))))
 | 
						|
    (home-page "https://github.com/bedops/bedops")
 | 
						|
    (synopsis "Tools for high-performance genomic feature operations")
 | 
						|
    (description
 | 
						|
     "BEDOPS is a suite of tools to address common questions raised in genomic
 | 
						|
studies---mostly with regard to overlap and proximity relationships between
 | 
						|
data sets.  It aims to be scalable and flexible, facilitating the efficient
 | 
						|
and accurate analysis and management of large-scale genomic data.
 | 
						|
 | 
						|
BEDOPS provides tools that perform highly efficient and scalable Boolean and
 | 
						|
other set operations, statistical calculations, archiving, conversion and
 | 
						|
other management of genomic data of arbitrary scale.  Tasks can be easily
 | 
						|
split by chromosome for distributing whole-genome analyses across a
 | 
						|
computational cluster.")
 | 
						|
    (license license:gpl2+)))
 | 
						|
 | 
						|
(define-public bedtools
 | 
						|
  (package
 | 
						|
    (name "bedtools")
 | 
						|
    (version "2.22.0")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
 | 
						|
                                  version ".tar.gz"))
 | 
						|
              (file-name (string-append name "-" version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "16aq0w3dmbd0853j32xk9jin4vb6v6fgakfyvrsmsjizzbn3fpfl"))))
 | 
						|
    (build-system gnu-build-system)
 | 
						|
    (native-inputs `(("python" ,python-2)))
 | 
						|
    (inputs `(("samtools" ,samtools)
 | 
						|
              ("zlib" ,zlib)))
 | 
						|
    (arguments
 | 
						|
     '(#:test-target "test"
 | 
						|
       #:phases
 | 
						|
       (alist-cons-after
 | 
						|
        'unpack 'patch-makefile-SHELL-definition
 | 
						|
        (lambda _
 | 
						|
          ;; patch-makefile-SHELL cannot be used here as it does not
 | 
						|
          ;; yet patch definitions with `:='.  Since changes to
 | 
						|
          ;; patch-makefile-SHELL result in a full rebuild, features
 | 
						|
          ;; of patch-makefile-SHELL are reimplemented here.
 | 
						|
          (substitute* "Makefile"
 | 
						|
            (("^SHELL := .*$") (string-append "SHELL := " (which "bash") " -e \n"))))
 | 
						|
        (alist-delete
 | 
						|
         'configure
 | 
						|
         (alist-replace
 | 
						|
          'install
 | 
						|
          (lambda* (#:key outputs #:allow-other-keys)
 | 
						|
            (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
 | 
						|
              (mkdir-p bin)
 | 
						|
              (for-each (lambda (file)
 | 
						|
                          (copy-file file (string-append bin (basename file))))
 | 
						|
                        (find-files "bin" ".*"))))
 | 
						|
          %standard-phases)))))
 | 
						|
    (home-page "https://github.com/arq5x/bedtools2")
 | 
						|
    (synopsis "Tools for genome analysis and arithmetic")
 | 
						|
    (description
 | 
						|
     "Collectively, the bedtools utilities are a swiss-army knife of tools for
 | 
						|
a wide-range of genomics analysis tasks.  The most widely-used tools enable
 | 
						|
genome arithmetic: that is, set theory on the genome.  For example, bedtools
 | 
						|
allows one to intersect, merge, count, complement, and shuffle genomic
 | 
						|
intervals from multiple files in widely-used genomic file formats such as BAM,
 | 
						|
BED, GFF/GTF, VCF.")
 | 
						|
    (license license:gpl2)))
 | 
						|
 | 
						|
(define-public python2-pybedtools
 | 
						|
  (package
 | 
						|
    (name "python2-pybedtools")
 | 
						|
    (version "0.6.9")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append
 | 
						|
                    "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
 | 
						|
                    version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
 | 
						|
    (build-system python-build-system)
 | 
						|
    (arguments `(#:python ,python-2)) ; no Python 3 support
 | 
						|
    (inputs
 | 
						|
     `(("python-cython" ,python2-cython)
 | 
						|
       ("python-matplotlib" ,python2-matplotlib)))
 | 
						|
    (propagated-inputs
 | 
						|
     `(("bedtools" ,bedtools)
 | 
						|
       ("samtools" ,samtools)))
 | 
						|
    (native-inputs
 | 
						|
     `(("python-pyyaml" ,python2-pyyaml)
 | 
						|
       ("python-nose" ,python2-nose)
 | 
						|
       ("python-setuptools" ,python2-setuptools)))
 | 
						|
    (home-page "https://pythonhosted.org/pybedtools/")
 | 
						|
    (synopsis "Python wrapper for BEDtools programs")
 | 
						|
    (description
 | 
						|
     "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
 | 
						|
which are widely used for genomic interval manipulation or \"genome algebra\".
 | 
						|
pybedtools extends BEDTools by offering feature-level manipulations from with
 | 
						|
Python.")
 | 
						|
    (license license:gpl2+)))
 | 
						|
 | 
						|
(define-public bowtie
 | 
						|
  (package
 | 
						|
    (name "bowtie")
 | 
						|
    (version "2.2.4")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
 | 
						|
                                  version ".tar.gz"))
 | 
						|
              (file-name (string-append name "-" version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "15dnbqippwvhyh9zqjhaxkabk7lm1xbh1nvar1x4b5kwm117zijn"))
 | 
						|
              (modules '((guix build utils)))
 | 
						|
              (snippet
 | 
						|
               '(substitute* "Makefile"
 | 
						|
                  (("^CC = .*$") "CC = gcc")
 | 
						|
                  (("^CPP = .*$") "CPP = g++")
 | 
						|
                  ;; replace BUILD_HOST and BUILD_TIME for deterministic build
 | 
						|
                  (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
 | 
						|
                  (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))
 | 
						|
              (patches (list (search-patch "bowtie-fix-makefile.patch")))))
 | 
						|
    (build-system gnu-build-system)
 | 
						|
    (inputs `(("perl" ,perl)
 | 
						|
              ("perl-clone" ,perl-clone)
 | 
						|
              ("perl-test-deep" ,perl-test-deep)
 | 
						|
              ("perl-test-simple" ,perl-test-simple)
 | 
						|
              ("python" ,python-2)))
 | 
						|
    (arguments
 | 
						|
     '(#:make-flags '("allall")
 | 
						|
       #:phases
 | 
						|
       (alist-delete
 | 
						|
        'configure
 | 
						|
        (alist-replace
 | 
						|
         'install
 | 
						|
         (lambda* (#:key outputs #:allow-other-keys)
 | 
						|
           (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
 | 
						|
             (mkdir-p bin)
 | 
						|
             (for-each (lambda (file)
 | 
						|
                         (copy-file file (string-append bin file)))
 | 
						|
                       (find-files "." "bowtie2.*"))))
 | 
						|
         (alist-replace
 | 
						|
          'check
 | 
						|
          (lambda* (#:key outputs #:allow-other-keys)
 | 
						|
            (system* "perl"
 | 
						|
                     "scripts/test/simple_tests.pl"
 | 
						|
                     "--bowtie2=./bowtie2"
 | 
						|
                     "--bowtie2-build=./bowtie2-build"))
 | 
						|
          %standard-phases)))))
 | 
						|
    (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
 | 
						|
    (synopsis "Fast and sensitive nucleotide sequence read aligner")
 | 
						|
    (description
 | 
						|
     "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
 | 
						|
reads to long reference sequences.  It is particularly good at aligning reads
 | 
						|
of about 50 up to 100s or 1,000s of characters, and particularly good at
 | 
						|
aligning to relatively long (e.g. mammalian) genomes.  Bowtie 2 indexes the
 | 
						|
genome with an FM Index to keep its memory footprint small: for the human
 | 
						|
genome, its memory footprint is typically around 3.2 GB.  Bowtie 2 supports
 | 
						|
gapped, local, and paired-end alignment modes.")
 | 
						|
    (supported-systems '("x86_64-linux"))
 | 
						|
    (license license:gpl3+)))
 | 
						|
 | 
						|
(define-public bwa
 | 
						|
  (package
 | 
						|
    (name "bwa")
 | 
						|
    (version "0.7.12")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
 | 
						|
                                  version ".tar.bz2"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
 | 
						|
    (build-system gnu-build-system)
 | 
						|
    (arguments
 | 
						|
     '(#:tests? #f ;no "check" target
 | 
						|
       #:phases
 | 
						|
       (alist-replace
 | 
						|
        'install
 | 
						|
        (lambda* (#:key outputs #:allow-other-keys)
 | 
						|
          (let ((bin (string-append
 | 
						|
                      (assoc-ref outputs "out") "/bin"))
 | 
						|
                (doc (string-append
 | 
						|
                      (assoc-ref outputs "out") "/share/doc/bwa"))
 | 
						|
                (man (string-append
 | 
						|
                      (assoc-ref outputs "out") "/share/man/man1")))
 | 
						|
            (mkdir-p bin)
 | 
						|
            (mkdir-p doc)
 | 
						|
            (mkdir-p man)
 | 
						|
            (copy-file "bwa" (string-append bin "/bwa"))
 | 
						|
            (copy-file "README.md" (string-append doc "/README.md"))
 | 
						|
            (copy-file "bwa.1" (string-append man "/bwa.1"))))
 | 
						|
        ;; no "configure" script
 | 
						|
        (alist-delete 'configure %standard-phases))))
 | 
						|
    (inputs `(("zlib" ,zlib)))
 | 
						|
    (home-page "http://bio-bwa.sourceforge.net/")
 | 
						|
    (synopsis "Burrows-Wheeler sequence aligner")
 | 
						|
    (description
 | 
						|
     "BWA is a software package for mapping low-divergent sequences against a
 | 
						|
large reference genome, such as the human genome.  It consists of three
 | 
						|
algorithms: BWA-backtrack, BWA-SW and BWA-MEM.  The first algorithm is
 | 
						|
designed for Illumina sequence reads up to 100bp, while the rest two for
 | 
						|
longer sequences ranged from 70bp to 1Mbp.  BWA-MEM and BWA-SW share similar
 | 
						|
features such as long-read support and split alignment, but BWA-MEM, which is
 | 
						|
the latest, is generally recommended for high-quality queries as it is faster
 | 
						|
and more accurate.  BWA-MEM also has better performance than BWA-backtrack for
 | 
						|
70-100bp Illumina reads.")
 | 
						|
    (license license:gpl3+)))
 | 
						|
 | 
						|
(define-public clipper
 | 
						|
  (package
 | 
						|
    (name "clipper")
 | 
						|
    (version "0.3.0")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append
 | 
						|
                    "https://github.com/YeoLab/clipper/archive/"
 | 
						|
                    version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq"))
 | 
						|
              (modules '((guix build utils)))
 | 
						|
              (snippet
 | 
						|
               ;; remove unnecessary setup dependency
 | 
						|
               '(substitute* "setup.py"
 | 
						|
                  (("setup_requires = .*") "")))))
 | 
						|
    (build-system python-build-system)
 | 
						|
    (arguments `(#:python ,python-2)) ; only Python 2 is supported
 | 
						|
    (inputs
 | 
						|
     `(("htseq" ,htseq)
 | 
						|
       ("python-pybedtools" ,python2-pybedtools)
 | 
						|
       ("python-cython" ,python2-cython)
 | 
						|
       ("python-scikit-learn" ,python2-scikit-learn)
 | 
						|
       ("python-matplotlib" ,python2-matplotlib)
 | 
						|
       ("python-pysam" ,python2-pysam)
 | 
						|
       ("python-numpy" ,python2-numpy)
 | 
						|
       ("python-scipy" ,python2-scipy)))
 | 
						|
    (native-inputs
 | 
						|
     `(("python-mock" ,python2-mock) ; for tests
 | 
						|
       ("python-pytz" ,python2-pytz) ; for tests
 | 
						|
       ("python-setuptools" ,python2-setuptools)))
 | 
						|
    (home-page "https://github.com/YeoLab/clipper")
 | 
						|
    (synopsis "CLIP peak enrichment recognition")
 | 
						|
    (description
 | 
						|
     "CLIPper is a tool to define peaks in CLIP-seq datasets.")
 | 
						|
    (license license:gpl2)))
 | 
						|
 | 
						|
(define-public crossmap
 | 
						|
  (package
 | 
						|
    (name "crossmap")
 | 
						|
    (version "0.1.6")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
 | 
						|
                                  version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "163hi5gjgij6cndxlvbkp5jjwr0k4wbm9im6d2210278q7k9kpnp"))
 | 
						|
              ;; patch has been sent upstream already
 | 
						|
              (patches (list
 | 
						|
                        (search-patch "crossmap-allow-system-pysam.patch")))
 | 
						|
              (modules '((guix build utils)))
 | 
						|
              ;; remove bundled copy of pysam
 | 
						|
              (snippet
 | 
						|
               '(delete-file-recursively "lib/pysam"))))
 | 
						|
    (build-system python-build-system)
 | 
						|
    (arguments
 | 
						|
     `(#:python ,python-2
 | 
						|
       #:phases
 | 
						|
       (alist-cons-after
 | 
						|
        'unpack 'set-env
 | 
						|
        (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
 | 
						|
        %standard-phases)))
 | 
						|
    (inputs
 | 
						|
     `(("python-numpy" ,python2-numpy)
 | 
						|
       ("python-pysam" ,python2-pysam)
 | 
						|
       ("zlib" ,zlib)))
 | 
						|
    (native-inputs
 | 
						|
     `(("python-cython" ,python2-cython)
 | 
						|
       ("python-nose" ,python2-nose)
 | 
						|
       ("python-setuptools" ,python2-setuptools)))
 | 
						|
    (home-page "http://crossmap.sourceforge.net/")
 | 
						|
    (synopsis "Convert genome coordinates between assemblies")
 | 
						|
    (description
 | 
						|
     "CrossMap is a program for conversion of genome coordinates or annotation
 | 
						|
files between different genome assemblies.  It supports most commonly used
 | 
						|
file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
 | 
						|
    (license license:gpl2+)))
 | 
						|
 | 
						|
(define-public flexbar
 | 
						|
  (package
 | 
						|
    (name "flexbar")
 | 
						|
    (version "2.5")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri
 | 
						|
               (string-append "mirror://sourceforge/flexbar/"
 | 
						|
                              version "/flexbar_v" version "_src.tgz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
 | 
						|
    (build-system cmake-build-system)
 | 
						|
    (arguments
 | 
						|
     `(;; There is no test target, although there is a directory containing
 | 
						|
       ;; test data and scripts (launched by flexbar_validate.sh).
 | 
						|
       #:tests? #f
 | 
						|
       #:configure-flags (list
 | 
						|
                          (string-append "-DFLEXBAR_BINARY_DIR="
 | 
						|
                                         (assoc-ref %outputs "out")
 | 
						|
                                         "/bin/"))
 | 
						|
       #:phases
 | 
						|
       (alist-delete 'install %standard-phases)))
 | 
						|
    (inputs
 | 
						|
     `(("tbb" ,tbb)
 | 
						|
       ("zlib" ,zlib)))
 | 
						|
    (native-inputs
 | 
						|
     `(("pkg-config" ,pkg-config)
 | 
						|
       ("seqan" ,seqan)))
 | 
						|
    (home-page "http://flexbar.sourceforge.net")
 | 
						|
    (synopsis "Barcode and adapter removal tool for sequencing platforms")
 | 
						|
    (description
 | 
						|
     "Flexbar preprocesses high-throughput nucleotide sequencing data
 | 
						|
efficiently.  It demultiplexes barcoded runs and removes adapter sequences.
 | 
						|
Moreover, trimming and filtering features are provided.  Flexbar increases
 | 
						|
read mapping rates and improves genome and transcriptome assemblies.  It
 | 
						|
supports next-generation sequencing data in fasta/q and csfasta/q format from
 | 
						|
Illumina, Roche 454, and the SOLiD platform.")
 | 
						|
    (license license:gpl3)))
 | 
						|
 | 
						|
(define-public hisat
 | 
						|
  (package
 | 
						|
    (name "hisat")
 | 
						|
    (version "0.1.4")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append
 | 
						|
                    "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
 | 
						|
                    version "-beta-source.zip"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
 | 
						|
    (build-system gnu-build-system)
 | 
						|
    (arguments
 | 
						|
     `(#:tests? #f ;no check target
 | 
						|
       #:make-flags '("allall"
 | 
						|
                      ;; Disable unsupported `popcnt' instructions on
 | 
						|
                      ;; architectures other than x86_64
 | 
						|
                      ,@(if (string-prefix? "x86_64"
 | 
						|
                                            (or (%current-target-system)
 | 
						|
                                                (%current-system)))
 | 
						|
                            '()
 | 
						|
                            '("POPCNT_CAPABILITY=0")))
 | 
						|
       #:phases
 | 
						|
       (alist-cons-after
 | 
						|
        'unpack 'patch-sources
 | 
						|
        (lambda _
 | 
						|
          ;; XXX Cannot use snippet because zip files are not supported
 | 
						|
          (substitute* "Makefile"
 | 
						|
            (("^CC = .*$") "CC = gcc")
 | 
						|
            (("^CPP = .*$") "CPP = g++")
 | 
						|
            ;; replace BUILD_HOST and BUILD_TIME for deterministic build
 | 
						|
            (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
 | 
						|
            (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
 | 
						|
          (substitute* '("hisat-build" "hisat-inspect")
 | 
						|
            (("/usr/bin/env") (which "env"))))
 | 
						|
        (alist-replace
 | 
						|
         'install
 | 
						|
         (lambda* (#:key outputs #:allow-other-keys)
 | 
						|
           (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
 | 
						|
             (mkdir-p bin)
 | 
						|
             (for-each
 | 
						|
              (lambda (file)
 | 
						|
                (copy-file file (string-append bin file)))
 | 
						|
              (find-files
 | 
						|
               "."
 | 
						|
               "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
 | 
						|
         (alist-delete 'configure %standard-phases)))))
 | 
						|
    (native-inputs
 | 
						|
     `(("unzip" ,unzip)))
 | 
						|
    (inputs
 | 
						|
     `(("perl" ,perl)
 | 
						|
       ("python" ,python)
 | 
						|
       ("zlib" ,zlib)))
 | 
						|
    (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
 | 
						|
    (synopsis "Hierarchical indexing for spliced alignment of transcripts")
 | 
						|
    (description
 | 
						|
     "HISAT is a fast and sensitive spliced alignment program for mapping
 | 
						|
RNA-seq reads.  In addition to one global FM index that represents a whole
 | 
						|
genome, HISAT uses a large set of small FM indexes that collectively cover the
 | 
						|
whole genome.  These small indexes (called local indexes) combined with
 | 
						|
several alignment strategies enable effective alignment of RNA-seq reads, in
 | 
						|
particular, reads spanning multiple exons.")
 | 
						|
    (license license:gpl3+)))
 | 
						|
 | 
						|
(define-public htseq
 | 
						|
  (package
 | 
						|
    (name "htseq")
 | 
						|
    (version "0.6.1")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append
 | 
						|
                    "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
 | 
						|
                    version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
 | 
						|
    (build-system python-build-system)
 | 
						|
    (arguments `(#:python ,python-2)) ; only Python 2 is supported
 | 
						|
    (inputs
 | 
						|
     `(("python-numpy" ,python2-numpy)
 | 
						|
       ("python-setuptools" ,python2-setuptools)))
 | 
						|
    (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
 | 
						|
    (synopsis "Analysing high-throughput sequencing data with Python")
 | 
						|
    (description
 | 
						|
     "HTSeq is a Python package that provides infrastructure to process data
 | 
						|
from high-throughput sequencing assays.")
 | 
						|
    (license license:gpl3+)))
 | 
						|
 | 
						|
(define-public htsjdk
 | 
						|
  (package
 | 
						|
    (name "htsjdk")
 | 
						|
    (version "1.129")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append
 | 
						|
                    "https://github.com/samtools/htsjdk/archive/"
 | 
						|
                    version ".tar.gz"))
 | 
						|
              (file-name (string-append name "-" version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
 | 
						|
              (modules '((guix build utils)))
 | 
						|
              ;; remove build dependency on git
 | 
						|
              (snippet '(substitute* "build.xml"
 | 
						|
                          (("failifexecutionfails=\"true\"")
 | 
						|
                           "failifexecutionfails=\"false\"")))))
 | 
						|
    (build-system gnu-build-system)
 | 
						|
    (arguments
 | 
						|
     `(#:modules ((srfi srfi-1)
 | 
						|
                  (guix build gnu-build-system)
 | 
						|
                  (guix build utils))
 | 
						|
       #:phases (alist-replace
 | 
						|
                 'build
 | 
						|
                 (lambda _
 | 
						|
                   (setenv "JAVA_HOME" (assoc-ref %build-inputs "jdk"))
 | 
						|
                   (zero? (system* "ant" "all"
 | 
						|
                                   (string-append "-Ddist="
 | 
						|
                                                  (assoc-ref %outputs "out")
 | 
						|
                                                  "/share/java/htsjdk/"))))
 | 
						|
                 (fold alist-delete %standard-phases
 | 
						|
                       '(configure install check)))))
 | 
						|
    (native-inputs
 | 
						|
     `(("ant" ,ant)
 | 
						|
       ("jdk" ,icedtea6 "jdk")))
 | 
						|
    (home-page "http://samtools.github.io/htsjdk/")
 | 
						|
    (synopsis "Java API for high-throughput sequencing data (HTS) formats")
 | 
						|
    (description
 | 
						|
     "HTSJDK is an implementation of a unified Java library for accessing
 | 
						|
common file formats, such as SAM and VCF, used for high-throughput
 | 
						|
sequencing (HTS) data.  There are also an number of useful utilities for
 | 
						|
manipulating HTS data.")
 | 
						|
    (license license:expat)))
 | 
						|
 | 
						|
(define-public macs
 | 
						|
  (package
 | 
						|
    (name "macs")
 | 
						|
    (version "2.1.0.20140616")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append
 | 
						|
                    "https://pypi.python.org/packages/source/M/MACS2/MACS2-"
 | 
						|
                    version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "11lmiw6avqhwn75sn59g4lfkrr2kk20r3rgfbx9xfqb8rg9mi2n6"))))
 | 
						|
    (build-system python-build-system)
 | 
						|
    (arguments
 | 
						|
     `(#:python ,python-2 ; only compatible with Python 2.7
 | 
						|
       #:tests? #f)) ; no test target
 | 
						|
    (inputs
 | 
						|
     `(("python-numpy" ,python2-numpy)))
 | 
						|
    (native-inputs
 | 
						|
     `(("python-setuptools" ,python2-setuptools)))
 | 
						|
    (home-page "http://github.com/taoliu/MACS/")
 | 
						|
    (synopsis "Model based analysis for ChIP-Seq data")
 | 
						|
    (description
 | 
						|
     "MACS is an implementation of a ChIP-Seq analysis algorithm for
 | 
						|
identifying transcript factor binding sites named Model-based Analysis of
 | 
						|
ChIP-Seq (MACS).  MACS captures the influence of genome complexity to evaluate
 | 
						|
the significance of enriched ChIP regions and it improves the spatial
 | 
						|
resolution of binding sites through combining the information of both
 | 
						|
sequencing tag position and orientation.")
 | 
						|
    (license license:bsd-3)))
 | 
						|
 | 
						|
(define-public miso
 | 
						|
  (package
 | 
						|
    (name "miso")
 | 
						|
    (version "0.5.3")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append
 | 
						|
                    "http://pypi.python.org/packages/source/m/misopy/misopy-"
 | 
						|
                    version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
 | 
						|
              (modules '((guix build utils)))
 | 
						|
              ;; use "gcc" instead of "cc" for compilation
 | 
						|
              (snippet
 | 
						|
               '(substitute* "setup.py"
 | 
						|
                  (("^defines")
 | 
						|
                   "cc.set_executables(
 | 
						|
compiler='gcc',
 | 
						|
compiler_so='gcc',
 | 
						|
linker_exe='gcc',
 | 
						|
linker_so='gcc -shared'); defines")))))
 | 
						|
    (build-system python-build-system)
 | 
						|
    (arguments
 | 
						|
     `(#:python ,python-2 ; only Python 2 is supported
 | 
						|
       #:tests? #f)) ; no "test" target
 | 
						|
    (inputs
 | 
						|
     `(("samtools" ,samtools)
 | 
						|
       ("python-numpy" ,python2-numpy)
 | 
						|
       ("python-pysam" ,python2-pysam)
 | 
						|
       ("python-scipy" ,python2-scipy)
 | 
						|
       ("python-matplotlib" ,python2-matplotlib)))
 | 
						|
    (native-inputs
 | 
						|
     `(("python-setuptools" ,python2-setuptools)))
 | 
						|
    (home-page "http://genes.mit.edu/burgelab/miso/index.html")
 | 
						|
    (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
 | 
						|
    (description
 | 
						|
     "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
 | 
						|
the expression level of alternatively spliced genes from RNA-Seq data, and
 | 
						|
identifies differentially regulated isoforms or exons across samples.  By
 | 
						|
modeling the generative process by which reads are produced from isoforms in
 | 
						|
RNA-Seq, the MISO model uses Bayesian inference to compute the probability
 | 
						|
that a read originated from a particular isoform.")
 | 
						|
    (license license:gpl2)))
 | 
						|
 | 
						|
(define-public rseqc
 | 
						|
  (package
 | 
						|
    (name "rseqc")
 | 
						|
    (version "2.6.1")
 | 
						|
    (source
 | 
						|
     (origin
 | 
						|
       (method url-fetch)
 | 
						|
       (uri
 | 
						|
        (string-append "mirror://sourceforge/rseqc/"
 | 
						|
                       version "/RSeQC-" version ".tar.gz"))
 | 
						|
       (sha256
 | 
						|
        (base32 "09rf0x9d6apjja5l01cgprj7vigpw6kiqhy34ibwwlxil0db0ri4"))
 | 
						|
       (modules '((guix build utils)))
 | 
						|
       (snippet
 | 
						|
        '(begin
 | 
						|
           ;; remove bundled copy of pysam
 | 
						|
           (delete-file-recursively "lib/pysam")
 | 
						|
           (substitute* "setup.py"
 | 
						|
             ;; remove dependency on outdated "distribute" module
 | 
						|
             (("^from distribute_setup import use_setuptools") "")
 | 
						|
             (("^use_setuptools\\(\\)") "")
 | 
						|
             ;; do not use bundled copy of pysam
 | 
						|
             (("^have_pysam = False") "have_pysam = True"))))))
 | 
						|
    (build-system python-build-system)
 | 
						|
    (arguments `(#:python ,python-2))
 | 
						|
    (inputs
 | 
						|
     `(("python-cython" ,python2-cython)
 | 
						|
       ("python-pysam" ,python2-pysam)
 | 
						|
       ("python-numpy" ,python2-numpy)
 | 
						|
       ("python-setuptools" ,python2-setuptools)
 | 
						|
       ("zlib" ,zlib)))
 | 
						|
    (native-inputs
 | 
						|
     `(("python-nose" ,python2-nose)))
 | 
						|
    (home-page "http://rseqc.sourceforge.net/")
 | 
						|
    (synopsis "RNA-seq quality control package")
 | 
						|
    (description
 | 
						|
     "RSeQC provides a number of modules that can comprehensively evaluate
 | 
						|
high throughput sequence data, especially RNA-seq data.  Some basic modules
 | 
						|
inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
 | 
						|
while RNA-seq specific modules evaluate sequencing saturation, mapped reads
 | 
						|
distribution, coverage uniformity, strand specificity, etc.")
 | 
						|
    (license license:gpl3+)))
 | 
						|
 | 
						|
(define-public samtools
 | 
						|
  (package
 | 
						|
    (name "samtools")
 | 
						|
    (version "1.1")
 | 
						|
    (source
 | 
						|
     (origin
 | 
						|
       (method url-fetch)
 | 
						|
       (uri
 | 
						|
        (string-append "mirror://sourceforge/samtools/"
 | 
						|
                       version "/samtools-" version ".tar.bz2"))
 | 
						|
       (sha256
 | 
						|
        (base32
 | 
						|
         "1y5p2hs4gif891b4ik20275a8xf3qrr1zh9wpysp4g8m0g1jckf2"))))
 | 
						|
    (build-system gnu-build-system)
 | 
						|
    (arguments
 | 
						|
     `(;; There are 87 test failures when building on non-64-bit architectures
 | 
						|
       ;; due to invalid test data.  This has since been fixed upstream (see
 | 
						|
       ;; <https://github.com/samtools/samtools/pull/307>), but as there has
 | 
						|
       ;; not been a new release we disable the tests for all non-64-bit
 | 
						|
       ;; systems.
 | 
						|
       #:tests? ,(string=? (or (%current-system) (%current-target-system))
 | 
						|
                           "x86_64-linux")
 | 
						|
       #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
 | 
						|
       #:phases
 | 
						|
       (alist-cons-after
 | 
						|
        'unpack
 | 
						|
        'patch-makefile-curses
 | 
						|
        (lambda _
 | 
						|
          (substitute* "Makefile"
 | 
						|
            (("-lcurses") "-lncurses")))
 | 
						|
        (alist-cons-after
 | 
						|
         'unpack
 | 
						|
         'patch-tests
 | 
						|
         (lambda* (#:key inputs #:allow-other-keys)
 | 
						|
           (let ((bash (assoc-ref inputs "bash")))
 | 
						|
             (substitute* "test/test.pl"
 | 
						|
               ;; The test script calls out to /bin/bash
 | 
						|
               (("/bin/bash")
 | 
						|
                (string-append bash "/bin/bash"))
 | 
						|
               ;; There are two failing tests upstream relating to the "stats"
 | 
						|
               ;; subcommand in test_usage_subcommand ("did not have Usage"
 | 
						|
               ;; and "usage did not mention samtools stats"), so we disable
 | 
						|
               ;; them.
 | 
						|
               (("(test_usage_subcommand\\(.*\\);)" cmd)
 | 
						|
                (string-append "unless ($subcommand eq 'stats') {" cmd "};")))))
 | 
						|
         (alist-delete
 | 
						|
          'configure
 | 
						|
          %standard-phases)))))
 | 
						|
    (native-inputs `(("pkg-config" ,pkg-config)))
 | 
						|
    (inputs `(("ncurses" ,ncurses)
 | 
						|
              ("perl" ,perl)
 | 
						|
              ("python" ,python)
 | 
						|
              ("zlib" ,zlib)))
 | 
						|
    (home-page "http://samtools.sourceforge.net")
 | 
						|
    (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
 | 
						|
    (description
 | 
						|
     "Samtools implements various utilities for post-processing nucleotide
 | 
						|
sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
 | 
						|
variant calling (in conjunction with bcftools), and a simple alignment
 | 
						|
viewer.")
 | 
						|
    (license license:expat)))
 | 
						|
 | 
						|
(define-public seqan
 | 
						|
  (package
 | 
						|
    (name "seqan")
 | 
						|
    (version "1.4.2")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append "http://packages.seqan.de/seqan-library/"
 | 
						|
                                  "seqan-library-" version ".tar.bz2"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
 | 
						|
    ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
 | 
						|
    ;; makes sense to split the outputs.
 | 
						|
    (outputs '("out" "doc"))
 | 
						|
    (build-system trivial-build-system)
 | 
						|
    (arguments
 | 
						|
     `(#:modules ((guix build utils))
 | 
						|
       #:builder
 | 
						|
       (begin
 | 
						|
         (use-modules (guix build utils))
 | 
						|
         (let ((tar  (assoc-ref %build-inputs "tar"))
 | 
						|
               (bzip (assoc-ref %build-inputs "bzip2"))
 | 
						|
               (out  (assoc-ref %outputs "out"))
 | 
						|
               (doc  (assoc-ref %outputs "doc")))
 | 
						|
           (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
 | 
						|
           (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
 | 
						|
           (chdir (string-append "seqan-library-" ,version))
 | 
						|
           (copy-recursively "include" (string-append out "/include"))
 | 
						|
           (copy-recursively "share"  (string-append doc "/share"))))))
 | 
						|
    (native-inputs
 | 
						|
     `(("source" ,source)
 | 
						|
       ("tar" ,tar)
 | 
						|
       ("bzip2" ,bzip2)))
 | 
						|
    (home-page "http://www.seqan.de")
 | 
						|
    (synopsis "Library for nucleotide sequence analysis")
 | 
						|
    (description
 | 
						|
     "SeqAn is a C++ library of efficient algorithms and data structures for
 | 
						|
the analysis of sequences with the focus on biological data.  It contains
 | 
						|
algorithms and data structures for string representation and their
 | 
						|
manipulation, online and indexed string search, efficient I/O of
 | 
						|
bioinformatics file formats, sequence alignment, and more.")
 | 
						|
    (license license:bsd-3)))
 | 
						|
 | 
						|
(define-public star
 | 
						|
  (package
 | 
						|
    (name "star")
 | 
						|
    (version "2.4.0j")
 | 
						|
    (source (origin
 | 
						|
              (method url-fetch)
 | 
						|
              (uri (string-append
 | 
						|
                    "https://github.com/alexdobin/STAR/archive/STAR_"
 | 
						|
                    version ".tar.gz"))
 | 
						|
              (sha256
 | 
						|
               (base32
 | 
						|
                "1y3bciych1aw6s7k8sy1saj23dcan9wk4d4f96an499slkxwz712"))
 | 
						|
              (modules '((guix build utils)))
 | 
						|
              (snippet
 | 
						|
               '(substitute* "source/Makefile"
 | 
						|
                  (("/bin/rm") "rm")))))
 | 
						|
    (build-system gnu-build-system)
 | 
						|
    (arguments
 | 
						|
     '(#:tests? #f ;no check target
 | 
						|
       #:make-flags '("STAR")
 | 
						|
       #:phases
 | 
						|
       (alist-cons-after
 | 
						|
        'unpack 'enter-source-dir (lambda _ (chdir "source"))
 | 
						|
        (alist-replace
 | 
						|
         'install
 | 
						|
         (lambda* (#:key outputs #:allow-other-keys)
 | 
						|
           (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
 | 
						|
             (mkdir-p bin)
 | 
						|
             (copy-file "STAR" (string-append bin "STAR"))))
 | 
						|
         (alist-delete
 | 
						|
          'configure %standard-phases)))))
 | 
						|
    (native-inputs
 | 
						|
     `(("vim" ,vim))) ; for xxd
 | 
						|
    (inputs
 | 
						|
     `(("zlib" ,zlib)))
 | 
						|
    (home-page "https://github.com/alexdobin/STAR")
 | 
						|
    (synopsis "Universal RNA-seq aligner")
 | 
						|
    (description
 | 
						|
     "The Spliced Transcripts Alignment to a Reference (STAR) software is
 | 
						|
based on a previously undescribed RNA-seq alignment algorithm that uses
 | 
						|
sequential maximum mappable seed search in uncompressed suffix arrays followed
 | 
						|
by seed clustering and stitching procedure.  In addition to unbiased de novo
 | 
						|
detection of canonical junctions, STAR can discover non-canonical splices and
 | 
						|
chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
 | 
						|
sequences.")
 | 
						|
    ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
 | 
						|
    (license license:gpl3+)))
 |