From ebad9d6c0ebb02ca5825a5984ad2bd795d1cbf59 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Tue, 23 Jan 2024 21:26:50 +0100 Subject: [PATCH] gnu: Add python-pyfasta. * gnu/packages/bioinformatics.scm (python-pyfasta): New variable. Change-Id: Ib2ab0a6eb250309633d5be33c18409227bce84ba --- gnu/packages/bioinformatics.scm | 84 ++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 6 deletions(-) diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index ec076b783e..5e30cf1ec7 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -4820,17 +4820,89 @@ gkm-SVM.") accessing bigWig files.") (license license:expat))) +(define-public python-pyfasta + ;; The release on pypi does not contain the test data files. + (let ((commit "c2f0611c5311f1b1466f2d56560447898b4a8b03") + (revision "1")) + (package + (name "python-pyfasta") + (version (git-version "0.5.2" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/brentp/pyfasta") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0a189id3fbv88gssyk6adbmz2ll1mqpmyw8vxmx3fi955gvaq9j7")))) + (build-system pyproject-build-system) + (arguments + (list + #:phases + '(modify-phases %standard-phases + (add-after 'unpack 'python3.10-compat + (lambda _ + (substitute* "pyfasta/__init__.py" + (("from fasta import") + "from pyfasta.fasta import") + (("from records import") + "from pyfasta.records import") + (("from split_fasta import") + "from pyfasta.split_fasta import") + (("in f.iteritems") + "in f.items")) + (substitute* "pyfasta/fasta.py" + (("from collections import Mapping") + "from collections.abc import Mapping") + (("from records import") + "from pyfasta.records import")) + (substitute* "pyfasta/records.py" + (("cPickle") "pickle") + (("\\(int, long\\)") + "(int, int)") + ;; XXX: it's not clear if this is really correct. + (("buffer\\(self\\)") + "memoryview(bytes(str(self), encoding='utf-8'))") + (("sys.maxint") "sys.maxsize")) + (substitute* "pyfasta/split_fasta.py" + (("from cStringIO import") + "from io import") + (("in lens.iteritems") "in lens.items")) + (substitute* "tests/test_all.py" + (("f.keys\\(\\)\\) == \\['a-extra'") + "list(f.keys())) == ['a-extra'") + (("f.iterkeys\\(\\)") "iter(f.keys())") + (("tests/data/" m) + (string-append (getcwd) "/" m)))))))) + (propagated-inputs (list python-numpy)) + (native-inputs (list python-nose)) + (home-page "https://github.com/brentp/pyfasta/") + (synopsis "Pythonic access to fasta sequence files") + (description + "This library provides fast, memory-efficient, pythonic (and +command-line) access to fasta sequence files. It stores a flattened version +of a fasta sequence file without spaces or headers and uses either a +@code{mmap} in numpy binary format or @code{fseek}/@code{fread} so the +sequence data is never read into memory. It saves a pickle (@code{.gdx}) of +the start and stop (for @code{fseek}/@code{mmap}) locations of each header in +the fasta file for internal use. + +Note that this package has been deprecated in favor of @code{pyfaidx}.") + (license license:expat)))) + (define-public python-schema-salad (package (name "python-schema-salad") (version "8.2.20211116214159") (source - (origin - (method url-fetch) - (uri (pypi-uri "schema-salad" version)) - (sha256 - (base32 - "005dh2y45x92zl8sf2sqjmfvcqr4hrz8dfckgkckv87003v7lwqc")))) + (origin + (method url-fetch) + (uri (pypi-uri "schema-salad" version)) + (sha256 + (base32 + "005dh2y45x92zl8sf2sqjmfvcqr4hrz8dfckgkckv87003v7lwqc")))) (build-system pyproject-build-system) (arguments `(#:phases