* gnu/packages/patches/borg-fix-hard-link-preloading.patch: New file. * gnu/local.mk (dist_patch_DATA): Add it. * gnu/packages/backup.scm (borg)[source]: Use it.
		
			
				
	
	
		
			157 lines
		
	
	
	
		
			7.4 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			157 lines
		
	
	
	
		
			7.4 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| Fix a bug that would cause the test suite to hang:
 | |
| 
 | |
| https://github.com/borgbackup/borg/issues/4350
 | |
| 
 | |
| Patch copied from upstream source repository:
 | |
| 
 | |
| https://github.com/borgbackup/borg/commit/18242ab9e2f26c450b8507aa1d5eceadab8ad027
 | |
| 
 | |
| From 18242ab9e2f26c450b8507aa1d5eceadab8ad027 Mon Sep 17 00:00:00 2001
 | |
| From: Thomas Waldmann <tw@waldmann-edv.de>
 | |
| Date: Thu, 2 May 2019 21:02:26 +0200
 | |
| Subject: [PATCH] preload chunks for hardlink slaves w/o preloaded master,
 | |
|  fixes #4350
 | |
| 
 | |
| also split the hardlink extraction test into 2 tests.
 | |
| 
 | |
| (cherry picked from commit f33f318d816505161d1449a02ddfdeb97d6fe80a)
 | |
| ---
 | |
|  src/borg/archive.py            | 42 +++++++++++++++++++++++++++++-----
 | |
|  src/borg/archiver.py           |  5 ++--
 | |
|  src/borg/testsuite/archiver.py | 20 +++++++++-------
 | |
|  3 files changed, 51 insertions(+), 16 deletions(-)
 | |
| 
 | |
| diff --git a/src/borg/archive.py b/src/borg/archive.py
 | |
| index adc1f42c..0793672a 100644
 | |
| --- a/src/borg/archive.py
 | |
| +++ b/src/borg/archive.py
 | |
| @@ -192,7 +192,7 @@ def __init__(self, repository, key):
 | |
|          self.repository = repository
 | |
|          self.key = key
 | |
|  
 | |
| -    def unpack_many(self, ids, filter=None, preload=False):
 | |
| +    def unpack_many(self, ids, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
 | |
|          """
 | |
|          Return iterator of items.
 | |
|  
 | |
| @@ -209,12 +209,40 @@ def unpack_many(self, ids, filter=None, preload=False):
 | |
|              for item in items:
 | |
|                  if 'chunks' in item:
 | |
|                      item.chunks = [ChunkListEntry(*e) for e in item.chunks]
 | |
| +
 | |
| +            def preload(chunks):
 | |
| +                self.repository.preload([c.id for c in chunks])
 | |
| +
 | |
|              if filter:
 | |
|                  items = [item for item in items if filter(item)]
 | |
| +
 | |
|              if preload:
 | |
| -                for item in items:
 | |
| -                    if 'chunks' in item:
 | |
| -                        self.repository.preload([c.id for c in item.chunks])
 | |
| +                if filter and partial_extract:
 | |
| +                    # if we do only a partial extraction, it gets a bit
 | |
| +                    # complicated with computing the preload items: if a hardlink master item is not
 | |
| +                    # selected (== not extracted), we will still need to preload its chunks if a
 | |
| +                    # corresponding hardlink slave is selected (== is extracted).
 | |
| +                    # due to a side effect of the filter() call, we now have hardlink_masters dict populated.
 | |
| +                    masters_preloaded = set()
 | |
| +                    for item in items:
 | |
| +                        if 'chunks' in item:  # regular file, maybe a hardlink master
 | |
| +                            preload(item.chunks)
 | |
| +                            # if this is a hardlink master, remember that we already preloaded it:
 | |
| +                            if 'source' not in item and hardlinkable(item.mode) and item.get('hardlink_master', True):
 | |
| +                                masters_preloaded.add(item.path)
 | |
| +                        elif 'source' in item and hardlinkable(item.mode):  # hardlink slave
 | |
| +                            source = item.source
 | |
| +                            if source not in masters_preloaded:
 | |
| +                                # we only need to preload *once* (for the 1st selected slave)
 | |
| +                                chunks, _ = hardlink_masters[source]
 | |
| +                                preload(chunks)
 | |
| +                                masters_preloaded.add(source)
 | |
| +                else:
 | |
| +                    # easy: we do not have a filter, thus all items are selected, thus we need to preload all chunks.
 | |
| +                    for item in items:
 | |
| +                        if 'chunks' in item:
 | |
| +                            preload(item.chunks)
 | |
| +
 | |
|              for item in items:
 | |
|                  yield item
 | |
|  
 | |
| @@ -433,8 +461,10 @@ def item_filter(self, item, filter=None):
 | |
|              return False
 | |
|          return filter(item) if filter else True
 | |
|  
 | |
| -    def iter_items(self, filter=None, preload=False):
 | |
| -        for item in self.pipeline.unpack_many(self.metadata.items, preload=preload,
 | |
| +    def iter_items(self, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
 | |
| +        assert not (filter and partial_extract and preload) or hardlink_masters is not None
 | |
| +        for item in self.pipeline.unpack_many(self.metadata.items, partial_extract=partial_extract,
 | |
| +                                              preload=preload, hardlink_masters=hardlink_masters,
 | |
|                                                filter=lambda item: self.item_filter(item, filter)):
 | |
|              yield item
 | |
|  
 | |
| diff --git a/src/borg/archiver.py b/src/borg/archiver.py
 | |
| index 957959d6..dcc20455 100644
 | |
| --- a/src/borg/archiver.py
 | |
| +++ b/src/borg/archiver.py
 | |
| @@ -755,7 +755,8 @@ def peek_and_store_hardlink_masters(item, matched):
 | |
|          else:
 | |
|              pi = None
 | |
|  
 | |
| -        for item in archive.iter_items(filter, preload=True):
 | |
| +        for item in archive.iter_items(filter, partial_extract=partial_extract,
 | |
| +                                       preload=True, hardlink_masters=hardlink_masters):
 | |
|              orig_path = item.path
 | |
|              if strip_components:
 | |
|                  item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
 | |
| @@ -997,7 +998,7 @@ def item_to_tarinfo(item, original_path):
 | |
|                  return None, stream
 | |
|              return tarinfo, stream
 | |
|  
 | |
| -        for item in archive.iter_items(filter, preload=True):
 | |
| +        for item in archive.iter_items(filter, preload=True, hardlink_masters=hardlink_masters):
 | |
|              orig_path = item.path
 | |
|              if strip_components:
 | |
|                  item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
 | |
| diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py
 | |
| index c35ad800..935b3d79 100644
 | |
| --- a/src/borg/testsuite/archiver.py
 | |
| +++ b/src/borg/testsuite/archiver.py
 | |
| @@ -823,7 +823,18 @@ def test_mount_hardlinks(self):
 | |
|              assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 | |
|  
 | |
|      @requires_hardlinks
 | |
| -    def test_extract_hardlinks(self):
 | |
| +    def test_extract_hardlinks1(self):
 | |
| +        self._extract_hardlinks_setup()
 | |
| +        with changedir('output'):
 | |
| +            self.cmd('extract', self.repository_location + '::test')
 | |
| +            assert os.stat('input/source').st_nlink == 4
 | |
| +            assert os.stat('input/abba').st_nlink == 4
 | |
| +            assert os.stat('input/dir1/hardlink').st_nlink == 4
 | |
| +            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
 | |
| +            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 | |
| +
 | |
| +    @requires_hardlinks
 | |
| +    def test_extract_hardlinks2(self):
 | |
|          self._extract_hardlinks_setup()
 | |
|          with changedir('output'):
 | |
|              self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
 | |
| @@ -839,13 +850,6 @@ def test_extract_hardlinks(self):
 | |
|              assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 | |
|              assert os.stat('input/dir1/aaaa').st_nlink == 2
 | |
|              assert os.stat('input/dir1/source2').st_nlink == 2
 | |
| -        with changedir('output'):
 | |
| -            self.cmd('extract', self.repository_location + '::test')
 | |
| -            assert os.stat('input/source').st_nlink == 4
 | |
| -            assert os.stat('input/abba').st_nlink == 4
 | |
| -            assert os.stat('input/dir1/hardlink').st_nlink == 4
 | |
| -            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
 | |
| -            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 | |
|  
 | |
|      def test_extract_include_exclude(self):
 | |
|          self.cmd('init', '--encryption=repokey', self.repository_location)
 | |
| -- 
 | |
| 2.21.0
 | |
| 
 |