s***@osuosl.org
2011-09-09 21:12:20 UTC
createrepo/__init__.py | 44 +++++++++++++++++++++++++++++---------------
createrepo/utils.py | 22 ++++++++++++++++++++++
docs/createrepo.8 | 10 +++++++++-
genpkgmetadata.py | 4 +++-
mergerepo.py | 5 +++++
5 files changed, 68 insertions(+), 17 deletions(-)
New commits:
commit 773b3dd40ee7ec77b74ea3269cb0b27deafe29c0
Author: Seth Vidal <***@fedoraproject.org>
Date: Fri Sep 9 17:11:27 2011 -0400
add xz support for making repodata - only use it on sqlitedbs and non primary/filelist/other xml data
leave primary/filelists/other xml files as gz for compatibility
add xz support to mergerepo
diff --git a/createrepo/__init__.py b/createrepo/__init__.py
index 0fbbff3..7d147a9 100644
--- a/createrepo/__init__.py
+++ b/createrepo/__init__.py
@@ -34,7 +34,7 @@ from yum.packageSack import MetaSack
from yum.packages import YumAvailablePackage
import rpmUtils.transaction
-from utils import _, errorprint, MDError
+from utils import _, errorprint, MDError, lzma
import readMetadata
try:
import sqlite3 as sqlite
@@ -46,7 +46,7 @@ try:
except ImportError:
pass
-from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \
+from utils import _gzipOpen, bzipFile, xzFile, checkAndMakeDir, GzipFile, \
checksum_and_rename, split_list_into_equal_chunks
import deltarpms
@@ -108,9 +108,9 @@ class MetaDataConfig(object):
self.collapse_glibc_requires = True
self.workers = 1 # number of workers to fork off to grab metadata from the pkgs
self.worker_cmd = '/usr/share/createrepo/worker.py'
-
#self.worker_cmd = './worker.py' # helpful when testing
self.retain_old_md = 0
+ self.xz = False # use xz for compression
class SimpleMDCallBack(object):
def errorlog(self, thing):
@@ -146,6 +146,9 @@ class MetaDataGenerator:
if not self.conf.directory and not self.conf.directories:
raise MDError, "No directory given on which to run."
+ if self.conf.xz and not utils.lzma:
+ raise MDError, "XZ compression requested but lzma/xz module not available."
+
if not self.conf.directories: # just makes things easier later
self.conf.directories = [self.conf.directory]
if not self.conf.directory: # ensure we have both in the config object
@@ -844,6 +847,11 @@ class MetaDataGenerator:
sfile = '%s.bz2' % sfile
outfn = os.path.join(outdir, sfile)
output = BZ2File(filename = outfn, mode='wb')
+ elif compress_type == 'xz':
+ sfile = '%s.xz' % sfile
+ outfn = os.path.join(outdir, sfile)
+ output = utils.lzma.LZMAFile(outfn, mode='wb')
+
else:
outfn = os.path.join(outdir, sfile)
output = open(outfn, 'w')
@@ -959,12 +967,17 @@ class MetaDataGenerator:
# rename from silly name to not silly name
os.rename(tmp_result_path, resultpath)
- compressed_name = '%s.bz2' % good_name
+ ext = 'bz2'
+ compress_func = bzipFile
+ if self.conf.xz:
+ ext = 'xz'
+ compress_func = xzFile
+ compressed_name = '%s.%s' % (good_name, ext)
result_compressed = os.path.join(repopath, compressed_name)
db_csums[ftype] = misc.checksum(sumtype, resultpath)
# compress the files
- bzipFile(resultpath, result_compressed)
+ compress_func(resultpath, result_compressed)
# csum the compressed file
db_compressed_sums[ftype] = misc.checksum(sumtype,
result_compressed)
@@ -974,8 +987,8 @@ class MetaDataGenerator:
os.unlink(resultpath)
if self.conf.unique_md_filenames:
- csum_compressed_name = '%s-%s.bz2' % (
- db_compressed_sums[ftype], good_name)
+ csum_compressed_name = '%s-%s.%s' % (
+ db_compressed_sums[ftype], good_name, ext)
csum_result_compressed = os.path.join(repopath,
csum_compressed_name)
os.rename(result_compressed, csum_result_compressed)
@@ -1038,7 +1051,7 @@ class MetaDataGenerator:
if self.conf.additional_metadata:
for md_type, md_file in self.conf.additional_metadata.items():
- mdcontent = self._createRepoDataObject(md_file, md_type)
+ mdcontent = self._createRepoDataObject(md_file, md_type, compress_type='xz')
repomd.repoData[mdcontent.type] = mdcontent
@@ -1112,10 +1125,11 @@ class MetaDataGenerator:
oldfile = os.path.join(output_old_dir, f)
finalfile = os.path.join(output_final_dir, f)
- for (end,lst) in (('-primary.sqlite.bz2', old_pr_db), ('-primary.xml.gz', old_pr),
- ('-filelists.sqlite.bz2', old_fl_db), ('-filelists.xml.gz', old_fl),
- ('-other.sqlite.bz2', old_ot_db), ('-other.xml.gz', old_ot)):
- if f.endswith(end):
+ for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr),
+ ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl),
+ ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)):
+ fn = '.'.join(f.split('.')[:-1])
+ if fn.endswith(end):
lst.append(oldfile)
break
@@ -1129,9 +1143,9 @@ class MetaDataGenerator:
for f in os.listdir(output_old_dir):
oldfile = os.path.join(output_old_dir, f)
finalfile = os.path.join(output_final_dir, f)
-
- if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2',
- 'primary.sqlite.bz2') or oldfile in old_to_remove:
+ fn = '.'.join(f.split('.')[:-1])
+ if fn in ('filelists.sqlite', 'other.sqlite.',
+ 'primary.sqlite') or oldfile in old_to_remove:
try:
os.remove(oldfile)
except (OSError, IOError), e:
diff --git a/createrepo/utils.py b/createrepo/utils.py
index 995c3b9..655083d 100644
--- a/createrepo/utils.py
+++ b/createrepo/utils.py
@@ -23,6 +23,12 @@ import bz2
import gzip
from gzip import write32u, FNAME
from yum import misc
+_available_compression = ['gz', 'bz2']
+try:
+ import lzma
+ _available_compression.append('xz')
+except ImportError:
+ lzma = None
def errorprint(stuff):
print >> sys.stderr, stuff
@@ -69,6 +75,22 @@ def bzipFile(source, dest):
s_fn.close()
+def xzFile(source, dest):
+ if not 'xz' in _available_compression:
+ raise MDError, "Cannot use xz for compression, library/module is not available"
+
+ s_fn = open(source, 'rb')
+ destination = lzma.LZMAFile(dest, 'w')
+
+ while True:
+ data = s_fn.read(1024000)
+
+ if not data: break
+ destination.write(data)
+
+ destination.close()
+ s_fn.close()
+
def returnFD(filename):
try:
fdno = os.open(filename, os.O_RDONLY)
diff --git a/docs/createrepo.8 b/docs/createrepo.8
index 96b5bf8..4734392 100644
--- a/docs/createrepo.8
+++ b/docs/createrepo.8
@@ -105,7 +105,15 @@ Tells createrepo to generate deltarpms and the delta metadata
paths to look for older pkgs to delta against. Can be specified multiple times
.IP "\fB\--num-deltas\fP int"
the number of older versions to make deltas against. Defaults to 1
-
+.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST
+output the paths to the pkgs actually read useful with --update
+.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE
+max size of an rpm that to run deltarpm against (in bytes)
+.IP "\fB\--workers\fP WORKERS
+number of workers to spawn to read rpms
+.IP "\fB\--xz\fP
+use xz for repodata compression
+.IP
.SH "EXAMPLES"
Here is an example of a repository with a groups file. Note that the
diff --git a/genpkgmetadata.py b/genpkgmetadata.py
index 512420b..4ba445e 100755
--- a/genpkgmetadata.py
+++ b/genpkgmetadata.py
@@ -121,10 +121,12 @@ def parse_args(args, conf):
parser.add_option("--max-delta-rpm-size", default=100000000,
dest='max_delta_rpm_size', type='int',
help="max size of an rpm that to run deltarpm against (in bytes)")
-
parser.add_option("--workers", default=1,
dest='workers', type='int',
help="number of workers to spawn to read rpms")
+ parser.add_option("--xz", default=False,
+ action="store_true",
+ help="use xz for repodata compression")
(opts, argsleft) = parser.parse_args(args)
if len(argsleft) > 1 and not opts.split:
diff --git a/mergerepo.py b/mergerepo.py
index 069a70b..882395a 100755
--- a/mergerepo.py
+++ b/mergerepo.py
@@ -48,6 +48,9 @@ def parse_args(args):
help="Do not merge group(comps) metadata")
parser.add_option("", "--noupdateinfo", default=False, action="store_true",
help="Do not merge updateinfo metadata")
+ parser.add_option("", "--xz", default=False, action="store_true",
+ help="Use xz for repodata compression")
+
(opts, argsleft) = parser.parse_args(args)
if len(opts.repos) < 2:
@@ -78,6 +81,8 @@ def main(args):
rmbase.groups = False
if opts.noupdateinfo:
rmbase.updateinfo = False
+ if opts.xz:
+ rmbase.mdconf.xz = True
try:
rmbase.merge_repos()
rmbase.write_metadata()
createrepo/utils.py | 22 ++++++++++++++++++++++
docs/createrepo.8 | 10 +++++++++-
genpkgmetadata.py | 4 +++-
mergerepo.py | 5 +++++
5 files changed, 68 insertions(+), 17 deletions(-)
New commits:
commit 773b3dd40ee7ec77b74ea3269cb0b27deafe29c0
Author: Seth Vidal <***@fedoraproject.org>
Date: Fri Sep 9 17:11:27 2011 -0400
add xz support for making repodata - only use it on sqlitedbs and non primary/filelist/other xml data
leave primary/filelists/other xml files as gz for compatibility
add xz support to mergerepo
diff --git a/createrepo/__init__.py b/createrepo/__init__.py
index 0fbbff3..7d147a9 100644
--- a/createrepo/__init__.py
+++ b/createrepo/__init__.py
@@ -34,7 +34,7 @@ from yum.packageSack import MetaSack
from yum.packages import YumAvailablePackage
import rpmUtils.transaction
-from utils import _, errorprint, MDError
+from utils import _, errorprint, MDError, lzma
import readMetadata
try:
import sqlite3 as sqlite
@@ -46,7 +46,7 @@ try:
except ImportError:
pass
-from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \
+from utils import _gzipOpen, bzipFile, xzFile, checkAndMakeDir, GzipFile, \
checksum_and_rename, split_list_into_equal_chunks
import deltarpms
@@ -108,9 +108,9 @@ class MetaDataConfig(object):
self.collapse_glibc_requires = True
self.workers = 1 # number of workers to fork off to grab metadata from the pkgs
self.worker_cmd = '/usr/share/createrepo/worker.py'
-
#self.worker_cmd = './worker.py' # helpful when testing
self.retain_old_md = 0
+ self.xz = False # use xz for compression
class SimpleMDCallBack(object):
def errorlog(self, thing):
@@ -146,6 +146,9 @@ class MetaDataGenerator:
if not self.conf.directory and not self.conf.directories:
raise MDError, "No directory given on which to run."
+ if self.conf.xz and not utils.lzma:
+ raise MDError, "XZ compression requested but lzma/xz module not available."
+
if not self.conf.directories: # just makes things easier later
self.conf.directories = [self.conf.directory]
if not self.conf.directory: # ensure we have both in the config object
@@ -844,6 +847,11 @@ class MetaDataGenerator:
sfile = '%s.bz2' % sfile
outfn = os.path.join(outdir, sfile)
output = BZ2File(filename = outfn, mode='wb')
+ elif compress_type == 'xz':
+ sfile = '%s.xz' % sfile
+ outfn = os.path.join(outdir, sfile)
+ output = utils.lzma.LZMAFile(outfn, mode='wb')
+
else:
outfn = os.path.join(outdir, sfile)
output = open(outfn, 'w')
@@ -959,12 +967,17 @@ class MetaDataGenerator:
# rename from silly name to not silly name
os.rename(tmp_result_path, resultpath)
- compressed_name = '%s.bz2' % good_name
+ ext = 'bz2'
+ compress_func = bzipFile
+ if self.conf.xz:
+ ext = 'xz'
+ compress_func = xzFile
+ compressed_name = '%s.%s' % (good_name, ext)
result_compressed = os.path.join(repopath, compressed_name)
db_csums[ftype] = misc.checksum(sumtype, resultpath)
# compress the files
- bzipFile(resultpath, result_compressed)
+ compress_func(resultpath, result_compressed)
# csum the compressed file
db_compressed_sums[ftype] = misc.checksum(sumtype,
result_compressed)
@@ -974,8 +987,8 @@ class MetaDataGenerator:
os.unlink(resultpath)
if self.conf.unique_md_filenames:
- csum_compressed_name = '%s-%s.bz2' % (
- db_compressed_sums[ftype], good_name)
+ csum_compressed_name = '%s-%s.%s' % (
+ db_compressed_sums[ftype], good_name, ext)
csum_result_compressed = os.path.join(repopath,
csum_compressed_name)
os.rename(result_compressed, csum_result_compressed)
@@ -1038,7 +1051,7 @@ class MetaDataGenerator:
if self.conf.additional_metadata:
for md_type, md_file in self.conf.additional_metadata.items():
- mdcontent = self._createRepoDataObject(md_file, md_type)
+ mdcontent = self._createRepoDataObject(md_file, md_type, compress_type='xz')
repomd.repoData[mdcontent.type] = mdcontent
@@ -1112,10 +1125,11 @@ class MetaDataGenerator:
oldfile = os.path.join(output_old_dir, f)
finalfile = os.path.join(output_final_dir, f)
- for (end,lst) in (('-primary.sqlite.bz2', old_pr_db), ('-primary.xml.gz', old_pr),
- ('-filelists.sqlite.bz2', old_fl_db), ('-filelists.xml.gz', old_fl),
- ('-other.sqlite.bz2', old_ot_db), ('-other.xml.gz', old_ot)):
- if f.endswith(end):
+ for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr),
+ ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl),
+ ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)):
+ fn = '.'.join(f.split('.')[:-1])
+ if fn.endswith(end):
lst.append(oldfile)
break
@@ -1129,9 +1143,9 @@ class MetaDataGenerator:
for f in os.listdir(output_old_dir):
oldfile = os.path.join(output_old_dir, f)
finalfile = os.path.join(output_final_dir, f)
-
- if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2',
- 'primary.sqlite.bz2') or oldfile in old_to_remove:
+ fn = '.'.join(f.split('.')[:-1])
+ if fn in ('filelists.sqlite', 'other.sqlite.',
+ 'primary.sqlite') or oldfile in old_to_remove:
try:
os.remove(oldfile)
except (OSError, IOError), e:
diff --git a/createrepo/utils.py b/createrepo/utils.py
index 995c3b9..655083d 100644
--- a/createrepo/utils.py
+++ b/createrepo/utils.py
@@ -23,6 +23,12 @@ import bz2
import gzip
from gzip import write32u, FNAME
from yum import misc
+_available_compression = ['gz', 'bz2']
+try:
+ import lzma
+ _available_compression.append('xz')
+except ImportError:
+ lzma = None
def errorprint(stuff):
print >> sys.stderr, stuff
@@ -69,6 +75,22 @@ def bzipFile(source, dest):
s_fn.close()
+def xzFile(source, dest):
+ if not 'xz' in _available_compression:
+ raise MDError, "Cannot use xz for compression, library/module is not available"
+
+ s_fn = open(source, 'rb')
+ destination = lzma.LZMAFile(dest, 'w')
+
+ while True:
+ data = s_fn.read(1024000)
+
+ if not data: break
+ destination.write(data)
+
+ destination.close()
+ s_fn.close()
+
def returnFD(filename):
try:
fdno = os.open(filename, os.O_RDONLY)
diff --git a/docs/createrepo.8 b/docs/createrepo.8
index 96b5bf8..4734392 100644
--- a/docs/createrepo.8
+++ b/docs/createrepo.8
@@ -105,7 +105,15 @@ Tells createrepo to generate deltarpms and the delta metadata
paths to look for older pkgs to delta against. Can be specified multiple times
.IP "\fB\--num-deltas\fP int"
the number of older versions to make deltas against. Defaults to 1
-
+.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST
+output the paths to the pkgs actually read useful with --update
+.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE
+max size of an rpm that to run deltarpm against (in bytes)
+.IP "\fB\--workers\fP WORKERS
+number of workers to spawn to read rpms
+.IP "\fB\--xz\fP
+use xz for repodata compression
+.IP
.SH "EXAMPLES"
Here is an example of a repository with a groups file. Note that the
diff --git a/genpkgmetadata.py b/genpkgmetadata.py
index 512420b..4ba445e 100755
--- a/genpkgmetadata.py
+++ b/genpkgmetadata.py
@@ -121,10 +121,12 @@ def parse_args(args, conf):
parser.add_option("--max-delta-rpm-size", default=100000000,
dest='max_delta_rpm_size', type='int',
help="max size of an rpm that to run deltarpm against (in bytes)")
-
parser.add_option("--workers", default=1,
dest='workers', type='int',
help="number of workers to spawn to read rpms")
+ parser.add_option("--xz", default=False,
+ action="store_true",
+ help="use xz for repodata compression")
(opts, argsleft) = parser.parse_args(args)
if len(argsleft) > 1 and not opts.split:
diff --git a/mergerepo.py b/mergerepo.py
index 069a70b..882395a 100755
--- a/mergerepo.py
+++ b/mergerepo.py
@@ -48,6 +48,9 @@ def parse_args(args):
help="Do not merge group(comps) metadata")
parser.add_option("", "--noupdateinfo", default=False, action="store_true",
help="Do not merge updateinfo metadata")
+ parser.add_option("", "--xz", default=False, action="store_true",
+ help="Use xz for repodata compression")
+
(opts, argsleft) = parser.parse_args(args)
if len(opts.repos) < 2:
@@ -78,6 +81,8 @@ def main(args):
rmbase.groups = False
if opts.noupdateinfo:
rmbase.updateinfo = False
+ if opts.xz:
+ rmbase.mdconf.xz = True
try:
rmbase.merge_repos()
rmbase.write_metadata()