Discussion:
[PATCH] switch --update to use the sqlite dbs instead of the xml files. Should massively impact memory footprint and hopefully only marginally impact performance.
Seth Vidal
2011-07-15 21:53:43 UTC
Permalink
---
createrepo/__init__.py | 40 ++------
createrepo/readMetadata.py | 240 +++++++++++++------------------------------
2 files changed, 83 insertions(+), 197 deletions(-)

diff --git a/createrepo/__init__.py b/createrepo/__init__.py
index 44035cc..8549188 100644
--- a/createrepo/__init__.py
+++ b/createrepo/__init__.py
@@ -530,39 +530,19 @@ class MetaDataGenerator:
old_pkg = pkg
if pkg.find("://") != -1:
old_pkg = os.path.basename(pkg)
- nodes = self.oldData.getNodes(old_pkg)
- if nodes is not None: # we have a match in the old metadata
+ old_po = self.oldData.getNodes(old_pkg)
+ if old_po: # we have a match in the old metadata
if self.conf.verbose:
self.callback.log(_("Using data from old metadata for %s")
% pkg)
- (primarynode, filenode, othernode) = nodes
-
- for node, outfile in ((primarynode, self.primaryfile),
- (filenode, self.flfile),
- (othernode, self.otherfile)):
- if node is None:
- break
-
- if self.conf.baseurl:
- anode = node.children
- while anode is not None:
- if anode.type != "element":
- anode = anode.next
- continue
- if anode.name == "location":
- anode.setProp('xml:base', self.conf.baseurl)
- anode = anode.next
-
- output = node.serialize('UTF-8', self.conf.pretty)
- if output:
- outfile.write(output)
- else:
- if self.conf.verbose:
- self.callback.log(_("empty serialize on write to" \
- "%s in %s") % (outfile, pkg))
- outfile.write('\n')
-
- self.oldData.freeNodes(pkg)
+
+ if self.conf.baseurl: # if we have a baseurl set, reset the one
+ # in the old pkg
+ old_po.basepath = self.conf.baseurl
+ self.primaryfile.write(old_po.xml_dump_primary_metadata())
+ self.flfile.write(old_po.xml_dump_filelists_metadata())
+ self.otherfile.write(old_po.xml_dump_other_metadata())
+
#FIXME - if we're in update and we have deltas enabled
# check the presto data for this pkg and write its info back out
# to our deltafile
diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py
index 27d3690..a449e68 100644
--- a/createrepo/readMetadata.py
+++ b/createrepo/readMetadata.py
@@ -16,11 +16,25 @@
# Copyright 2006 Red Hat

import os
-import libxml2
import stat
from utils import errorprint, _

-from yum import repoMDObject
+import yum
+from yum import misc
+
+
+class CreaterepoPkgOld(yum.sqlitesack.YumAvailablePackageSqlite):
+ # special for special people like us.
+ def _return_remote_location(self):
+
+ if self.basepath:
+ msg = """<location xml:base="%s" href="%s"/>\n""" % (
+ misc.to_xml(self.basepath, attrib=True),
+ misc.to_xml(self.relativepath, attrib=True))
+ else:
+ msg = """<location href="%s"/>\n""" % misc.to_xml(self.relativepath, attrib=True)
+
+ return msg


class MetadataIndex(object):
@@ -30,178 +44,70 @@ class MetadataIndex(object):
opts = {}
self.opts = opts
self.outputdir = outputdir
+ realpath = os.path.realpath(outputdir)
repodatadir = self.outputdir + '/repodata'
- myrepomdxml = repodatadir + '/repomd.xml'
- if os.path.exists(myrepomdxml):
- repomd = repoMDObject.RepoMD('garbageid', myrepomdxml)
- b = repomd.getData('primary').location[1]
- f = repomd.getData('filelists').location[1]
- o = repomd.getData('other').location[1]
- basefile = os.path.join(self.outputdir, b)
- filelistfile = os.path.join(self.outputdir, f)
- otherfile = os.path.join(self.outputdir, o)
- else:
- basefile = filelistfile = otherfile = ""
-
- self.files = {'base' : basefile,
- 'filelist' : filelistfile,
- 'other' : otherfile}
+ self._repo = yum.yumRepo.YumRepository('garbageid')
+ self._repo.baseurl = 'file://' + realpath
+ self._repo.basecachedir = misc.getCacheDir()
+ self._repo.metadata_expire = 1
+ self._repo.gpgcheck = 0
+ self._repo.repo_gpgcheck = 0
+ self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld)
+ self.pkg_tups_by_path = {}
self.scan()
+

def scan(self):
- """Read in and index old repo data"""
- self.basenodes = {}
- self.filesnodes = {}
- self.othernodes = {}
- self.pkg_ids = {}
+ """Read in old repodata"""
if self.opts.get('verbose'):
print _("Scanning old repo data")
- for fn in self.files.values():
- if not os.path.exists(fn):
- #cannot scan
- errorprint(_("Warning: Old repodata file missing: %s") % fn)
- return
- root = libxml2.parseFile(self.files['base']).getRootElement()
- self._scanPackageNodes(root, self._handleBase)
- if self.opts.get('verbose'):
- print _("Indexed %i base nodes" % len(self.basenodes))
- root = libxml2.parseFile(self.files['filelist']).getRootElement()
- self._scanPackageNodes(root, self._handleFiles)
- if self.opts.get('verbose'):
- print _("Indexed %i filelist nodes" % len(self.filesnodes))
- root = libxml2.parseFile(self.files['other']).getRootElement()
- self._scanPackageNodes(root, self._handleOther)
- if self.opts.get('verbose'):
- print _("Indexed %i other nodes" % len(self.othernodes))
- #reverse index pkg ids to track references
- self.pkgrefs = {}
- for relpath, pkgid in self.pkg_ids.iteritems():
- self.pkgrefs.setdefault(pkgid,[]).append(relpath)
-
- def _scanPackageNodes(self, root, handler):
- node = root.children
- while node is not None:
- if node.type != "element":
- node = node.next
+ self._repo.sack.populate(self._repo, 'all', None, False)
+ for thispo in self._repo.sack:
+ mtime = thispo.filetime
+ size = thispo.size
+ relpath = thispo.relativepath
+ do_stat = self.opts.get('do_stat', True)
+ if mtime is None:
+ print _("mtime missing for %s") % relpath
continue
- if node.name == "package":
- handler(node)
- node = node.next
-
- def _handleBase(self, node):
- top = node
- node = node.children
- pkgid = None
- mtime = None
- size = None
- relpath = None
- do_stat = self.opts.get('do_stat', True)
- while node is not None:
- if node.type != "element":
- node = node.next
+ if size is None:
+ print _("size missing for %s") % relpath
continue
- if node.name == "checksum":
- pkgid = node.content
- elif node.name == "time":
- mtime = int(node.prop('file'))
- elif node.name == "size":
- size = int(node.prop('package'))
- elif node.name == "location":
- relpath = node.prop('href')
- node = node.next
- if relpath is None:
- print _("Incomplete data for node")
- return
- if pkgid is None:
- print _("pkgid missing for %s") % relpath
- return
- if mtime is None:
- print _("mtime missing for %s") % relpath
- return
- if size is None:
- print _("size missing for %s") % relpath
- return
- if do_stat:
- filepath = os.path.join(self.opts['pkgdir'], relpath)
- try:
- st = os.stat(filepath)
- except OSError:
- #file missing -- ignore
- return
- if not stat.S_ISREG(st.st_mode):
- #ignore non files
- return
- #check size and mtime
- if st.st_size != size:
- if self.opts.get('verbose'):
- print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
- return
- if int(st.st_mtime) != mtime:
- if self.opts.get('verbose'):
- print _("Modification time changed for %s") % filepath
- return
- #otherwise we index
- self.basenodes[relpath] = top
- self.pkg_ids[relpath] = pkgid
-
- def _handleFiles(self, node):
- pkgid = node.prop('pkgid')
- if pkgid:
- self.filesnodes[pkgid] = node
-
- def _handleOther(self, node):
- pkgid = node.prop('pkgid')
- if pkgid:
- self.othernodes[pkgid] = node
+ if do_stat:
+ filepath = os.path.join(self.opts['pkgdir'], relpath)
+ try:
+ st = os.stat(filepath)
+ except OSError:
+ #file missing -- ignore
+ continue
+ if not stat.S_ISREG(st.st_mode):
+ #ignore non files
+ continue
+ #check size and mtime
+ if st.st_size != size:
+ if self.opts.get('verbose'):
+ print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
+ continue
+ if int(st.st_mtime) != mtime:
+ if self.opts.get('verbose'):
+ print _("Modification time changed for %s") % filepath
+ continue
+
+ self.pkg_tups_by_path[relpath] = thispo.pkgtup
+

- def getNodes(self, relpath):
- """Return base, filelist, and other nodes for file, if they exist

- Returns a tuple of nodes, or None if not found
+ def getNodes(self, relpath):
+ """return a package object based on relative path of pkg
"""
- bnode = self.basenodes.get(relpath,None)
- if bnode is None:
- return None
- pkgid = self.pkg_ids.get(relpath,None)
- if pkgid is None:
- print _("No pkgid found for: %s") % relpath
- return None
- fnode = self.filesnodes.get(pkgid,None)
- if fnode is None:
- return None
- onode = self.othernodes.get(pkgid,None)
- if onode is None:
- return None
- return bnode, fnode, onode
-
- def freeNodes(self,relpath):
- #causing problems
- """Free up nodes corresponding to file, if possible"""
- bnode = self.basenodes.get(relpath,None)
- if bnode is None:
- print "Missing node for %s" % relpath
- return
- bnode.unlinkNode()
- bnode.freeNode()
- del self.basenodes[relpath]
- pkgid = self.pkg_ids.get(relpath,None)
- if pkgid is None:
- print _("No pkgid found for: %s") % relpath
+ if relpath in self.pkg_tups_by_path:
+ pkgtup = self.pkg_tups_by_path[relpath]
+ return self._repo.sack.searchPkgTuple(pkgtup)[0]
+ else:
+ print _("No pkg found for: %s") % relpath
return None
- del self.pkg_ids[relpath]
- dups = self.pkgrefs.get(pkgid)
- dups.remove(relpath)
- if len(dups):
- #still referenced
- return
- del self.pkgrefs[pkgid]
- for nodes in self.filesnodes, self.othernodes:
- node = nodes.get(pkgid)
- if node is not None:
- node.unlinkNode()
- node.freeNode()
- del nodes[pkgid]

+

if __name__ == "__main__":
cwd = os.getcwd()
@@ -209,9 +115,9 @@ if __name__ == "__main__":
'pkgdir': cwd}

idx = MetadataIndex(cwd, opts)
- for fn in idx.basenodes.keys():
- a,b,c, = idx.getNodes(fn)
- a.serialize()
- b.serialize()
- c.serialize()
- idx.freeNodes(fn)
+ for fn in idx.pkg_tups_by_path:
+ po = idx.getNodes(fn)
+ print po.xml_dump_primary_metadata()
+ print po.xml_dump_filelists_metadata()
+ print po.xml_dump_other_metadata()
+
--
1.7.4.4
t***@gmail.com
2011-07-17 09:20:55 UTC
Permalink
---
 createrepo/__init__.py     |   40 ++------
 createrepo/readMetadata.py |  240 +++++++++++++------------------------------
 2 files changed, 83 insertions(+), 197 deletions(-)
diff --git a/createrepo/__init__.py b/createrepo/__init__.py
index 44035cc..8549188 100644
--- a/createrepo/__init__.py
+++ b/createrepo/__init__.py
                old_pkg = pkg
                    old_pkg = os.path.basename(pkg)
-                nodes = self.oldData.getNodes(old_pkg)
-                if nodes is not None: # we have a match in the old metadata
+                old_po = self.oldData.getNodes(old_pkg)
+                if old_po: # we have a match in the old metadata
                        self.callback.log(_("Using data from old metadata for %s")
                                            % pkg)
-                    (primarynode, filenode, othernode) = nodes
-
-                    for node, outfile in ((primarynode, self.primaryfile),
-                                          (filenode, self.flfile),
-                            break
-
-                            anode = node.children
-                                    anode = anode.next
-                                    continue
-                                    anode.setProp('xml:base', self.conf.baseurl)
-                                anode = anode.next
-
-                        output = node.serialize('UTF-8', self.conf.pretty)
-                            outfile.write(output)
-                                self.callback.log(_("empty serialize on write to" \
-                                                    "%s in %s") % (outfile, pkg))
-                        outfile.write('\n')
-
-                    self.oldData.freeNodes(pkg)
+
+                    if self.conf.baseurl: # if we have a baseurl set, reset the one
+                                          # in the old pkg
+                        old_po.basepath = self.conf.baseurl
+                    self.primaryfile.write(old_po.xml_dump_primary_metadata())
+                    self.flfile.write(old_po.xml_dump_filelists_metadata())
+                    self.otherfile.write(old_po.xml_dump_other_metadata())
+
                    #FIXME - if we're in update and we have deltas enabled
                    # check the presto data for this pkg and write its info back out
                    # to our deltafile
diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py
index 27d3690..a449e68 100644
--- a/createrepo/readMetadata.py
+++ b/createrepo/readMetadata.py
@@ -16,11 +16,25 @@
 # Copyright 2006 Red Hat
 import os
-import libxml2
 import stat
 from utils import errorprint, _
-from yum import repoMDObject
+import yum
+from yum import misc
+
+
+    # special for special people like us.
+
+            msg = """<location xml:base="%s" href="%s"/>\n""" % (
+                                     misc.to_xml(self.basepath, attrib=True),
+                                     misc.to_xml(self.relativepath, attrib=True))
+            msg = """<location href="%s"/>\n""" % misc.to_xml(self.relativepath, attrib=True)
+
+        return msg
            opts = {}
        self.opts = opts
        self.outputdir = outputdir
+        realpath = os.path.realpath(outputdir)
        repodatadir = self.outputdir + '/repodata'
-        myrepomdxml = repodatadir + '/repomd.xml'
-            repomd = repoMDObject.RepoMD('garbageid', myrepomdxml)
-            b = repomd.getData('primary').location[1]
-            f = repomd.getData('filelists').location[1]
-            o = repomd.getData('other').location[1]
-            basefile = os.path.join(self.outputdir, b)
-            filelistfile = os.path.join(self.outputdir, f)
-            otherfile = os.path.join(self.outputdir, o)
-            basefile = filelistfile = otherfile = ""
-
-        self.files = {'base' : basefile,
-                      'filelist' : filelistfile,
-                      'other' : otherfile}
+        self._repo = yum.yumRepo.YumRepository('garbageid')
+        self._repo.baseurl = 'file://' + realpath
+        self._repo.basecachedir = misc.getCacheDir()
+        self._repo.metadata_expire = 1
+        self._repo.gpgcheck = 0
+        self._repo.repo_gpgcheck = 0
+        self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld)
+        self.pkg_tups_by_path = {}
        self.scan()
+
-        """Read in and index old repo data"""
-        self.basenodes = {}
-        self.filesnodes = {}
-        self.othernodes = {}
-        self.pkg_ids = {}
+        """Read in old repodata"""
            print _("Scanning old repo data")
-                #cannot scan
-                errorprint(_("Warning: Old repodata file missing: %s") % fn)
-                return
-        root = libxml2.parseFile(self.files['base']).getRootElement()
-        self._scanPackageNodes(root, self._handleBase)
-            print _("Indexed %i base nodes" % len(self.basenodes))
-        root = libxml2.parseFile(self.files['filelist']).getRootElement()
-        self._scanPackageNodes(root, self._handleFiles)
-            print _("Indexed %i filelist nodes" % len(self.filesnodes))
-        root = libxml2.parseFile(self.files['other']).getRootElement()
-        self._scanPackageNodes(root, self._handleOther)
-            print _("Indexed %i other nodes" % len(self.othernodes))
-        #reverse index pkg ids to track references
-        self.pkgrefs = {}
-            self.pkgrefs.setdefault(pkgid,[]).append(relpath)
-
-        node = root.children
-                node = node.next
+        self._repo.sack.populate(self._repo, 'all', None, False)
+            mtime = thispo.filetime
+            size = thispo.size
+            relpath = thispo.relativepath
+            do_stat = self.opts.get('do_stat', True)
+                print _("mtime missing for %s") % relpath
                continue
-                handler(node)
-            node = node.next
-
-        top = node
-        node = node.children
-        pkgid = None
-        mtime = None
-        size = None
-        relpath = None
-        do_stat = self.opts.get('do_stat', True)
-                node = node.next
+                print _("size missing for %s") % relpath
                continue
-                pkgid = node.content
-                mtime = int(node.prop('file'))
-                size = int(node.prop('package'))
-                relpath = node.prop('href')
-            node = node.next
-            print _("Incomplete data for node")
-            return
-            print _("pkgid missing for %s") % relpath
-            return
-            print _("mtime missing for %s") % relpath
-            return
-            print _("size missing for %s") % relpath
-            return
-            filepath = os.path.join(self.opts['pkgdir'], relpath)
-                st = os.stat(filepath)
-                #file missing -- ignore
-                return
-                #ignore non files
-                return
-            #check size and mtime
-                    print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
-                return
-                    print _("Modification time changed for %s") % filepath
-                return
-        #otherwise we index
-        self.basenodes[relpath] = top
-        self.pkg_ids[relpath] = pkgid
-
-        pkgid = node.prop('pkgid')
-            self.filesnodes[pkgid] = node
-
-        pkgid = node.prop('pkgid')
-            self.othernodes[pkgid] = node
+                filepath = os.path.join(self.opts['pkgdir'], relpath)
+                    st = os.stat(filepath)
+                    #file missing -- ignore
+                    continue
+                    #ignore non files
+                    continue
+                #check size and mtime
+                        print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
+                    continue
+                        print _("Modification time changed for %s") % filepath
+                    continue
+
+            self.pkg_tups_by_path[relpath] = thispo.pkgtup
+
-        """Return base, filelist, and other nodes for file, if they exist
-        Returns a tuple of nodes, or None if not found
+        """return a package object based on relative path of pkg
        """
-        bnode = self.basenodes.get(relpath,None)
-            return None
-        pkgid = self.pkg_ids.get(relpath,None)
-            print _("No pkgid found for: %s") % relpath
-            return None
-        fnode = self.filesnodes.get(pkgid,None)
-            return None
-        onode = self.othernodes.get(pkgid,None)
-            return None
-        return bnode, fnode, onode
-
-        #causing problems
-        """Free up nodes corresponding to file, if possible"""
-        bnode = self.basenodes.get(relpath,None)
-            print "Missing node for %s" % relpath
-            return
-        bnode.unlinkNode()
-        bnode.freeNode()
-        del self.basenodes[relpath]
-        pkgid = self.pkg_ids.get(relpath,None)
-            print _("No pkgid found for: %s") % relpath
+            pkgtup = self.pkg_tups_by_path[relpath]
+            return self._repo.sack.searchPkgTuple(pkgtup)[0]
+            print _("No pkg found for: %s") % relpath
            return None
-        del self.pkg_ids[relpath]
-        dups = self.pkgrefs.get(pkgid)
-        dups.remove(relpath)
-            #still referenced
-            return
-        del self.pkgrefs[pkgid]
-            node = nodes.get(pkgid)
-                node.unlinkNode()
-                node.freeNode()
-                del nodes[pkgid]
+
    cwd = os.getcwd()
            'pkgdir': cwd}
    idx = MetadataIndex(cwd, opts)
-        a,b,c, = idx.getNodes(fn)
-        a.serialize()
-        b.serialize()
-        c.serialize()
-        idx.freeNodes(fn)
+        po = idx.getNodes(fn)
+        print po.xml_dump_primary_metadata()
+        print po.xml_dump_filelists_metadata()
+        print po.xml_dump_other_metadata()
+
--
1.7.4.4
_______________________________________________
Rpm-metadata mailing list
http://lists.baseurl.org/mailman/listinfo/rpm-metadata
ACK, Looks good to me

Tim
seth vidal
2011-07-18 05:35:38 UTC
Permalink
Post by t***@gmail.com
_______________________________________________
Rpm-metadata mailing list
http://lists.baseurl.org/mailman/listinfo/rpm-metadata
ACK, Looks good to me
I'm going to do some more testing on it today to make sure the memory
footprint is sane before I commit it.

-sv
t***@gmail.com
2011-07-18 15:14:24 UTC
Permalink
Post by seth vidal
Post by t***@gmail.com
_______________________________________________
Rpm-metadata mailing list
http://lists.baseurl.org/mailman/listinfo/rpm-metadata
ACK, Looks good to me
I'm going to do some more testing on it today to make sure the memory
footprint is sane before I commit it.
Sound like a good idea :)

Tim
seth vidal
2011-07-18 19:59:15 UTC
Permalink
Post by Seth Vidal
---
createrepo/__init__.py | 40 ++------
createrepo/readMetadata.py | 240 +++++++++++++------------------------------
2 files changed, 83 insertions(+), 197 deletions(-)
Tested locally on repodata of 9000 pkgs.

Goes from 1.8-> 2GB of memory in use with the old createrepo code to
325MB of memory in use - same operation - performance-wise it is not
considerably different. More testing will bear that out, though.

I think I'll push this

-sv
t***@gmail.com
2011-07-19 05:13:16 UTC
Permalink
Post by seth vidal
---
 createrepo/__init__.py     |   40 ++------
 createrepo/readMetadata.py |  240 +++++++++++++------------------------------
 2 files changed, 83 insertions(+), 197 deletions(-)
Tested locally on repodata of 9000 pkgs.
Goes from 1.8-> 2GB of  memory in use with the old createrepo code to
325MB of memory in use - same operation - performance-wise it is not
considerably different. More testing will bear that out, though.
1.8 GB -> 328 MB sound like a winner :)

Tim
Dennis Gregorovic
2011-09-07 16:18:48 UTC
Permalink
Post by Seth Vidal
- """Return base, filelist, and other nodes for file, if they exist
- Returns a tuple of nodes, or None if not found
+ """return a package object based on relative path of pkg
"""
- bnode = self.basenodes.get(relpath,None)
- return None
- pkgid = self.pkg_ids.get(relpath,None)
- print _("No pkgid found for: %s") % relpath
- return None
- fnode = self.filesnodes.get(pkgid,None)
- return None
- onode = self.othernodes.get(pkgid,None)
- return None
- return bnode, fnode, onode
-
- #causing problems
- """Free up nodes corresponding to file, if possible"""
- bnode = self.basenodes.get(relpath,None)
- print "Missing node for %s" % relpath
- return
- bnode.unlinkNode()
- bnode.freeNode()
- del self.basenodes[relpath]
- pkgid = self.pkg_ids.get(relpath,None)
- print _("No pkgid found for: %s") % relpath
+ pkgtup = self.pkg_tups_by_path[relpath]
+ return self._repo.sack.searchPkgTuple(pkgtup)[0]
+ print _("No pkg found for: %s") % relpath
return None
Seth,

Thank you very much for this patch. I've locally backported it to RHEL
6 and the memory savings are tremendous. However, I just noticed one
small issue. Before, the getNodes call would only print "No pkg found
for: %" when a package was found in the basenodes cache but not the
pkg_ids cache. That was an edge condition that never actually happened
as far as I can tell, so the net effect was that "No pkg found" did not
get printed. After the code change, "No pkg found" is now printed every
time getNodes is called with a package that isn't cached.

-- Dennis
seth vidal
2011-09-07 16:58:10 UTC
Permalink
Post by Dennis Gregorovic
Post by Seth Vidal
- """Return base, filelist, and other nodes for file, if they exist
- Returns a tuple of nodes, or None if not found
+ """return a package object based on relative path of pkg
"""
- bnode = self.basenodes.get(relpath,None)
- return None
- pkgid = self.pkg_ids.get(relpath,None)
- print _("No pkgid found for: %s") % relpath
- return None
- fnode = self.filesnodes.get(pkgid,None)
- return None
- onode = self.othernodes.get(pkgid,None)
- return None
- return bnode, fnode, onode
-
- #causing problems
- """Free up nodes corresponding to file, if possible"""
- bnode = self.basenodes.get(relpath,None)
- print "Missing node for %s" % relpath
- return
- bnode.unlinkNode()
- bnode.freeNode()
- del self.basenodes[relpath]
- pkgid = self.pkg_ids.get(relpath,None)
- print _("No pkgid found for: %s") % relpath
+ pkgtup = self.pkg_tups_by_path[relpath]
+ return self._repo.sack.searchPkgTuple(pkgtup)[0]
+ print _("No pkg found for: %s") % relpath
return None
Seth,
Thank you very much for this patch. I've locally backported it to RHEL
6 and the memory savings are tremendous. However, I just noticed one
small issue. Before, the getNodes call would only print "No pkg found
for: %" when a package was found in the basenodes cache but not the
pkg_ids cache. That was an edge condition that never actually happened
as far as I can tell, so the net effect was that "No pkg found" did not
get printed. After the code change, "No pkg found" is now printed every
time getNodes is called with a package that isn't cached.
Yah - I think someone else noticed that. Dgilmore, I think.

I was a bit torn about it - since it is quasi-useful to know which ones
went walk-about and which ones did not. However, it isn't helpful for
the general case of 'yes, I know I removed pkgs from the repo, just
rebuild it'.

I suspect it is really only useful when debugging something ridiculous.

Anyone object to me just pulling it?
-sv
Dennis Gregorovic
2011-09-07 17:53:03 UTC
Permalink
Post by seth vidal
Post by Dennis Gregorovic
Post by Seth Vidal
- """Return base, filelist, and other nodes for file, if they exist
- Returns a tuple of nodes, or None if not found
+ """return a package object based on relative path of pkg
"""
- bnode = self.basenodes.get(relpath,None)
- return None
- pkgid = self.pkg_ids.get(relpath,None)
- print _("No pkgid found for: %s") % relpath
- return None
- fnode = self.filesnodes.get(pkgid,None)
- return None
- onode = self.othernodes.get(pkgid,None)
- return None
- return bnode, fnode, onode
-
- #causing problems
- """Free up nodes corresponding to file, if possible"""
- bnode = self.basenodes.get(relpath,None)
- print "Missing node for %s" % relpath
- return
- bnode.unlinkNode()
- bnode.freeNode()
- del self.basenodes[relpath]
- pkgid = self.pkg_ids.get(relpath,None)
- print _("No pkgid found for: %s") % relpath
+ pkgtup = self.pkg_tups_by_path[relpath]
+ return self._repo.sack.searchPkgTuple(pkgtup)[0]
+ print _("No pkg found for: %s") % relpath
return None
Seth,
Thank you very much for this patch. I've locally backported it to RHEL
6 and the memory savings are tremendous. However, I just noticed one
small issue. Before, the getNodes call would only print "No pkg found
for: %" when a package was found in the basenodes cache but not the
pkg_ids cache. That was an edge condition that never actually happened
as far as I can tell, so the net effect was that "No pkg found" did not
get printed. After the code change, "No pkg found" is now printed every
time getNodes is called with a package that isn't cached.
Yah - I think someone else noticed that. Dgilmore, I think.
I was a bit torn about it - since it is quasi-useful to know which ones
went walk-about and which ones did not. However, it isn't helpful for
the general case of 'yes, I know I removed pkgs from the repo, just
rebuild it'.
I suspect it is really only useful when debugging something ridiculous.
Anyone object to me just pulling it?
-sv
Maybe only print out the message if run with --verbose?
seth vidal
2011-09-07 19:29:57 UTC
Permalink
Post by Dennis Gregorovic
Post by seth vidal
Post by Dennis Gregorovic
Post by Seth Vidal
- """Return base, filelist, and other nodes for file, if they exist
- Returns a tuple of nodes, or None if not found
+ """return a package object based on relative path of pkg
"""
- bnode = self.basenodes.get(relpath,None)
- return None
- pkgid = self.pkg_ids.get(relpath,None)
- print _("No pkgid found for: %s") % relpath
- return None
- fnode = self.filesnodes.get(pkgid,None)
- return None
- onode = self.othernodes.get(pkgid,None)
- return None
- return bnode, fnode, onode
-
- #causing problems
- """Free up nodes corresponding to file, if possible"""
- bnode = self.basenodes.get(relpath,None)
- print "Missing node for %s" % relpath
- return
- bnode.unlinkNode()
- bnode.freeNode()
- del self.basenodes[relpath]
- pkgid = self.pkg_ids.get(relpath,None)
- print _("No pkgid found for: %s") % relpath
+ pkgtup = self.pkg_tups_by_path[relpath]
+ return self._repo.sack.searchPkgTuple(pkgtup)[0]
+ print _("No pkg found for: %s") % relpath
return None
Seth,
Thank you very much for this patch. I've locally backported it to RHEL
6 and the memory savings are tremendous. However, I just noticed one
small issue. Before, the getNodes call would only print "No pkg found
for: %" when a package was found in the basenodes cache but not the
pkg_ids cache. That was an edge condition that never actually happened
as far as I can tell, so the net effect was that "No pkg found" did not
get printed. After the code change, "No pkg found" is now printed every
time getNodes is called with a package that isn't cached.
Yah - I think someone else noticed that. Dgilmore, I think.
I was a bit torn about it - since it is quasi-useful to know which ones
went walk-about and which ones did not. However, it isn't helpful for
the general case of 'yes, I know I removed pkgs from the repo, just
rebuild it'.
I suspect it is really only useful when debugging something ridiculous.
Anyone object to me just pulling it?
-sv
Maybe only print out the message if run with --verbose?
I just yanked it - the only person who will probably need this debug
info is me and it is easy to do it manually (ls -lR on the dir and a
sqlite dump of pkg filenames)

thanks
-sv

Loading...