ipkg 0.99.163: Apply patches to optimize internal algorithms.
authorPaul Sokolovsky <pmiscml@gmail.com>
Fri, 11 May 2007 11:40:00 +0000 (11:40 +0000)
committerPaul Sokolovsky <pmiscml@gmail.com>
Fri, 11 May 2007 11:40:00 +0000 (11:40 +0000)
* Leading to 10x+ speedup on typical OE feeds (thousands of packages)
* Still needs more testing, though developers' testing didn't show any issues.
* Read patch comments if in doubt.
* Closes #2244.

packages/ipkg/files/1-pkg-parse--Optimize-inefficient-parsing.patch [new file with mode: 0644]
packages/ipkg/files/2-pkg-vec--Optimize-gross-inefficiency.patch [new file with mode: 0644]
packages/ipkg/ipkg_0.99.163.bb

diff --git a/packages/ipkg/files/1-pkg-parse--Optimize-inefficient-parsing.patch b/packages/ipkg/files/1-pkg-parse--Optimize-inefficient-parsing.patch
new file mode 100644 (file)
index 0000000..e7632b1
--- /dev/null
@@ -0,0 +1,227 @@
+# HG changeset patch
+# User pfalcon@localhost
+# Date 1178334771 0
+# Node ID e4c99830ba0d55813e1774bd6d41039ca640d6a6
+# Parent  d324eba24e79a0a86df7978f0511e4632a2732c7
+pkg_parse: Optimize inefficient parsing.
+
+Instead of expensively probing all fields in row, dispatch based on the
+first letter of the field. Tests show ~12 times reduction in number of calls
+to low-level parsing functions.
+
+diff -r d324eba24e79 -r e4c99830ba0d pkg_parse.c
+--- a/pkg_parse.c      Fri May 04 23:22:33 2007 +0000
++++ b/pkg_parse.c      Sat May 05 03:12:51 2007 +0000
+@@ -241,87 +241,116 @@ int pkg_parse_raw(pkg_t *pkg, char ***ra
+     for (lines = *raw; *lines; lines++) {
+       /*      fprintf(stderr, "PARSING %s\n", *lines);*/
+-      if(isGenericFieldType("Package:", *lines)) 
+-          pkg->name = parseGenericFieldType("Package", *lines);
+-      else if(isGenericFieldType("Architecture:", *lines))
+-          pkg->architecture = parseGenericFieldType("Architecture", *lines);
+-      else if(isGenericFieldType("Filename:", *lines))
+-          pkg->filename = parseGenericFieldType("Filename", *lines);
+-      else if(isGenericFieldType("Section:", *lines))
+-          pkg->section = parseGenericFieldType("Section", *lines);
+-      else if(isGenericFieldType("MD5sum:", *lines))
+-          pkg->md5sum = parseGenericFieldType("MD5sum", *lines);
+-      /* The old ipkg wrote out status files with the wrong case for MD5sum,
+-         let's parse it either way */
+-      else if(isGenericFieldType("MD5Sum:", *lines))
+-          pkg->md5sum = parseGenericFieldType("MD5Sum", *lines);
+-      else if(isGenericFieldType("Size:", *lines))
+-          pkg->size = parseGenericFieldType("Size", *lines);
+-      else if(isGenericFieldType("Source:", *lines))
+-          pkg->source = parseGenericFieldType("Source", *lines);
+-      else if(isGenericFieldType("Installed-Size:", *lines))
+-          pkg->installed_size = parseGenericFieldType("Installed-Size", *lines);
+-      else if(isGenericFieldType("Installed-Time:", *lines)) {
+-           char *time_str = parseGenericFieldType("Installed-Time", *lines);
+-           pkg->installed_time = strtoul(time_str, NULL, 0);
+-      } else if(isGenericFieldType("Priority:", *lines))
+-          pkg->priority = parseGenericFieldType("Priority", *lines);
+-      else if(isGenericFieldType("Essential:", *lines)) {
+-          char *essential_value;
+-          essential_value = parseGenericFieldType("Essential", *lines);
+-          if (strcmp(essential_value, "yes") == 0) {
+-              pkg->essential = 1;
+-          }
+-          free(essential_value);
+-      }
+-      else if(isGenericFieldType("Status", *lines))
+-          parseStatus(pkg, *lines);
+-      else if(isGenericFieldType("Version", *lines))
+-          parseVersion(pkg, *lines);
+-      else if(isGenericFieldType("Maintainer", *lines))
+-          pkg->maintainer = parseGenericFieldType("Maintainer", *lines);
+-      else if(isGenericFieldType("Conffiles", *lines)){
+-          parseConffiles(pkg, *lines);
+-          reading_conffiles = 1;
+-      }
+-      else if(isGenericFieldType("Description", *lines)) {
+-          pkg->description = parseGenericFieldType("Description", *lines);
+-          reading_conffiles = 0;
+-          reading_description = 1;
+-      }
+-
+-      else if(isGenericFieldType("Provides", *lines)){
++      switch (**lines) {
++      case 'P':
++          if(isGenericFieldType("Package:", *lines)) 
++              pkg->name = parseGenericFieldType("Package", *lines);
++          else if(isGenericFieldType("Priority:", *lines))
++              pkg->priority = parseGenericFieldType("Priority", *lines);
++          else if(isGenericFieldType("Provides", *lines)){
+ /* Here we add the internal_use to align the off by one problem between provides_str and provides */
+-            provide = (char * ) malloc(strlen(*lines)+ 35 ); /* Preparing the space for the new ipkg_internal_use_only */
+-            if ( alterProvidesLine(*lines,provide) ){
+-               return EINVAL;
+-            }
+-          pkg->provides_str = parseDependsString( provide, &pkg->provides_count);
++              provide = (char * ) malloc(strlen(*lines)+ 35 ); /* Preparing the space for the new ipkg_internal_use_only */
++              if ( alterProvidesLine(*lines,provide) ){
++                  return EINVAL;
++              }
++              pkg->provides_str = parseDependsString( provide, &pkg->provides_count);
+ /* Let's try to hack a bit here.
+    The idea is that if a package has no Provides, we would add one generic, to permit the check of dependencies
+    in alot of other places. We will remove it before writing down the status database */
+-            pkg_false_provides=0;
+-            free(provide);
+-        } 
+-
+-      else if(isGenericFieldType("Depends", *lines))
+-          pkg->depends_str = parseDependsString(*lines, &pkg->depends_count);
+-      else if(isGenericFieldType("Pre-Depends", *lines))
+-          pkg->pre_depends_str = parseDependsString(*lines, &pkg->pre_depends_count);
+-      else if(isGenericFieldType("Recommends", *lines))
+-          pkg->recommends_str = parseDependsString(*lines, &pkg->recommends_count);
+-      else if(isGenericFieldType("Suggests", *lines))
+-          pkg->suggests_str = parseDependsString(*lines, &pkg->suggests_count);
+-      /* Abhaya: support for conflicts */
+-      else if(isGenericFieldType("Conflicts", *lines))
+-          pkg->conflicts_str = parseDependsString(*lines, &pkg->conflicts_count);
+-      else if(isGenericFieldType("Replaces", *lines))
+-          pkg->replaces_str = parseDependsString(*lines, &pkg->replaces_count);
+-      else if(line_is_blank(*lines)) {
+-          lines++;
+-          break;
+-      }
+-      else if(**lines == ' '){
++              pkg_false_provides=0;
++              free(provide);
++          } 
++          else if(isGenericFieldType("Pre-Depends", *lines))
++              pkg->pre_depends_str = parseDependsString(*lines, &pkg->pre_depends_count);
++          break;
++
++      case 'A':
++          if(isGenericFieldType("Architecture:", *lines))
++              pkg->architecture = parseGenericFieldType("Architecture", *lines);
++          break;
++
++      case 'F':
++          if(isGenericFieldType("Filename:", *lines))
++              pkg->filename = parseGenericFieldType("Filename", *lines);
++          break;
++
++      case 'S':
++          if(isGenericFieldType("Section:", *lines))
++              pkg->section = parseGenericFieldType("Section", *lines);
++          else if(isGenericFieldType("Size:", *lines))
++              pkg->size = parseGenericFieldType("Size", *lines);
++          else if(isGenericFieldType("Source:", *lines))
++              pkg->source = parseGenericFieldType("Source", *lines);
++          else if(isGenericFieldType("Status", *lines))
++              parseStatus(pkg, *lines);
++          else if(isGenericFieldType("Suggests", *lines))
++              pkg->suggests_str = parseDependsString(*lines, &pkg->suggests_count);
++          break;
++
++      case 'M':
++          if(isGenericFieldType("MD5sum:", *lines))
++              pkg->md5sum = parseGenericFieldType("MD5sum", *lines);
++          /* The old ipkg wrote out status files with the wrong case for MD5sum,
++              let's parse it either way */
++          else if(isGenericFieldType("MD5Sum:", *lines))
++              pkg->md5sum = parseGenericFieldType("MD5Sum", *lines);
++          else if(isGenericFieldType("Maintainer", *lines))
++              pkg->maintainer = parseGenericFieldType("Maintainer", *lines);
++          break;
++
++      case 'I':
++          if(isGenericFieldType("Installed-Size:", *lines))
++              pkg->installed_size = parseGenericFieldType("Installed-Size", *lines);
++          else if(isGenericFieldType("Installed-Time:", *lines)) {
++              char *time_str = parseGenericFieldType("Installed-Time", *lines);
++              pkg->installed_time = strtoul(time_str, NULL, 0);
++          }       
++          break;
++
++      case 'E':
++          if(isGenericFieldType("Essential:", *lines)) {
++              char *essential_value;
++              essential_value = parseGenericFieldType("Essential", *lines);
++              if (strcmp(essential_value, "yes") == 0) {
++                  pkg->essential = 1;
++              }
++              free(essential_value);
++          }
++          break;
++
++      case 'V':
++          if(isGenericFieldType("Version", *lines))
++              parseVersion(pkg, *lines);
++          break;
++
++      case 'C':
++          if(isGenericFieldType("Conffiles", *lines)){
++              parseConffiles(pkg, *lines);
++              reading_conffiles = 1;
++          }
++          else if(isGenericFieldType("Conflicts", *lines))
++              pkg->conflicts_str = parseDependsString(*lines, &pkg->conflicts_count);
++          break;
++
++      case 'D':
++          if(isGenericFieldType("Description", *lines)) {
++              pkg->description = parseGenericFieldType("Description", *lines);
++              reading_conffiles = 0;
++              reading_description = 1;
++          }
++          else if(isGenericFieldType("Depends", *lines))
++              pkg->depends_str = parseDependsString(*lines, &pkg->depends_count);
++          break;
++
++      case 'R':
++          if(isGenericFieldType("Recommends", *lines))
++              pkg->recommends_str = parseDependsString(*lines, &pkg->recommends_count);
++          else if(isGenericFieldType("Replaces", *lines))
++              pkg->replaces_str = parseDependsString(*lines, &pkg->replaces_count);
++          
++          break;
++
++      case ' ':
+           if(reading_description) {
+               /* we already know it's not blank, so the rest of description */      
+               pkg->description = realloc(pkg->description,
+@@ -332,8 +361,18 @@ int pkg_parse_raw(pkg_t *pkg, char ***ra
+           }
+           else if(reading_conffiles)
+               parseConffiles(pkg, *lines);
++              
++          break;
++
++      default:
++          if(line_is_blank(*lines)) {
++              lines++;
++              goto out;
++          }
+       }
+     }
++out:;
++    
+     *raw = lines;
+ /* If the ipk has not a Provides line, we insert our false line */ 
+     if ( pkg_false_provides==1)
diff --git a/packages/ipkg/files/2-pkg-vec--Optimize-gross-inefficiency.patch b/packages/ipkg/files/2-pkg-vec--Optimize-gross-inefficiency.patch
new file mode 100644 (file)
index 0000000..3c5cac4
--- /dev/null
@@ -0,0 +1,65 @@
+# HG changeset patch
+# User pfalcon@localhost
+# Date 1178812057 0
+# Node ID eff4450fdea2f8210a4fc927bd35ae2562d2eced
+# Parent  e4c99830ba0d55813e1774bd6d41039ca640d6a6
+pkg_vec: Optimize gross inefficiency.
+
+This module tries to implement *unique* vector (without duplicating
+objects), and does this by iterating thru all already existing elements.
+Thus, complexity of adding N elements was O(N^2). However, there're no grave
+reasons to do uniqueness at all:
+1. First of all, if feeds are correct, there won't be duplicates.
+2. Then, even if there will be, there won't be serious problems like
+segfaults.
+3. Finally, for quite a few operations vectors is constructed from a
+hashtable, thus uniqueness is guaranteed (which reduces possible cases of
+non-uniqueness to values of Depends: and friends).
+
+All an all, remove dup check, and make ipkg work order of magnitude faster
+on a feed with few thousands of packages.
+
+diff -r e4c99830ba0d -r eff4450fdea2 pkg_vec.c
+--- a/pkg_vec.c        Sat May 05 03:12:51 2007 +0000
++++ b/pkg_vec.c        Thu May 10 15:47:37 2007 +0000
+@@ -104,6 +104,7 @@ void pkg_vec_insert(pkg_vec_t *vec, cons
+      int i;
+      int found = 0;
++#if 0
+      /* look for a duplicate pkg by name, version, and architecture */
+      for (i = 0; i < vec->len; i++)
+         if ((strcmp(pkg->name, vec->pkgs[i]->name) == 0)
+@@ -112,6 +113,7 @@ void pkg_vec_insert(pkg_vec_t *vec, cons
+              found = 1;
+              break;
+         }
++#endif
+      /* we didn't find one, add it */
+      if(!found){   
+@@ -191,6 +193,7 @@ void abstract_pkg_vec_insert(abstract_pk
+ {
+     int i;
++#if 0
+     /* look for a duplicate pkg by name */
+     for(i = 0; i < vec->len; i++)
+       if (strcmp(pkg->name, vec->pkgs[i]->name) == 0)
+@@ -198,12 +201,15 @@ void abstract_pkg_vec_insert(abstract_pk
+     /* we didn't find one, add it */
+     if(i == vec->len){   
++#endif
+       vec->pkgs = 
+         (abstract_pkg_t **)
+           realloc(vec->pkgs, (vec->len + 1) * sizeof(abstract_pkg_t *));
+       vec->pkgs[vec->len] = pkg;
+       vec->len++;
+-    }
++#if 0
++    }
++#endif
+ }
+ abstract_pkg_t * abstract_pkg_vec_get(abstract_pkg_vec_t *vec, int i)
index 627a1fb..c5972a7 100644 (file)
@@ -1,11 +1,14 @@
 include ipkg.inc
-PR = "r3"
+PR = "r4"
 
 S = "${WORKDIR}/ipkg-${PV}"
 
 SRC_URI = "http://www.handhelds.org/pub/packages/ipkg/ipkg-${PV}.tar.gz \
        file://terse.patch;patch=1 \
-       file://is-processing.patch;patch=1"
+       file://is-processing.patch;patch=1 \
+       file://1-pkg-parse--Optimize-inefficient-parsing.patch;patch=1 \
+       file://2-pkg-vec--Optimize-gross-inefficiency.patch;patch=1 \
+       "
 
 do_stage() {
        oe_libinstall -so libipkg ${STAGING_LIBDIR}