Breedlove: Python TypeError: 'NoneType' object is not iterable -

Monday 15 September 2014

Python TypeError: 'NoneType' object is not iterable -

i have looked @ other posts seem have same problem me, issue has not been resolved yet ...

i trying extract google page rank list of domain names, in case "domain_list". below code using. maintain getting error , cannot figure out underlying reason it.

import struct import sys import urllib import urllib2 import httplib import re import xml.etree.elementtree   domain_list = open('/data/personal/samaneh/test.txt','r')  class rankprovider(object):     """abstract class obtaining page rank (popularity)         provider such google or alexa.         """     def __init__(self, host, proxy=none, timeout=30):         """keyword arguments:             host -- toolbar host address             proxy -- address of proxy server. default: none             timeout -- how long wait response server.             default: 30 (seconds)             """         self._opener = urllib2.build_opener()         if proxy:             self._opener.add_handler(urllib2.proxyhandler({"http": proxy}))          self._host = host         self._timeout = timeout      def get_rank(self, url):         """get page rank specified url             keyword arguments:             url -- page rank url             """         raise notimplementederror("you must override get_rank()")   class alexatrafficrank(rankprovider):     """ alexa traffic rank url         """     def __init__(self, host="xml.alexa.com", proxy=none, timeout=30):         """keyword arguments:             host -- toolbar host address: default: joolbarqueries.google.com             proxy -- address of proxy server (if required). default: none             timeout -- how long wait response server.             default: 30 (seconds)             """         super(alexatrafficrank, self).__init__(host, proxy, timeout)      def get_rank(self, url):         """get page rank specified url             keyword arguments:             url -- page rank url             """         query = "http://%s/data?%s" % (self._host, urllib.urlencode((                                                                      ("cli", 10),                                                                      ("dat", "nsa"),                                                                      ("ver", "quirk-searchstatus"),                                                                      ("uid", "20120730094100"),                                                                      ("userip", "192.168.0.1"),                                                                      ("url", url))))          response = self._opener.open(query, timeout=self._timeout)         if response.getcode() == httplib.ok:               info = response.read()              element = xml.etree.elementtree.fromstring(data)             e in element.find("sd"):                 popularity = e.find("popularity")                 if popularity not none:                      homecoming int(popularity.get("text"))   class googlepagerank(rankprovider):     """ google page rank figure using toolbar api.         credits author of www::google::pagerank cpan  bundle         ported code python.         """     def __init__(self, host="toolbarqueries.google.com", proxy=none, timeout=30):         """keyword arguments:             host -- toolbar host address: default: toolbarqueries.google.com             proxy -- address of proxy server (if required). default: none             timeout -- how long wait response server.             default: 30 (seconds)             """         super(googlepagerank, self).__init__(host, proxy, timeout)         self._opener.addheaders = [("user-agent", "mozilla/4.0 (compatible; \                                     googletoolbar 2.0.111-big; windows xp 5.1)")]      def get_rank(self, url):         # calculate hash required part of         # request sent toolbarqueries url.         ch = '6' + str(self._compute_ch_new("info:%s" % (url)))          query = "http://%s/tbr?%s" % (self._host, urllib.urlencode((                                                                     ("client", "navclient-auto"),                                                                     ("ch", ch),                                                                     ("ie", "utf-8"),                                                                     ("oe", "utf-8"),                                                                     ("features", "rank"),                                                                     ("q", "info:%s" % (url)))))          response = self._opener.open(query, timeout=self._timeout)         if response.getcode() == httplib.ok:               info = response.read()             match = re.match("rank_\d+:\d+:(\d+)", data)             if match:                 rank = match.group(1)                  homecoming int(rank)      @classmethod     def _compute_ch_new(cls, url):         ch = cls._compute_ch(url)         ch = ((ch % 0x0d) & 7) | ((ch / 7) << 2);           homecoming cls._compute_ch(struct.pack("<20l", *(cls._wsub(ch, * 9) in range(20))))      @classmethod     def _compute_ch(cls, url):         url = struct.unpack("%db" % (len(url)), url)         = 0x9e3779b9         b = 0x9e3779b9         c = 0xe6359a60         k = 0          length = len(url)          while length >= 12:             = cls._wadd(a, url[k+0] | (url[k+1] << 8) | (url[k+2] << 16) | (url[k+3] << 24));             b = cls._wadd(b, url[k+4] | (url[k+5] << 8) | (url[k+6] << 16) | (url[k+7] << 24));             c = cls._wadd(c, url[k+8] | (url[k+9] << 8) | (url[k+10] << 16) | (url[k+11] << 24));              a, b, c = cls._mix(a, b, c)              k += 12             length -= 12          c = cls._wadd(c, len(url));          if length > 10: c = cls._wadd(c, url[k+10] << 24)         if length > 9: c = cls._wadd(c, url[k+9] << 16)         if length > 8: c = cls._wadd(c, url[k+8] << 8)         if length > 7: b = cls._wadd(b, url[k+7] << 24)         if length > 6: b = cls._wadd(b, url[k+6] << 16)         if length > 5: b = cls._wadd(b, url[k+5] << 8)         if length > 4: b = cls._wadd(b, url[k+4])         if length > 3: = cls._wadd(a, url[k+3] << 24)         if length > 2: = cls._wadd(a, url[k+2] << 16)         if length > 1: = cls._wadd(a, url[k+1] << 8)         if length > 0: = cls._wadd(a, url[k])          a, b, c = cls._mix(a, b, c);          # integer positive          homecoming c      @classmethod     def _mix(cls, a, b, c):         = cls._wsub(a, b); = cls._wsub(a, c); ^= c >> 13;         b = cls._wsub(b, c); b = cls._wsub(b, a); b ^= (a << 8) % 4294967296;         c = cls._wsub(c, a); c = cls._wsub(c, b); c ^= b >>13;         = cls._wsub(a, b); = cls._wsub(a, c); ^= c >> 12;         b = cls._wsub(b, c); b = cls._wsub(b, a); b ^= (a << 16) % 4294967296;         c = cls._wsub(c, a); c = cls._wsub(c, b); c ^= b >> 5;         = cls._wsub(a, b); = cls._wsub(a, c); ^= c >> 3;         b = cls._wsub(b, c); b = cls._wsub(b, a); b ^= (a << 10) % 4294967296;         c = cls._wsub(c, a); c = cls._wsub(c, b); c ^= b >> 15;           homecoming a, b, c      @staticmethod     def _wadd(a, b):          homecoming (a + b) % 4294967296      @staticmethod     def _wsub(a, b):          homecoming (a - b) % 4294967296   if __name__ == "__main__":     url in domain_list: #    url = "http://www.archlinux.org"         providers = (alexatrafficrank(), googlepagerank(),)         print("traffic stats for: %s" % (url))         p in providers:             print("%s:%s" % (p.__class__.__name__, p.get_rank(url)))

and here finish error get:

traceback (most recent  phone call last):   file "test-alexa-ranking.py", line 187, in <module>     print("%s:%s" % (p.__class__.__name__, p.get_rank(url)))   file "test-alexa-ranking.py", line 69, in get_rank     e in element.find("sd"): typeerror: 'nonetype' object not iterable

can help me that?

element.find() returns one match, if there one, otherwise returns none.

you either want utilize element.find_all(), homecoming list of matches (which can empty), or test first if there match:

match = element.find('sd') if not match: homecoming none

seeing looking contained element, utilize xpath expression here:

# find first popularity tag  straight under sd tag text attribute pop = element.find('.//sd/popularity[@text]') if pop not none:      homecoming int(pop.attrib['text'])

this looks work test xml document.

python typeerror

Breedlove

Monday 15 September 2014

Python TypeError: 'NoneType' object is not iterable -

No comments:

Post a Comment