こちらを参考に一回200人の取得をAPIに引っかからないように取得する。
フォローしている人(friends)もしくはフォローされている人(followers)で別個に取得できる。有名人ならたいていfollowersが万人規模になるので、取得するだけでくっそ時間がかかる。
# 待ち時間をプログレスバーで import sys, time rng_time = 60*15 wait_time = 1 def LimitError(rng_time = 60*15, wait_time = 1): for i in range(1, rng_time + 1): rate = i * 71 / rng_time meter = "\r|%s%s| %d秒" % ("=" * rate, ' ' * (71 - rate), (rng_time - i)) sys.stdout.write(meter) sys.stdout.flush() time.sleep(wait_time) sys.stderr.write('\n') sys.stderr.write('\n')
fnum = 200 # 一回の取得数 query_per_sec = 65 #15*60/180.0 # API 待機時間 func = "friends" # friends か followers か wd_out = "/" + func + "/" # cv というのがアカウント名のリスト for user in cv: try: suser = twitter.show_user(screen_name=user) e = ["0"] LimitError(rng_time = query_per_sec, wait_time = 1) except TwythonError, e: print(user) except TwythonRateLimitError, e: print("TwythonRateLimitError") LimitError(rng_time = 60*15, wait_time = 1) if list(e)[0].find("4") < 0: pnum = int(math.ceil(float(suser[func + "_count"]) / fnum)) # 人数 print("Processing " + user + " (" + str(cv.index(user)+1) + "/" + str(len(cv)) + ") " + str(pnum)) pages = [] oldpages = [] for i in range(pnum): pages.append("p"+str(i+1)) oldpages.append("p"+str(i)) if pnum == 0: # 0 人なら書きだすだけで終了する。 w0 = open(wd_out + user + ".txt", "w") w0.close() p0 = { "next_cursor": -1 } # So the following exec() call doesn't fail. # friends もしくは followers の取得 for i in range(pnum): try: exec(pages[i]+" = twitter.get_" + func + "_list(screen_name=user, count=fnum, skip_status=1, cursor="+oldpages[i]+"['next_cursor'])") LimitError(rng_time = query_per_sec, wait_time = 1) except TwythonRateLimitError, e: print("TwythonRateLimitError") LimitError(rng_time = 60*15, wait_time = 1) pass except TwythonAuthError, e: print("TwythonAuthError") LimitError(rng_time = query_per_sec, wait_time = 1) if i == pnum-1: res = [] for p in range(pnum): try: exec("for i in range(fnum): res.append("+pages[p]+"['users'][i])") except(IndexError): pass a = map(lambda x: x["screen_name"], res) w0 = open(wd_out + user + ".txt", "w") w0.write("\t".join(a) + "\n") w0.close()