[search engine] Nova2 multiprocessing

This commit is contained in:
DoumanAsh 2015-04-06 08:35:29 +03:00
parent e502ce38ec
commit bef8106d0f
2 changed files with 239 additions and 204 deletions

View File

@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.32 #VERSION: 1.40
# Author: # Author:
# Fabien Devaux <fab AT gnux DOT info> # Fabien Devaux <fab AT gnux DOT info>
@ -37,16 +37,15 @@
# #
# Licence: BSD # Licence: BSD
import sys
import threading
import os
import glob
import urllib import urllib
from os import path
import fix_encoding from glob import glob
from sys import argv
from multiprocessing import Pool, cpu_count
from fix_encoding import fix_encoding
THREADED = True THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################ ################################################################################
# Every engine should have a "search" method taking # Every engine should have a "search" method taking
@ -56,108 +55,125 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar # As a convention, try to list results by decrasing number of seeds or similar
################################################################################ ################################################################################
supported_engines = [] def initialize_engines():
""" Import available engines
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) Return list of available engines
for engine in engines: """
e = engine.split(os.sep)[-1][:-3] supported_engines = []
if len(e.strip()) == 0: continue
if e.startswith('_'): continue
try:
exec "from engines.%s import %s"%(e,e)
supported_engines.append(e)
except:
pass
def engineToXml(short_name): engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
xml = "<%s>\n"%short_name for engine in engines:
exec "engine = %s()"%short_name engi = path.basename(engine).split('.')[0].strip()
xml += "<name>%s</name>\n"%engine.name if len(engi) == 0 or engi.startswith('_'):
xml += "<url>%s</url>\n"%engine.url continue
xml += "<categories>" try:
if hasattr(engine, 'supported_categories'): #import engines.[engine]
supported_categories = engine.supported_categories.keys() engine_module = __import__(".".join(("engines", engi)))
supported_categories.remove('all') #get low-level module
xml += " ".join(supported_categories) engine_module = getattr(engine_module, engi)
xml += "</categories>\n" #bind class name
xml += "</%s>\n"%short_name globals()[engi] = getattr(engine_module, engi)
return xml supported_engines.append(engi)
except:
pass
def displayCapabilities(): return supported_engines
"""
Display capabilities in XML format
<capabilities>
<engine_short_name>
<name>long name</name>
<url>http://example.com</url>
<categories>movies music games</categories>
</engine_short_name>
</capabilities>
"""
xml = "<capabilities>"
for short_name in supported_engines:
xml += engineToXml(short_name)
xml += "</capabilities>"
print xml
class EngineLauncher(threading.Thread): def engines_to_xml(supported_engines):
def __init__(self, engine, what, cat='all'): """ Generates xml for supported engines """
threading.Thread.__init__(self) tab = " " * 4
self.engine = engine
self.what = what
self.cat = cat
def run(self):
if hasattr(self.engine, 'supported_categories'):
if self.cat == 'all' or self.cat in self.engine.supported_categories.keys():
self.engine.search(self.what, self.cat)
elif self.cat == 'all':
self.engine.search(self.what)
if __name__ == '__main__': for short_name in supported_engines:
# Make sure we enforce utf-8 encoding search_engine = globals()[short_name]()
fix_encoding.fix_encoding()
if len(sys.argv) < 2: supported_categories = ""
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'% if hasattr(search_engine, "supported_categories"):
(','.join(supported_engines))) supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
if key is not "all"))
if len(sys.argv) == 2: yield "".join((tab, "<", short_name, ">\n",
if sys.argv[1] == "--capabilities": tab, tab, "<name>", search_engine.name, "</name>\n",
displayCapabilities() tab, tab, "<url>", search_engine.url, "</url>\n",
sys.exit(0) tab, tab, "<categories>", supported_categories, "</categories>\n",
else: tab, "</", short_name, ">\n"))
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] def displayCapabilities(supported_engines):
"""
Display capabilities in XML format
<capabilities>
<engine_short_name>
<name>long name</name>
<url>http://example.com</url>
<categories>movies music games</categories>
</engine_short_name>
</capabilities>
"""
xml = "".join(("<capabilities>\n",
"".join(engines_to_xml(supported_engines)),
"</capabilities>"))
print(xml)
if 'all' in engines_list: def run_search(engine_list):
engines_list = supported_engines """ Run search in engine
cat = sys.argv[2].lower() @retval False if any exceptions occured
@retval True otherwise
if cat not in CATEGORIES: """
raise SystemExit('Invalid category!') engine, what, cat = engine_list
try:
what = urllib.quote(' '.join(sys.argv[3:])) engine = engine()
#avoid exceptions due to invalid category
threads = [] if hasattr(engine, 'supported_categories'):
for engine in engines_list: cat = cat if cat in engine.supported_categories else "all"
try: engine.search(what, cat)
if THREADED: else:
exec "l = EngineLauncher(%s(), what, cat)"%engine engine.search(what)
threads.append(l) return True
l.start() except:
else: return False
exec "e = %s()"%engine
if hasattr(engine, 'supported_categories'): def main(args):
if cat == 'all' or cat in e.supported_categories.keys(): fix_encoding()
e.search(what, cat) supported_engines = initialize_engines()
elif self.cat == 'all':
e.search(what) if not args:
engine().search(what, cat) raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
except: "available engines: %s" % (','.join(supported_engines)))
pass
if THREADED: elif args[0] == "--capabilities":
for t in threads: displayCapabilities(supported_engines)
t.join() return
elif len(args) < 3:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
engines_list = set(e.lower() for e in args[0].strip().split(','))
if 'all' in engines_list:
engines_list = supported_engines
else:
#discard un-supported engines
engines_list = [engine for engine in engines_list
if engine in supported_engines]
if not engines_list:
#engine list is empty. Nothing to do here
return
cat = args[1].lower()
if cat not in CATEGORIES:
raise SystemExit(" - ".join(('Invalid category', cat)))
what = urllib.quote(' '.join(args[2:]))
if THREADED:
pool = Pool(min(len(engines_list), cpu_count()))
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
else:
_ = [run_search([globals()[engine], what, cat]) for engine in engines_list]
if __name__ == "__main__":
main(argv[1:])

View File

@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.24 #VERSION: 1.40
# Author: # Author:
# Fabien Devaux <fab AT gnux DOT info> # Fabien Devaux <fab AT gnux DOT info>
@ -37,14 +37,14 @@
# #
# Licence: BSD # Licence: BSD
import sys
import threading
import os
import glob
import urllib.parse import urllib.parse
from os import path, cpu_count
from glob import glob
from sys import argv
from multiprocessing import Pool
THREADED = True THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################ ################################################################################
# Every engine should have a "search" method taking # Every engine should have a "search" method taking
@ -54,105 +54,124 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar # As a convention, try to list results by decrasing number of seeds or similar
################################################################################ ################################################################################
supported_engines = [] def initialize_engines():
""" Import available engines
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) Return list of available engines
for engine in engines: """
e = engine.split(os.sep)[-1][:-3] supported_engines = []
if len(e.strip()) == 0: continue
if e.startswith('_'): continue
try:
exec("from engines.%s import %s"%(e,e))
supported_engines.append(e)
except:
pass
def engineToXml(short_name): engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
xml = "<%s>\n"%short_name for engine in engines:
exec("search_engine = %s()"%short_name, globals()) engi = path.basename(engine).split('.')[0].strip()
xml += "<name>%s</name>\n"%search_engine.name if len(engi) == 0 or engi.startswith('_'):
xml += "<url>%s</url>\n"%search_engine.url continue
xml += "<categories>" try:
if hasattr(search_engine, 'supported_categories'): #import engines.[engine]
supported_categories = list(search_engine.supported_categories.keys()) engine_module = __import__(".".join(("engines", engi)))
supported_categories.remove('all') #get low-level module
xml += " ".join(supported_categories) engine_module = getattr(engine_module, engi)
xml += "</categories>\n" #bind class name
xml += "</%s>\n"%short_name globals()[engi] = getattr(engine_module, engi)
return xml supported_engines.append(engi)
except:
pass
def displayCapabilities(): return supported_engines
"""
Display capabilities in XML format
<capabilities>
<engine_short_name>
<name>long name</name>
<url>http://example.com</url>
<categories>movies music games</categories>
</engine_short_name>
</capabilities>
"""
xml = "<capabilities>"
for short_name in supported_engines:
xml += engineToXml(short_name)
xml += "</capabilities>"
print(xml)
class EngineLauncher(threading.Thread): def engines_to_xml(supported_engines):
def __init__(self, engine, what, cat='all'): """ Generates xml for supported engines """
threading.Thread.__init__(self) tab = " " * 4
self.engine = engine
self.what = what
self.cat = cat
def run(self):
if hasattr(self.engine, 'supported_categories'):
if self.cat == 'all' or self.cat in list(self.engine.supported_categories.keys()):
self.engine.search(self.what, self.cat)
elif self.cat == 'all':
self.engine.search(self.what)
if __name__ == '__main__': for short_name in supported_engines:
if len(sys.argv) < 2: search_engine = globals()[short_name]()
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
if len(sys.argv) == 2: supported_categories = ""
if sys.argv[1] == "--capabilities": if hasattr(search_engine, "supported_categories"):
displayCapabilities() supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
sys.exit(0) if key is not "all"))
else:
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] yield "".join((tab, "<", short_name, ">\n",
tab, tab, "<name>", search_engine.name, "</name>\n",
tab, tab, "<url>", search_engine.url, "</url>\n",
tab, tab, "<categories>", supported_categories, "</categories>\n",
tab, "</", short_name, ">\n"))
if 'all' in engines_list: def displayCapabilities(supported_engines):
engines_list = supported_engines """
Display capabilities in XML format
cat = sys.argv[2].lower() <capabilities>
<engine_short_name>
if cat not in CATEGORIES: <name>long name</name>
raise SystemExit('Invalid category!') <url>http://example.com</url>
<categories>movies music games</categories>
what = urllib.parse.quote(' '.join(sys.argv[3:])) </engine_short_name>
</capabilities>
threads = [] """
for engine in engines_list: xml = "".join(("<capabilities>\n",
try: "".join(engines_to_xml(supported_engines)),
if THREADED: "</capabilities>"))
exec("l = EngineLauncher(%s(), what, cat)"%engine) print(xml)
threads.append(l)
l.start() def run_search(engine_list):
else: """ Run search in engine
exec("e = %s()"%engine)
if hasattr(engine, 'supported_categories'): @retval False if any exceptions occured
if cat == 'all' or cat in list(e.supported_categories.keys()): @retval True otherwise
e.search(what, cat) """
elif self.cat == 'all': engine, what, cat = engine_list
e.search(what) try:
engine().search(what, cat) engine = engine()
except: #avoid exceptions due to invalid category
pass if hasattr(engine, 'supported_categories'):
if THREADED: cat = cat if cat in engine.supported_categories else "all"
for t in threads: engine.search(what, cat)
t.join() else:
engine.search(what)
return True
except:
return False
def main(args):
supported_engines = initialize_engines()
if not args:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
elif args[0] == "--capabilities":
displayCapabilities(supported_engines)
return
elif len(args) < 3:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
engines_list = set(e.lower() for e in args[0].strip().split(','))
if 'all' in engines_list:
engines_list = supported_engines
else:
#discard un-supported engines
engines_list = [engine for engine in engines_list
if engine in supported_engines]
if not engines_list:
#engine list is empty. Nothing to do here
return
cat = args[1].lower()
if cat not in CATEGORIES:
raise SystemExit(" - ".join(('Invalid category', cat)))
what = urllib.parse.quote(' '.join(args[2:]))
if THREADED:
with Pool(min(len(engines_list), cpu_count())) as pool:
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
else:
_ = [run_search([globals()[engine], what, cat]) for engine in engines_list]
if __name__ == "__main__":
main(argv[1:])