mirror of
https://github.com/qbittorrent/qBittorrent.git
synced 2025-01-24 18:44:52 +08:00
[search engine] Update Legit Torrent to remove sgmllib
This commit is contained in:
parent
f3dd93a42c
commit
033817f70b
@ -1,5 +1,6 @@
|
|||||||
#VERSION: 1.05
|
#VERSION: 2.00
|
||||||
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
||||||
|
# Douman (custparasite@gmx.se)
|
||||||
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
# Redistribution and use in source and binary forms, with or without
|
||||||
# modification, are permitted provided that the following conditions are met:
|
# modification, are permitted provided that the following conditions are met:
|
||||||
@ -28,78 +29,73 @@
|
|||||||
|
|
||||||
from novaprinter import prettyPrinter
|
from novaprinter import prettyPrinter
|
||||||
from helpers import retrieve_url, download_file
|
from helpers import retrieve_url, download_file
|
||||||
import sgmllib
|
from HTMLParser import HTMLParser
|
||||||
import re
|
from re import compile as re_compile
|
||||||
|
|
||||||
class legittorrents(object):
|
class legittorrents(object):
|
||||||
url = 'http://www.legittorrents.info'
|
url = 'http://www.legittorrents.info'
|
||||||
name = 'Legit Torrents'
|
name = 'Legit Torrents'
|
||||||
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
|
supported_categories = {'all': '0', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
|
||||||
|
|
||||||
def download_torrent(self, info):
|
def download_torrent(self, info):
|
||||||
print download_file(info)
|
print(download_file(info))
|
||||||
|
|
||||||
class SimpleSGMLParser(sgmllib.SGMLParser):
|
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||||
def __init__(self, results, url, *args):
|
""" Parser class """
|
||||||
sgmllib.SGMLParser.__init__(self)
|
def __init__(self, url):
|
||||||
|
HTMLParser.__init__(self)
|
||||||
self.url = url
|
self.url = url
|
||||||
self.td_counter = None
|
|
||||||
self.current_item = None
|
self.current_item = None
|
||||||
self.start_name = False
|
self.save_item_key = None
|
||||||
self.results = results
|
|
||||||
|
|
||||||
def start_a(self, attr):
|
def handle_starttag(self, tag, attrs):
|
||||||
params = dict(attr)
|
""" Parser's start tag handler """
|
||||||
if params.has_key('href') and params['href'].startswith('download.php?'):
|
if self.current_item:
|
||||||
self.current_item['link'] = self.url + '/' + params['href'].strip()
|
params = dict(attrs)
|
||||||
elif params.has_key('href') and params['href'].startswith('index.php?page=torrent-details'):
|
if tag == "a":
|
||||||
|
link = params["href"]
|
||||||
|
if link.startswith("index") and "title" in params:
|
||||||
|
#description link
|
||||||
|
self.current_item["name"] = params["title"][14:]
|
||||||
|
self.current_item["desc_link"] = "/".join((self.url, link))
|
||||||
|
elif link.startswith("download"):
|
||||||
|
self.current_item["link"] = "/".join((self.url, link))
|
||||||
|
elif tag == "td":
|
||||||
|
if "class" in params and params["class"].startswith("#FF"):
|
||||||
|
self.save_item_key = "leech" if "seeds" in self.current_item else "seeds"
|
||||||
|
|
||||||
|
elif tag == "tr":
|
||||||
self.current_item = {}
|
self.current_item = {}
|
||||||
self.td_counter = 0
|
self.current_item["size"] = ""
|
||||||
self.current_item['desc_link'] = self.url + '/' + params['href'].strip()
|
self.current_item["engine_url"] = self.url
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
""" Parser's end tag handler """
|
||||||
|
if self.current_item and tag == "tr":
|
||||||
|
if len(self.current_item) > 4:
|
||||||
|
prettyPrinter(self.current_item)
|
||||||
|
self.current_item = None
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
if self.td_counter == 0:
|
""" Parser's data handler """
|
||||||
if not self.current_item.has_key('name'):
|
if self.save_item_key:
|
||||||
self.current_item['name'] = data.strip()
|
self.current_item[self.save_item_key] = data.strip()
|
||||||
elif self.td_counter == 3:
|
self.save_item_key = None
|
||||||
if not self.current_item.has_key('seeds'):
|
|
||||||
self.current_item['seeds'] = ''
|
|
||||||
self.current_item['seeds']+= data.strip()
|
|
||||||
elif self.td_counter == 4:
|
|
||||||
if not self.current_item.has_key('leech'):
|
|
||||||
self.current_item['leech'] = ''
|
|
||||||
self.current_item['leech']+= data.strip()
|
|
||||||
|
|
||||||
def start_td(self,attr):
|
|
||||||
if isinstance(self.td_counter,int):
|
|
||||||
self.td_counter += 1
|
|
||||||
if self.td_counter > 5:
|
|
||||||
self.td_counter = None
|
|
||||||
# Display item
|
|
||||||
if self.current_item:
|
|
||||||
self.current_item['engine_url'] = self.url
|
|
||||||
if not self.current_item['seeds'].isdigit():
|
|
||||||
self.current_item['seeds'] = 0
|
|
||||||
if not self.current_item['leech'].isdigit():
|
|
||||||
self.current_item['leech'] = 0
|
|
||||||
self.current_item['size'] = ''
|
|
||||||
prettyPrinter(self.current_item)
|
|
||||||
self.results.append('a')
|
|
||||||
|
|
||||||
def search(self, what, cat='all'):
|
def search(self, what, cat='all'):
|
||||||
ret = []
|
""" Performs search """
|
||||||
i = 1
|
query = "".join((self.url, "/index.php?page=torrents&search=", what, "&category=", self.supported_categories.get(cat, '0'), "&active=1"))
|
||||||
while True and i<11:
|
|
||||||
results = []
|
|
||||||
parser = self.SimpleSGMLParser(results, self.url)
|
|
||||||
dat = retrieve_url(self.url+'/index.php?page=torrents&search=%s&category=%s&active=1&order=3&by=2&pages=%d'%(what, self.supported_categories[cat], i))
|
|
||||||
results_re = re.compile('(?s)<table width="100%" class="lista">.*')
|
|
||||||
for match in results_re.finditer(dat):
|
|
||||||
res_tab = match.group(0)
|
|
||||||
parser.feed(res_tab)
|
|
||||||
parser.close()
|
|
||||||
break
|
|
||||||
if len(results) <= 0:
|
|
||||||
break
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
|
get_table = re_compile('(?s)<table\sclass="lista".*>(.*)</table>')
|
||||||
|
data = get_table.search(retrieve_url(query)).group(0)
|
||||||
|
#extract first ten pages of next results
|
||||||
|
next_pages = re_compile('(?m)<option value="(.*)">[0-9]+</option>')
|
||||||
|
next_pages = ["".join((self.url, page)) for page in next_pages.findall(data)[:10]]
|
||||||
|
|
||||||
|
parser = self.MyHtmlParseWithBlackJack(self.url)
|
||||||
|
parser.feed(data)
|
||||||
|
parser.close()
|
||||||
|
|
||||||
|
for page in next_pages:
|
||||||
|
parser.feed(get_table.search(retrieve_url(page)).group(0))
|
||||||
|
parser.close()
|
||||||
|
@ -2,7 +2,7 @@ btdigg: 1.25
|
|||||||
demonoid: 1.1
|
demonoid: 1.1
|
||||||
extratorrent: 2.0
|
extratorrent: 2.0
|
||||||
kickasstorrents: 1.27
|
kickasstorrents: 1.27
|
||||||
legittorrents: 1.05
|
legittorrents: 2.00
|
||||||
mininova: 2.00
|
mininova: 2.00
|
||||||
piratebay: 2.11
|
piratebay: 2.11
|
||||||
torrentreactor: 1.36
|
torrentreactor: 1.36
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#VERSION: 1.05
|
#VERSION: 2.00
|
||||||
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
||||||
|
# Douman (custparasite@gmx.se)
|
||||||
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
# Redistribution and use in source and binary forms, with or without
|
||||||
# modification, are permitted provided that the following conditions are met:
|
# modification, are permitted provided that the following conditions are met:
|
||||||
@ -28,78 +29,73 @@
|
|||||||
|
|
||||||
from novaprinter import prettyPrinter
|
from novaprinter import prettyPrinter
|
||||||
from helpers import retrieve_url, download_file
|
from helpers import retrieve_url, download_file
|
||||||
import sgmllib3 as sgmllib
|
from html.parser import HTMLParser
|
||||||
import re
|
from re import compile as re_compile
|
||||||
|
|
||||||
class legittorrents(object):
|
class legittorrents(object):
|
||||||
url = 'http://www.legittorrents.info'
|
url = 'http://www.legittorrents.info'
|
||||||
name = 'Legit Torrents'
|
name = 'Legit Torrents'
|
||||||
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
|
supported_categories = {'all': '0', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
|
||||||
|
|
||||||
def download_torrent(self, info):
|
def download_torrent(self, info):
|
||||||
print(download_file(info))
|
print(download_file(info))
|
||||||
|
|
||||||
class SimpleSGMLParser(sgmllib.SGMLParser):
|
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||||
def __init__(self, results, url, *args):
|
""" Parser class """
|
||||||
sgmllib.SGMLParser.__init__(self)
|
def __init__(self, url):
|
||||||
|
HTMLParser.__init__(self)
|
||||||
self.url = url
|
self.url = url
|
||||||
self.td_counter = None
|
|
||||||
self.current_item = None
|
self.current_item = None
|
||||||
self.start_name = False
|
self.save_item_key = None
|
||||||
self.results = results
|
|
||||||
|
|
||||||
def start_a(self, attr):
|
def handle_starttag(self, tag, attrs):
|
||||||
params = dict(attr)
|
""" Parser's start tag handler """
|
||||||
if 'href' in params and params['href'].startswith('download.php?'):
|
if self.current_item:
|
||||||
self.current_item['link'] = self.url + '/' + params['href'].strip()
|
params = dict(attrs)
|
||||||
elif 'href' in params and params['href'].startswith('index.php?page=torrent-details'):
|
if tag == "a":
|
||||||
|
link = params["href"]
|
||||||
|
if link.startswith("index") and "title" in params:
|
||||||
|
#description link
|
||||||
|
self.current_item["name"] = params["title"][14:]
|
||||||
|
self.current_item["desc_link"] = "/".join((self.url, link))
|
||||||
|
elif link.startswith("download"):
|
||||||
|
self.current_item["link"] = "/".join((self.url, link))
|
||||||
|
elif tag == "td":
|
||||||
|
if "class" in params and params["class"].startswith("#FF"):
|
||||||
|
self.save_item_key = "leech" if "seeds" in self.current_item else "seeds"
|
||||||
|
|
||||||
|
elif tag == "tr":
|
||||||
self.current_item = {}
|
self.current_item = {}
|
||||||
self.td_counter = 0
|
self.current_item["size"] = ""
|
||||||
self.current_item['desc_link'] = self.url + '/' + params['href'].strip()
|
self.current_item["engine_url"] = self.url
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
""" Parser's end tag handler """
|
||||||
|
if self.current_item and tag == "tr":
|
||||||
|
if len(self.current_item) > 4:
|
||||||
|
prettyPrinter(self.current_item)
|
||||||
|
self.current_item = None
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
if self.td_counter == 0:
|
""" Parser's data handler """
|
||||||
if 'name' not in self.current_item:
|
if self.save_item_key:
|
||||||
self.current_item['name'] = data.strip()
|
self.current_item[self.save_item_key] = data.strip()
|
||||||
elif self.td_counter == 3:
|
self.save_item_key = None
|
||||||
if 'seeds' not in self.current_item:
|
|
||||||
self.current_item['seeds'] = ''
|
|
||||||
self.current_item['seeds']+= data.strip()
|
|
||||||
elif self.td_counter == 4:
|
|
||||||
if 'leech' not in self.current_item:
|
|
||||||
self.current_item['leech'] = ''
|
|
||||||
self.current_item['leech']+= data.strip()
|
|
||||||
|
|
||||||
def start_td(self,attr):
|
|
||||||
if isinstance(self.td_counter,int):
|
|
||||||
self.td_counter += 1
|
|
||||||
if self.td_counter > 5:
|
|
||||||
self.td_counter = None
|
|
||||||
# Display item
|
|
||||||
if self.current_item:
|
|
||||||
self.current_item['engine_url'] = self.url
|
|
||||||
if not self.current_item['seeds'].isdigit():
|
|
||||||
self.current_item['seeds'] = 0
|
|
||||||
if not self.current_item['leech'].isdigit():
|
|
||||||
self.current_item['leech'] = 0
|
|
||||||
self.current_item['size'] = ''
|
|
||||||
prettyPrinter(self.current_item)
|
|
||||||
self.results.append('a')
|
|
||||||
|
|
||||||
def search(self, what, cat='all'):
|
def search(self, what, cat='all'):
|
||||||
ret = []
|
""" Performs search """
|
||||||
i = 1
|
query = "".join((self.url, "/index.php?page=torrents&search=", what, "&category=", self.supported_categories.get(cat, '0'), "&active=1"))
|
||||||
while True and i<11:
|
|
||||||
results = []
|
|
||||||
parser = self.SimpleSGMLParser(results, self.url)
|
|
||||||
dat = retrieve_url(self.url+'/index.php?page=torrents&search=%s&category=%s&active=1&order=3&by=2&pages=%d'%(what, self.supported_categories[cat], i))
|
|
||||||
results_re = re.compile('(?s)<table width="100%" class="lista">.*')
|
|
||||||
for match in results_re.finditer(dat):
|
|
||||||
res_tab = match.group(0)
|
|
||||||
parser.feed(res_tab)
|
|
||||||
parser.close()
|
|
||||||
break
|
|
||||||
if len(results) <= 0:
|
|
||||||
break
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
|
get_table = re_compile('(?s)<table\sclass="lista".*>(.*)</table>')
|
||||||
|
data = get_table.search(retrieve_url(query)).group(0)
|
||||||
|
#extract first ten pages of next results
|
||||||
|
next_pages = re_compile('(?m)<option value="(.*)">[0-9]+</option>')
|
||||||
|
next_pages = ["".join((self.url, page)) for page in next_pages.findall(data)[:10]]
|
||||||
|
|
||||||
|
parser = self.MyHtmlParseWithBlackJack(self.url)
|
||||||
|
parser.feed(data)
|
||||||
|
parser.close()
|
||||||
|
|
||||||
|
for page in next_pages:
|
||||||
|
parser.feed(get_table.search(retrieve_url(page)).group(0))
|
||||||
|
parser.close()
|
||||||
|
@ -2,7 +2,7 @@ btdigg: 1.25
|
|||||||
demonoid: 1.1
|
demonoid: 1.1
|
||||||
extratorrent: 2.0
|
extratorrent: 2.0
|
||||||
kickasstorrents: 1.27
|
kickasstorrents: 1.27
|
||||||
legittorrents: 1.05
|
legittorrents: 2.00
|
||||||
mininova: 2.00
|
mininova: 2.00
|
||||||
piratebay: 2.11
|
piratebay: 2.11
|
||||||
torrentreactor: 1.36
|
torrentreactor: 1.36
|
||||||
|
Loading…
Reference in New Issue
Block a user