From b57fdf33d6e278b207a6f34c27e6aceb66c49b88 Mon Sep 17 00:00:00 2001 From: swordgrass <38562879+swordgrass@users.noreply.github.com> Date: Tue, 3 Dec 2019 02:23:06 -0600 Subject: [PATCH] cjsftsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 图书馆52546646454 --- cjsf.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 cjsf.py diff --git a/cjsf.py b/cjsf.py new file mode 100644 index 0000000..1c77841 --- /dev/null +++ b/cjsf.py @@ -0,0 +1,60 @@ +import urllib.request as ur +from bs4 import BeautifulSoup +import re +def geturl(url,headears): + r=ur.urlopen(url) + text = r.read() + html = text.decode('utf-8') + return html + +def chuli(html): + soup = BeautifulSoup(html,'lxml') + datas = soup.find_all("li") + books,bookid,booknm,time,chubans = [],[],[],[],[] + time = re.findall('
出版时间: (.*?)
',html) + author = re.findall('作者:(.*?)
',html) + chubans = re.findall('出版社:(.*?)
',html) + for book in datas: + bookid.append(re.search('"bookid":"(\d*?)"',book.input['value']).group(1)) + booknm.append(re.search('"title":"(.*?)"',book.input['value']).group(1)) + # books = [bookid,booknm,author,chubans,time] + books = [] + length = min(len(bookid),len(booknm),len(author),len(chubans),len(time)) + for i in range(0,length): + p = [bookid[i],booknm[i],author[i],chubans[i],time[i]] + books.append(p) + page = re.search(r'selected="selected">1/(\d*?)',html) + if(page is not None): + return books,int(page.group(1)) + else: + return books + + +# def suoyou(booklist,books): +# for i in range(0,5): +# for k in books[i]: +# booklist[i].append(k) +# return booklist + +def main(): + keyword = "python" + url="http://mlib.yznu.cn:8089/search/searchList?kw="+keyword + headears = {"User-Agent":" Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0", + "Accept-Encoding":" gzip, deflate","Connection":" keep-alive", + "Cookie":" JSESSIONID=CA421982C51F9A429FA243EC72491D6A"} + html = geturl(url,headears) + booklist,pageint = chuli(html) + for i in range(2,pageint+1): + url="http://mlib.yznu.cn:8089/search/searchList?kw="+keyword+"&pageIndex="+str(i) + html = geturl(url,headears) + # booklist = suoyou(booklist,chuli(html)) + booklist=booklist+chuli(html) + booklist2 = sorted(booklist,key=lambda time: time[4],reverse=True) + for i in range(0,10): + print(booklist2[i]) + + + + +if __name__=='__main__': + main()