BLACK CAT PROGRAMMER

python request

金田一出了新的故事,上網睇固然開心,不用擔心存放問題,但網頁的計設真是很爛,要scroll up and down 去看一整頁,又多廣告,索性把它下載下來好一點


import urllib.request

# http://cache.someou.com/Uploads/files/2020/66134/001.jpg

PAGES = [25, 23, 22, 22, 22, 24, 21, 23, 26, 23, 23, 21, 21, 22, 25, 22, 23, 21, 23, 24, 23, 23, 22, 23, 26, 23, 22, 24, 24, 22]
BASE_URL = 'http://cache.someou.com/Uploads/files/2020/{:d}/{:03d}.jpg'


for chp in range(30):
    for page in range(PAGES[chp]):
        link = BASE_URL.format(66134+chp, page+1)
        print("donwloading {} chapter {} page {}".format(link, chp, page))

        req = urllib.request.Request(link)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36')
        req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3')
        req.add_header('Cookie', '__cfduid=d28d5f69ff39f960a0571508905d2e3b21556458631')
        response = urllib.request.urlopen(req)

        if response.status == 200:
            #result = f.read().decode('utf-8')
            f = open('{:03d}_{:03d}.jpg'.format(chp+1,page+1), 'wb')
            f.write(response.read())
            f.close()
        else:
            print("Failed in download chp {} page {}".format(chp, page))
            print("status {} reason {}".format(response.status, response.reason))

print("done")

小小玩具,自己做一個記錄

Posted in notesTagged