diff options
author | Unit 193 <unit193@ubuntu.com> | 2020-04-14 18:18:40 -0400 |
---|---|---|
committer | Unit 193 <unit193@ubuntu.com> | 2020-04-14 18:18:40 -0400 |
commit | cf188f30e1c27bdb900fa2623a9ff91b944633b2 (patch) | |
tree | 94803cd79aa8aaefd09d9bbc7b9c8029b415c885 | |
parent | e4887ae6b00c50fbbde531cc274c77b076bd821d (diff) | |
download | gallery-dl-cf188f30e1c27bdb900fa2623a9ff91b944633b2.tar.bz2 gallery-dl-cf188f30e1c27bdb900fa2623a9ff91b944633b2.tar.xz gallery-dl-cf188f30e1c27bdb900fa2623a9ff91b944633b2.tar.zst |
New upstream version 1.13.4upstream/1.13.4
31 files changed, 690 insertions, 360 deletions
@@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.13.3 +Version: 1.13.4 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -247,7 +247,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.4.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -263,8 +263,8 @@ Description: ========== .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ - .. |build| image:: https://travis-ci.org/mikf/gallery-dl.svg?branch=master - :target: https://travis-ci.org/mikf/gallery-dl + .. |build| image:: https://travis-ci.com/mikf/gallery-dl.svg?branch=master + :target: https://travis-ci.com/mikf/gallery-dl .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg :target: https://gitter.im/gallery-dl/main @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -236,7 +236,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.4.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -252,8 +252,8 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ -.. |build| image:: https://travis-ci.org/mikf/gallery-dl.svg?branch=master - :target: https://travis-ci.org/mikf/gallery-dl +.. |build| image:: https://travis-ci.com/mikf/gallery-dl.svg?branch=master + :target: https://travis-ci.com/mikf/gallery-dl .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg :target: https://gitter.im/gallery-dl/main diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index af9ac7d..e7e1566 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2020-03-28" "1.13.3" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-04-12" "1.13.4" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 9a374da..2fd4dba 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2020-03-28" "1.13.3" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-04-12" "1.13.4" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index c9ca17b..45381a6 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.13.3 +Version: 1.13.4 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -247,7 +247,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.4.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -263,8 +263,8 @@ Description: ========== .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ - .. |build| image:: https://travis-ci.org/mikf/gallery-dl.svg?branch=master - :target: https://travis-ci.org/mikf/gallery-dl + .. |build| image:: https://travis-ci.com/mikf/gallery-dl.svg?branch=master + :target: https://travis-ci.com/mikf/gallery-dl .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg :target: https://gitter.im/gallery-dl/main diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index ecb052c..1df3675 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -39,6 +39,7 @@ gallery_dl/extractor/8muses.py gallery_dl/extractor/__init__.py gallery_dl/extractor/adultempire.py gallery_dl/extractor/artstation.py +gallery_dl/extractor/aryion.py gallery_dl/extractor/bcy.py gallery_dl/extractor/behance.py gallery_dl/extractor/blogger.py diff --git a/gallery_dl/config.py b/gallery_dl/config.py index 785ffc3..c2787ad 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -35,6 +35,14 @@ else: ] +if getattr(sys, "frozen", False): + # look for config file in PyInstaller executable directory (#682) + _default_configs.append(os.path.join( + os.path.dirname(sys.executable), + "gallery-dl.conf", + )) + + # -------------------------------------------------------------------- # public interface diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 844e422..64a2978 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -164,7 +164,11 @@ class HttpDownloader(DownloaderBase): self.downloading = False if self.mtime: - pathfmt.kwdict["_mtime"] = response.headers.get("Last-Modified") + pathfmt.kwdict.setdefault( + "_mtime", response.headers.get("Last-Modified")) + else: + pathfmt.kwdict["_mtime"] = None + return True def receive(self, response, file): diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 74c553d..2c87eb3 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -19,6 +19,7 @@ modules = [ "8muses", "adultempire", "artstation", + "aryion", "bcy", "behance", "blogger", diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py new file mode 100644 index 0000000..d8f55bd --- /dev/null +++ b/gallery_dl/extractor/aryion.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://aryion.com/""" + +from .common import Extractor, Message +from .. import text, util + + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4" + + +class AryionExtractor(Extractor): + """Base class for aryion extractors""" + category = "aryion" + directory_fmt = ("{category}", "{user!l}", "{path:J - }") + filename_fmt = "{id} {title}.{extension}" + archive_fmt = "{id}" + root = "https://aryion.com" + + def __init__(self, match): + Extractor.__init__(self, match) + self.user = match.group(1) + self.offset = 0 + + def items(self): + for post_id in util.advance(self.posts(), self.offset): + post = self._parse_post(post_id) + if post: + yield Message.Directory, post + yield Message.Url, post["url"], post + + def posts(self): + return () + + def skip(self, num): + self.offset += num + return num + + def _parse_post(self, post_id): + url = "{}/g4/data.php?id={}".format(self.root, post_id) + with self.request(url, method="HEAD", fatal=False) as response: + + if response.status_code >= 400: + return None + headers = response.headers + + # ignore folders + if headers["content-type"] == "application/x-folder": + return None + + # get filename from 'content-disposition' header + cdis = headers["content-disposition"] + fname, _, ext = text.extract( + cdis, 'filename="', '"')[0].rpartition(".") + if not fname: + fname, ext = ext, fname + + # fix 'last-modified' header + lmod = headers["last-modified"] + if lmod[22] != ":": + lmod = "{}:{} GMT".format(lmod[:22], lmod[22:24]) + + post_url = "{}/g4/view/{}".format(self.root, post_id) + extr = text.extract_from(self.request(post_url).text) + + title, _, artist = text.unescape(extr( + "<title>g4 :: ", "<")).rpartition(" by ") + data = { + "id" : text.parse_int(post_id), + "url" : url, + "user" : self.user or artist, + "title" : title, + "artist": artist, + "path" : text.split_html(extr("cookiecrumb'>", '</span'))[4:-1:2], + "date" : extr("class='pretty-date' title='", "'"), + "views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")), + "size" : text.parse_bytes(extr("File size</b>:", "<")[:-2]), + "width" : text.parse_int(extr("Resolution</b>:", "x")), + "height": text.parse_int(extr("", "<")), + "comments" : text.parse_int(extr("Comments</b>:", "<")), + "favorites": text.parse_int(extr("Favorites</b>:", "<")), + "tags" : text.split_html(extr("class='taglist'>", "</span>")), + "description": text.unescape(text.remove_html(extr( + "<p>", "</p>"), "", "")), + "filename" : fname, + "extension" : ext, + "_mtime" : lmod, + } + + d1, _, d2 = data["date"].partition(",") + data["date"] = text.parse_datetime( + d1[:-2] + d2, "%b %d %Y %I:%M %p", -5) + + return data + + +class AryionGalleryExtractor(AryionExtractor): + """Extractor for a user's gallery on eka's portal""" + subcategory = "gallery" + pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?&#]+)" + test = ( + ("https://aryion.com/g4/gallery/jameshoward", { + "pattern": r"https://aryion\.com/g4/data\.php\?id=\d+$", + "range": "48-52", + "count": 5, + }), + ("https://aryion.com/g4/user/jameshoward"), + ("https://aryion.com/g4/latest.php?name=jameshoward"), + ) + + def posts(self): + url = "{}/g4/latest.php?name={}".format(self.root, self.user) + + while True: + page = self.request(url).text + yield from text.extract_iter( + page, "class='thumb' href='/g4/view/", "'") + + pos = page.find("Next >>") + if pos < 0: + return + url = self.root + text.rextract(page, "href='", "'", pos)[0] + + +class AryionPostExtractor(AryionExtractor): + """Extractor for individual posts on eka's portal""" + subcategory = "post" + pattern = BASE_PATTERN + r"/view/(\d+)" + test = ("https://aryion.com/g4/view/510079", { + "url": "f233286fa5558c07ae500f7f2d5cb0799881450e", + "keyword": { + "artist" : "jameshoward", + "user" : "jameshoward", + "filename" : "jameshoward-510079-subscribestar_150", + "extension": "jpg", + "id" : 510079, + "width" : 1665, + "height" : 1619, + "size" : 784241, + "title" : "I'm on subscribestar now too!", + "description": r"re:Doesn't hurt to have a backup, right\?", + "tags" : ["Non-Vore", "subscribestar"], + "date" : "dt:2019-02-16 19:30:00", + "path" : [], + "views" : int, + "favorites": int, + "comments" : int, + "_mtime" : "Sat, 16 Feb 2019 19:30:34 GMT", + }, + }) + + def posts(self): + post_id = self.user + self.user = None + return (post_id,) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 19ee182..8986c99 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -122,23 +122,33 @@ class Extractor(): raise exception.HttpError(msg) - def wait(self, *, seconds=None, until=None, reason=None, adjust=1): - now = datetime.datetime.now() + def wait(self, *, seconds=None, until=None, adjust=1.0, + reason="rate limit reset"): + now = time.time() if seconds: seconds = float(seconds) - until = now + datetime.timedelta(seconds=seconds) + until = now + seconds elif until: - until = datetime.datetime.fromtimestamp(float(until)) - seconds = (until - now).total_seconds() + if isinstance(until, datetime.datetime): + # convert to UTC timestamp + epoch = datetime.datetime(1970, 1, 1) + until = (until - epoch) / datetime.timedelta(0, 1) + else: + until = float(until) + seconds = until - now else: raise ValueError("Either 'seconds' or 'until' is required") + seconds += adjust + if seconds <= 0.0: + return + if reason: - t = until.time() + t = datetime.datetime.fromtimestamp(until).time() isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second) self.log.info("Waiting until %s for %s.", isotime, reason) - time.sleep(seconds + adjust) + time.sleep(seconds) def _get_auth_info(self): """Return authentication information as (username, password) tuple""" diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index d6669d1..2dcf0b7 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -27,14 +27,15 @@ BASE_PATTERN = ( class DeviantartExtractor(Extractor): - """Base class for deviantart extractors using the OAuth API""" + """Base class for deviantart extractors""" category = "deviantart" directory_fmt = ("{category}", "{username}") filename_fmt = "{category}_{index}_{title}.{extension}" cookiedomain = None root = "https://www.deviantart.com" + _last_request = 0 - def __init__(self, match=None): + def __init__(self, match): Extractor.__init__(self, match) self.offset = 0 self.flat = self.config("flat", True) @@ -43,10 +44,10 @@ class DeviantartExtractor(Extractor): self.original = self.config("original", True) self.user = match.group(1) or match.group(2) self.group = False - self.api = DeviantartAPI(self) + self.api = None if self.quality: - self.quality = "q_{}".format(self.quality) + self.quality = ",q_{}".format(self.quality) if self.original != "image": self._update_content = self._update_content_default @@ -64,6 +65,8 @@ class DeviantartExtractor(Extractor): return num def items(self): + self.api = DeviantartOAuthAPI(self) + if self.user: profile = self.api.user_profile(self.user) self.group = not profile @@ -95,12 +98,12 @@ class DeviantartExtractor(Extractor): # https://github.com/r888888888/danbooru/issues/4069 intermediary, count = re.subn( r"(/f/[^/]+/[^/]+)/v\d+/.*", - r"/intermediary\1", content["src"]) + r"/intermediary\1", content["src"], 1) if count and self._check_url(intermediary): content["src"] = intermediary if self.quality: content["src"] = re.sub( - r"q_\d+", self.quality, content["src"]) + r",q_\d+", self.quality, content["src"], 1) yield self.commit(deviation, content) @@ -266,6 +269,23 @@ class DeviantartExtractor(Extractor): def _check_url(self, url): return self.request(url, method="HEAD", fatal=False).status_code < 400 + def _limited_request(self, url, **kwargs): + """Limits HTTP requests to one every 2 seconds""" + kwargs["fatal"] = None + diff = time.time() - DeviantartExtractor._last_request + if diff < 2.0: + delay = 2.0 - diff + self.log.debug("Sleeping %.2f seconds", delay) + time.sleep(delay) + + while True: + response = self.request(url, **kwargs) + if response.status_code != 403 or \ + b"Request blocked." not in response.content: + DeviantartExtractor._last_request = time.time() + return response + self.wait(seconds=180) + class DeviantartUserExtractor(DeviantartExtractor): """Extractor for an artist's user profile""" @@ -293,6 +313,9 @@ class DeviantartUserExtractor(DeviantartExtractor): ), ("gallery",)) +############################################################################### +# OAuth ####################################################################### + class DeviantartGalleryExtractor(DeviantartExtractor): """Extractor for all deviations from an artist's gallery""" subcategory = "gallery" @@ -439,7 +462,7 @@ class DeviantartStashExtractor(DeviantartExtractor): }), # multiple stash items ("https://sta.sh/21jf51j7pzl2", { - "pattern": pattern, + "options": (("original", False),), "count": 4, }), # downloadable, but no "content" field (#307) @@ -447,8 +470,13 @@ class DeviantartStashExtractor(DeviantartExtractor): "pattern": r"https://api-da\.wixmp\.com/_api/download/file", "count": 1, }), + # mixed folders and images (#659) + ("https://sta.sh/215twi387vfj", { + "options": (("original", False),), + "count": 4, + }), ("https://sta.sh/abcdefghijkl", { - "exception": exception.HttpError, + "count": 0, }), ) @@ -459,21 +487,31 @@ class DeviantartStashExtractor(DeviantartExtractor): self.user = None self.stash_id = match.group(1) - def deviations(self): - url = "https://sta.sh/" + self.stash_id - page = self.request(url).text - deviation_id = text.extract(page, '//deviation/', '"')[0] + def deviations(self, stash_id=None): + if stash_id is None: + stash_id = self.stash_id + url = "https://sta.sh/" + stash_id + page = self._limited_request(url).text - if deviation_id: - return (self.api.deviation(deviation_id),) + if stash_id[0] == "0": + uuid = text.extract(page, '//deviation/', '"')[0] + if uuid: + yield self.api.deviation(uuid) + return - else: - data = {"_extractor": DeviantartStashExtractor} - page = text.extract(page, 'id="stash-body"', 'class="footer"')[0] - return [ - (url, data) - for url in text.extract_iter(page, '<a href="', '"') - ] + for item in text.extract_iter( + page, 'class="stash-thumb-container', '</div>'): + url = text.extract(item, '<a href="', '"')[0] + + if url: + stash_id = url.rpartition("/")[2] + else: + stash_id = text.extract(item, 'gmi-stashid="', '"')[0] + stash_id = "2" + util.bencode(text.parse_int( + stash_id), "0123456789abcdefghijklmnopqrstuvwxyz") + + if len(stash_id) > 2: + yield from self.deviations(stash_id) class DeviantartFavoriteExtractor(DeviantartExtractor): @@ -635,148 +673,10 @@ class DeviantartPopularExtractor(DeviantartExtractor): deviation["popular"] = self.popular -class DeviantartExtractorV2(DeviantartExtractor): - """Base class for deviantart extractors using the NAPI""" - cookiedomain = ".deviantart.com" - cookienames = ("auth", "auth_secure", "userinfo") - _warning = True - - def items(self): - if self.original and not self._check_cookies(self.cookienames): - self.original = False - if self._warning: - DeviantartExtractorV2._warning = False - self.log.warning("No session cookies set: " - "Disabling original file downloads.") - - yield Message.Version, 1 - for deviation in self.deviations(): - data = self.api.deviation_extended_fetch( - deviation["deviationId"], - deviation["author"]["username"], - "journal" if deviation["isJournal"] else "art", - ) - - if "deviation" not in data: - self.log.warning("Unable to fetch deviation ID %s", - deviation["deviationId"]) - self.log.debug("Server response: %s", data) - continue - - deviation = self._extract(data) - if not deviation: - continue - - yield Message.Directory, deviation - yield Message.Url, deviation["target"]["src"], deviation - if self.extra: - for match in DeviantartStashExtractor.pattern.finditer( - deviation["description"]): - deviation["_extractor"] = DeviantartStashExtractor - yield Message.Queue, match.group(0), deviation - - def _extract(self, data): - deviation = data["deviation"] - extended = deviation["extended"] - media = deviation["media"] - del deviation["extended"] - del deviation["media"] - - # prepare deviation metadata - deviation["description"] = extended.get("description", "") - deviation["username"] = deviation["author"]["username"] - deviation["_username"] = deviation["username"].lower() - deviation["stats"] = extended["stats"] - deviation["stats"]["comments"] = data["comments"]["total"] - deviation["index"] = deviation["deviationId"] - deviation["tags"] = [t["name"] for t in extended.get("tags") or ()] - deviation["date"] = text.parse_datetime( - deviation["publishedTime"]) - deviation["category_path"] = "/".join( - extended[key]["displayNameEn"] - for key in ("typeFacet", "contentFacet", "categoryFacet") - if key in extended - ) - - # extract download target - target = media["types"][-1] - src = token = None - - if "textContent" in deviation: - if not self.commit_journal: - return None - journal = deviation["textContent"] - journal["html"] = journal["html"]["markup"] - src = self.commit_journal(deviation, journal)[1] - - elif target["t"] == "gif": - src = target["b"] - token = media["token"][0] - - elif "download" in extended and self.original: - target = extended["download"] - src = target["url"] - del target["url"] - - elif target["t"] == "video": - # select largest video - target = max(media["types"], - key=lambda x: text.parse_int(x.get("q", "")[:-1])) - src = target["b"] - - elif target["t"] == "flash": - src = target["s"] - if src.startswith("https://sandbox.deviantart.com"): - # extract SWF file from "sandbox" - src = text.extract( - self.request(src).text, 'id="sandboxembed" src="', '"')[0] - - else: - src = media["baseUri"] - if "token" in media: - token = media["token"][0] - - if "c" in target: - src += "/" + target["c"].replace( - "<prettyName>", media["prettyName"]) - if src.startswith("https://images-wixmp-"): - if deviation["index"] <= 790677560: - # https://github.com/r888888888/danbooru/issues/4069 - intermediary, count = re.subn( - r"(/f/[^/]+/[^/]+)/v\d+/.*", r"/intermediary\1", src) - if count and self._check_url(intermediary): - src = intermediary - if self.quality: - src = re.sub(r"q_\d+", self.quality, src) - - # filename and extension metadata - alphabet = "0123456789abcdefghijklmnopqrstuvwxyz" - sub = re.compile(r"\W").sub - deviation["filename"] = "".join(( - sub("_", deviation["title"].lower()), "_by_", - sub("_", deviation["author"]["username"].lower()), "-d", - util.bencode(deviation["index"], alphabet), - )) - if "extension" not in deviation: - deviation["extension"] = text.ext_from_url(src) - - if token: - src = src + "?token=" + token - target["src"] = src - deviation["target"] = target - return deviation - - def _pagination(self, url, params, headers=None): - while True: - data = self.request(url, params=params, headers=headers).json() - yield from data["results"] +############################################################################### +# Eclipse ##################################################################### - if not data["hasMore"]: - return - params["offset"] = data["nextOffset"] - - -class DeviantartDeviationExtractor(DeviantartExtractorV2): +class DeviantartDeviationExtractor(DeviantartExtractor): """Extractor for single deviations""" subcategory = "deviation" archive_fmt = "{index}.{extension}" @@ -784,16 +684,13 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): test = ( (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), { "options": (("original", 0),), - # "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", + "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", }), ("https://www.deviantart.com/zzz/art/zzz-1234567890", { - "count": 0, + "exception": exception.NotFoundError, }), (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), { - # "pattern": (r"https://www.deviantart.com/download/261986576" - # r"/[\w-]+\.jpg\?token=\w+&ts=\d+"), - "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" - r"/intermediary/f/[^/]+/[^.]+\.jpg") + "pattern": r"https://api-da\.wixmp\.com/_api/download/file", }), # wixmp URL rewrite (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), { @@ -809,10 +706,10 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" r"/f/[^/]+/[^.]+\.gif\?token="), }), - # external URLs from description (#302) + # sta.sh URLs from description (#302) (("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), { "options": (("extra", 1), ("original", 0)), - "pattern": r"https?://sta\.sh/\w+$", + "pattern": DeviantartStashExtractor.pattern, "range": "2-", "count": 4, }), @@ -823,33 +720,21 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5", "extension": "mp4", "target": { - "d": 306, - "f": 19367585, - "h": 720, - "q": "720p", - "t": "video", - "w": 1364, + "duration": 306, + "filesize": 19367585, + "quality": "720p", "src": str, }, } }), - # archive - ("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", { - # "pattern": r"https://.+deviantart.com/download/763300948/.*rar", - "pattern": r"https://images-wixmp-\w+\.wixmp\.com/i/.*\.png" - }), - # swf - ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", { - "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf", - }), # journal ("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", { - "url": "f33f8127ab71819be7de849175b6d5f8b37bb629", + "url": "d34b2c9f873423e665a1b8ced20fcb75951694a3", "pattern": "text:<!DOCTYPE html>\n", }), # journal-like post with isJournal == False (#419) ("https://www.deviantart.com/gliitchlord/art/brashstrokes-812942668", { - "url": "1534d6ea0561247ab921d07505e57a9d663a833b", + "url": "e2e0044bd255304412179b6118536dbd9bb3bb0e", "pattern": "text:<!DOCTYPE html>\n", }), # old-style URLs @@ -863,19 +748,20 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): skip = Extractor.skip def __init__(self, match): - DeviantartExtractorV2.__init__(self, match) + DeviantartExtractor.__init__(self, match) self.type = match.group(3) self.deviation_id = match.group(4) def deviations(self): - return ({ - "deviationId": self.deviation_id, - "author" : {"username": self.user}, - "isJournal" : self.type == "journal", - },) + deviation = DeviantartEclipseAPI(self).deviation_extended_fetch( + self.deviation_id, self.user, self.type) + if "error" in deviation: + raise exception.NotFoundError("deviation") + return (self.api.deviation( + deviation["deviation"]["extended"]["deviationUuid"]),) -class DeviantartScrapsExtractor(DeviantartExtractorV2): +class DeviantartScrapsExtractor(DeviantartExtractor): """Extractor for an artist's scraps""" subcategory = "scraps" directory_fmt = ("{category}", "{username}", "Scraps") @@ -888,24 +774,31 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2): ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"), ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"), ) + cookiedomain = ".deviantart.com" + cookienames = ("auth", "auth_secure", "userinfo") + _warning = True def deviations(self): - url = self.root + "/_napi/da-user-profile/api/gallery/contents" - params = { - "username" : self.user, - "offset" : self.offset, - "limit" : "24", - "scraps_folder": "true", - } - headers = { - "Referer": "{}/{}/gallery/scraps".format(self.root, self.user), - } + eclipse_api = DeviantartEclipseAPI(self) + if self._warning: + DeviantartScrapsExtractor._warning = False + if not self._check_cookies(self.cookienames): + self.log.warning( + "No session cookies set: Unable to fetch mature scraps.") + + for obj in eclipse_api.gallery_scraps(self.user, self.offset): + deviation = obj["deviation"] + deviation_uuid = eclipse_api.deviation_extended_fetch( + deviation["deviationId"], + deviation["author"]["username"], + "journal" if deviation["isJournal"] else "art", + )["deviation"]["extended"]["deviationUuid"] - for obj in self._pagination(url, params, headers): - yield obj["deviation"] + yield self.api.deviation(deviation_uuid) -class DeviantartFollowingExtractor(DeviantartExtractorV2): +class DeviantartFollowingExtractor(DeviantartExtractor): + """Extractor for user's watched users""" subcategory = "following" pattern = BASE_PATTERN + "/about#watching$" test = ("https://www.deviantart.com/shimoda7/about#watching", { @@ -915,30 +808,19 @@ class DeviantartFollowingExtractor(DeviantartExtractorV2): }) def items(self): - url = "{}/_napi/da-user-profile/api/module/watching".format(self.root) - params = { - "username": self.user, - "moduleid": self._module_id(self.user), - "offset" : "0", - "limit" : "24", - } + eclipse_api = DeviantartEclipseAPI(self) yield Message.Version, 1 - for user in self._pagination(url, params): + for user in eclipse_api.user_watching(self.user, self.offset): url = "{}/{}".format(self.root, user["username"]) yield Message.Queue, url, user - def _module_id(self, username): - url = "{}/{}/about".format(self.root, username) - page = self.request(url).text - pos = page.find('\\"type\\":\\"watching\\"') - if pos < 0: - raise exception.NotFoundError("module") - return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ') +############################################################################### +# API Interfaces ############################################################## -class DeviantartAPI(): - """Minimal interface for the DeviantArt API +class DeviantartOAuthAPI(): + """Interface for the DeviantArt OAuth API Ref: https://www.deviantart.com/developers/http/v1/20160316 """ @@ -1029,31 +911,6 @@ class DeviantartAPI(): params = {"mature_content": self.mature} return self._call(endpoint, params) - def deviation_extended_fetch(self, deviation_id, user, kind): - url = ("https://www.deviantart.com/_napi/da-browse/shared_api" - "/deviation/extended_fetch") - headers = {"Referer": "https://www.deviantart.com/"} - params = { - "deviationid" : deviation_id, - "username" : user, - "type" : kind, - "include_session": "false", - } - response = self.extractor.request( - url, headers=headers, params=params, fatal=None) - code = response.status_code - - if code == 404: - raise exception.StopExtraction( - "Your account must use the Eclipse interface.") - elif code == 403 and b"Request blocked." in response.content: - raise exception.StopExtraction( - "Requests to deviantart.com blocked due to too much traffic.") - try: - return response.json() - except Exception: - return {"error": response.text} - def deviation_metadata(self, deviations): """ Fetch deviation metadata for a set of deviations""" if not deviations: @@ -1225,11 +1082,84 @@ class DeviantartAPI(): return dmap +class DeviantartEclipseAPI(): + """Interface to the DeviantArt Eclipse API""" + + def __init__(self, extractor): + self.extractor = extractor + self.log = extractor.log + + def deviation_extended_fetch(self, deviation_id, user=None, kind=None): + endpoint = "da-browse/shared_api/deviation/extended_fetch" + params = { + "deviationid" : deviation_id, + "username" : user, + "type" : kind, + "include_session": "false", + } + return self._call(endpoint, params) + + def gallery_scraps(self, user, offset=None): + endpoint = "da-user-profile/api/gallery/contents" + params = { + "username" : user, + "offset" : offset, + "limit" : "24", + "scraps_folder": "true", + } + return self._pagination(endpoint, params) + + def user_watching(self, user, offset=None): + endpoint = "da-user-profile/api/module/watching" + params = { + "username": user, + "moduleid": self._module_id_watching(user), + "offset" : None, + "limit" : "24", + } + return self._pagination(endpoint, params) + + def _call(self, endpoint, params=None): + url = "https://www.deviantart.com/_napi/" + endpoint + headers = {"Referer": "https://www.deviantart.com/"} + + response = self.extractor._limited_request( + url, params=params, headers=headers, fatal=None) + + if response.status_code == 404: + raise exception.StopExtraction( + "Your account must use the Eclipse interface.") + try: + return response.json() + except Exception: + return {"error": response.text} + + def _pagination(self, endpoint, params=None): + while True: + data = self._call(endpoint, params) + yield from data["results"] + + if not data["hasMore"]: + return + params["offset"] = data["nextOffset"] + + def _module_id_watching(self, user): + url = "{}/{}/about".format(self.extractor.root, user) + page = self.extractor._limited_request(url).text + pos = page.find('\\"type\\":\\"watching\\"') + if pos < 0: + raise exception.NotFoundError("module") + return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ') + + @cache(maxage=10*365*24*3600, keyarg=0) def _refresh_token_cache(original_token, new_token=None): return new_token or original_token +############################################################################### +# Journal Formats ############################################################# + SHADOW_TEMPLATE = """ <span class="shadow"> <img src="{src}" class="smshadow" width="{width}" height="{height}"> diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py index ad97eba..ef64942 100644 --- a/gallery_dl/extractor/hentainexus.py +++ b/gallery_dl/extractor/hentainexus.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor): test = ( ("https://hentainexus.com/view/5688", { "url": "746d0043e20030f1171aae5ea113176607302517", - "keyword": "77702b42f8f76ecfe5d8a14cfbbcbd855eb14d7f", + "keyword": "5e5bb4b1553b1c6e126b198f9ae017a1a5d0a5ad", }), ("https://hentainexus.com/read/5688"), ) @@ -42,6 +42,8 @@ class HentainexusGalleryExtractor(GalleryExtractor): "title" : extr('<h1 class="title">', '</h1>'), "artist" : rmve(extr('viewcolumn">Artist</td>' , '</td>')), "book" : rmve(extr('viewcolumn">Book</td>' , '</td>')), + "circle" : rmve(extr('viewcolumn">Circle</td>' , '</td>')), + "event" : rmve(extr('viewcolumn">Event</td>' , '</td>')), "language" : rmve(extr('viewcolumn">Language</td>' , '</td>')), "magazine" : rmve(extr('viewcolumn">Magazine</td>' , '</td>')), "parody" : rmve(extr('viewcolumn">Parody</td>' , '</td>')), @@ -49,8 +51,36 @@ class HentainexusGalleryExtractor(GalleryExtractor): "description": rmve(extr('viewcolumn">Description</td>', '</td>')), } data["lang"] = util.language_to_code(data["language"]) + data["type"] = "Doujinshi" if 'doujin' in data["tags"] else "Manga" + data["title_conventional"] = self.join_title( + data["event"], + data["circle"], + data["artist"], + data["title"], + data["parody"], + data["book"], + data["magazine"], + ) return data + @staticmethod + def join_title(event, circle, artist, title, parody, book, magazine): + jt = '' + if event: + jt += '({}) '.format(event) + if circle: + jt += '[{} ({})] '.format(circle, artist) + else: + jt += '[{}] '.format(artist) + jt += title + if parody.lower() != 'original work': + jt += ' ({})'.format(parody) + if book: + jt += ' ({})'.format(book) + if magazine: + jt += ' ({})'.format(magazine) + return jt + def images(self, page): url = "{}/read/{}".format(self.root, self.gallery_id) extr = text.extract_from(self.request(url).text) diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index e0b0f50..85cfe49 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -64,7 +64,9 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.com" r"(/manga/([^/?&#]+)/([^/?&#]+))") test = ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { - "url": "111bc3ee14ce91d78c275770ef63b56c9ac15d8d", + "pattern": r"https://hiperdex.com/wp-content/uploads" + r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp", + "count": 9, "keyword": { "artist" : "Sasuga Kei", "author" : "Sasuga Kei", @@ -89,7 +91,8 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): def images(self, page): return [ (url.strip(), None) - for url in re.findall(r'id="image-\d+"\s+src="([^"]+)', page) + for url in re.findall( + r'id="image-\d+"\s+(?:data-)?src="([^"]+)', page) ] @@ -122,16 +125,44 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): def chapters(self, page): self.manga_data(self.manga, page) results = [] - last = None - - page = text.extract(page, 'class="page-content-listing', '</ul>')[0] - for match in HiperdexChapterExtractor.pattern.finditer(page): - path = match.group(1) - if last != path: - last = path - results.append(( - self.root + path, - self.chapter_data(path.rpartition("/")[2]), - )) + shortlink = text.extract(page, "rel='shortlink' href='", "'")[0] + data = { + "action": "manga_get_chapters", + "manga" : shortlink.rpartition("=")[2], + } + url = self.root + "/wp-admin/admin-ajax.php" + page = self.request(url, method="POST", data=data).text + + for url in text.extract_iter(page, 'href="', '"', 320): + chapter = url.rpartition("/")[2] + results.append((url, self.chapter_data(chapter))) + + return results + + +class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): + """Extractor for an artists's manga on hiperdex.com""" + subcategory = "artist" + categorytransfer = False + chapterclass = HiperdexMangaExtractor + reverse = False + pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.com" + r"(/manga-a(?:rtist|uthor)/([^/?&#]+))") + test = ( + ("https://hiperdex.com/manga-artist/beck-ho-an/"), + ("https://hiperdex.com/manga-author/viagra/", { + "pattern": HiperdexMangaExtractor.pattern, + "count": ">= 6", + }), + ) + + def __init__(self, match): + MangaExtractor.__init__(self, match, self.root + match.group(1) + "/") + + def chapters(self, page): + results = [] + for info in text.extract_iter(page, 'id="manga-item-', '<img'): + url = text.extract(info, 'href="', '"')[0] + results.append((url, {})) return results diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index c31de1c..7561c64 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -61,7 +61,7 @@ class LusciousAlbumExtractor(LusciousExtractor): "created_by" : "NTRshouldbeillegal", "date" : "dt:2016-11-20 07:10:53", "description" : "Enjoy.", - "download_url": "/download/824778/277031/", + "download_url": "re:/download/(r/)?824778/277031/", "genres" : list, "id" : 277031, "is_manga" : True, @@ -72,7 +72,7 @@ class LusciousAlbumExtractor(LusciousExtractor): "permissions" : list, "rating" : float, "slug" : "okinami-no-koigokoro", - "status" : "not_moderated", + "status" : str, "tags" : list, "title" : "Okinami no Koigokoro", "url" : "/albums/okinami-no-koigokoro_277031/", @@ -92,7 +92,7 @@ class LusciousAlbumExtractor(LusciousExtractor): "like_status" : "none", "position" : int, "resolution" : r"re:\d+x\d+", - "status" : "not_moderated", + "status" : str, "tags" : list, "thumbnail" : str, "title" : str, diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index 36e0b62..3f07d21 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -9,7 +9,7 @@ """Extractors for mastodon instances""" from .common import Extractor, Message -from .. import text, config, exception +from .. import text, util, config, exception import re @@ -108,7 +108,7 @@ class MastodonAPI(): def account_search(self, query, limit=40): """Search for content""" params = {"q": query, "limit": limit} - return self._call("accounts/search", params) + return self._call("accounts/search", params).json() def account_statuses(self, account_id): """Get an account's statuses""" @@ -118,28 +118,38 @@ class MastodonAPI(): def status(self, status_id): """Fetch a Status""" - return self._call("statuses/" + status_id) + return self._call("statuses/" + status_id).json() def _call(self, endpoint, params=None): url = "{}/api/v1/{}".format(self.root, endpoint) - response = self.extractor.request( - url, params=params, headers=self.headers) - return self._parse(response) + + while True: + response = self.extractor.request( + url, params=params, headers=self.headers, fatal=None) + code = response.status_code + + if code < 400: + return response + if code == 404: + raise exception.NotFoundError() + if code == 429: + self.extractor.wait(until=text.parse_datetime( + response.headers["x-ratelimit-reset"], + "%Y-%m-%dT%H:%M:%S.%fZ", + )) + continue + raise exception.StopExtraction(response.json().get("error")) def _pagination(self, endpoint, params): url = "{}/api/v1/{}".format(self.root, endpoint) while url: - response = self.extractor.request( - url, params=params, headers=self.headers) - yield from self._parse(response) - url = response.links.get("next", {}).get("url") + response = self._call(endpoint, params) + yield from response.json() - @staticmethod - def _parse(response): - """Parse an API response""" - if response.status_code == 404: - raise exception.NotFoundError() - return response.json() + url = response.links.get("next") + if not url: + return + url = url["url"] def generate_extractors(): @@ -148,7 +158,7 @@ def generate_extractors(): symtable = globals() extractors = config.get(("extractor",), "mastodon") if extractors: - EXTRACTORS.update(extractors) + util.combine_dict(EXTRACTORS, extractors) config.set(("extractor",), "mastodon", EXTRACTORS) for instance, info in EXTRACTORS.items(): @@ -189,14 +199,26 @@ def generate_extractors(): EXTRACTORS = { + "mastodon.social": { + "category" : "mastodon.social", + "access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48", + "client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo", + "client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI", + }, "pawoo.net": { "category" : "pawoo", - "access-token" : "286462927198d0cf3e24683e91c8259a" - "ac4367233064e0570ca18df2ac65b226", - "client-id" : "97b142b6904abf97a1068d51a7bc2f2f" - "cf9323cef81f13cb505415716dba7dac", - "client-secret": "e45bef4bad45b38abf7d9ef88a646b73" - "75e7fb2532c31a026327a93549236481", + "access-token" : "c12c9d275050bce0dc92169a28db09d7" + "0d62d0a75a8525953098c167eacd3668", + "client-id" : "978a25f843ec01e53d09be2c290cd75c" + "782bc3b7fdbd7ea4164b9f3c3780c8ff", + "client-secret": "9208e3d4a7997032cf4f1b0e12e5df38" + "8428ef1fadb446dcfeb4f5ed6872d97b", + }, + "baraag.net": { + "category" : "baraag", + "access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0", + "client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o", + "client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY", }, } diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py index 95799cf..51b314a 100644 --- a/gallery_dl/extractor/myportfolio.py +++ b/gallery_dl/extractor/myportfolio.py @@ -23,18 +23,24 @@ class MyportfolioGalleryExtractor(Extractor): r"(?:https?://)?([^.]+\.myportfolio\.com))" r"(/[^/?&#]+)?") test = ( - ("https://hannahcosgrove.myportfolio.com/niamh-1", { - "url": "8cbd73a73e5bf3b4f5d1b1d4a1eb114c01a72a66", - "keyword": "7a460bb5641e648ae70702ff91c2fb11054b0e0b", + ("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", { + "url": "acea0690c76db0e5cf267648cefd86e921bc3499", + "keyword": "6ac6befe2ee0af921d24cf1dd4a4ed71be06db6d", }), - ("https://hannahcosgrove.myportfolio.com/lfw", { - "pattern": r"https://hannahcosgrove\.myportfolio\.com/[^/?&#+]+$", - "count": ">= 8", + ("https://andrewling.myportfolio.com/", { + "pattern": r"https://andrewling\.myportfolio\.com/[^/?&#+]+$", + "count": ">= 6", }), + # no explicit title + ("https://stevenilousphotography.myportfolio.com/society", { + "keyword": "49e7ff6322645c22b409280656202c2736a380c9", + }), + # custom domain ("myportfolio:https://tooco.com.ar/6-of-diamonds-paradise-bird", { "count": 3, }), ("myportfolio:https://tooco.com.ar/", { + "pattern": pattern, "count": ">= 40", }), ) @@ -80,8 +86,11 @@ class MyportfolioGalleryExtractor(Extractor): title, pos = text.extract( page, '<h1 ', '</h1>', pos) - title = title.partition(">")[2] - user = user[:-len(title)-3] + if title: + title = title.partition(">")[2] + user = user[:-len(title)-3] + else: + user, _, title = user.partition(" - ") return { "user": text.unescape(user), diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 2f5b429..c06721c 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -178,9 +178,9 @@ class OAuthDeviantart(OAuthBase): self._oauth2_authorization_code_grant( self.oauth_config( - "client-id", deviantart.DeviantartAPI.CLIENT_ID), + "client-id", deviantart.DeviantartOAuthAPI.CLIENT_ID), self.oauth_config( - "client-secret", deviantart.DeviantartAPI.CLIENT_SECRET), + "client-secret", deviantart.DeviantartOAuthAPI.CLIENT_SECRET), "https://www.deviantart.com/oauth2/authorize", "https://www.deviantart.com/oauth2/token", scope="browse", diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index 41b1039..064967d 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -10,7 +10,6 @@ from .common import Extractor, Message from .. import text -import json class PiczelExtractor(Extractor): @@ -137,8 +136,5 @@ class PiczelImageExtractor(PiczelExtractor): self.image_id = match.group(1) def posts(self): - url = "{}/gallery/image/{}".format(self.root, self.image_id) - page = self.request(url).text - data = json.loads(text.extract( - page, 'window.__PRELOADED_STATE__ =', '</script>')[0]) - return (data["gallery"]["images"]["byId"][self.image_id],) + url = "{}/api/gallery/{}".format(self.root, self.image_id) + return (self.request(url).json(),) diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py index 70b4833..f6bb4df 100644 --- a/gallery_dl/extractor/realbooru.py +++ b/gallery_dl/extractor/realbooru.py @@ -53,7 +53,7 @@ class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor): "options": (("tags", True),), "keyword": { "tags_general" : str, - "tags_metadata": "tagme", + "tags_metadata": "cute tagme", "tags_model" : "jennifer_lawrence", }, }) diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index a312c1c..d0232cc 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -313,8 +313,7 @@ class RedditAPI(): remaining = response.headers.get("x-ratelimit-remaining") if remaining and float(remaining) < 2: - reset = response.headers["x-ratelimit-reset"] - self.extractor.wait(seconds=reset, reason="rate limit reset") + self.extractor.wait(seconds=response.headers["x-ratelimit-reset"]) return self._call(endpoint, params) data = response.json() diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 0505fa9..7e99823 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -418,7 +418,7 @@ class TumblrAPI(oauth.OAuth1API): reset = response.headers.get("x-ratelimit-perhour-reset") if reset: self.log.info("Hourly API rate limit exceeded") - self.extractor.wait(seconds=reset, reason="rate limit reset") + self.extractor.wait(seconds=reset) return self._call(blog, endpoint, params) raise exception.StopExtraction(data) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index cbb075c..03ce3dd 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -224,8 +224,7 @@ class TwitterExtractor(Extractor): if response.status_code == 429 or \ response.headers.get("x-rate-limit-remaining") == "0": if self.logged_in: - reset = response.headers.get("x-rate-limit-reset") - self.wait(until=reset, reason="rate limit reset") + self.wait(until=response.headers.get("x-rate-limit-reset")) else: _guest_token.invalidate() return self._video_from_tweet(tweet_id) diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index a020064..0306112 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -172,7 +172,7 @@ class VscoCollectionExtractor(VscoExtractor): url = "{}/api/2.0/collections/{}/medias".format(self.root, cid) params = {"page": 2, "size": "20"} return self._pagination(url, params, tkn, "medias", ( - data["medias"]["byId"][mid]["media"] + data["medias"]["byId"][mid["id"]]["media"] for mid in data ["collections"]["byCollectionId"][cid]["byPage"]["1"]["collection"] )) diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 6a779d9..9539c2f 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -118,7 +118,7 @@ class WeiboStatusExtractor(WeiboExtractor): """Extractor for images from a status on weibo.cn""" subcategory = "status" pattern = (r"(?:https?://)?(?:www\.|m\.)?weibo\.c(?:om|n)" - r"/(?:detail|status|\d+)/(\d+)") + r"/(?:detail|status|\d+)/(\w+)") test = ( ("https://m.weibo.cn/detail/4323047042991618", { "pattern": r"https?://wx\d+.sinaimg.cn/large/\w+.jpg", @@ -130,6 +130,10 @@ class WeiboStatusExtractor(WeiboExtractor): ("https://m.weibo.cn/status/4268682979207023", { "exception": exception.NotFoundError, }), + # non-numeric status ID (#664) + ("https://weibo.com/3314883543/Iy7fj4qVg", { + "pattern": r"https?://f.video.weibocdn.com/\w+\.mp4\?label=mp4_hd", + }), ("https://m.weibo.cn/status/4339748116375525"), ("https://m.weibo.cn/5746766133/4339748116375525"), ) diff --git a/gallery_dl/text.py b/gallery_dl/text.py index a3f4e0a..3bb6390 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -233,7 +233,7 @@ def parse_timestamp(ts, default=None): return default -def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"): +def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z", utcoffset=0): """Create a datetime object by parsing 'date_string'""" try: if format.endswith("%z") and date_string[-3] == ":": @@ -244,7 +244,11 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"): d = datetime.datetime.strptime(ds, format) o = d.utcoffset() if o is not None: - d = d.replace(tzinfo=None) - o # convert to naive UTC + # convert to naive UTC + d = d.replace(tzinfo=None) - o + elif utcoffset: + # apply manual UTC offset + d += datetime.timedelta(0, utcoffset * -3600) return d except (TypeError, IndexError, KeyError): return None diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 47fad9e..83cf84b 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -840,16 +840,15 @@ class PathFormat(): shutil.copyfile(self.temppath, self.realpath) os.unlink(self.temppath) - if "_mtime" in self.kwdict: + mtime = self.kwdict.get("_mtime") + if mtime: # Set file modification time - mtime = self.kwdict["_mtime"] - if mtime: - try: - if isinstance(mtime, str): - mtime = mktime_tz(parsedate_tz(mtime)) - os.utime(self.realpath, (time.time(), mtime)) - except Exception: - pass + try: + if isinstance(mtime, str): + mtime = mktime_tz(parsedate_tz(mtime)) + os.utime(self.realpath, (time.time(), mtime)) + except Exception: + pass class DownloadArchive(): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 37d133e..7905500 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.13.3" +__version__ = "1.13.4" diff --git a/test/test_extractor.py b/test/test_extractor.py index 2555b58..e6f4963 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -1,17 +1,22 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. import sys -import unittest +import time import string +from datetime import datetime, timedelta + +import unittest +from unittest.mock import patch from gallery_dl import extractor +from gallery_dl.extractor import mastodon from gallery_dl.extractor.common import Extractor, Message from gallery_dl.extractor.directlink import DirectlinkExtractor as DLExtractor @@ -26,7 +31,7 @@ class FakeExtractor(Extractor): yield Message.Url, "text:foobar", {} -class TestExtractor(unittest.TestCase): +class TestExtractorModule(unittest.TestCase): VALID_URIS = ( "https://example.org/file.jpg", "tumblr:foobar", @@ -170,5 +175,116 @@ class TestExtractor(unittest.TestCase): self.assertEqual(expected, extr.__name__) +class TestExtractorWait(unittest.TestCase): + + def test_wait_seconds(self): + extr = extractor.find("test:") + seconds = 5 + until = time.time() + seconds + + with patch("time.sleep") as sleep, patch.object(extr, "log") as log: + extr.wait(seconds=seconds) + + sleep.assert_called_once_with(6.0) + + calls = log.info.mock_calls + self.assertEqual(len(calls), 1) + self._assert_isotime(calls[0][1][1], until) + + def test_wait_until(self): + extr = extractor.find("test:") + until = time.time() + 5 + + with patch("time.sleep") as sleep, patch.object(extr, "log") as log: + extr.wait(until=until) + + calls = sleep.mock_calls + self.assertEqual(len(calls), 1) + self.assertAlmostEqual(calls[0][1][0], 6.0, places=1) + + calls = log.info.mock_calls + self.assertEqual(len(calls), 1) + self._assert_isotime(calls[0][1][1], until) + + def test_wait_until_datetime(self): + extr = extractor.find("test:") + until = datetime.utcnow() + timedelta(seconds=5) + until_local = datetime.now() + timedelta(seconds=5) + + with patch("time.sleep") as sleep, patch.object(extr, "log") as log: + extr.wait(until=until) + + calls = sleep.mock_calls + self.assertEqual(len(calls), 1) + self.assertAlmostEqual(calls[0][1][0], 6.0, places=1) + + calls = log.info.mock_calls + self.assertEqual(len(calls), 1) + self._assert_isotime(calls[0][1][1], until_local) + + def _assert_isotime(self, output, until): + if not isinstance(until, datetime): + until = datetime.fromtimestamp(until) + o = self._isotime_to_seconds(output) + u = self._isotime_to_seconds(until.time().isoformat()[:8]) + self.assertLess(o-u, 1.0) + + @staticmethod + def _isotime_to_seconds(isotime): + parts = isotime.split(":") + return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2]) + + +class TextExtractorOAuth(unittest.TestCase): + + @classmethod + def setUpClass(cls): + mastodon.generate_extractors() + + def test_oauth1(self): + for category in ("flickr", "smugmug", "tumblr"): + extr = extractor.find("oauth:" + category) + + with patch.object(extr, "_oauth1_authorization_flow") as m: + for msg in extr: + pass + self.assertEqual(len(m.mock_calls), 1) + + def test_oauth2(self): + for category in ("deviantart", "reddit"): + extr = extractor.find("oauth:" + category) + + with patch.object(extr, "_oauth2_authorization_code_grant") as m: + for msg in extr: + pass + self.assertEqual(len(m.mock_calls), 1) + + def test_oauth2_mastodon(self): + extr = extractor.find("oauth:mastodon:pawoo.net") + + with patch.object(extr, "_oauth2_authorization_code_grant") as m, \ + patch.object(extr, "_register") as r: + for msg in extr: + pass + self.assertEqual(len(r.mock_calls), 0) + self.assertEqual(len(m.mock_calls), 1) + + def test_oauth2_mastodon_unknown(self): + extr = extractor.find("oauth:mastodon:example.com") + + with patch.object(extr, "_oauth2_authorization_code_grant") as m, \ + patch.object(extr, "_register") as r: + r.return_value = { + "client-id" : "foo", + "client-secret": "bar", + } + + for msg in extr: + pass + + self.assertEqual(len(r.mock_calls), 1) + self.assertEqual(len(m.mock_calls), 1) + + if __name__ == "__main__": unittest.main() diff --git a/test/test_results.py b/test/test_results.py index b697d15..9064810 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -27,7 +27,9 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "myportfolio", + "dokireader", + "mangafox", + "mangahere", "photobucket", "worldthree", } diff --git a/test/test_text.py b/test/test_text.py index 6a6d83a..0390823 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2015-2018 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -376,6 +376,10 @@ class TestText(unittest.TestCase): datetime.datetime(2019, 5, 7, 12, 25, 2), ) self.assertEqual( + f("2019-05-07T21:25:02", "%Y-%m-%dT%H:%M:%S", utcoffset=9), + datetime.datetime(2019, 5, 7, 12, 25, 2), + ) + self.assertEqual( f("2019-05-07 21:25:02"), "2019-05-07 21:25:02", ) |