aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2020-08-15 17:48:11 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2020-08-15 17:48:11 -0400
commit7cf59dc17c3607e096292462ed15d391be4e3dfd (patch)
tree50d2750e958f43271dc6cc5310211cf8f8bbd9d0
parentba039cfb2e1ba2522ee0a0fa2a84a1a6579e4877 (diff)
downloadgallery-dl-7cf59dc17c3607e096292462ed15d391be4e3dfd.tar.bz2
gallery-dl-7cf59dc17c3607e096292462ed15d391be4e3dfd.tar.xz
gallery-dl-7cf59dc17c3607e096292462ed15d391be4e3dfd.tar.zst
New upstream version 1.14.4.upstream/1.14.4
-rw-r--r--CHANGELOG.md27
-rw-r--r--PKG-INFO15
-rw-r--r--README.rst13
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.519
-rw-r--r--gallery_dl.egg-info/PKG-INFO15
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/exception.py5
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/blogger.py50
-rw-r--r--gallery_dl/extractor/bobx.py135
-rw-r--r--gallery_dl/extractor/common.py3
-rw-r--r--gallery_dl/extractor/exhentai.py6
-rw-r--r--gallery_dl/extractor/gfycat.py10
-rw-r--r--gallery_dl/extractor/hentainexus.py31
-rw-r--r--gallery_dl/extractor/imgbb.py6
-rw-r--r--gallery_dl/extractor/imgur.py12
-rw-r--r--gallery_dl/extractor/inkbunny.py251
-rw-r--r--gallery_dl/extractor/instagram.py13
-rw-r--r--gallery_dl/extractor/mangapanda.py92
-rw-r--r--gallery_dl/extractor/mangareader.py122
-rw-r--r--gallery_dl/extractor/mangoxo.py2
-rw-r--r--gallery_dl/extractor/myportfolio.py9
-rw-r--r--gallery_dl/extractor/paheal.py4
-rw-r--r--gallery_dl/extractor/pinterest.py4
-rw-r--r--gallery_dl/extractor/pixnet.py10
-rw-r--r--gallery_dl/extractor/reactor.py9
-rw-r--r--gallery_dl/extractor/reddit.py23
-rw-r--r--gallery_dl/extractor/shopify.py43
-rw-r--r--gallery_dl/extractor/simplyhentai.py3
-rw-r--r--gallery_dl/extractor/smugmug.py4
-rw-r--r--gallery_dl/extractor/subscribestar.py43
-rw-r--r--gallery_dl/extractor/twitter.py5
-rw-r--r--gallery_dl/extractor/vsco.py2
-rw-r--r--gallery_dl/extractor/xhamster.py2
-rw-r--r--gallery_dl/job.py3
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_oauth.py8
-rw-r--r--test/test_results.py6
39 files changed, 668 insertions, 345 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ac09ee7..fa9f17c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,32 @@
# Changelog
+## 1.14.4 - 2020-08-15
+### Additions
+- [blogger] add `search` extractor ([#925](https://github.com/mikf/gallery-dl/issues/925))
+- [blogger] support searching posts by labels ([#925](https://github.com/mikf/gallery-dl/issues/925))
+- [inkbunny] add `user` and `post` extractors ([#283](https://github.com/mikf/gallery-dl/issues/283))
+- [instagram] support `/reel/` URLs
+- [pinterest] support `pinterest.co.uk` URLs ([#914](https://github.com/mikf/gallery-dl/issues/914))
+- [reddit] support gallery posts ([#920](https://github.com/mikf/gallery-dl/issues/920))
+- [subscribestar] extract attached media files ([#852](https://github.com/mikf/gallery-dl/issues/852))
+### Fixes
+- [blogger] improve error messages for missing posts/blogs ([#903](https://github.com/mikf/gallery-dl/issues/903))
+- [exhentai] adjust image limit costs ([#940](https://github.com/mikf/gallery-dl/issues/940))
+- [gfycat] skip malformed gfycat responses ([#902](https://github.com/mikf/gallery-dl/issues/902))
+- [imgur] handle 403 overcapacity responses ([#910](https://github.com/mikf/gallery-dl/issues/910))
+- [instagram] wait before GraphQL requests ([#901](https://github.com/mikf/gallery-dl/issues/901))
+- [mangareader] fix extraction
+- [mangoxo] fix login
+- [pixnet] detect password-protected albums ([#177](https://github.com/mikf/gallery-dl/issues/177))
+- [simplyhentai] fix `gallery_id` extraction
+- [subscribestar] update `date` parsing
+- [vsco] handle missing `description` fields
+- [xhamster] fix extraction ([#917](https://github.com/mikf/gallery-dl/issues/917))
+- allow `parent-directory` to work recursively ([#905](https://github.com/mikf/gallery-dl/issues/905))
+- skip external OAuth tests ([#908](https://github.com/mikf/gallery-dl/issues/908))
+### Removals
+- [bobx] remove module
+
## 1.14.3 - 2020-07-18
### Additions
- [8muses] support `comics.8muses.com` URLs
diff --git a/PKG-INFO b/PKG-INFO
index ab22502..afc4636 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.14.3
+Version: 1.14.4
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.4/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.4/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -165,7 +165,7 @@ Description: ==========
$ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
- If a site's address is nonstandard for its extractor, you can prefix the URL with the
+ If a site's address is nonstandard for its extractor, you can prefix the URL with the
extractor's name to force the use of a specific extractor:
.. code:: bash
@@ -216,8 +216,9 @@ Description: ==========
a username & password pair. This is necessary for
``pixiv``, ``nijie``, and ``seiga``
and optional for
- ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``instagram``,
- ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``, and ``twitter``.
+ ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``inkbunny``,
+ ``instagram``, ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``,
+ and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -310,7 +311,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.3.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.4.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 360c02b..2148c42 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.4/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.4/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -154,7 +154,7 @@ Filter manga chapters by language and chapter number:
$ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
-If a site's address is nonstandard for its extractor, you can prefix the URL with the
+If a site's address is nonstandard for its extractor, you can prefix the URL with the
extractor's name to force the use of a specific extractor:
.. code:: bash
@@ -205,8 +205,9 @@ Some extractors require you to provide valid login credentials in the form of
a username & password pair. This is necessary for
``pixiv``, ``nijie``, and ``seiga``
and optional for
-``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``instagram``,
-``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``, and ``twitter``.
+``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``inkbunny``,
+``instagram``, ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``,
+and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -299,7 +300,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.3.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.4.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index f05f2e8..e554159 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-07-18" "1.14.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-08-15" "1.14.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 039e750..67e51d4 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-07-18" "1.14.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-08-15" "1.14.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -111,7 +111,7 @@ subcategory
image
Note: Even if the value of the \f[I]extension\f[] key is missing or
-\f[I]None\f[], it will filled in later when the file download is
+\f[I]None\f[], it will be filled in later when the file download is
starting. This key is therefore always available to provide
a valid filename extension.
@@ -284,6 +284,8 @@ and optional for
.br
* \f[I]idolcomplex\f[]
.br
+* \f[I]inkbunny\f[]
+.br
* \f[I]instagram\f[]
.br
* \f[I]luscious\f[]
@@ -1006,6 +1008,19 @@ Controls whether to choose the GIF or MP4 version of an animation.
.br
* \f[I]"always"\f[]: Always choose MP4.
+.SS extractor.inkbunny.orderby
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"create_datetime"\f[]
+
+.IP "Description:" 4
+Value of the \f[I]orderby\f[] parameter for submission searches.
+
+(See \f[I]API#Search <https://wiki.inkbunny.net/wiki/API#Search>\f[]
+for details)
+
.SS extractor.instagram.highlights
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 0007699..8f6f112 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.14.3
+Version: 1.14.4
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.4/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.4/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -165,7 +165,7 @@ Description: ==========
$ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
- If a site's address is nonstandard for its extractor, you can prefix the URL with the
+ If a site's address is nonstandard for its extractor, you can prefix the URL with the
extractor's name to force the use of a specific extractor:
.. code:: bash
@@ -216,8 +216,9 @@ Description: ==========
a username & password pair. This is necessary for
``pixiv``, ``nijie``, and ``seiga``
and optional for
- ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``instagram``,
- ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``, and ``twitter``.
+ ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``inkbunny``,
+ ``instagram``, ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``,
+ and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -310,7 +311,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.3.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.4.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 80c9f4f..56c9245 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -49,7 +49,6 @@ gallery_dl/extractor/aryion.py
gallery_dl/extractor/bcy.py
gallery_dl/extractor/behance.py
gallery_dl/extractor/blogger.py
-gallery_dl/extractor/bobx.py
gallery_dl/extractor/booru.py
gallery_dl/extractor/common.py
gallery_dl/extractor/danbooru.py
@@ -86,6 +85,7 @@ gallery_dl/extractor/imgbb.py
gallery_dl/extractor/imgbox.py
gallery_dl/extractor/imgth.py
gallery_dl/extractor/imgur.py
+gallery_dl/extractor/inkbunny.py
gallery_dl/extractor/instagram.py
gallery_dl/extractor/issuu.py
gallery_dl/extractor/kabeuchi.py
diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py
index 783e2b2..f553d41 100644
--- a/gallery_dl/exception.py
+++ b/gallery_dl/exception.py
@@ -51,6 +51,11 @@ class HttpError(ExtractionError):
default = "HTTP request failed"
code = 4
+ def __init__(self, message, response=None):
+ ExtractionError.__init__(self, message)
+ self.response = response
+ self.status = response.status_code if response else 0
+
class NotFoundError(ExtractionError):
"""Requested resource (gallery/image) could not be found"""
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 3184663..6f8867c 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -23,7 +23,6 @@ modules = [
"bcy",
"behance",
"blogger",
- "bobx",
"danbooru",
"deviantart",
"dynastyscans",
@@ -54,6 +53,7 @@ modules = [
"imgbox",
"imgth",
"imgur",
+ "inkbunny",
"instagram",
"issuu",
"kabeuchi",
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 331cfc2..9c18e0e 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -147,7 +147,7 @@ class BloggerPostExtractor(BloggerExtractor):
class BloggerBlogExtractor(BloggerExtractor):
"""Extractor for an entire Blogger blog"""
subcategory = "blog"
- pattern = BASE_PATTERN + "/?$"
+ pattern = BASE_PATTERN + r"/?$"
test = (
("https://julianbphotography.blogspot.com/", {
"range": "1-25",
@@ -164,6 +164,34 @@ class BloggerBlogExtractor(BloggerExtractor):
return self.api.blog_posts(blog["id"])
+class BloggerSearchExtractor(BloggerExtractor):
+ """Extractor for search resuls and labels"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/search(?:/?\?q=([^/?&#]+)|/label/([^/?&#]+))"
+ test = (
+ ("https://julianbphotography.blogspot.com/search?q=400mm", {
+ "count": "< 10"
+ }),
+ ("https://dmmagazine.blogspot.com/search/label/D%26D", {
+ "range": "1-25",
+ "count": 25,
+ }),
+ )
+
+ def __init__(self, match):
+ BloggerExtractor.__init__(self, match)
+ query = match.group(3)
+ if query:
+ self.query, self.label = query, None
+ else:
+ self.query, self.label = None, match.group(4)
+
+ def posts(self, blog):
+ if self.query:
+ return self.api.blog_search(blog["id"], text.unquote(self.query))
+ return self.api.blog_posts(blog["id"], text.unquote(self.label))
+
+
class BloggerAPI():
"""Minimal interface for the Blogger v3 API
@@ -176,19 +204,27 @@ class BloggerAPI():
self.api_key = extractor.config("api-key", self.API_KEY)
def blog_by_url(self, url):
- return self._call("blogs/byurl", {"url": url})
+ return self._call("blogs/byurl", {"url": url}, "blog")
+
+ def blog_posts(self, blog_id, label=None):
+ endpoint = "blogs/{}/posts".format(blog_id)
+ params = {"labels": label}
+ return self._pagination(endpoint, params)
- def blog_posts(self, blog_id):
- return self._pagination("blogs/{}/posts".format(blog_id), {})
+ def blog_search(self, blog_id, query):
+ endpoint = "blogs/{}/posts/search".format(blog_id)
+ params = {"q": query}
+ return self._pagination(endpoint, params)
def post_by_path(self, blog_id, path):
endpoint = "blogs/{}/posts/bypath".format(blog_id)
- return self._call(endpoint, {"path": path})
+ return self._call(endpoint, {"path": path}, "post")
- def _call(self, endpoint, params):
+ def _call(self, endpoint, params, notfound=None):
url = "https://www.googleapis.com/blogger/v3/" + endpoint
params["key"] = self.api_key
- return self.extractor.request(url, params=params).json()
+ return self.extractor.request(
+ url, params=params, notfound=notfound).json()
def _pagination(self, endpoint, params):
while True:
diff --git a/gallery_dl/extractor/bobx.py b/gallery_dl/extractor/bobx.py
deleted file mode 100644
index 94a2840..0000000
--- a/gallery_dl/extractor/bobx.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2018-2019 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extract images from http://www.bobx.com/dark/"""
-
-from .common import Extractor, Message
-from .. import text
-from ..cache import memcache
-import random
-import time
-
-
-class BobxExtractor(Extractor):
- """Base class for bobx extractors"""
- category = "bobx"
- root = "http://www.bobx.com"
- cookiedomain = ".bobx.com"
- per_page = 80
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.path = match.group(1)
-
- def login(self):
- if not self._check_cookies(("BobXUser",)):
- self._update_cookies(self._login_impl())
-
- @memcache()
- def _login_impl(self):
- """Generate a randomized 'BobXUser' cookie"""
- rand = random.randrange
- tnow = time.time() - rand(60, 3600)
-
- return {"BobXUser": "{}.{}.{}.{}.{}.{}".format(
- int(tnow),
- rand(128, 192), rand(0, 256), rand(0, 256), rand(0, 256),
- tnow + 622080000, # timestamp in 7200 days
- )}
-
-
-class BobxGalleryExtractor(BobxExtractor):
- """Extractor for individual image galleries on bobx.com"""
- subcategory = "gallery"
- directory_fmt = ("{category}", "{model}", "{title}")
- filename_fmt = "{model}_{image_id}_{num:>03}.{extension}"
- archive_fmt = "{image_id}"
- pattern = (r"(?:https?://)?(?:www\.)?bobx\.com"
- r"/([^/]+/[^/]+/photoset/[\w-]+)-\d+-\d+-\d+\.html")
- test = (
- (("http://www.bobx.com/idol/mikoto-hibi"
- "/photoset/wpb-2018-_11-0-2-8.html"), {
- "url": "93972d6a661f6627e963d62c9d15531e6b36a389",
- "keyword": "6c620862db494ed05e69356ba30e604b167b0670",
- "content": "3f176b7fe752524cec21a763aa55567e41181e07",
- }),
- (("http://www.bobx.com/idol/nashiko-momotsuki"
- "/photoset/wpb-net-_221---2018-08---magic-of-summer-0-10-10.html"), {
- "url": "f5d6c0cd0881ae6f504c21a90d86e3464dc54e8e",
- "keyword": "f4819c75f494044348889ecd27771508464c0f5f",
- }),
- )
-
- def items(self):
- self.login()
-
- num = 0
- while True:
- url = "{}/{}-{}-10-8.html".format(self.root, self.path, num)
- page = self.request(url, encoding="utf-8").text
-
- if num == 0:
- data = self.metadata(page)
- yield Message.Version, 1
- yield Message.Directory, data
- data["num"] = 0
-
- for url in self.images(page):
- url = text.urljoin(self.root, url.replace("-preview-", "-"))
- data = text.nameext_from_url(url, data)
- data["image_id"] = text.parse_int(
- data["filename"].rpartition("-")[2])
- data["num"] += 1
- yield Message.Url, url, data
-
- num += self.per_page
- if num >= data["count"]:
- return
-
- @staticmethod
- def metadata(page):
- """Collect metadata for extractor-job"""
- info = text.extract(page, "<title>", "</title>")[0]
- model, _, info = info.partition(" in ")
- info, _, count = info.rpartition(" of ")
- title = info.rpartition(" - @")[0]
- return {
- "title": text.unquote(title),
- "model": text.unquote(model),
- "count": text.parse_int(count),
- }
-
- @staticmethod
- def images(page):
- """Extract all image-urls"""
- page = text.extract(page, "<table CELLPADDING=", "<script ")[0]
- return text.extract_iter(page, '<img src="/thumbnail', '"')
-
-
-class BobxIdolExtractor(BobxExtractor):
- """Extractor for an idol's image galleries on bobx.com"""
- subcategory = "idol"
- pattern = r"(?:https?://)?(?:www\.)?bobx\.com/([^/]+/[^/?&#]+)/?$"
- test = ("http://www.bobx.com/idol/rin-okabe/", {
- "pattern": BobxGalleryExtractor.pattern,
- "count": ">= 6",
- })
-
- def items(self):
- self.login()
- url = "{}/{}/".format(self.root, self.path)
- data = {"_extractor": BobxGalleryExtractor}
- page = self.request(url).text
- skip = True
-
- yield Message.Version, 1
- for part in text.extract_iter(page, '="photoset/', '"'):
- # skip every other entry
- skip = not skip
- if not skip:
- yield Message.Queue, "{}photoset/{}".format(url, part), data
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index bbbd8a6..e6c0968 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -79,6 +79,7 @@ class Extractor():
session = self.session if session is None else session
kwargs.setdefault("timeout", self._timeout)
kwargs.setdefault("verify", self._verify)
+ response = None
while True:
try:
@@ -125,7 +126,7 @@ class Extractor():
time.sleep(min(2 ** (tries-1), 1800))
tries += 1
- raise exception.HttpError(msg)
+ raise exception.HttpError(msg, response)
def wait(self, *, seconds=None, until=None, adjust=1.0,
reason="rate limit reset"):
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index bf310ec..4cb10b4 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2019 Mike Fährmann
+# Copyright 2014-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -123,7 +123,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
test = (
("https://exhentai.org/g/1200119/d55c44d3d0/", {
- "keyword": "3eeae7bde70dd992402d4cc0230ea0f2c4af46c5",
+ "keyword": "199db053b4ccab94463b459e1cfe079df8cdcdd1",
"content": "e9891a4c017ed0bb734cd1efba5cd03f594d31ff",
}),
("https://exhentai.org/g/960461/4f0e369d82/", {
@@ -353,7 +353,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"height": text.parse_int(parts[2]),
"size": size,
# 1 initial point + 1 per 0.1 MB
- "cost": 1 + math.ceil(size / 104857.6)
+ "cost": 1 + math.ceil(size / 100000)
}
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index b4b0e49..ac1bca3 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -28,6 +28,9 @@ class GfycatExtractor(Extractor):
def items(self):
metadata = self.metadata()
for gfycat in self.gfycats():
+ if "gfyName" not in gfycat:
+ self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
+ continue
url = self._select_format(gfycat)
gfycat.update(metadata)
yield Message.Directory, gfycat
@@ -118,6 +121,10 @@ class GfycatImageExtractor(GfycatExtractor):
("https://www.gfycat.com/foolishforkedabyssiniancat", {
"pattern": "https://redgifs.com/watch/foolishforkedabyssiniancat",
}),
+ # malformed API response (#902)
+ ("https://gfycat.com/illexcitablehairstreak", {
+ "count": 0,
+ }),
("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),
("https://gfycat.com/ifr/UnequaledHastyAnkole"),
("https://gfycat.com/ru/UnequaledHastyAnkole"),
@@ -132,6 +139,9 @@ class GfycatImageExtractor(GfycatExtractor):
data = {"_extractor": RedgifsImageExtractor}
yield Message.Queue, url, data
else:
+ if "gfyName" not in gfycat:
+ self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
+ return
url = self._select_format(gfycat)
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py
index aa41836..49c1a98 100644
--- a/gallery_dl/extractor/hentainexus.py
+++ b/gallery_dl/extractor/hentainexus.py
@@ -36,21 +36,17 @@ class HentainexusGalleryExtractor(GalleryExtractor):
rmve = text.remove_html
extr = text.extract_from(page)
data = {
- "gallery_id" : text.parse_int(self.gallery_id),
- "tags" : extr('"og:description" content="', '"').split(", "),
- "thumbnail" : extr('"og:image" content="', '"'),
- "title" : extr('<h1 class="title">', '</h1>'),
- "artist" : rmve(extr('viewcolumn">Artist</td>' , '</td>')),
- "book" : rmve(extr('viewcolumn">Book</td>' , '</td>')),
- "circle" : rmve(extr('viewcolumn">Circle</td>' , '</td>')),
- "event" : rmve(extr('viewcolumn">Event</td>' , '</td>')),
- "language" : rmve(extr('viewcolumn">Language</td>' , '</td>')),
- "magazine" : rmve(extr('viewcolumn">Magazine</td>' , '</td>')),
- "parody" : rmve(extr('viewcolumn">Parody</td>' , '</td>')),
- "publisher" : rmve(extr('viewcolumn">Publisher</td>' , '</td>')),
- "description": rmve(extr('viewcolumn">Description</td>', '</td>')),
+ "gallery_id": text.parse_int(self.gallery_id),
+ "tags" : extr('"og:description" content="', '"').split(", "),
+ "thumbnail" : extr('"og:image" content="', '"'),
+ "title" : extr('<h1 class="title">', '</h1>'),
}
+ for key in ("Artist", "Book", "Circle", "Event", "Language",
+ "Magazine", "Parody", "Publisher", "Description"):
+ data[key.lower()] = rmve(extr(
+ 'viewcolumn">' + key + '</td>', '</td>'))
data["lang"] = util.language_to_code(data["language"])
+
if 'doujin' in data['tags']:
data['type'] = 'Doujinshi'
elif 'illustration' in data['tags']:
@@ -60,10 +56,10 @@ class HentainexusGalleryExtractor(GalleryExtractor):
data["title_conventional"] = self._join_title(data)
return data
- def images(self, page):
+ def images(self, _):
url = "{}/read/{}".format(self.root, self.gallery_id)
- extr = text.extract_from(self.request(url).text)
- urls = extr("initReader(", "]") + "]"
+ page = self.request(url).text
+ urls = text.extract(page, "initReader(", "]")[0] + "]"
return [(url, None) for url in json.loads(urls)]
@staticmethod
@@ -120,14 +116,13 @@ class HentainexusSearchExtractor(Extractor):
self.params = text.parse_query(match.group(1))
def items(self):
- yield Message.Version, 1
params = self.params
path = "/"
+ data = {"_extractor": HentainexusGalleryExtractor}
while path:
page = self.request(self.root + path, params=params).text
extr = text.extract_from(page)
- data = {"_extractor": HentainexusGalleryExtractor}
while True:
gallery_id = extr('<a href="/view/', '"')
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 3882a92..2a69fb1 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -132,14 +132,10 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
"url": "ac0abcfcb89f4df6adc2f7e4ff872f3b03ef1bc7",
"keyword": {"user": ""},
}),
- # deleted
- ("https://ibb.co/album/fDArrF", {
- "exception": exception.NotFoundError,
- }),
# private
("https://ibb.co/album/hqgWrF", {
"exception": exception.HttpError,
- })
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index a617975..25328ab 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -332,9 +332,15 @@ class ImgurAPI():
return self._call("image/" + image_hash)
def _call(self, endpoint):
- return self.extractor.request(
- "https://api.imgur.com/3/" + endpoint, headers=self.headers,
- ).json()["data"]
+ try:
+ return self.extractor.request(
+ "https://api.imgur.com/3/" + endpoint, headers=self.headers,
+ ).json()["data"]
+ except exception.HttpError as exc:
+ if exc.status != 403 or b"capacity" not in exc.response.content:
+ raise
+ self.extractor.sleep(seconds=600)
+ return self._call(endpoint)
def _pagination(self, endpoint):
num = 0
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
new file mode 100644
index 0000000..ff8318c
--- /dev/null
+++ b/gallery_dl/extractor/inkbunny.py
@@ -0,0 +1,251 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://inkbunny.net/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+from ..cache import cache
+
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?inkbunny\.net"
+
+
+class InkbunnyExtractor(Extractor):
+ """Base class for inkbunny extractors"""
+ category = "inkbunny"
+ directory_fmt = ("{category}", "{username!l}")
+ filename_fmt = "{submission_id} {file_id} {title}.{extension}"
+ archive_fmt = "{file_id}"
+ root = "https://inkbunny.net"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.api = InkbunnyAPI(self)
+
+ def items(self):
+ self.api.authenticate()
+ to_bool = ("deleted", "digitalsales", "favorite", "forsale",
+ "friends_only", "guest_block", "hidden", "printsales",
+ "public", "scraps")
+
+ for post in self.posts():
+ post["date"] = text.parse_datetime(
+ post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
+ post["tags"] = [kw["keyword_name"] for kw in post["keywords"]]
+ post["ratings"] = [r["name"] for r in post["ratings"]]
+ files = post["files"]
+
+ for key in to_bool:
+ post[key] = (post[key] == "t")
+
+ del post["keywords"]
+ del post["files"]
+
+ yield Message.Directory, post
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ post["deleted"] = (file["deleted"] == "t")
+ post["date"] = text.parse_datetime(
+ file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
+ text.nameext_from_url(file["file_name"], post)
+ yield Message.Url, file["file_url_full"], post
+
+
+class InkbunnyUserExtractor(InkbunnyExtractor):
+ """Extractor for inkbunny user profiles"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?([^/?&#]+)"
+ test = (
+ ("https://inkbunny.net/soina", {
+ "pattern": r"https://[\w.]+\.metapix\.net/files/full"
+ r"/\d+/\d+_soina_.+",
+ "range": "20-50",
+ "keyword": {
+ "date" : "type:datetime",
+ "deleted" : bool,
+ "file_id" : "re:[0-9]+",
+ "filename" : r"re:[0-9]+_soina_\w+",
+ "full_file_md5": "re:[0-9a-f]{32}",
+ "mimetype" : str,
+ "submission_id": "re:[0-9]+",
+ "user_id" : "20969",
+ "comments_count" : "re:[0-9]+",
+ "deleted" : bool,
+ "digitalsales" : bool,
+ "favorite" : bool,
+ "favorites_count": "re:[0-9]+",
+ "forsale" : bool,
+ "friends_only" : bool,
+ "guest_block" : bool,
+ "hidden" : bool,
+ "pagecount" : "re:[0-9]+",
+ "pools" : list,
+ "pools_count" : int,
+ "printsales" : bool,
+ "public" : bool,
+ "rating_id" : "re:[0-9]+",
+ "rating_name" : str,
+ "ratings" : list,
+ "scraps" : bool,
+ "tags" : list,
+ "title" : str,
+ "type_name" : str,
+ "username" : "soina",
+ "views" : str,
+ },
+ }),
+ ("https://inkbunny.net/gallery/soina", {
+ "range": "1-25",
+ "keyword": {"scraps": False},
+ }),
+ ("https://inkbunny.net/scraps/soina", {
+ "range": "1-25",
+ "keyword": {"scraps": True},
+ }),
+ )
+
+ def __init__(self, match):
+ kind, self.user = match.groups()
+ if not kind:
+ self.scraps = None
+ elif kind[0] == "g":
+ self.subcategory = "gallery"
+ self.scraps = "no"
+ else:
+ self.subcategory = "scraps"
+ self.scraps = "only"
+ InkbunnyExtractor.__init__(self, match)
+
+ def posts(self):
+ orderby = self.config("orderby")
+ params = {
+ "username": self.user,
+ "scraps" : self.scraps,
+ "orderby" : orderby,
+ }
+ if orderby and orderby.startswith("unread_"):
+ params["unread_submissions"] = "yes"
+ return self.api.search(params)
+
+
+class InkbunnyPostExtractor(InkbunnyExtractor):
+ """Extractor for individual Inkbunny posts"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/s/(\d+)"
+ test = (
+ ("https://inkbunny.net/s/1829715", {
+ "pattern": r"https://[\w.]+\.metapix\.net/files/full"
+ r"/2626/2626843_soina_dscn2296\.jpg",
+ "content": "cf69d8dddf0822a12b4eef1f4b2258bd600b36c8",
+ }),
+ ("https://inkbunny.net/s/2044094", {
+ "count": 4,
+ }),
+ )
+
+ def __init__(self, match):
+ InkbunnyExtractor.__init__(self, match)
+ self.submission_id = match.group(1)
+
+ def posts(self):
+ return self.api.detail(({"submission_id": self.submission_id},))
+
+
+class InkbunnyAPI():
+ """Interface for the Inkunny API
+
+ Ref: https://wiki.inkbunny.net/wiki/API
+ """
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.session_id = None
+
+ def detail(self, submissions):
+ """Get full details about submissions with the given IDs"""
+ ids = {
+ sub["submission_id"]: idx
+ for idx, sub in enumerate(submissions)
+ }
+ params = {
+ "submission_ids": ",".join(ids),
+ "show_description": "yes",
+ }
+
+ submissions = [None] * len(ids)
+ for sub in self._call("submissions", params)["submissions"]:
+ submissions[ids[sub["submission_id"]]] = sub
+ return submissions
+
+ def search(self, params):
+ """Perform a search"""
+ return self._pagination_search(params)
+
+ def set_allowed_ratings(self, nudity=True, sexual=True,
+ violence=True, strong_violence=True):
+ """Change allowed submission ratings"""
+ params = {
+ "tag[2]": "yes" if nudity else "no",
+ "tag[3]": "yes" if violence else "no",
+ "tag[4]": "yes" if sexual else "no",
+ "tag[5]": "yes" if strong_violence else "no",
+ }
+ self._call("userrating", params)
+
+ def authenticate(self, invalidate=False):
+ username, password = self.extractor._get_auth_info()
+ if invalidate:
+ _authenticate_impl.invalidate(username or "guest")
+ if username:
+ self.session_id = _authenticate_impl(self, username, password)
+ else:
+ self.session_id = _authenticate_impl(self, "guest", "")
+ self.set_allowed_ratings()
+
+ def _call(self, endpoint, params):
+ url = "https://inkbunny.net/api_" + endpoint + ".php"
+ params["sid"] = self.session_id
+ data = self.extractor.request(url, params=params).json()
+
+ if "error_code" in data:
+ if str(data["error_code"]) == "2":
+ self.authenticate(invalidate=True)
+ return self._call(endpoint, params)
+ raise exception.StopExtraction(data.get("error_message"))
+
+ return data
+
+ def _pagination_search(self, params):
+ params["page"] = 1
+ params["get_rid"] = "yes"
+ params["submission_ids_only"] = "yes"
+
+ while True:
+ data = self._call("search", params)
+ yield from self.detail(data["submissions"])
+
+ if data["page"] >= data["pages_count"]:
+ return
+ if "get_rid" in params:
+ del params["get_rid"]
+ params["rid"] = data["rid"]
+ params["page"] += 1
+
+
+@cache(maxage=360*24*3600, keyarg=1)
+def _authenticate_impl(api, username, password):
+ api.extractor.log.info("Logging in as %s", username)
+
+ url = "https://inkbunny.net/api_login.php"
+ data = {"username": username, "password": password}
+ data = api.extractor.request(url, method="POST", data=data).json()
+
+ if "sid" not in data:
+ raise exception.AuthenticationError(data.get("error_message"))
+ return data["sid"]
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index bf6b10f..639f272 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -315,7 +315,7 @@ class InstagramExtractor(Extractor):
if not has_next_page:
break
-
+ time.sleep(3)
end_cursor = medias['page_info']['end_cursor']
variables = '{{"{}":"{}","first":12,"after":"{}"}}'.format(
psdf['variables_id'],
@@ -342,7 +342,8 @@ class InstagramExtractor(Extractor):
class InstagramImageExtractor(InstagramExtractor):
"""Extractor for PostPage"""
subcategory = "image"
- pattern = r"(?:https?://)?(?:www\.)?instagram\.com/(?:p|tv)/([^/?&#]+)"
+ pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
+ r"/(?:p|tv|reel)/([^/?&#]+)")
test = (
# GraphImage
("https://www.instagram.com/p/BqvsDleB3lV/", {
@@ -440,6 +441,8 @@ class InstagramImageExtractor(InstagramExtractor):
}]
}
}),
+
+ ("https://www.instagram.com/reel/CDg_6Y1pxWu/"),
)
def __init__(self, match):
@@ -500,7 +503,7 @@ class InstagramUserExtractor(InstagramExtractor):
"""Extractor for ProfilePage"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
+ r"/(?!(?:p|explore|directory|accounts|stories|tv|reel)/)"
r"([^/?&#]+)/?(?:$|[?#])")
test = (
("https://www.instagram.com/instagram/", {
@@ -530,7 +533,7 @@ class InstagramUserExtractor(InstagramExtractor):
'node_id': 'id',
'variables_id': 'id',
'edge_to_medias': 'edge_owner_to_timeline_media',
- 'query_hash': '44efc15d3c13342d02df0b5a9fa3d33f',
+ 'query_hash': '15bf78a4ad24e33cbd838fdb31353ac1',
})
if self.config('highlights'):
@@ -599,5 +602,5 @@ class InstagramTagExtractor(InstagramExtractor):
'node_id': 'name',
'variables_id': 'tag_name',
'edge_to_medias': 'edge_hashtag_to_media',
- 'query_hash': '7dabc71d3e758b1ec19ffb85639e427b',
+ 'query_hash': 'c769cb6c71b24c8a86590b22402fda50',
})
diff --git a/gallery_dl/extractor/mangapanda.py b/gallery_dl/extractor/mangapanda.py
index 18ef005..a4b8340 100644
--- a/gallery_dl/extractor/mangapanda.py
+++ b/gallery_dl/extractor/mangapanda.py
@@ -1,14 +1,15 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from https://www.mangapanda.com/"""
+"""Extractors for https://www.mangapanda.com/"""
-from .mangareader import MangareaderMangaExtractor, MangareaderChapterExtractor
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
class MangapandaBase():
@@ -16,21 +17,102 @@ class MangapandaBase():
category = "mangapanda"
root = "https://www.mangapanda.com"
+ @staticmethod
+ def parse_page(page, data):
+ """Parse metadata on 'page' and add it to 'data'"""
+ text.extract_all(page, (
+ ("manga" , '<h2 class="aname">', '</h2>'),
+ ("release", '>Year of Release:</td>\n<td>', '</td>'),
+ ('author' , '>Author:</td>\n<td>', '</td>'),
+ ('artist' , '>Artist:</td>\n<td>', '</td>'),
+ ), values=data)
+ data["manga"] = data["manga"].strip()
+ data["author"] = text.unescape(data["author"])
+ data["artist"] = text.unescape(data["artist"])
+ return data
-class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
+
+class MangapandaChapterExtractor(MangapandaBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapanda.com"""
+ archive_fmt = "{manga}_{chapter}_{page}"
pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"
test = ("https://www.mangapanda.com/red-storm/2", {
"url": "1f633f776e950531ba9b1e81965316458e785261",
"keyword": "b24df4b9cc36383fb6a44e06d32a3884a4dcb5fb",
})
+ def __init__(self, match):
+ path, self.url_title, self.chapter = match.groups()
+ ChapterExtractor.__init__(self, match, self.root + path)
+
+ def metadata(self, chapter_page):
+ page = self.request(self.root + self.url_title).text
+ data = self.parse_page(page, {
+ "chapter": text.parse_int(self.chapter),
+ "lang": "en",
+ "language": "English",
+ })
+ text.extract_all(page, (
+ ('title', ' ' + self.chapter + '</a> : ', '</td>'),
+ ('date', '<td>', '</td>'),
+ ), page.index('<div id="chapterlist">'), data)
+ data["count"] = text.parse_int(text.extract(
+ chapter_page, '</select> of ', '<')[0]
+ )
+ return data
+
+ def images(self, page):
+ while True:
+ next_url, image_url, image_data = self.get_image_metadata(page)
+ yield image_url, image_data
+
+ if not next_url:
+ return
+ page = self.request(next_url).text
-class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
+ def get_image_metadata(self, page):
+ """Collect next url, image-url and metadata for one manga-page"""
+ extr = text.extract
+ width = None
+ test , pos = extr(page, "document['pu']", '')
+ if test is None:
+ return None, None, None
+ if page.find("document['imgwidth']", pos, pos+200) != -1:
+ width , pos = extr(page, "document['imgwidth'] = ", ";", pos)
+ height, pos = extr(page, "document['imgheight'] = ", ";", pos)
+ _ , pos = extr(page, '<div id="imgholder">', '')
+ url, pos = extr(page, ' href="', '"', pos)
+ if width is None:
+ width , pos = extr(page, '<img id="img" width="', '"', pos)
+ height, pos = extr(page, ' height="', '"', pos)
+ image, pos = extr(page, ' src="', '"', pos)
+ return self.root + url, image, {
+ "width": text.parse_int(width),
+ "height": text.parse_int(height),
+ }
+
+
+class MangapandaMangaExtractor(MangapandaBase, MangaExtractor):
"""Extractor for manga from mangapanda.com"""
chapterclass = MangapandaChapterExtractor
+ reverse = False
pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/?&#]+)/?$"
test = ("https://www.mangapanda.com/mushishi", {
"url": "357f965732371cac1990fee8b480f62e29141a42",
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
})
+
+ def chapters(self, page):
+ results = []
+ data = self.parse_page(page, {"lang": "en", "language": "English"})
+
+ needle = '<div class="chico_manga"></div>\n<a href="'
+ pos = page.index('<div id="chapterlist">')
+ while True:
+ url, pos = text.extract(page, needle, '"', pos)
+ if not url:
+ return results
+ data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
+ data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
+ data["chapter"] = text.parse_int(url.rpartition("/")[2])
+ results.append((self.root + url, data.copy()))
diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py
index 31083dc..fd9c7ac 100644
--- a/gallery_dl/extractor/mangareader.py
+++ b/gallery_dl/extractor/mangareader.py
@@ -6,10 +6,12 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from https://www.mangareader.net/"""
+"""Extractors for https://www.mangareader.net/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
+from ..cache import memcache
+import json
class MangareaderBase():
@@ -17,19 +19,35 @@ class MangareaderBase():
category = "mangareader"
root = "https://www.mangareader.net"
- @staticmethod
- def parse_page(page, data):
- """Parse metadata on 'page' and add it to 'data'"""
- text.extract_all(page, (
- ("manga" , '<h2 class="aname">', '</h2>'),
- ("release", '>Year of Release:</td>\n<td>', '</td>'),
- ('author' , '>Author:</td>\n<td>', '</td>'),
- ('artist' , '>Artist:</td>\n<td>', '</td>'),
- ), values=data)
- data["manga"] = data["manga"].strip()
- data["author"] = text.unescape(data["author"])
- data["artist"] = text.unescape(data["artist"])
- return data
+ @memcache(keyarg=1)
+ def _manga_info(self, path, page=None):
+ if not page:
+ page = self.request(self.root + path).text
+ extr = text.extract_from(page)
+ data = {
+ "manga" : text.unescape(extr('class="name">', '<')),
+ "release" : text.unescape(extr('Year of Release :</td><td>', '<')),
+ "author" : text.unescape(text.unescape(extr(
+ 'Author :</td><td>', '<'))),
+ "artist" : text.unescape(text.unescape(extr(
+ 'Artist :</td><td>', '<'))),
+ "lang" : "en",
+ "language": "English",
+ }
+
+ extr('<table', '>')
+ chapters = []
+ while True:
+ url = extr('</i> <a href="', '"')
+ if not url:
+ return chapters
+ chapter = {
+ "chapter": text.parse_int(url.rpartition("/")[2]),
+ "title" : text.unescape(extr("</a> : ", "<")),
+ "date" : extr("<td>", "<"),
+ }
+ chapter.update(data)
+ chapters.append((self.root + url, chapter))
class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
@@ -38,59 +56,28 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
pattern = r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))"
test = (("https://www.mangareader.net"
"/karate-shoukoushi-kohinata-minoru/11"), {
- "url": "3d8a5b900856d59b8d8e83908d0df392be92c0f4",
+ "url": "45ece5668d1e9f65cf2225237d78de58660b54e4",
"keyword": "133e3e2f7c0529a35bbb16149e34c40546f8dfd6",
})
def __init__(self, match):
- path, self.url_title, self.chapter = match.groups()
- ChapterExtractor.__init__(self, match, self.root + path)
+ ChapterExtractor.__init__(self, match)
+ _, self.path, self.chapter = match.groups()
- def metadata(self, chapter_page):
- page = self.request(self.root + self.url_title).text
- data = self.parse_page(page, {
- "chapter": text.parse_int(self.chapter),
- "lang": "en",
- "language": "English",
- })
- text.extract_all(page, (
- ('title', ' ' + self.chapter + '</a> : ', '</td>'),
- ('date', '<td>', '</td>'),
- ), page.index('<div id="chapterlist">'), data)
- data["count"] = text.parse_int(text.extract(
- chapter_page, '</select> of ', '<')[0]
- )
- return data
+ def metadata(self, page):
+ chapter = text.parse_int(self.chapter)
+ return self._manga_info(self.path)[chapter-1][1]
def images(self, page):
- while True:
- next_url, image_url, image_data = self.get_image_metadata(page)
- yield image_url, image_data
-
- if not next_url:
- return
- page = self.request(next_url).text
-
- def get_image_metadata(self, page):
- """Collect next url, image-url and metadata for one manga-page"""
- extr = text.extract
- width = None
- test , pos = extr(page, "document['pu']", '')
- if test is None:
- return None, None, None
- if page.find("document['imgwidth']", pos, pos+200) != -1:
- width , pos = extr(page, "document['imgwidth'] = ", ";", pos)
- height, pos = extr(page, "document['imgheight'] = ", ";", pos)
- _ , pos = extr(page, '<div id="imgholder">', '')
- url, pos = extr(page, ' href="', '"', pos)
- if width is None:
- width , pos = extr(page, '<img id="img" width="', '"', pos)
- height, pos = extr(page, ' height="', '"', pos)
- image, pos = extr(page, ' src="', '"', pos)
- return self.root + url, image, {
- "width": text.parse_int(width),
- "height": text.parse_int(height),
- }
+ data = json.loads(text.extract(
+ page, 'document["mj"]=', '</script>')[0])
+ return [
+ (text.ensure_http_scheme(img["u"]), {
+ "width" : text.parse_int(img["w"]),
+ "height": text.parse_int(img["h"]),
+ })
+ for img in data["im"]
+ ]
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
@@ -104,16 +91,5 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
})
def chapters(self, page):
- results = []
- data = self.parse_page(page, {"lang": "en", "language": "English"})
-
- needle = '<div class="chico_manga"></div>\n<a href="'
- pos = page.index('<div id="chapterlist">')
- while True:
- url, pos = text.extract(page, needle, '"', pos)
- if not url:
- return results
- data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
- data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
- data["chapter"] = text.parse_int(url.rpartition("/")[2])
- results.append((self.root + url, data.copy()))
+ path = self.manga_url[len(self.root):]
+ return self._manga_info(path, page)
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index 25fba70..0e04f97 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -58,7 +58,7 @@ class MangoxoExtractor(Extractor):
("timestamp", str(int(time.time()))),
]
query = "&".join("=".join(item) for item in sorted(params))
- query += "&secretKey=996293536"
+ query += "&secretKey=340836904"
sign = hashlib.md5(query.encode()).hexdigest()
params.append(("sign", sign.upper()))
return params
diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py
index 51b314a..e2e163a 100644
--- a/gallery_dl/extractor/myportfolio.py
+++ b/gallery_dl/extractor/myportfolio.py
@@ -9,7 +9,7 @@
"""Extract images from https://www.myportfolio.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
class MyportfolioGalleryExtractor(Extractor):
@@ -31,9 +31,8 @@ class MyportfolioGalleryExtractor(Extractor):
"pattern": r"https://andrewling\.myportfolio\.com/[^/?&#+]+$",
"count": ">= 6",
}),
- # no explicit title
("https://stevenilousphotography.myportfolio.com/society", {
- "keyword": "49e7ff6322645c22b409280656202c2736a380c9",
+ "exception": exception.NotFoundError,
}),
# custom domain
("myportfolio:https://tooco.com.ar/6-of-diamonds-paradise-bird", {
@@ -89,8 +88,10 @@ class MyportfolioGalleryExtractor(Extractor):
if title:
title = title.partition(">")[2]
user = user[:-len(title)-3]
- else:
+ elif user:
user, _, title = user.partition(" - ")
+ else:
+ raise exception.NotFoundError()
return {
"user": text.unescape(user),
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 931fb13..8f2d633 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -95,8 +95,8 @@ class PahealPostExtractor(PahealExtractor):
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/view/(\d+)")
test = ("https://rule34.paheal.net/post/view/481609", {
- "url": "1142779378f655ec0497d4c301836aa667f788b1",
- "keyword": "34e9e93d4fa6fa06fac1a56e78c9a52e8cd7b271",
+ "url": "d3fd0f82762716fe3fb03c9c923e61c13ce22204",
+ "keyword": "35748081bfeaab48f909f4b097a4d79b2be12538",
"content": "7b924bcf150b352ac75c9d281d061e174c851a11",
})
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 3bbe06a..cc89ac5 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -14,7 +14,7 @@ import itertools
import json
-BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.\w+"
+BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
class PinterestExtractor(Extractor):
@@ -101,6 +101,8 @@ class PinterestBoardExtractor(PinterestExtractor):
("https://www.pinterest.com/g1952848/test/", {
"exception": exception.GalleryDLException,
}),
+ # .co.uk TLD (#914)
+ ("https://www.pinterest.co.uk/hextra7519/based-animals/"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/pixnet.py b/gallery_dl/extractor/pixnet.py
index 9cada6b..d8ac9f6 100644
--- a/gallery_dl/extractor/pixnet.py
+++ b/gallery_dl/extractor/pixnet.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,7 @@
"""Extractors for https://www.pixnet.net/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
BASE_PATTERN = r"(?:https?://)?(?!www\.)([^.]+)\.pixnet.net"
@@ -53,6 +53,9 @@ class PixnetExtractor(Extractor):
yield from text.extract_iter(page, '<li id="', '</li>')
pnext = text.extract(page, 'class="nextBtn"', '>')[0]
+ if pnext is None and 'name="albumpass">' in page:
+ raise exception.StopExtraction(
+ "Album %s is password-protected.", self.item_id)
if "href" not in pnext:
return
url = self.root + text.extract(pnext, 'href="', '"')[0]
@@ -107,6 +110,9 @@ class PixnetSetExtractor(PixnetExtractor):
"url": "b3eb6431aea0bcf5003432a4a0f3a3232084fc13",
"keyword": "bf7004faa1cea18cf9bd856f0955a69be51b1ec6",
}),
+ ("https://sky92100.pixnet.net/album/set/17492544", {
+ "count": 0, # password-protected
+ }),
)
def items(self):
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index f97454b..8290d2d 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -112,6 +112,7 @@ class ReactorExtractor(SharedConfigMixin, Extractor):
if not tags:
title, tags = tags, title
tags = tags.split(" :: ")
+ tags.sort()
for image in images:
url = text.extract(image, ' src="', '"')[0]
@@ -259,19 +260,19 @@ class JoyreactorPostExtractor(ReactorPostExtractor):
test = (
("http://joyreactor.com/post/3721876", { # single image
"url": "6ce09f239d8b7fdf6dd1664c2afc39618cc87663",
- "keyword": "966d2acd462732a9ed823a9db5ed19f95734fd10",
+ "keyword": "147ed5b9799ba43cbd16168450afcfae5ddedbf3",
}),
("http://joyreactor.com/post/3713804", { # 4 images
"url": "f08ac8493ca0619a3e3c6bedb8d8374af3eec304",
- "keyword": "84e34d402342607045a65fab6d4d593d146c238a",
+ "keyword": "f12c6f3c2f298fed9b12bd3e70fb823870aa9b93",
}),
("http://joyreactor.com/post/3726210", { # gif / video
"url": "33a48e1eca6cb2d298fbbb6536b3283799d6515b",
- "keyword": "dbe148d576f2fc9431020c557ddb78f449e48c47",
+ "keyword": "d173cc6e88f02a63904e475eacd7050304eb1967",
}),
("http://joyreactor.com/post/3668724", { # youtube embed
"url": "bf1666eddcff10c9b58f6be63fa94e4e13074214",
- "keyword": "989112c7888e9cc80fd35870180c6c98165d953b",
+ "keyword": "e18b1ffbd79d76f9a0e90b6d474cc2499e343f0b",
}),
("http://joyreactor.cc/post/1299", { # "malformed" JSON
"url": "ac900743ed7cf1baf3db3b531c3bc414bf1ffcde",
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 679059c..cb70fe5 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -17,7 +17,7 @@ class RedditExtractor(Extractor):
"""Base class for reddit extractors"""
category = "reddit"
directory_fmt = ("{category}", "{subreddit}")
- filename_fmt = "{id} {title[:220]}.{extension}"
+ filename_fmt = "{id}{num:? //>02} {title[:220]}.{extension}"
archive_fmt = "{filename}"
cookiedomain = None
@@ -50,11 +50,22 @@ class RedditExtractor(Extractor):
yield Message.Directory, submission
visited.add(submission["id"])
url = submission["url"]
+ submission["num"] = 0
if url.startswith("https://i.redd.it/"):
text.nameext_from_url(url, submission)
yield Message.Url, url, submission
+ elif "gallery_data" in submission:
+ meta = submission["media_metadata"]
+ items = submission["gallery_data"]["items"]
+ for submission["num"], item in enumerate(items, 1):
+ url = meta[item["media_id"]]["s"]["u"]
+ url = url.partition("?")[0]
+ url = url.replace("/preview.", "/i.", 1)
+ text.nameext_from_url(url, submission)
+ yield Message.Url, url, submission
+
elif submission["is_video"]:
if videos:
text.nameext_from_url(url, submission)
@@ -160,9 +171,8 @@ class RedditSubmissionExtractor(RedditExtractor):
"""Extractor for URLs from a submission on reddit.com"""
subcategory = "submission"
pattern = (r"(?:https?://)?(?:"
- r"(?:\w+\.)?reddit\.com/r/[^/?&#]+/comments|"
- r"redd\.it"
- r")/([a-z0-9]+)")
+ r"(?:\w+\.)?reddit\.com/(?:r/[^/?&#]+/comments|gallery)"
+ r"|redd\.it)/([a-z0-9]+)")
test = (
("https://www.reddit.com/r/lavaporn/comments/8cqhub/", {
"pattern": r"https://c2.staticflickr.com/8/7272/\w+_k.jpg",
@@ -173,6 +183,11 @@ class RedditSubmissionExtractor(RedditExtractor):
"pattern": r"https://",
"count": 3,
}),
+ ("https://www.reddit.com/gallery/hrrh23", {
+ "url": "25b91ede15459470274dd17291424b037ed8b0ae",
+ "content": "1e7dde4ee7d5f4c4b45749abfd15b2dbfa27df3f",
+ "count": 3,
+ }),
("https://old.reddit.com/r/lavaporn/comments/2a00np/"),
("https://np.reddit.com/r/lavaporn/comments/2a00np/"),
("https://m.reddit.com/r/lavaporn/comments/2a00np/"),
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 28ee46c..9d1df18 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -74,21 +74,33 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
def products(self):
params = text.parse_query(self.params)
params["page"] = text.parse_int(params.get("page"), 1)
- search_re = re.compile(r"/collections/[\w-]+/products/[\w-]+")
-
- while True:
- page = self.request(self.item_url, params=params).text
- urls = search_re.findall(page)
- last = None
-
- if not urls:
- return
- for path in urls:
- if last == path:
- continue
- last = path
- yield self.root + path
- params["page"] += 1
+ fetch = True
+ last = None
+
+ for pattern in (
+ r"/collections/[\w-]+/products/[\w-]+",
+ r"href=[\"'](/products/[\w-]+)",
+ ):
+ search_re = re.compile(pattern)
+
+ while True:
+ if fetch:
+ page = self.request(self.item_url, params=params).text
+ urls = search_re.findall(page)
+
+ if len(urls) < 3:
+ if last:
+ return
+ fetch = False
+ break
+ fetch = True
+
+ for path in urls:
+ if last == path:
+ continue
+ last = path
+ yield self.root + path
+ params["page"] += 1
class ShopifyProductExtractor(ShopifyExtractor):
@@ -121,7 +133,6 @@ EXTRACTORS = {
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
("https://www.fashionnova.com/collections/mini-dresses#1"),
),
-
},
}
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index abf9995..a0d34d1 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -45,11 +45,12 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
self.gallery_url = extr('<link rel="canonical" href="', '"')
title = extr('<meta property="og:title" content="', '"')
+ image = extr('<meta property="og:image" content="', '"')
if not title:
raise exception.NotFoundError("gallery")
data = {
"title" : text.unescape(title),
- "gallery_id": text.parse_int(extr('/Album/', '/')),
+ "gallery_id": text.parse_int(image.split("/")[-2]),
"parody" : split(extr('box-title">Series</div>', '</div>')),
"language" : text.remove_html(extr(
'box-title">Language</div>', '</div>')) or None,
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 44a0a84..163102d 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -112,13 +112,13 @@ class SmugmugImageExtractor(SmugmugExtractor):
test = (
("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", {
"url": "f624ad7293afd6412a7d34e3950a118596c36c85",
- "keyword": "085861b5935e3cd96ad15954039bc2419cdf1c27",
+ "keyword": "d69c69c1517b8ea77bc763cffc4d0a4002dfee3f",
"content": "ecbd9d7b4f75a637abc8d35319be9ec065a44eb0",
}),
# video
("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
"url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
- "keyword": "e0927fda7b1c39c19974625270102ad7e72b9d6f",
+ "keyword": "720da317232504f05099da37802ed3c3ce3cd310",
}),
)
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 08d8850..076d0c0 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, exception
from ..cache import cache
-import datetime
import json
@@ -35,8 +34,6 @@ class SubscribestarExtractor(Extractor):
self.cookiedomain = "subscribestar.adult"
self.subcategory += "-adult"
Extractor.__init__(self, match)
- self.metadata = self.config("metadata", False)
- self._year = " " + str(datetime.date.today().year)
def items(self):
self.login()
@@ -92,38 +89,46 @@ class SubscribestarExtractor(Extractor):
@staticmethod
def _media_from_post(html):
+ media = []
+
gallery = text.extract(html, 'data-gallery="', '"')[0]
if gallery:
- return [
+ media.extend(
item for item in json.loads(text.unescape(gallery))
if "/previews/" not in item["url"]
- ]
- return ()
+ )
+
+ attachments = text.extract(
+ html, 'class="uploads-docs"', 'data-role="post-edit_form"')[0]
+ if attachments:
+ for att in attachments.split('class="doc_preview"')[1:]:
+ media.append({
+ "id" : text.parse_int(text.extract(
+ att, 'data-upload-id="', '"')[0]),
+ "url" : text.extract(att, 'href="', '"')[0],
+ "type": "attachment",
+ })
+
+ return media
def _data_from_post(self, html):
extr = text.extract_from(html)
- data = {
+ return {
"post_id" : text.parse_int(extr('data-id="', '"')),
"author_id" : text.parse_int(extr('data-user-id="', '"')),
"author_name": text.unescape(extr('href="/', '"')),
"author_nick": text.unescape(extr('>', '<')),
+ "date" : self._parse_datetime(text.remove_html(extr(
+ 'class="post-date">', '</'))),
"content" : (extr(
'<div class="post-content', '<div class="post-uploads')
.partition(">")[2]),
}
- if self.metadata:
- url = "{}/posts/{}".format(self.root, data["post_id"])
- page = self.request(url).text
- data["date"] = self._parse_datetime(text.extract(
- page, 'class="section-subtitle">', '<')[0])
-
- return data
-
def _parse_datetime(self, dt):
- date = text.parse_datetime(dt, "%B %d, %Y %H:%M")
+ date = text.parse_datetime(dt, "%b %d, %Y %I:%M %p")
if date is dt:
- date = text.parse_datetime(dt + self._year, "%d %b %H:%M %Y")
+ date = text.parse_datetime(dt, "%B %d, %Y %I:%M %p")
return date
@@ -141,6 +146,7 @@ class SubscribestarUserExtractor(SubscribestarExtractor):
"author_name": "subscribestar",
"author_nick": "SubscribeStar",
"content": str,
+ "date" : "type:datetime",
"height" : int,
"id" : int,
"pinned" : bool,
@@ -209,8 +215,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
def posts(self):
url = "{}/posts/{}".format(self.root, self.item)
- self._page = self.request(url).text
- return (self._page,)
+ return (self.request(url).text,)
def _data_from_post(self, html):
extr = text.extract_from(html)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 2530040..71f14dc 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -503,8 +503,9 @@ class TwitterAPI():
if response.status_code < 400:
return response.json()
if response.status_code == 429:
- self.extractor.wait(until=response.headers["x-rate-limit-reset"])
- return self._call(endpoint, params)
+ until = response.headers.get("x-rate-limit-reset")
+ self.extractor.wait(until=until, seconds=(None if until else 60))
+ return self._call(endpoint, params, method)
try:
msg = ", ".join(
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index c9f0ec3..76e4e3d 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -61,7 +61,7 @@ class VscoExtractor(Extractor):
"video" : img["is_video"],
"width" : img["width"],
"height": img["height"],
- "description": img["description"],
+ "description": img.get("description") or "",
})
yield Message.Url, url, data
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index a338216..5f11df3 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -146,7 +146,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
def _data(self, url):
page = self.request(url).text
return json.loads(text.extract(
- page, "window.initials =", "</script>")[0].rstrip("\n\r;"))
+ page, "window.initials=", "</script>")[0].rstrip("\n\r;"))
class XhamsterUserExtractor(XhamsterExtractor):
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 4c18e4d..163c3c6 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -48,6 +48,9 @@ class Job():
extr.category = pextr.category
extr.subcategory = pextr.subcategory
+ # transfer parent directory
+ extr._parentdir = pextr._parentdir
+
# reuse connection adapters
extr.session.adapters = pextr.session.adapters
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index fd52077..b2b59e0 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.14.3"
+__version__ = "1.14.4"
diff --git a/test/test_oauth.py b/test/test_oauth.py
index e4664e4..7455928 100644
--- a/test/test_oauth.py
+++ b/test/test_oauth.py
@@ -15,7 +15,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import oauth, text # noqa E402
TESTSERVER = "http://term.ie/oauth/example"
-TESTSERVER = "http://term.ie/oauth/example"
CONSUMER_KEY = "key"
CONSUMER_SECRET = "secret"
REQUEST_TOKEN = "requestkey"
@@ -96,12 +95,17 @@ class TestOAuthSession(unittest.TestCase):
def _oauth_request(self, endpoint, params=None,
oauth_token=None, oauth_token_secret=None):
+ # the test server at 'term.ie' is unreachable
+ raise unittest.SkipTest()
+
session = oauth.OAuth1Session(
CONSUMER_KEY, CONSUMER_SECRET,
oauth_token, oauth_token_secret,
)
try:
- return session.get(TESTSERVER + endpoint, params=params).text
+ response = session.get(TESTSERVER + endpoint, params=params)
+ response.raise_for_status()
+ return response.text
except OSError:
raise unittest.SkipTest()
diff --git a/test/test_results.py b/test/test_results.py
index dd1ed1d..1f2f699 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -31,8 +31,10 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
- "bobx",
+ "hentaihand",
"imagevenue",
+ "mangapark",
+ "ngomik",
"photobucket",
"worldthree",
}
@@ -317,7 +319,7 @@ def setup_test_config():
config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma")
for category in ("danbooru", "instagram", "twitter", "subscribestar",
- "e621"):
+ "e621", "inkbunny"):
config.set(("extractor", category), "username", None)
config.set(("extractor", "mastodon.social"), "access-token",