aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Unit 193 <unit193@unit193.net>2021-11-10 01:26:06 -0500
committerLibravatar Unit 193 <unit193@unit193.net>2021-11-10 01:26:06 -0500
commitfc8c5e642017e2b4e5299e2093e72b316479690d (patch)
tree41119c71e8d86755f4c945d8c2ccf5a7000f546e
parent4a965d875415907cc1a016b428ae305a964f9228 (diff)
downloadgallery-dl-fc8c5e642017e2b4e5299e2093e72b316479690d.tar.bz2
gallery-dl-fc8c5e642017e2b4e5299e2093e72b316479690d.tar.xz
gallery-dl-fc8c5e642017e2b4e5299e2093e72b316479690d.tar.zst
New upstream version 1.19.2.upstream/1.19.2
-rw-r--r--CHANGELOG.md21
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.532
-rw-r--r--docs/gallery-dl.conf3
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/extractor/8muses.py2
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/bcy.py5
-rw-r--r--gallery_dl/extractor/common.py3
-rw-r--r--gallery_dl/extractor/cyberdrop.py40
-rw-r--r--gallery_dl/extractor/deviantart.py16
-rw-r--r--gallery_dl/extractor/foolfuuka.py2
-rw-r--r--gallery_dl/extractor/gfycat.py15
-rw-r--r--gallery_dl/extractor/hentaihand.py4
-rw-r--r--gallery_dl/extractor/hitomi.py6
-rw-r--r--gallery_dl/extractor/instagram.py31
-rw-r--r--gallery_dl/extractor/kemonoparty.py21
-rw-r--r--gallery_dl/extractor/luscious.py2
-rw-r--r--gallery_dl/extractor/mangakakalot.py91
-rw-r--r--gallery_dl/extractor/mangapark.py2
-rw-r--r--gallery_dl/extractor/mangoxo.py37
-rw-r--r--gallery_dl/extractor/paheal.py6
-rw-r--r--gallery_dl/extractor/patreon.py2
-rw-r--r--gallery_dl/extractor/philomena.py4
-rw-r--r--gallery_dl/extractor/reddit.py16
-rw-r--r--gallery_dl/extractor/redgifs.py123
-rw-r--r--gallery_dl/extractor/sankaku.py6
-rw-r--r--gallery_dl/extractor/skeb.py141
-rw-r--r--gallery_dl/extractor/slideshare.py2
-rw-r--r--gallery_dl/extractor/smugmug.py4
-rw-r--r--gallery_dl/extractor/tapas.py2
-rw-r--r--gallery_dl/extractor/twitter.py3
-rw-r--r--gallery_dl/extractor/xvideos.py5
-rw-r--r--gallery_dl/formatter.py23
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_formatter.py91
39 files changed, 612 insertions, 171 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f6b60f0..9fa1540 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,25 @@
# Changelog
+## 1.19.2 - 2021-11-05
+### Additions
+- [kemonoparty] add `comments` option ([#1980](https://github.com/mikf/gallery-dl/issues/1980))
+- [skeb] add `user` and `post` extractors ([#1031](https://github.com/mikf/gallery-dl/issues/1031), [#1971](https://github.com/mikf/gallery-dl/issues/1971))
+- [twitter] add `pinned` option
+- support accessing environment variables and the current local datetime in format strings ([#1968](https://github.com/mikf/gallery-dl/issues/1968))
+- add special type format strings to docs ([#1987](https://github.com/mikf/gallery-dl/issues/1987))
+### Fixes
+- [cyberdrop] fix video extraction ([#1993](https://github.com/mikf/gallery-dl/issues/1993))
+- [deviantart] fix `index` values for stashed deviations
+- [gfycat] provide consistent `userName` values for `user` downloads ([#1962](https://github.com/mikf/gallery-dl/issues/1962))
+- [gfycat] show warning when there are no available formats
+- [hitomi] fix image URLs ([#1975](https://github.com/mikf/gallery-dl/issues/1975), [#1982](https://github.com/mikf/gallery-dl/issues/1982), [#1988](https://github.com/mikf/gallery-dl/issues/1988))
+- [instagram] update query hashes
+- [mangakakalot] update domain and fix extraction
+- [mangoxo] fix login and extraction
+- [reddit] prevent crash for galleries with no `media_metadata` ([#2001](https://github.com/mikf/gallery-dl/issues/2001))
+- [redgifs] update to API v2 ([#1984](https://github.com/mikf/gallery-dl/issues/1984))
+- fix calculating retry sleep times ([#1990](https://github.com/mikf/gallery-dl/issues/1990))
+
## 1.19.1 - 2021-10-24
### Additions
- [inkbunny] add `following` extractor ([#515](https://github.com/mikf/gallery-dl/issues/515))
@@ -24,6 +44,7 @@
- [patreon] better filenames for `content` images ([#1954](https://github.com/mikf/gallery-dl/issues/1954))
- [redgifs][gfycat] provide fallback URLs ([#1962](https://github.com/mikf/gallery-dl/issues/1962))
- [downloader:ytdl] prevent crash in `_progress_hook()`
+- restore SOCKS support for Windows executables
## 1.19.0 - 2021-10-01
### Additions
diff --git a/PKG-INFO b/PKG-INFO
index ca59c0c..e40d119 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.19.1
+Version: 1.19.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index e5befc8..5586fda 100644
--- a/README.rst
+++ b/README.rst
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index b893a3e..ba0aa8d 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-10-24" "1.19.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-11-05" "1.19.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index cdedaee..dc097d2 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-10-24" "1.19.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-11-05" "1.19.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1518,6 +1518,17 @@ You can use \f[I]"all"\f[] instead of listing all values separately.
Download video files.
+.SS extractor.kemonoparty.comments
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract \f[I]comments\f[] metadata.
+
+
.SS extractor.kemonoparty.max-posts
.IP "Type:" 6
\f[I]integer\f[]
@@ -2107,17 +2118,17 @@ video extraction and download
* \f[I]string\f[]
.IP "Default:" 9
-\f[I]["mp4", "webm", "mobile", "gif"]\f[]
+\f[I]["hd", "sd", "gif"]\f[]
.IP "Description:" 4
List of names of the preferred animation format, which can be
-\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"gif"\f[], \f[I]"webp"\f[], \f[I]"mobile"\f[], or \f[I]"mini"\f[].
+\f[I]"hd"\f[], \f[I]"sd"\f[], "gif", "vthumbnail"`, "thumbnail"\f[I], or \f[]"poster"\f[I].
If a selected format is not available, the next one in the list will be
tried until an available format is found.
-If the format is given as \f[I]string\f[], it will be extended with
-\f[I]["mp4", "webm", "mobile", "gif"]\f[]. Use a list with one element to
+If the format is given as \f[]string\f[I], it will be extended with
+\f[]["hd", "sd", "gif"]``. Use a list with one element to
restrict it to only one possible format.
@@ -2285,6 +2296,17 @@ Known available sizes are
Logout and retry as guest when access to another user's Tweets is blocked.
+.SS extractor.twitter.pinned
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Fetch media from pinned Tweets.
+
+
.SS extractor.twitter.quoted
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index c89f4b9..5e2628f 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -221,7 +221,7 @@
},
"redgifs":
{
- "format": ["mp4", "webm", "mobile", "gif"]
+ "format": ["hd", "sd", "gif"]
},
"sankakucomplex":
{
@@ -266,6 +266,7 @@
"password": null,
"cards": false,
"conversations": false,
+ "pinned": false,
"quoted": false,
"replies": true,
"retweets": false,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 5f9b4b8..7e6d632 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.19.1
+Version: 1.19.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 6dd43c8..409b8e7 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -154,6 +154,7 @@ gallery_dl/extractor/senmanga.py
gallery_dl/extractor/sexcom.py
gallery_dl/extractor/shopify.py
gallery_dl/extractor/simplyhentai.py
+gallery_dl/extractor/skeb.py
gallery_dl/extractor/slickpic.py
gallery_dl/extractor/slideshare.py
gallery_dl/extractor/smugmug.py
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index c961ded..3cd9c3a 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -55,7 +55,7 @@ class _8musesAlbumExtractor(Extractor):
},
}),
# custom sorting
- ("https://www.8muses.com/comics/album/Fakku-Comics/8?sort=az", {
+ ("https://www.8muses.com/comics/album/Fakku-Comics/9?sort=az", {
"count": ">= 70",
"keyword": {"name": r"re:^[R-Zr-z]"},
}),
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 93702ab..79fe971 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -115,6 +115,7 @@ modules = [
"senmanga",
"sexcom",
"simplyhentai",
+ "skeb",
"slickpic",
"slideshare",
"smugmug",
diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py
index f867bd9..47e51b3 100644
--- a/gallery_dl/extractor/bcy.py
+++ b/gallery_dl/extractor/bcy.py
@@ -9,7 +9,7 @@
"""Extractors for https://bcy.net/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
import json
import re
@@ -93,7 +93,7 @@ class BcyExtractor(Extractor):
def _data_from_post(self, post_id):
url = "{}/item/detail/{}".format(self.root, post_id)
- page = self.request(url).text
+ page = self.request(url, notfound="post").text
return json.loads(
text.extract(page, 'JSON.parse("', '");')[0]
.replace('\\\\u002F', '/')
@@ -178,6 +178,7 @@ class BcyPostExtractor(BcyExtractor):
}),
# deleted
("https://bcy.net/item/detail/6780546160802143236", {
+ "exception": exception.NotFoundError,
"count": 0,
}),
# only visible to logged in users
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 4f42477..e80366e 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -164,7 +164,8 @@ class Extractor():
self.log.debug("%s (%s/%s)", msg, tries, retries+1)
if tries > retries:
break
- time.sleep(max(tries, self.request_interval))
+ time.sleep(
+ max(tries, self._interval()) if self._interval else tries)
tries += 1
raise exception.HttpError(msg, response)
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index d1b1b25..dbaa97e 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -17,18 +17,32 @@ class CyberdropAlbumExtractor(Extractor):
directory_fmt = ("{category}", "{album_name} ({album_id})")
archive_fmt = "{album_id}_{id}"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)"
- test = ("https://cyberdrop.me/a/keKRjm4t", {
- "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.[a-z]+$",
- "keyword": {
- "album_id": "keKRjm4t",
- "album_name": "Fate (SFW)",
- "album_size": 150069254,
- "count": 62,
- "date": "dt:2020-06-18 13:14:20",
- "description": "",
- "id": r"re:\w{8}",
- },
- })
+ test = (
+ # images
+ ("https://cyberdrop.me/a/keKRjm4t", {
+ "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.(jpg|png|webp)$",
+ "keyword": {
+ "album_id": "keKRjm4t",
+ "album_name": "Fate (SFW)",
+ "album_size": 150069254,
+ "count": 62,
+ "date": "dt:2020-06-18 13:14:20",
+ "description": "",
+ "id": r"re:\w{8}",
+ },
+ }),
+ # videos
+ ("https://cyberdrop.me/a/l8gIAXVD", {
+ "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.mp4$",
+ "count": 31,
+ "keyword": {
+ "album_id": "l8gIAXVD",
+ "album_name": "Achelois17 videos",
+ "album_size": 652037121,
+ "date": "dt:2020-06-16 15:40:44",
+ },
+ }),
+ )
def __init__(self, match):
Extractor.__init__(self, match)
@@ -41,7 +55,7 @@ class CyberdropAlbumExtractor(Extractor):
files = []
append = files.append
while True:
- url = extr('downloadUrl: "', '"')
+ url = extr('id="file" href="', '"')
if not url:
break
append(text.unescape(url))
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 4604d39..61affb5 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -137,11 +137,12 @@ class DeviantartExtractor(Extractor):
def prepare(self, deviation):
"""Adjust the contents of a Deviation-object"""
- try:
- deviation["index"] = text.parse_int(
- deviation["url"].rpartition("-")[2])
- except KeyError:
- deviation["index"] = 0
+ if "index" not in deviation:
+ try:
+ deviation["index"] = text.parse_int(
+ deviation["url"].rpartition("-")[2])
+ except KeyError:
+ deviation["index"] = 0
if self.user:
deviation["username"] = self.user
@@ -602,7 +603,10 @@ class DeviantartStashExtractor(DeviantartExtractor):
if stash_id[0] == "0":
uuid = text.extract(page, '//deviation/', '"')[0]
if uuid:
- yield self.api.deviation(uuid)
+ deviation = self.api.deviation(uuid)
+ deviation["index"] = text.parse_int(text.extract(
+ page, 'gmi-deviationid="', '"')[0])
+ yield deviation
return
for item in text.extract_iter(
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 9b89999..d2c5e8f 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -125,7 +125,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
}),
("https://boards.fireden.net/sci/thread/11264294/", {
- "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
+ "url": "61cab625c95584a12a30049d054931d64f8d20aa",
}),
("https://archive.nyafuu.org/c/thread/2849220/", {
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 9b4d5ee..501d114 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -36,7 +36,13 @@ class GfycatExtractor(Extractor):
if "gfyName" not in gfycat:
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
continue
+
url = self._process(gfycat)
+ if not url:
+ self.log.warning("Skipping '%s' (format not available)",
+ gfycat["gfyId"])
+ continue
+
gfycat.update(metadata)
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
@@ -44,7 +50,7 @@ class GfycatExtractor(Extractor):
def _process(self, gfycat):
gfycat["_fallback"] = formats = self._formats(gfycat)
gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
- return next(formats, "")
+ return next(formats, None)
def _formats(self, gfycat):
for fmt in self.formats:
@@ -73,6 +79,9 @@ class GfycatUserExtractor(GfycatExtractor):
"count": ">= 100",
})
+ def metadata(self):
+ return {"userName": self.key}
+
def gfycats(self):
return GfycatAPI(self).user(self.key)
@@ -155,6 +164,10 @@ class GfycatImageExtractor(GfycatExtractor):
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
return
url = self._process(gfycat)
+ if not url:
+ self.log.warning("Skipping '%s' (format not available)",
+ gfycat["gfyId"])
+ return
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py
index fa8e98b..c423b18 100644
--- a/gallery_dl/extractor/hentaihand.py
+++ b/gallery_dl/extractor/hentaihand.py
@@ -22,12 +22,12 @@ class HentaihandGalleryExtractor(GalleryExtractor):
(("https://hentaihand.com/en/comic/c75-takumi-na-muchi-choudenji-hou-"
"no-aishi-kata-how-to-love-a-super-electromagnetic-gun-toaru-kagaku-"
"no-railgun-english"), {
- "pattern": r"https://cdn.hentaihand.com/.*/images/5669/\d+.jpg$",
+ "pattern": r"https://cdn.hentaihand.com/.*/images/360468/\d+.jpg$",
"count": 50,
"keyword": {
"artists" : ["Takumi Na Muchi"],
"date" : "dt:2014-06-28 00:00:00",
- "gallery_id": 5669,
+ "gallery_id": 360468,
"lang" : "en",
"language" : "English",
"parodies" : ["Toaru Kagaku No Railgun"],
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 01e5629..a4ce925 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -35,12 +35,12 @@ class HitomiGalleryExtractor(GalleryExtractor):
}),
# Game CG with scenes (#321)
("https://hitomi.la/galleries/733697.html", {
- "url": "e057652b40629d3d72b0ef059c6ec7556417139c",
+ "url": "0cb629ab2bfe93d994a7972f68ad2a5a64ecc161",
"count": 210,
}),
# fallback for galleries only available through /reader/ URLs
("https://hitomi.la/galleries/1045954.html", {
- "url": "779b94b47d5f0c2341db03499270d2b5370196f6",
+ "url": "b420755d56a1135104ca8ca0765f44e290db70c3",
"count": 1413,
}),
# gallery with "broken" redirect
@@ -140,7 +140,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
# see https://ltn.hitomi.la/common.js
inum = int(ihash[-3:-1], 16)
- offset = 2 if inum < 0x44 else 1 if inum < 0x88 else 0
+ offset = 1 if inum < 0x7c else 0
url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format(
chr(97 + offset),
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 983ae37..bf479ab 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -426,7 +426,7 @@ class InstagramPostsExtractor(InstagramExtractor):
url = "{}/{}/".format(self.root, self.item)
user = self._extract_profile_page(url)
- query_hash = "7ea6ae3cf6fb05e73fcbe1732b1d2a42"
+ query_hash = "8c2a529969ee035a5063f2fc8602a0fd"
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_owner_to_timeline_media")
return self._pagination_graphql(query_hash, variables, edge)
@@ -655,7 +655,7 @@ class InstagramPostExtractor(InstagramExtractor):
)
def posts(self):
- query_hash = "1f950d414a6e11c98c556aa007b3157d"
+ query_hash = "2efa04f61586458cef44441f474eee7c"
variables = {
"shortcode" : self.item,
"child_comment_count" : 3,
@@ -713,25 +713,14 @@ class InstagramHighlightsExtractor(InstagramExtractor):
url = "{}/{}/".format(self.root, self.item)
user = self._extract_profile_page(url)
- query_hash = "d4d88dc1500312af6f937f7b804c68c3"
- variables = {
- "user_id": user["id"],
- "include_chaining": False,
- "include_reel": True,
- "include_suggested_users": False,
- "include_logged_out_extras": False,
- "include_highlight_reels": True,
- "include_live_status": True,
- }
- data = self._request_graphql(query_hash, variables)
- edges = data["user"]["edge_highlight_reels"]["edges"]
- if not edges:
- return ()
-
- reel_ids = ["highlight:" + edge["node"]["id"] for edge in edges]
- endpoint = "/v1/feed/reels_media/?reel_ids=" + \
- "&reel_ids=".join(text.quote(rid) for rid in reel_ids)
- reels = self._request_api(endpoint)["reels"]
+ endpoint = "/v1/highlights/{}/highlights_tray/".format(user["id"])
+ tray = self._request_api(endpoint)["tray"]
+
+ reel_ids = [highlight["id"] for highlight in tray]
+ endpoint = "/v1/feed/reels_media/"
+ params = {"reel_ids": reel_ids}
+ reels = self._request_api(endpoint, params=params)["reels"]
+
return [reels[rid] for rid in reel_ids]
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index d5aad67..2e1d0b2 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -35,6 +35,7 @@ class KemonopartyExtractor(Extractor):
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
skip_service = \
"patreon" if self.config("patreon-skip-file", True) else None
+ comments = self.config("comments")
if self.config("metadata"):
username = text.unescape(text.extract(
@@ -68,6 +69,8 @@ class KemonopartyExtractor(Extractor):
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
if username:
post["username"] = username
+ if comments:
+ post["comments"] = self._extract_comments(post)
yield Message.Directory, post
for post["num"], file in enumerate(files, 1):
@@ -100,6 +103,24 @@ class KemonopartyExtractor(Extractor):
return {c.name: c.value for c in response.history[0].cookies}
+ def _extract_comments(self, post):
+ url = "{}/{}/user/{}/post/{}".format(
+ self.root, post["service"], post["user"], post["id"])
+ page = self.request(url).text
+
+ comments = []
+ for comment in text.extract_iter(page, "<article", "</article>"):
+ extr = text.extract_from(comment)
+ cid = extr('id="', '"')
+ comments.append({
+ "id" : cid,
+ "user": extr('href="#' + cid + '"', '</').strip(" \n\r>"),
+ "body": extr(
+ '<section class="comment__body">', '</section>').strip(),
+ "date": extr('datetime="', '"'),
+ })
+ return comments
+
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 665d257..6761b55 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -105,7 +105,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
"exception": exception.NotFoundError,
}),
("https://members.luscious.net/albums/login-required_323871/", {
- "count": 78,
+ "count": 64,
}),
("https://www.luscious.net/albums/okinami_277031/"),
("https://members.luscious.net/albums/okinami_277031/"),
diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py
index cab866a..c71b003 100644
--- a/gallery_dl/extractor/mangakakalot.py
+++ b/gallery_dl/extractor/mangakakalot.py
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Jake Mannens
+# Copyright 2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from https://mangakakalot.com/"""
+"""Extractors for https://mangakakalot.tv/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
@@ -16,34 +17,24 @@ import re
class MangakakalotBase():
"""Base class for mangakakalot extractors"""
category = "mangakakalot"
- root = "https://mangakakalot.com"
-
- @staticmethod
- def parse_page(page, data):
- """Parse metadata on 'page' and add it to 'data'"""
- text.extract_all(page, (
- ("manga" , '<h1>', '</h1>'),
- ('author' , '<li>Author(s) :\n', '</a>'),
- ), values=data)
- data["author"] = text.remove_html(data["author"])
- return data
+ root = "https://ww.mangakakalot.tv"
class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
- """Extractor for manga-chapters from mangakakalot.com"""
- pattern = (r"(?:https?://)?(?:www\.)?mangakakalot\.com"
- r"(/chapter/\w+/chapter_[^/?#]+)")
+ """Extractor for manga chapters from mangakakalot.tv"""
+ pattern = (r"(?:https?://)?(?:www?\.)?mangakakalot\.tv"
+ r"(/chapter/[^/?#]+/chapter[_-][^/?#]+)")
test = (
- ("https://mangakakalot.com/chapter/rx922077/chapter_6", {
- "pattern": r"https://s\d+\.\w+\.com/mangakakalot/r\d+/rx922077/"
- r"chapter_6_master_help_me_out/\d+\.jpg",
- "keyword": "80fde46d2210a6c17f0b2f7c1c89f0f56b65e157",
+ ("https://ww.mangakakalot.tv/chapter/manga-hl984546/chapter-6", {
+ "pattern": r"https://cm\.blazefast\.co"
+ r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
+ "keyword": "e9646a76a210f1eb4a71b4134664814c99d65d48",
"count": 14,
}),
- (("https://mangakakalot.com/chapter"
+ (("https://mangakakalot.tv/chapter"
"/hatarakanai_futari_the_jobless_siblings/chapter_20.1"), {
- "keyword": "6b24349bb16f41ef1c4350200c1ccda5f09ae136",
- "content": "7196aed8bb1536806bf55033ed1f2ed172c86f9a",
+ "keyword": "14c430737ff600b26a3811815905f34dd6a6c8c6",
+ "content": "b3eb1f139caef98d9dcd8ba6a5ee146a13deebc4",
"count": 2,
}),
)
@@ -54,10 +45,10 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
self.session.headers['Referer'] = self.root
def metadata(self, page):
- _ , pos = text.extract(page, '<span itemprop="name">', '<')
- manga , pos = text.extract(page, '<span itemprop="name">', '<', pos)
- info , pos = text.extract(page, '<span itemprop="name">', '<', pos)
- author, pos = text.extract(page, '. Author: ', ' already has ', pos)
+ _ , pos = text.extract(page, '<span itemprop="title">', '<')
+ manga , pos = text.extract(page, '<span itemprop="title">', '<', pos)
+ info , pos = text.extract(page, '<span itemprop="title">', '<', pos)
+ author, pos = text.extract(page, '. Author:', ' already has ', pos)
match = re.match(
r"(?:[Vv]ol\. *(\d+) )?"
@@ -69,7 +60,7 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
return {
"manga" : text.unescape(manga),
"title" : text.unescape(title) if title else "",
- "author" : text.unescape(author) if author else "",
+ "author" : text.unescape(author).strip() if author else "",
"volume" : text.parse_int(volume),
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
@@ -78,42 +69,46 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
}
def images(self, page):
- page = text.extract(
- page, 'class="container-chapter-reader', '\n<div')[0]
return [
(url, None)
- for url in text.extract_iter(page, '<img src="', '"')
+ for url in text.extract_iter(page, '<img data-src="', '"')
]
class MangakakalotMangaExtractor(MangakakalotBase, MangaExtractor):
- """Extractor for manga from mangakakalot.com"""
+ """Extractor for manga from mangakakalot.tv"""
chapterclass = MangakakalotChapterExtractor
- pattern = (r"(?:https?://)?(?:www\.)?mangakakalot\.com"
- r"(/(?:manga/|read-)\w+)")
+ pattern = (r"(?:https?://)?(?:www?\.)?mangakakalot\.tv"
+ r"(/manga/[^/?#]+)")
test = (
- ("https://mangakakalot.com/manga/lk921810", {
- "url": "d262134b65993b031406f7b9d9442c9afd321a27",
+ ("https://ww.mangakakalot.tv/manga/lk921810", {
+ "url": "654d040c17728c9c8756fce7092b084e8dcf67d2",
}),
- ("https://mangakakalot.com/read-ry3sw158504884246", {
+ ("https://mangakakalot.tv/manga/manga-jk986845", {
"pattern": MangakakalotChapterExtractor.pattern,
- "count": ">= 40"
+ "count": ">= 30",
}),
)
def chapters(self, page):
+ data = {"lang": "en", "language": "English"}
+ data["manga"], pos = text.extract(page, "<h1>", "<")
+ author, pos = text.extract(page, "<li>Author(s) :", "</a>", pos)
+ data["author"] = text.remove_html(author)
+
results = []
- data = self.parse_page(page, {"lang": "en", "language": "English"})
-
- needle = '<div class="row">\n<span><a href="'
- pos = page.index('<div class="chapter-list">')
- while True:
- url, pos = text.extract(page, needle, '"', pos)
- if not url:
- return results
- data["title"], pos = text.extract(page, '>', '</a>', pos)
- data["date"] , pos = text.extract(page, '<span title="', '">', pos)
- chapter, sep, minor = url.rpartition("/chapter_")[2].partition(".")
+ for chapter in text.extract_iter(page, '<div class="row">', '</div>'):
+ url, pos = text.extract(chapter, '<a href="', '"')
+ title, pos = text.extract(chapter, '>', '</a>', pos)
+ data["title"] = title.partition(": ")[2]
+ data["date"] , pos = text.extract(
+ chapter, '<span title=" ', '"', pos)
+
+ chapter, sep, minor = url.rpartition("/chapter-")[2].partition(".")
data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor
+
+ if url.startswith("/"):
+ url = self.root + url
results.append((url, data.copy()))
+ return results
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 4bd5572..a883b91 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -66,7 +66,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
(("https://mangapark.net/manga"
"/gekkan-shoujo-nozaki-kun/i2067426/v7/c70/1"), {
"count": 15,
- "keyword": "8f18f1c977ebe049ef35e3a877eaaab97fb25274",
+ "keyword": "edc14993c4752cee3a76e09b2f024d40d854bfd1",
}),
("https://mangapark.me/manga/gosu/i811615/c55/1"),
("https://mangapark.com/manga/gosu/i811615/c55/1"),
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index 5d240d4..d45fbc9 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -36,12 +36,16 @@ class MangoxoExtractor(Extractor):
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
+ url = self.root + "/login"
+ page = self.request(url).text
+ token = text.extract(page, 'id="loginToken" value="', '"')[0]
+
url = self.root + "/api/login"
headers = {
"X-Requested-With": "XMLHttpRequest",
"Referer": self.root + "/login",
}
- data = self._sign_by_md5(username, password)
+ data = self._sign_by_md5(username, password, token)
response = self.request(url, method="POST", headers=headers, data=data)
data = response.json()
@@ -50,11 +54,12 @@ class MangoxoExtractor(Extractor):
return {"SESSION": self.session.cookies.get("SESSION")}
@staticmethod
- def _sign_by_md5(username, password):
+ def _sign_by_md5(username, password, token):
# https://dns.mangoxo.com/libs/plugins/phoenix-ui/js/phoenix-ui.js
params = [
("username" , username),
("password" , password),
+ ("token" , token),
("timestamp", str(int(time.time()))),
]
query = "&".join("=".join(item) for item in sorted(params))
@@ -79,8 +84,8 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
"url": "ad921fe62663b06e7d73997f7d00646cab7bdd0d",
"keyword": {
"channel": {
- "id": "Jpw9ywQ4",
- "name": "绘画艺术赏析",
+ "id": "gaxO16d8",
+ "name": "Phoenix",
"cover": str,
},
"album": {
@@ -116,14 +121,14 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
def metadata(self, page):
"""Return general metadata"""
- title, pos = text.extract(page, '<title>', '</title>')
- _ , pos = text.extract(page, 'class="desc"', '', pos)
- cid , pos = text.extract(page, '//www.mangoxo.com/channel/', '"', pos)
- cname, pos = text.extract(page, '>', '<', pos)
- count, pos = text.extract(page, 'id="pic-count">', '<', pos)
- cover, pos = text.extract(page, ' src="', '"', pos)
- date , pos = text.extract(page, '</i>', '<', pos)
- descr, pos = text.extract(page, '<pre>', '</pre>', pos)
+ extr = text.extract_from(page)
+ title = extr('<title>', '</title>')
+ count = extr('id="pic-count">', '<')
+ cid = extr('<img alt="', '"')
+ cover = extr(' src="', '"')
+ cname = extr('target="_blank">', '<')
+ date = extr('</i>', '<')
+ descr = extr('<pre>', '</pre>')
return {
"channel": {
@@ -157,8 +162,8 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
class MangoxoChannelExtractor(MangoxoExtractor):
"""Extractor for all albums on a mangoxo channel"""
subcategory = "channel"
- pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/channel/(\w+)"
- test = ("https://www.mangoxo.com/channel/QeYKRkO0", {
+ pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/(\w+)/album"
+ test = ("https://www.mangoxo.com/phoenix/album", {
"pattern": MangoxoAlbumExtractor.pattern,
"range": "1-30",
"count": "> 20",
@@ -166,12 +171,12 @@ class MangoxoChannelExtractor(MangoxoExtractor):
def __init__(self, match):
MangoxoExtractor.__init__(self, match)
- self.channel_id = match.group(1)
+ self.user = match.group(1)
def items(self):
self.login()
num = total = 1
- url = "{}/channel/{}/album/".format(self.root, self.channel_id)
+ url = "{}/{}/album/".format(self.root, self.user)
data = {"_extractor": MangoxoAlbumExtractor}
while True:
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 05cbcdf..ad9f620 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -99,8 +99,10 @@ class PahealPostExtractor(PahealExtractor):
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/view/(\d+)")
test = ("https://rule34.paheal.net/post/view/481609", {
- "url": "a91d579be030753282f55b8cb4eeaa89c45a9116",
- "keyword": "e02e4dcf8cdf4e9c206e695253c9024d79a2e20a",
+ "pattern": r"https://tulip\.paheal\.net/_images"
+ r"/bbdc1c33410c2cdce7556c7990be26b7/481609%20-%20"
+ r"Azumanga_Daioh%20Osaka%20Vuvuzela%20inanimate\.jpg",
+ "keyword": "abe7c1220ba5601f9639aa79fbb9689674ec8f5c",
"content": "7b924bcf150b352ac75c9d281d061e174c851a11",
})
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index c7df089..62e4f58 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -151,7 +151,7 @@ class PatreonExtractor(Extractor):
included[file["type"]][file["id"]]
for file in files["data"]
]
- return ()
+ return []
@memcache(keyarg=1)
def _user(self, url):
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index c6c885c..d3b3bb1 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -107,11 +107,11 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
"source_url": "https://www.deviantart.com/speccysy/art"
"/Afternoon-Flight-215193985",
"spoilered": False,
- "tag_count": 39,
+ "tag_count": 42,
"tag_ids": list,
"tags": list,
"thumbnails_generated": True,
- "updated_at": "2021-07-13T14:22:40Z",
+ "updated_at": "2021-09-30T20:04:01Z",
"uploader": "Clover the Clever",
"uploader_id": 211188,
"upvotes": int,
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 43c7e50..55c963d 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -112,12 +112,18 @@ class RedditExtractor(Extractor):
"""Return an iterable containing all (submission, comments) tuples"""
def _extract_gallery(self, submission):
- if submission["gallery_data"] is None:
+ gallery = submission["gallery_data"]
+ if gallery is None:
self.log.warning("gallery %s: deleted", submission["id"])
return
- meta = submission["media_metadata"]
- for item in submission["gallery_data"]["items"]:
+ meta = submission.get("media_metadata")
+ if meta is None:
+ self.log.warning("gallery %s: missing 'media_metadata'",
+ submission["id"])
+ return
+
+ for item in gallery["items"]:
data = meta[item["media_id"]]
if data["status"] != "valid" or "s" not in data:
self.log.warning(
@@ -221,6 +227,10 @@ class RedditSubmissionExtractor(RedditExtractor):
("https://www.reddit.com/r/cosplay/comments/jvwaqr", {
"count": 1,
}),
+ # gallery with no 'media_metadata' (#2001)
+ ("https://www.reddit.com/r/kpopfap/comments/qjj04q/", {
+ "count": 0,
+ }),
("https://old.reddit.com/r/lavaporn/comments/2a00np/"),
("https://np.reddit.com/r/lavaporn/comments/2a00np/"),
("https://m.reddit.com/r/lavaporn/comments/2a00np/"),
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index e078bef..df50f70 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -8,15 +8,60 @@
"""Extractors for https://redgifs.com/"""
-from .gfycat import GfycatExtractor, GfycatAPI
+from .common import Extractor, Message
from .. import text
-class RedgifsExtractor(GfycatExtractor):
+class RedgifsExtractor(Extractor):
"""Base class for redgifs extractors"""
category = "redgifs"
+ filename_fmt = "{category}_{id}.{extension}"
+ archive_fmt = "{id}"
root = "https://www.redgifs.com"
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.key = match.group(1)
+
+ formats = self.config("format")
+ if formats is None:
+ formats = ("hd", "sd", "gif")
+ elif isinstance(formats, str):
+ formats = (formats, "hd", "sd", "gif")
+ self.formats = formats
+
+ def items(self):
+ metadata = self.metadata()
+ for gif in self.gifs():
+ url = self._process(gif)
+ if not url:
+ self.log.warning("Skipping '%s' (format not available)",
+ gif["id"])
+ continue
+
+ gif.update(metadata)
+ yield Message.Directory, gif
+ yield Message.Url, url, gif
+
+ def _process(self, gif):
+ gif["_fallback"] = formats = self._formats(gif)
+ gif["date"] = text.parse_timestamp(gif.get("createDate"))
+ return next(formats, None)
+
+ def _formats(self, gif):
+ urls = gif["urls"]
+ for fmt in self.formats:
+ url = urls.get(fmt)
+ if url:
+ text.nameext_from_url(url, gif)
+ yield url
+
+ def metadata(self):
+ return {}
+
+ def gifs(self):
+ return ()
+
class RedgifsUserExtractor(RedgifsExtractor):
"""Extractor for redgifs user profiles"""
@@ -24,11 +69,14 @@ class RedgifsUserExtractor(RedgifsExtractor):
directory_fmt = ("{category}", "{userName}")
pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?#]+)"
test = ("https://www.redgifs.com/users/Natalifiction", {
- "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4",
- "count": ">= 100",
+ "pattern": r"https://\w+\.redgifs\.com/[A-Za-z]+\.mp4",
+ "count": ">= 120",
})
- def gfycats(self):
+ def metadata(self):
+ return {"userName": self.key}
+
+ def gifs(self):
return RedgifsAPI(self).user(self.key)
@@ -36,19 +84,23 @@ class RedgifsSearchExtractor(RedgifsExtractor):
"""Extractor for redgifs search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?#]+)"
- test = ("https://www.redgifs.com/gifs/browse/jav", {
- "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4",
- "range": "1-10",
- "count": 10,
- })
+ pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/browse/?\?([^#]+)"
+ test = (
+ ("https://www.redgifs.com/browse?tags=JAV", {
+ "pattern": r"https://\w+\.redgifs\.com/[A-Za-z]+\.mp4",
+ "range": "1-10",
+ "count": 10,
+ }),
+ ("https://www.redgifs.com/browse?type=i&verified=y&order=top7"),
+ )
def metadata(self):
- self.key = text.unquote(self.key).replace("-", " ")
- return {"search": self.key}
+ self.params = params = text.parse_query(self.key)
+ search = params.get("tags") or params.get("order") or "trending"
+ return {"search": search}
- def gfycats(self):
- return RedgifsAPI(self).search(self.key)
+ def gifs(self):
+ return RedgifsAPI(self).search(self.params)
class RedgifsImageExtractor(RedgifsExtractor):
@@ -58,7 +110,7 @@ class RedgifsImageExtractor(RedgifsExtractor):
r"|gifdeliverynetwork.com)/([A-Za-z]+)")
test = (
("https://redgifs.com/watch/foolishforkedabyssiniancat", {
- "pattern": r"https://\w+\.(redgifs|gfycat)\.com"
+ "pattern": r"https://\w+\.redgifs\.com"
r"/FoolishForkedAbyssiniancat\.mp4",
"content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533",
}),
@@ -66,9 +118,42 @@ class RedgifsImageExtractor(RedgifsExtractor):
("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"),
)
- def gfycats(self):
- return (RedgifsAPI(self).gfycat(self.key),)
+ def gifs(self):
+ return (RedgifsAPI(self).gif(self.key),)
-class RedgifsAPI(GfycatAPI):
+class RedgifsAPI():
API_ROOT = "https://api.redgifs.com"
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+
+ def gif(self, gif_id):
+ endpoint = "/v2/gifs/" + gif_id.lower()
+ return self._call(endpoint)["gif"]
+
+ def user(self, user, order="best"):
+ endpoint = "/v2/users/{}/search".format(user.lower())
+ params = {"order": order}
+ return self._pagination(endpoint, params)
+
+ def search(self, params):
+ endpoint = "/v2/gifs/search"
+ params["search_text"] = params.pop("tags", None)
+ params.pop("needSendGtm", None)
+ return self._pagination(endpoint, params)
+
+ def _call(self, endpoint, params=None):
+ url = self.API_ROOT + endpoint
+ return self.extractor.request(url, params=params).json()
+
+ def _pagination(self, endpoint, params):
+ params["page"] = 1
+
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["gifs"]
+
+ if params["page"] >= data["pages"]:
+ return
+ params["page"] += 1
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 2ea6f57..59e8be6 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -78,8 +78,8 @@ class SankakuTagExtractor(SankakuExtractor):
test = (
("https://sankaku.app/?tags=bonocho", {
"count": 5,
- "pattern": r"https://c?s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
- r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
+ "pattern": r"https://v\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
+ r"/[^/]{32}\.\w+\?e=\d+&expires=\d+&m=[^&#]+",
}),
("https://beta.sankakucomplex.com/?tags=bonocho"),
("https://chan.sankakucomplex.com/?tags=bonocho"),
@@ -159,7 +159,7 @@ class SankakuPostExtractor(SankakuExtractor):
}),
# 'contentious_content'
("https://sankaku.app/post/show/21418978", {
- "pattern": r"https://s\.sankakucomplex\.com"
+ "pattern": r"https://v\.sankakucomplex\.com"
r"/data/13/3c/133cda3bfde249c504284493903fb985\.jpg",
}),
# empty tags (#1617)
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
new file mode 100644
index 0000000..c1a8878
--- /dev/null
+++ b/gallery_dl/extractor/skeb.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://skeb.jp/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class SkebExtractor(Extractor):
+ """Base class for skeb extractors"""
+ category = "skeb"
+ directory_fmt = ("{category}", "{creator[screen_name]}")
+ filename_fmt = "{post_num}_{file_id}.{extension}"
+ archive_fmt = "{post_num}_{file_id}_{content_category}"
+ root = "https://skeb.jp"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user_name = match.group(1)
+
+ def items(self):
+ for post_num in self.posts():
+ response, post = self._get_post_data(post_num)
+ yield Message.Directory, post
+ for data in self._get_urls_from_post(response, post):
+ url = data["file_url"]
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ def posts(self):
+ """Return post number"""
+
+ def _pagination(self):
+ url = "{}/api/users/{}/works".format(self.root, self.user_name)
+ params = {"role": "creator", "sort": "date", "offset": 0}
+ headers = {"Referer": self.root, "Authorization": "Bearer null"}
+
+ while True:
+ posts = self.request(url, params=params, headers=headers).json()
+
+ for post in posts:
+ post_num = post["path"].rpartition("/")[2]
+ if post["private"]:
+ self.log.debug("Skipping %s (private)", post_num)
+ continue
+ yield post_num
+
+ if len(posts) < 30:
+ return
+ params["offset"] += 30
+
+ def _get_post_data(self, post_num):
+ url = "{}/api/users/{}/works/{}".format(
+ self.root, self.user_name, post_num)
+ headers = {"Referer": self.root, "Authorization": "Bearer null"}
+ resp = self.request(url, headers=headers).json()
+ creator = resp["creator"]
+ post = {
+ "post_num" : post_num,
+ "post_url" : self.root + resp["path"],
+ "body" : resp["body"],
+ "source_body" : resp["source_body"],
+ "translated_body" : resp["translated"],
+ "completed_at" : resp["completed_at"],
+ "date" : text.parse_datetime(
+ resp["completed_at"], "%Y-%m-%dT%H:%M:%S.%fZ"),
+ "nsfw" : resp["nsfw"],
+ "anonymous" : resp["anonymous"],
+ "tags" : resp["tag_list"],
+ "genre" : resp["genre"],
+ "thanks" : resp["thanks"],
+ "source_thanks" : resp["source_thanks"],
+ "translated_thanks": resp["translated_thanks"],
+ "creator": {
+ "id" : creator["id"],
+ "name" : creator["name"],
+ "screen_name" : creator["screen_name"],
+ "avatar_url" : creator["avatar_url"],
+ "header_url" : creator["header_url"],
+ }
+ }
+ if not resp["anonymous"] and "client" in resp:
+ client = resp["client"]
+ post["client"] = {
+ "id" : client["id"],
+ "name" : client["name"],
+ "screen_name" : client["screen_name"],
+ "avatar_url" : client["avatar_url"],
+ "header_url" : client["header_url"],
+ }
+ return resp, post
+
+ def _get_urls_from_post(self, resp, post):
+ if "og_image_url" in resp:
+ post["content_category"] = "thumb"
+ post["file_id"] = "thumb"
+ post["file_url"] = resp["og_image_url"]
+ yield post
+
+ for preview in resp["previews"]:
+ post["content_category"] = "preview"
+ post["file_id"] = preview["id"]
+ post["file_url"] = preview["url"]
+ info = preview["information"]
+ post["original"] = {
+ "width" : info["width"],
+ "height" : info["height"],
+ "byte_size" : info["byte_size"],
+ "duration" : info["duration"],
+ "frame_rate": info["frame_rate"],
+ "software" : info["software"],
+ "extension" : info["extension"],
+ "is_movie" : info["is_movie"],
+ "transcoder": info["transcoder"],
+ }
+ yield post
+
+
+class SkebPostExtractor(SkebExtractor):
+ """Extractor for a single skeb post"""
+ subcategory = "post"
+ pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)"
+
+ def __init__(self, match):
+ SkebExtractor.__init__(self, match)
+ self.post_num = match.group(2)
+
+ def posts(self):
+ return (self.post_num,)
+
+
+class SkebUserExtractor(SkebExtractor):
+ """Extractor for all posts from a skeb user"""
+ subcategory = "user"
+ pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)"
+
+ def posts(self):
+ return self._pagination()
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index a3c77e8..91386e8 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -25,7 +25,7 @@ class SlidesharePresentationExtractor(Extractor):
(("https://www.slideshare.net"
"/Slideshare/get-started-with-slide-share"), {
"url": "23685fb9b94b32c77a547d45dc3a82fe7579ea18",
- "content": "ee54e54898778e92696a7afec3ffabdbd98eb0cc",
+ "content": "2e90a01c6ca225579ebf8f98ab46f97a28a5e45c",
}),
# long title
(("https://www.slideshare.net/pragmaticsolutions/warum-sie-nicht-ihren"
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index f7408e6..5d582b5 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -111,13 +111,13 @@ class SmugmugImageExtractor(SmugmugExtractor):
test = (
("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", {
"url": "e6408fd2c64e721fd146130dceb56a971ceb4259",
- "keyword": "05c8d50aa6ea08d458f83c38d7f9e92148362f0e",
+ "keyword": "b15af021186b7234cebcac758d2a4fd8462f9912",
"content": "ecbd9d7b4f75a637abc8d35319be9ec065a44eb0",
}),
# video
("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
"url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
- "keyword": "720da317232504f05099da37802ed3c3ce3cd310",
+ "keyword": "f6967cc5a46c3e130a4f8de7c5c971f72e07fe61",
}),
)
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index ec1e044..9b06f92 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -150,7 +150,7 @@ class TapasEpisodeExtractor(TapasExtractor):
subcategory = "episode"
pattern = BASE_PATTERN + r"/episode/(\d+)"
test = ("https://tapas.io/episode/2068651", {
- "url": "0e536117dfaa17972e83d2e0141e6f9e91a33611",
+ "url": "f122b05648a9f53c2ddb2f6854a7a80ab946e9e8",
"pattern": "^text:",
"keyword": {
"book": True,
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 568ee2e..00f3b04 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -36,6 +36,7 @@ class TwitterExtractor(Extractor):
self.retweets = self.config("retweets", False)
self.replies = self.config("replies", True)
self.twitpic = self.config("twitpic", False)
+ self.pinned = self.config("pinned", False)
self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
@@ -838,7 +839,7 @@ class TwitterAPI():
if params is None:
params = self.params.copy()
original_retweets = (self.extractor.retweets == "original")
- pinned_tweet = True
+ pinned_tweet = self.extractor.pinned
while True:
cursor = tweet = None
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 59649a0..0922c7c 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -31,7 +31,10 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
r"/([^/?#]+)/photos/(\d+)")
test = (
("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", {
- "url": "cb4657a37eea5ab6b1d333491cee7eeb529b0645",
+ "count": 8,
+ "pattern": r"https://profile-pics-l3\.xvideos-cdn\.com"
+ r"/[0-9a-f]{40}-\d+/videos/profiles/galleries/84/ca/37"
+ r"/pervertedcouple/gal751031/pic_\d+_big\.jpg",
"keyword": {
"gallery": {
"id" : 751031,
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 654e5d0..f5d961a 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -8,14 +8,20 @@
"""String formatters"""
+import os
import json
import string
import _string
+import datetime
import operator
from . import text, util
_CACHE = {}
_CONVERSIONS = None
+_GLOBALS = {
+ "_env": lambda: os.environ,
+ "_now": datetime.datetime.now,
+}
def parse(format_string, default=None):
@@ -58,7 +64,7 @@ class StringFormatter():
- "j". calls json.dumps
- "t": calls str.strip
- "d": calls text.parse_timestamp
- - "U": calls urllib.parse.unquote
+ - "U": calls urllib.parse.unescape
- "S": calls util.to_string()
- "T": calls util.to_timestamü()
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
@@ -125,6 +131,8 @@ class StringFormatter():
], fmt)
else:
key, funcs = parse_field_name(field_name)
+ if key in _GLOBALS:
+ return self._apply_globals(_GLOBALS[key], funcs, fmt)
if funcs:
return self._apply(key, funcs, fmt)
return self._apply_simple(key, fmt)
@@ -140,6 +148,17 @@ class StringFormatter():
return fmt(obj)
return wrap
+ def _apply_globals(self, gobj, funcs, fmt):
+ def wrap(_):
+ try:
+ obj = gobj()
+ for func in funcs:
+ obj = func(obj)
+ except Exception:
+ obj = self.default
+ return fmt(obj)
+ return wrap
+
def _apply_simple(self, key, fmt):
def wrap(kwdict):
return fmt(kwdict[key] if key in kwdict else self.default)
@@ -149,7 +168,7 @@ class StringFormatter():
def wrap(kwdict):
for key, funcs in lst:
try:
- obj = kwdict[key]
+ obj = _GLOBALS[key]() if key in _GLOBALS else kwdict[key]
for func in funcs:
obj = func(obj)
if obj:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index ee01549..48817be 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.19.1"
+__version__ = "1.19.2"
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 70201f3..088b45b 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -11,6 +11,7 @@ import os
import sys
import unittest
import datetime
+import tempfile
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import formatter # noqa E402
@@ -173,6 +174,96 @@ class TestFormatter(unittest.TestCase):
self._run_test("{d[a]:?</>/L1/too long/}", "<too long>")
self._run_test("{d[c]:?</>/L5/too long/}", "")
+ def test_globals_env(self):
+ os.environ["FORMATTER_TEST"] = value = self.kwdict["a"]
+
+ self._run_test("{_env[FORMATTER_TEST]}" , value)
+ self._run_test("{_env[FORMATTER_TEST]!l}", value.lower())
+ self._run_test("{z|_env[FORMATTER_TEST]}", value)
+
+ def test_globals_now(self):
+ fmt = formatter.parse("{_now}")
+ out1 = fmt.format_map(self.kwdict)
+ self.assertRegex(out1, r"^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d(\.\d+)?$")
+
+ out = formatter.parse("{_now:%Y%m%d}").format_map(self.kwdict)
+ now = datetime.datetime.now()
+ self.assertRegex(out, r"^\d{8}$")
+ self.assertEqual(out, format(now, "%Y%m%d"))
+
+ out = formatter.parse("{z|_now:%Y}").format_map(self.kwdict)
+ self.assertRegex(out, r"^\d{4}$")
+ self.assertEqual(out, format(now, "%Y"))
+
+ out2 = fmt.format_map(self.kwdict)
+ self.assertRegex(out1, r"^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d(\.\d+)?$")
+ self.assertNotEqual(out1, out2)
+
+ def test_template(self):
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ path1 = os.path.join(tmpdirname, "tpl1")
+ path2 = os.path.join(tmpdirname, "tpl2")
+
+ with open(path1, "w") as fp:
+ fp.write("{a}")
+ fmt1 = formatter.parse("\fT " + path1)
+
+ with open(path2, "w") as fp:
+ fp.write("{a!u:Rh/C/}\nFooBar")
+ fmt2 = formatter.parse("\fT " + path2)
+
+ self.assertEqual(fmt1.format_map(self.kwdict), self.kwdict["a"])
+ self.assertEqual(fmt2.format_map(self.kwdict), "HELLO WORLD\nFooBar")
+
+ with self.assertRaises(OSError):
+ formatter.parse("\fT /")
+
+ def test_expression(self):
+ self._run_test("\fE a", self.kwdict["a"])
+ self._run_test("\fE name * 2 + ' ' + a", "{}{} {}".format(
+ self.kwdict["name"], self.kwdict["name"], self.kwdict["a"]))
+
+ def test_module(self):
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ path = os.path.join(tmpdirname, "testmod.py")
+
+ with open(path, "w") as fp:
+ fp.write("""
+def gentext(kwdict):
+ name = kwdict.get("Name") or kwdict.get("name") or "foo"
+ return "'{title1}' by {}".format(name, **kwdict)
+
+def lengths(kwdict):
+ a = 0
+ for k, v in kwdict.items():
+ try:
+ a += len(v)
+ except TypeError:
+ pass
+ return format(a)
+
+def noarg():
+ return ""
+""")
+ sys.path.insert(0, tmpdirname)
+ try:
+ fmt1 = formatter.parse("\fM testmod:gentext")
+ fmt2 = formatter.parse("\fM testmod:lengths")
+ fmt3 = formatter.parse("\fM testmod:noarg")
+
+ with self.assertRaises(AttributeError):
+ formatter.parse("\fM testmod:missing")
+ with self.assertRaises(ImportError):
+ formatter.parse("\fM missing:missing")
+ finally:
+ sys.path.pop(0)
+
+ self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
+ self.assertEqual(fmt2.format_map(self.kwdict), "65")
+
+ with self.assertRaises(TypeError):
+ self.assertEqual(fmt3.format_map(self.kwdict), "")
+
def _run_test(self, format_string, result, default=None):
fmt = formatter.parse(format_string, default)
output = fmt.format_map(self.kwdict)