diff options
author | Unit 193 <unit193@unit193.net> | 2021-11-10 01:26:06 -0500 |
---|---|---|
committer | Unit 193 <unit193@unit193.net> | 2021-11-10 01:26:06 -0500 |
commit | fc8c5e642017e2b4e5299e2093e72b316479690d (patch) | |
tree | 41119c71e8d86755f4c945d8c2ccf5a7000f546e | |
parent | 4a965d875415907cc1a016b428ae305a964f9228 (diff) | |
download | gallery-dl-fc8c5e642017e2b4e5299e2093e72b316479690d.tar.bz2 gallery-dl-fc8c5e642017e2b4e5299e2093e72b316479690d.tar.xz gallery-dl-fc8c5e642017e2b4e5299e2093e72b316479690d.tar.zst |
New upstream version 1.19.2.upstream/1.19.2
39 files changed, 612 insertions, 171 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index f6b60f0..9fa1540 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +## 1.19.2 - 2021-11-05 +### Additions +- [kemonoparty] add `comments` option ([#1980](https://github.com/mikf/gallery-dl/issues/1980)) +- [skeb] add `user` and `post` extractors ([#1031](https://github.com/mikf/gallery-dl/issues/1031), [#1971](https://github.com/mikf/gallery-dl/issues/1971)) +- [twitter] add `pinned` option +- support accessing environment variables and the current local datetime in format strings ([#1968](https://github.com/mikf/gallery-dl/issues/1968)) +- add special type format strings to docs ([#1987](https://github.com/mikf/gallery-dl/issues/1987)) +### Fixes +- [cyberdrop] fix video extraction ([#1993](https://github.com/mikf/gallery-dl/issues/1993)) +- [deviantart] fix `index` values for stashed deviations +- [gfycat] provide consistent `userName` values for `user` downloads ([#1962](https://github.com/mikf/gallery-dl/issues/1962)) +- [gfycat] show warning when there are no available formats +- [hitomi] fix image URLs ([#1975](https://github.com/mikf/gallery-dl/issues/1975), [#1982](https://github.com/mikf/gallery-dl/issues/1982), [#1988](https://github.com/mikf/gallery-dl/issues/1988)) +- [instagram] update query hashes +- [mangakakalot] update domain and fix extraction +- [mangoxo] fix login and extraction +- [reddit] prevent crash for galleries with no `media_metadata` ([#2001](https://github.com/mikf/gallery-dl/issues/2001)) +- [redgifs] update to API v2 ([#1984](https://github.com/mikf/gallery-dl/issues/1984)) +- fix calculating retry sleep times ([#1990](https://github.com/mikf/gallery-dl/issues/1990)) + ## 1.19.1 - 2021-10-24 ### Additions - [inkbunny] add `following` extractor ([#515](https://github.com/mikf/gallery-dl/issues/515)) @@ -24,6 +44,7 @@ - [patreon] better filenames for `content` images ([#1954](https://github.com/mikf/gallery-dl/issues/1954)) - [redgifs][gfycat] provide fallback URLs ([#1962](https://github.com/mikf/gallery-dl/issues/1962)) - [downloader:ytdl] prevent crash in `_progress_hook()` +- restore SOCKS support for Windows executables ## 1.19.0 - 2021-10-01 ### Additions @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.19.1 +Version: 1.19.2 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index b893a3e..ba0aa8d 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-10-24" "1.19.1" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-11-05" "1.19.2" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index cdedaee..dc097d2 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-10-24" "1.19.1" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-11-05" "1.19.2" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1518,6 +1518,17 @@ You can use \f[I]"all"\f[] instead of listing all values separately. Download video files. +.SS extractor.kemonoparty.comments +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Extract \f[I]comments\f[] metadata. + + .SS extractor.kemonoparty.max-posts .IP "Type:" 6 \f[I]integer\f[] @@ -2107,17 +2118,17 @@ video extraction and download * \f[I]string\f[] .IP "Default:" 9 -\f[I]["mp4", "webm", "mobile", "gif"]\f[] +\f[I]["hd", "sd", "gif"]\f[] .IP "Description:" 4 List of names of the preferred animation format, which can be -\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"gif"\f[], \f[I]"webp"\f[], \f[I]"mobile"\f[], or \f[I]"mini"\f[]. +\f[I]"hd"\f[], \f[I]"sd"\f[], "gif", "vthumbnail"`, "thumbnail"\f[I], or \f[]"poster"\f[I]. If a selected format is not available, the next one in the list will be tried until an available format is found. -If the format is given as \f[I]string\f[], it will be extended with -\f[I]["mp4", "webm", "mobile", "gif"]\f[]. Use a list with one element to +If the format is given as \f[]string\f[I], it will be extended with +\f[]["hd", "sd", "gif"]``. Use a list with one element to restrict it to only one possible format. @@ -2285,6 +2296,17 @@ Known available sizes are Logout and retry as guest when access to another user's Tweets is blocked. +.SS extractor.twitter.pinned +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Fetch media from pinned Tweets. + + .SS extractor.twitter.quoted .IP "Type:" 6 \f[I]bool\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index c89f4b9..5e2628f 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -221,7 +221,7 @@ }, "redgifs": { - "format": ["mp4", "webm", "mobile", "gif"] + "format": ["hd", "sd", "gif"] }, "sankakucomplex": { @@ -266,6 +266,7 @@ "password": null, "cards": false, "conversations": false, + "pinned": false, "quoted": false, "replies": true, "retweets": false, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 5f9b4b8..7e6d632 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.19.1 +Version: 1.19.2 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 6dd43c8..409b8e7 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -154,6 +154,7 @@ gallery_dl/extractor/senmanga.py gallery_dl/extractor/sexcom.py gallery_dl/extractor/shopify.py gallery_dl/extractor/simplyhentai.py +gallery_dl/extractor/skeb.py gallery_dl/extractor/slickpic.py gallery_dl/extractor/slideshare.py gallery_dl/extractor/smugmug.py diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index c961ded..3cd9c3a 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -55,7 +55,7 @@ class _8musesAlbumExtractor(Extractor): }, }), # custom sorting - ("https://www.8muses.com/comics/album/Fakku-Comics/8?sort=az", { + ("https://www.8muses.com/comics/album/Fakku-Comics/9?sort=az", { "count": ">= 70", "keyword": {"name": r"re:^[R-Zr-z]"}, }), diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 93702ab..79fe971 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -115,6 +115,7 @@ modules = [ "senmanga", "sexcom", "simplyhentai", + "skeb", "slickpic", "slideshare", "smugmug", diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py index f867bd9..47e51b3 100644 --- a/gallery_dl/extractor/bcy.py +++ b/gallery_dl/extractor/bcy.py @@ -9,7 +9,7 @@ """Extractors for https://bcy.net/""" from .common import Extractor, Message -from .. import text +from .. import text, exception import json import re @@ -93,7 +93,7 @@ class BcyExtractor(Extractor): def _data_from_post(self, post_id): url = "{}/item/detail/{}".format(self.root, post_id) - page = self.request(url).text + page = self.request(url, notfound="post").text return json.loads( text.extract(page, 'JSON.parse("', '");')[0] .replace('\\\\u002F', '/') @@ -178,6 +178,7 @@ class BcyPostExtractor(BcyExtractor): }), # deleted ("https://bcy.net/item/detail/6780546160802143236", { + "exception": exception.NotFoundError, "count": 0, }), # only visible to logged in users diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 4f42477..e80366e 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -164,7 +164,8 @@ class Extractor(): self.log.debug("%s (%s/%s)", msg, tries, retries+1) if tries > retries: break - time.sleep(max(tries, self.request_interval)) + time.sleep( + max(tries, self._interval()) if self._interval else tries) tries += 1 raise exception.HttpError(msg, response) diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index d1b1b25..dbaa97e 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -17,18 +17,32 @@ class CyberdropAlbumExtractor(Extractor): directory_fmt = ("{category}", "{album_name} ({album_id})") archive_fmt = "{album_id}_{id}" pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)" - test = ("https://cyberdrop.me/a/keKRjm4t", { - "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.[a-z]+$", - "keyword": { - "album_id": "keKRjm4t", - "album_name": "Fate (SFW)", - "album_size": 150069254, - "count": 62, - "date": "dt:2020-06-18 13:14:20", - "description": "", - "id": r"re:\w{8}", - }, - }) + test = ( + # images + ("https://cyberdrop.me/a/keKRjm4t", { + "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.(jpg|png|webp)$", + "keyword": { + "album_id": "keKRjm4t", + "album_name": "Fate (SFW)", + "album_size": 150069254, + "count": 62, + "date": "dt:2020-06-18 13:14:20", + "description": "", + "id": r"re:\w{8}", + }, + }), + # videos + ("https://cyberdrop.me/a/l8gIAXVD", { + "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.mp4$", + "count": 31, + "keyword": { + "album_id": "l8gIAXVD", + "album_name": "Achelois17 videos", + "album_size": 652037121, + "date": "dt:2020-06-16 15:40:44", + }, + }), + ) def __init__(self, match): Extractor.__init__(self, match) @@ -41,7 +55,7 @@ class CyberdropAlbumExtractor(Extractor): files = [] append = files.append while True: - url = extr('downloadUrl: "', '"') + url = extr('id="file" href="', '"') if not url: break append(text.unescape(url)) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 4604d39..61affb5 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -137,11 +137,12 @@ class DeviantartExtractor(Extractor): def prepare(self, deviation): """Adjust the contents of a Deviation-object""" - try: - deviation["index"] = text.parse_int( - deviation["url"].rpartition("-")[2]) - except KeyError: - deviation["index"] = 0 + if "index" not in deviation: + try: + deviation["index"] = text.parse_int( + deviation["url"].rpartition("-")[2]) + except KeyError: + deviation["index"] = 0 if self.user: deviation["username"] = self.user @@ -602,7 +603,10 @@ class DeviantartStashExtractor(DeviantartExtractor): if stash_id[0] == "0": uuid = text.extract(page, '//deviation/', '"')[0] if uuid: - yield self.api.deviation(uuid) + deviation = self.api.deviation(uuid) + deviation["index"] = text.parse_int(text.extract( + page, 'gmi-deviationid="', '"')[0]) + yield deviation return for item in text.extract_iter( diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 9b89999..d2c5e8f 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -125,7 +125,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): "url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406", }), ("https://boards.fireden.net/sci/thread/11264294/", { - "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43", + "url": "61cab625c95584a12a30049d054931d64f8d20aa", }), ("https://archive.nyafuu.org/c/thread/2849220/", { "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f", diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index 9b4d5ee..501d114 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -36,7 +36,13 @@ class GfycatExtractor(Extractor): if "gfyName" not in gfycat: self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"]) continue + url = self._process(gfycat) + if not url: + self.log.warning("Skipping '%s' (format not available)", + gfycat["gfyId"]) + continue + gfycat.update(metadata) yield Message.Directory, gfycat yield Message.Url, url, gfycat @@ -44,7 +50,7 @@ class GfycatExtractor(Extractor): def _process(self, gfycat): gfycat["_fallback"] = formats = self._formats(gfycat) gfycat["date"] = text.parse_timestamp(gfycat.get("createDate")) - return next(formats, "") + return next(formats, None) def _formats(self, gfycat): for fmt in self.formats: @@ -73,6 +79,9 @@ class GfycatUserExtractor(GfycatExtractor): "count": ">= 100", }) + def metadata(self): + return {"userName": self.key} + def gfycats(self): return GfycatAPI(self).user(self.key) @@ -155,6 +164,10 @@ class GfycatImageExtractor(GfycatExtractor): self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"]) return url = self._process(gfycat) + if not url: + self.log.warning("Skipping '%s' (format not available)", + gfycat["gfyId"]) + return yield Message.Directory, gfycat yield Message.Url, url, gfycat diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py index fa8e98b..c423b18 100644 --- a/gallery_dl/extractor/hentaihand.py +++ b/gallery_dl/extractor/hentaihand.py @@ -22,12 +22,12 @@ class HentaihandGalleryExtractor(GalleryExtractor): (("https://hentaihand.com/en/comic/c75-takumi-na-muchi-choudenji-hou-" "no-aishi-kata-how-to-love-a-super-electromagnetic-gun-toaru-kagaku-" "no-railgun-english"), { - "pattern": r"https://cdn.hentaihand.com/.*/images/5669/\d+.jpg$", + "pattern": r"https://cdn.hentaihand.com/.*/images/360468/\d+.jpg$", "count": 50, "keyword": { "artists" : ["Takumi Na Muchi"], "date" : "dt:2014-06-28 00:00:00", - "gallery_id": 5669, + "gallery_id": 360468, "lang" : "en", "language" : "English", "parodies" : ["Toaru Kagaku No Railgun"], diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 01e5629..a4ce925 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -35,12 +35,12 @@ class HitomiGalleryExtractor(GalleryExtractor): }), # Game CG with scenes (#321) ("https://hitomi.la/galleries/733697.html", { - "url": "e057652b40629d3d72b0ef059c6ec7556417139c", + "url": "0cb629ab2bfe93d994a7972f68ad2a5a64ecc161", "count": 210, }), # fallback for galleries only available through /reader/ URLs ("https://hitomi.la/galleries/1045954.html", { - "url": "779b94b47d5f0c2341db03499270d2b5370196f6", + "url": "b420755d56a1135104ca8ca0765f44e290db70c3", "count": 1413, }), # gallery with "broken" redirect @@ -140,7 +140,7 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/common.js inum = int(ihash[-3:-1], 16) - offset = 2 if inum < 0x44 else 1 if inum < 0x88 else 0 + offset = 1 if inum < 0x7c else 0 url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format( chr(97 + offset), diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 983ae37..bf479ab 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -426,7 +426,7 @@ class InstagramPostsExtractor(InstagramExtractor): url = "{}/{}/".format(self.root, self.item) user = self._extract_profile_page(url) - query_hash = "7ea6ae3cf6fb05e73fcbe1732b1d2a42" + query_hash = "8c2a529969ee035a5063f2fc8602a0fd" variables = {"id": user["id"], "first": 50} edge = self._get_edge_data(user, "edge_owner_to_timeline_media") return self._pagination_graphql(query_hash, variables, edge) @@ -655,7 +655,7 @@ class InstagramPostExtractor(InstagramExtractor): ) def posts(self): - query_hash = "1f950d414a6e11c98c556aa007b3157d" + query_hash = "2efa04f61586458cef44441f474eee7c" variables = { "shortcode" : self.item, "child_comment_count" : 3, @@ -713,25 +713,14 @@ class InstagramHighlightsExtractor(InstagramExtractor): url = "{}/{}/".format(self.root, self.item) user = self._extract_profile_page(url) - query_hash = "d4d88dc1500312af6f937f7b804c68c3" - variables = { - "user_id": user["id"], - "include_chaining": False, - "include_reel": True, - "include_suggested_users": False, - "include_logged_out_extras": False, - "include_highlight_reels": True, - "include_live_status": True, - } - data = self._request_graphql(query_hash, variables) - edges = data["user"]["edge_highlight_reels"]["edges"] - if not edges: - return () - - reel_ids = ["highlight:" + edge["node"]["id"] for edge in edges] - endpoint = "/v1/feed/reels_media/?reel_ids=" + \ - "&reel_ids=".join(text.quote(rid) for rid in reel_ids) - reels = self._request_api(endpoint)["reels"] + endpoint = "/v1/highlights/{}/highlights_tray/".format(user["id"]) + tray = self._request_api(endpoint)["tray"] + + reel_ids = [highlight["id"] for highlight in tray] + endpoint = "/v1/feed/reels_media/" + params = {"reel_ids": reel_ids} + reels = self._request_api(endpoint, params=params)["reels"] + return [reels[rid] for rid in reel_ids] diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index d5aad67..2e1d0b2 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -35,6 +35,7 @@ class KemonopartyExtractor(Extractor): r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall skip_service = \ "patreon" if self.config("patreon-skip-file", True) else None + comments = self.config("comments") if self.config("metadata"): username = text.unescape(text.extract( @@ -68,6 +69,8 @@ class KemonopartyExtractor(Extractor): post["published"], "%a, %d %b %Y %H:%M:%S %Z") if username: post["username"] = username + if comments: + post["comments"] = self._extract_comments(post) yield Message.Directory, post for post["num"], file in enumerate(files, 1): @@ -100,6 +103,24 @@ class KemonopartyExtractor(Extractor): return {c.name: c.value for c in response.history[0].cookies} + def _extract_comments(self, post): + url = "{}/{}/user/{}/post/{}".format( + self.root, post["service"], post["user"], post["id"]) + page = self.request(url).text + + comments = [] + for comment in text.extract_iter(page, "<article", "</article>"): + extr = text.extract_from(comment) + cid = extr('id="', '"') + comments.append({ + "id" : cid, + "user": extr('href="#' + cid + '"', '</').strip(" \n\r>"), + "body": extr( + '<section class="comment__body">', '</section>').strip(), + "date": extr('datetime="', '"'), + }) + return comments + class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing""" diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 665d257..6761b55 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -105,7 +105,7 @@ class LusciousAlbumExtractor(LusciousExtractor): "exception": exception.NotFoundError, }), ("https://members.luscious.net/albums/login-required_323871/", { - "count": 78, + "count": 64, }), ("https://www.luscious.net/albums/okinami_277031/"), ("https://members.luscious.net/albums/okinami_277031/"), diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py index cab866a..c71b003 100644 --- a/gallery_dl/extractor/mangakakalot.py +++ b/gallery_dl/extractor/mangakakalot.py @@ -1,12 +1,13 @@ # -*- coding: utf-8 -*- # Copyright 2020 Jake Mannens +# Copyright 2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from https://mangakakalot.com/""" +"""Extractors for https://mangakakalot.tv/""" from .common import ChapterExtractor, MangaExtractor from .. import text @@ -16,34 +17,24 @@ import re class MangakakalotBase(): """Base class for mangakakalot extractors""" category = "mangakakalot" - root = "https://mangakakalot.com" - - @staticmethod - def parse_page(page, data): - """Parse metadata on 'page' and add it to 'data'""" - text.extract_all(page, ( - ("manga" , '<h1>', '</h1>'), - ('author' , '<li>Author(s) :\n', '</a>'), - ), values=data) - data["author"] = text.remove_html(data["author"]) - return data + root = "https://ww.mangakakalot.tv" class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): - """Extractor for manga-chapters from mangakakalot.com""" - pattern = (r"(?:https?://)?(?:www\.)?mangakakalot\.com" - r"(/chapter/\w+/chapter_[^/?#]+)") + """Extractor for manga chapters from mangakakalot.tv""" + pattern = (r"(?:https?://)?(?:www?\.)?mangakakalot\.tv" + r"(/chapter/[^/?#]+/chapter[_-][^/?#]+)") test = ( - ("https://mangakakalot.com/chapter/rx922077/chapter_6", { - "pattern": r"https://s\d+\.\w+\.com/mangakakalot/r\d+/rx922077/" - r"chapter_6_master_help_me_out/\d+\.jpg", - "keyword": "80fde46d2210a6c17f0b2f7c1c89f0f56b65e157", + ("https://ww.mangakakalot.tv/chapter/manga-hl984546/chapter-6", { + "pattern": r"https://cm\.blazefast\.co" + r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg", + "keyword": "e9646a76a210f1eb4a71b4134664814c99d65d48", "count": 14, }), - (("https://mangakakalot.com/chapter" + (("https://mangakakalot.tv/chapter" "/hatarakanai_futari_the_jobless_siblings/chapter_20.1"), { - "keyword": "6b24349bb16f41ef1c4350200c1ccda5f09ae136", - "content": "7196aed8bb1536806bf55033ed1f2ed172c86f9a", + "keyword": "14c430737ff600b26a3811815905f34dd6a6c8c6", + "content": "b3eb1f139caef98d9dcd8ba6a5ee146a13deebc4", "count": 2, }), ) @@ -54,10 +45,10 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): self.session.headers['Referer'] = self.root def metadata(self, page): - _ , pos = text.extract(page, '<span itemprop="name">', '<') - manga , pos = text.extract(page, '<span itemprop="name">', '<', pos) - info , pos = text.extract(page, '<span itemprop="name">', '<', pos) - author, pos = text.extract(page, '. Author: ', ' already has ', pos) + _ , pos = text.extract(page, '<span itemprop="title">', '<') + manga , pos = text.extract(page, '<span itemprop="title">', '<', pos) + info , pos = text.extract(page, '<span itemprop="title">', '<', pos) + author, pos = text.extract(page, '. Author:', ' already has ', pos) match = re.match( r"(?:[Vv]ol\. *(\d+) )?" @@ -69,7 +60,7 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): return { "manga" : text.unescape(manga), "title" : text.unescape(title) if title else "", - "author" : text.unescape(author) if author else "", + "author" : text.unescape(author).strip() if author else "", "volume" : text.parse_int(volume), "chapter" : text.parse_int(chapter), "chapter_minor": sep + minor, @@ -78,42 +69,46 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): } def images(self, page): - page = text.extract( - page, 'class="container-chapter-reader', '\n<div')[0] return [ (url, None) - for url in text.extract_iter(page, '<img src="', '"') + for url in text.extract_iter(page, '<img data-src="', '"') ] class MangakakalotMangaExtractor(MangakakalotBase, MangaExtractor): - """Extractor for manga from mangakakalot.com""" + """Extractor for manga from mangakakalot.tv""" chapterclass = MangakakalotChapterExtractor - pattern = (r"(?:https?://)?(?:www\.)?mangakakalot\.com" - r"(/(?:manga/|read-)\w+)") + pattern = (r"(?:https?://)?(?:www?\.)?mangakakalot\.tv" + r"(/manga/[^/?#]+)") test = ( - ("https://mangakakalot.com/manga/lk921810", { - "url": "d262134b65993b031406f7b9d9442c9afd321a27", + ("https://ww.mangakakalot.tv/manga/lk921810", { + "url": "654d040c17728c9c8756fce7092b084e8dcf67d2", }), - ("https://mangakakalot.com/read-ry3sw158504884246", { + ("https://mangakakalot.tv/manga/manga-jk986845", { "pattern": MangakakalotChapterExtractor.pattern, - "count": ">= 40" + "count": ">= 30", }), ) def chapters(self, page): + data = {"lang": "en", "language": "English"} + data["manga"], pos = text.extract(page, "<h1>", "<") + author, pos = text.extract(page, "<li>Author(s) :", "</a>", pos) + data["author"] = text.remove_html(author) + results = [] - data = self.parse_page(page, {"lang": "en", "language": "English"}) - - needle = '<div class="row">\n<span><a href="' - pos = page.index('<div class="chapter-list">') - while True: - url, pos = text.extract(page, needle, '"', pos) - if not url: - return results - data["title"], pos = text.extract(page, '>', '</a>', pos) - data["date"] , pos = text.extract(page, '<span title="', '">', pos) - chapter, sep, minor = url.rpartition("/chapter_")[2].partition(".") + for chapter in text.extract_iter(page, '<div class="row">', '</div>'): + url, pos = text.extract(chapter, '<a href="', '"') + title, pos = text.extract(chapter, '>', '</a>', pos) + data["title"] = title.partition(": ")[2] + data["date"] , pos = text.extract( + chapter, '<span title=" ', '"', pos) + + chapter, sep, minor = url.rpartition("/chapter-")[2].partition(".") data["chapter"] = text.parse_int(chapter) data["chapter_minor"] = sep + minor + + if url.startswith("/"): + url = self.root + url results.append((url, data.copy())) + return results diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index 4bd5572..a883b91 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -66,7 +66,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): (("https://mangapark.net/manga" "/gekkan-shoujo-nozaki-kun/i2067426/v7/c70/1"), { "count": 15, - "keyword": "8f18f1c977ebe049ef35e3a877eaaab97fb25274", + "keyword": "edc14993c4752cee3a76e09b2f024d40d854bfd1", }), ("https://mangapark.me/manga/gosu/i811615/c55/1"), ("https://mangapark.com/manga/gosu/i811615/c55/1"), diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py index 5d240d4..d45fbc9 100644 --- a/gallery_dl/extractor/mangoxo.py +++ b/gallery_dl/extractor/mangoxo.py @@ -36,12 +36,16 @@ class MangoxoExtractor(Extractor): def _login_impl(self, username, password): self.log.info("Logging in as %s", username) + url = self.root + "/login" + page = self.request(url).text + token = text.extract(page, 'id="loginToken" value="', '"')[0] + url = self.root + "/api/login" headers = { "X-Requested-With": "XMLHttpRequest", "Referer": self.root + "/login", } - data = self._sign_by_md5(username, password) + data = self._sign_by_md5(username, password, token) response = self.request(url, method="POST", headers=headers, data=data) data = response.json() @@ -50,11 +54,12 @@ class MangoxoExtractor(Extractor): return {"SESSION": self.session.cookies.get("SESSION")} @staticmethod - def _sign_by_md5(username, password): + def _sign_by_md5(username, password, token): # https://dns.mangoxo.com/libs/plugins/phoenix-ui/js/phoenix-ui.js params = [ ("username" , username), ("password" , password), + ("token" , token), ("timestamp", str(int(time.time()))), ] query = "&".join("=".join(item) for item in sorted(params)) @@ -79,8 +84,8 @@ class MangoxoAlbumExtractor(MangoxoExtractor): "url": "ad921fe62663b06e7d73997f7d00646cab7bdd0d", "keyword": { "channel": { - "id": "Jpw9ywQ4", - "name": "绘画艺术赏析", + "id": "gaxO16d8", + "name": "Phoenix", "cover": str, }, "album": { @@ -116,14 +121,14 @@ class MangoxoAlbumExtractor(MangoxoExtractor): def metadata(self, page): """Return general metadata""" - title, pos = text.extract(page, '<title>', '</title>') - _ , pos = text.extract(page, 'class="desc"', '', pos) - cid , pos = text.extract(page, '//www.mangoxo.com/channel/', '"', pos) - cname, pos = text.extract(page, '>', '<', pos) - count, pos = text.extract(page, 'id="pic-count">', '<', pos) - cover, pos = text.extract(page, ' src="', '"', pos) - date , pos = text.extract(page, '</i>', '<', pos) - descr, pos = text.extract(page, '<pre>', '</pre>', pos) + extr = text.extract_from(page) + title = extr('<title>', '</title>') + count = extr('id="pic-count">', '<') + cid = extr('<img alt="', '"') + cover = extr(' src="', '"') + cname = extr('target="_blank">', '<') + date = extr('</i>', '<') + descr = extr('<pre>', '</pre>') return { "channel": { @@ -157,8 +162,8 @@ class MangoxoAlbumExtractor(MangoxoExtractor): class MangoxoChannelExtractor(MangoxoExtractor): """Extractor for all albums on a mangoxo channel""" subcategory = "channel" - pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/channel/(\w+)" - test = ("https://www.mangoxo.com/channel/QeYKRkO0", { + pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/(\w+)/album" + test = ("https://www.mangoxo.com/phoenix/album", { "pattern": MangoxoAlbumExtractor.pattern, "range": "1-30", "count": "> 20", @@ -166,12 +171,12 @@ class MangoxoChannelExtractor(MangoxoExtractor): def __init__(self, match): MangoxoExtractor.__init__(self, match) - self.channel_id = match.group(1) + self.user = match.group(1) def items(self): self.login() num = total = 1 - url = "{}/channel/{}/album/".format(self.root, self.channel_id) + url = "{}/{}/album/".format(self.root, self.user) data = {"_extractor": MangoxoAlbumExtractor} while True: diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py index 05cbcdf..ad9f620 100644 --- a/gallery_dl/extractor/paheal.py +++ b/gallery_dl/extractor/paheal.py @@ -99,8 +99,10 @@ class PahealPostExtractor(PahealExtractor): pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net" r"/post/view/(\d+)") test = ("https://rule34.paheal.net/post/view/481609", { - "url": "a91d579be030753282f55b8cb4eeaa89c45a9116", - "keyword": "e02e4dcf8cdf4e9c206e695253c9024d79a2e20a", + "pattern": r"https://tulip\.paheal\.net/_images" + r"/bbdc1c33410c2cdce7556c7990be26b7/481609%20-%20" + r"Azumanga_Daioh%20Osaka%20Vuvuzela%20inanimate\.jpg", + "keyword": "abe7c1220ba5601f9639aa79fbb9689674ec8f5c", "content": "7b924bcf150b352ac75c9d281d061e174c851a11", }) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index c7df089..62e4f58 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -151,7 +151,7 @@ class PatreonExtractor(Extractor): included[file["type"]][file["id"]] for file in files["data"] ] - return () + return [] @memcache(keyarg=1) def _user(self, url): diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py index c6c885c..d3b3bb1 100644 --- a/gallery_dl/extractor/philomena.py +++ b/gallery_dl/extractor/philomena.py @@ -107,11 +107,11 @@ class PhilomenaPostExtractor(PhilomenaExtractor): "source_url": "https://www.deviantart.com/speccysy/art" "/Afternoon-Flight-215193985", "spoilered": False, - "tag_count": 39, + "tag_count": 42, "tag_ids": list, "tags": list, "thumbnails_generated": True, - "updated_at": "2021-07-13T14:22:40Z", + "updated_at": "2021-09-30T20:04:01Z", "uploader": "Clover the Clever", "uploader_id": 211188, "upvotes": int, diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 43c7e50..55c963d 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -112,12 +112,18 @@ class RedditExtractor(Extractor): """Return an iterable containing all (submission, comments) tuples""" def _extract_gallery(self, submission): - if submission["gallery_data"] is None: + gallery = submission["gallery_data"] + if gallery is None: self.log.warning("gallery %s: deleted", submission["id"]) return - meta = submission["media_metadata"] - for item in submission["gallery_data"]["items"]: + meta = submission.get("media_metadata") + if meta is None: + self.log.warning("gallery %s: missing 'media_metadata'", + submission["id"]) + return + + for item in gallery["items"]: data = meta[item["media_id"]] if data["status"] != "valid" or "s" not in data: self.log.warning( @@ -221,6 +227,10 @@ class RedditSubmissionExtractor(RedditExtractor): ("https://www.reddit.com/r/cosplay/comments/jvwaqr", { "count": 1, }), + # gallery with no 'media_metadata' (#2001) + ("https://www.reddit.com/r/kpopfap/comments/qjj04q/", { + "count": 0, + }), ("https://old.reddit.com/r/lavaporn/comments/2a00np/"), ("https://np.reddit.com/r/lavaporn/comments/2a00np/"), ("https://m.reddit.com/r/lavaporn/comments/2a00np/"), diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index e078bef..df50f70 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -8,15 +8,60 @@ """Extractors for https://redgifs.com/""" -from .gfycat import GfycatExtractor, GfycatAPI +from .common import Extractor, Message from .. import text -class RedgifsExtractor(GfycatExtractor): +class RedgifsExtractor(Extractor): """Base class for redgifs extractors""" category = "redgifs" + filename_fmt = "{category}_{id}.{extension}" + archive_fmt = "{id}" root = "https://www.redgifs.com" + def __init__(self, match): + Extractor.__init__(self, match) + self.key = match.group(1) + + formats = self.config("format") + if formats is None: + formats = ("hd", "sd", "gif") + elif isinstance(formats, str): + formats = (formats, "hd", "sd", "gif") + self.formats = formats + + def items(self): + metadata = self.metadata() + for gif in self.gifs(): + url = self._process(gif) + if not url: + self.log.warning("Skipping '%s' (format not available)", + gif["id"]) + continue + + gif.update(metadata) + yield Message.Directory, gif + yield Message.Url, url, gif + + def _process(self, gif): + gif["_fallback"] = formats = self._formats(gif) + gif["date"] = text.parse_timestamp(gif.get("createDate")) + return next(formats, None) + + def _formats(self, gif): + urls = gif["urls"] + for fmt in self.formats: + url = urls.get(fmt) + if url: + text.nameext_from_url(url, gif) + yield url + + def metadata(self): + return {} + + def gifs(self): + return () + class RedgifsUserExtractor(RedgifsExtractor): """Extractor for redgifs user profiles""" @@ -24,11 +69,14 @@ class RedgifsUserExtractor(RedgifsExtractor): directory_fmt = ("{category}", "{userName}") pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?#]+)" test = ("https://www.redgifs.com/users/Natalifiction", { - "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4", - "count": ">= 100", + "pattern": r"https://\w+\.redgifs\.com/[A-Za-z]+\.mp4", + "count": ">= 120", }) - def gfycats(self): + def metadata(self): + return {"userName": self.key} + + def gifs(self): return RedgifsAPI(self).user(self.key) @@ -36,19 +84,23 @@ class RedgifsSearchExtractor(RedgifsExtractor): """Extractor for redgifs search results""" subcategory = "search" directory_fmt = ("{category}", "Search", "{search}") - pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?#]+)" - test = ("https://www.redgifs.com/gifs/browse/jav", { - "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4", - "range": "1-10", - "count": 10, - }) + pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/browse/?\?([^#]+)" + test = ( + ("https://www.redgifs.com/browse?tags=JAV", { + "pattern": r"https://\w+\.redgifs\.com/[A-Za-z]+\.mp4", + "range": "1-10", + "count": 10, + }), + ("https://www.redgifs.com/browse?type=i&verified=y&order=top7"), + ) def metadata(self): - self.key = text.unquote(self.key).replace("-", " ") - return {"search": self.key} + self.params = params = text.parse_query(self.key) + search = params.get("tags") or params.get("order") or "trending" + return {"search": search} - def gfycats(self): - return RedgifsAPI(self).search(self.key) + def gifs(self): + return RedgifsAPI(self).search(self.params) class RedgifsImageExtractor(RedgifsExtractor): @@ -58,7 +110,7 @@ class RedgifsImageExtractor(RedgifsExtractor): r"|gifdeliverynetwork.com)/([A-Za-z]+)") test = ( ("https://redgifs.com/watch/foolishforkedabyssiniancat", { - "pattern": r"https://\w+\.(redgifs|gfycat)\.com" + "pattern": r"https://\w+\.redgifs\.com" r"/FoolishForkedAbyssiniancat\.mp4", "content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533", }), @@ -66,9 +118,42 @@ class RedgifsImageExtractor(RedgifsExtractor): ("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"), ) - def gfycats(self): - return (RedgifsAPI(self).gfycat(self.key),) + def gifs(self): + return (RedgifsAPI(self).gif(self.key),) -class RedgifsAPI(GfycatAPI): +class RedgifsAPI(): API_ROOT = "https://api.redgifs.com" + + def __init__(self, extractor): + self.extractor = extractor + + def gif(self, gif_id): + endpoint = "/v2/gifs/" + gif_id.lower() + return self._call(endpoint)["gif"] + + def user(self, user, order="best"): + endpoint = "/v2/users/{}/search".format(user.lower()) + params = {"order": order} + return self._pagination(endpoint, params) + + def search(self, params): + endpoint = "/v2/gifs/search" + params["search_text"] = params.pop("tags", None) + params.pop("needSendGtm", None) + return self._pagination(endpoint, params) + + def _call(self, endpoint, params=None): + url = self.API_ROOT + endpoint + return self.extractor.request(url, params=params).json() + + def _pagination(self, endpoint, params): + params["page"] = 1 + + while True: + data = self._call(endpoint, params) + yield from data["gifs"] + + if params["page"] >= data["pages"]: + return + params["page"] += 1 diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 2ea6f57..59e8be6 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -78,8 +78,8 @@ class SankakuTagExtractor(SankakuExtractor): test = ( ("https://sankaku.app/?tags=bonocho", { "count": 5, - "pattern": r"https://c?s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" - r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+", + "pattern": r"https://v\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" + r"/[^/]{32}\.\w+\?e=\d+&expires=\d+&m=[^&#]+", }), ("https://beta.sankakucomplex.com/?tags=bonocho"), ("https://chan.sankakucomplex.com/?tags=bonocho"), @@ -159,7 +159,7 @@ class SankakuPostExtractor(SankakuExtractor): }), # 'contentious_content' ("https://sankaku.app/post/show/21418978", { - "pattern": r"https://s\.sankakucomplex\.com" + "pattern": r"https://v\.sankakucomplex\.com" r"/data/13/3c/133cda3bfde249c504284493903fb985\.jpg", }), # empty tags (#1617) diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py new file mode 100644 index 0000000..c1a8878 --- /dev/null +++ b/gallery_dl/extractor/skeb.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://skeb.jp/""" + +from .common import Extractor, Message +from .. import text + + +class SkebExtractor(Extractor): + """Base class for skeb extractors""" + category = "skeb" + directory_fmt = ("{category}", "{creator[screen_name]}") + filename_fmt = "{post_num}_{file_id}.{extension}" + archive_fmt = "{post_num}_{file_id}_{content_category}" + root = "https://skeb.jp" + + def __init__(self, match): + Extractor.__init__(self, match) + self.user_name = match.group(1) + + def items(self): + for post_num in self.posts(): + response, post = self._get_post_data(post_num) + yield Message.Directory, post + for data in self._get_urls_from_post(response, post): + url = data["file_url"] + yield Message.Url, url, text.nameext_from_url(url, data) + + def posts(self): + """Return post number""" + + def _pagination(self): + url = "{}/api/users/{}/works".format(self.root, self.user_name) + params = {"role": "creator", "sort": "date", "offset": 0} + headers = {"Referer": self.root, "Authorization": "Bearer null"} + + while True: + posts = self.request(url, params=params, headers=headers).json() + + for post in posts: + post_num = post["path"].rpartition("/")[2] + if post["private"]: + self.log.debug("Skipping %s (private)", post_num) + continue + yield post_num + + if len(posts) < 30: + return + params["offset"] += 30 + + def _get_post_data(self, post_num): + url = "{}/api/users/{}/works/{}".format( + self.root, self.user_name, post_num) + headers = {"Referer": self.root, "Authorization": "Bearer null"} + resp = self.request(url, headers=headers).json() + creator = resp["creator"] + post = { + "post_num" : post_num, + "post_url" : self.root + resp["path"], + "body" : resp["body"], + "source_body" : resp["source_body"], + "translated_body" : resp["translated"], + "completed_at" : resp["completed_at"], + "date" : text.parse_datetime( + resp["completed_at"], "%Y-%m-%dT%H:%M:%S.%fZ"), + "nsfw" : resp["nsfw"], + "anonymous" : resp["anonymous"], + "tags" : resp["tag_list"], + "genre" : resp["genre"], + "thanks" : resp["thanks"], + "source_thanks" : resp["source_thanks"], + "translated_thanks": resp["translated_thanks"], + "creator": { + "id" : creator["id"], + "name" : creator["name"], + "screen_name" : creator["screen_name"], + "avatar_url" : creator["avatar_url"], + "header_url" : creator["header_url"], + } + } + if not resp["anonymous"] and "client" in resp: + client = resp["client"] + post["client"] = { + "id" : client["id"], + "name" : client["name"], + "screen_name" : client["screen_name"], + "avatar_url" : client["avatar_url"], + "header_url" : client["header_url"], + } + return resp, post + + def _get_urls_from_post(self, resp, post): + if "og_image_url" in resp: + post["content_category"] = "thumb" + post["file_id"] = "thumb" + post["file_url"] = resp["og_image_url"] + yield post + + for preview in resp["previews"]: + post["content_category"] = "preview" + post["file_id"] = preview["id"] + post["file_url"] = preview["url"] + info = preview["information"] + post["original"] = { + "width" : info["width"], + "height" : info["height"], + "byte_size" : info["byte_size"], + "duration" : info["duration"], + "frame_rate": info["frame_rate"], + "software" : info["software"], + "extension" : info["extension"], + "is_movie" : info["is_movie"], + "transcoder": info["transcoder"], + } + yield post + + +class SkebPostExtractor(SkebExtractor): + """Extractor for a single skeb post""" + subcategory = "post" + pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)" + + def __init__(self, match): + SkebExtractor.__init__(self, match) + self.post_num = match.group(2) + + def posts(self): + return (self.post_num,) + + +class SkebUserExtractor(SkebExtractor): + """Extractor for all posts from a skeb user""" + subcategory = "user" + pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)" + + def posts(self): + return self._pagination() diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index a3c77e8..91386e8 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -25,7 +25,7 @@ class SlidesharePresentationExtractor(Extractor): (("https://www.slideshare.net" "/Slideshare/get-started-with-slide-share"), { "url": "23685fb9b94b32c77a547d45dc3a82fe7579ea18", - "content": "ee54e54898778e92696a7afec3ffabdbd98eb0cc", + "content": "2e90a01c6ca225579ebf8f98ab46f97a28a5e45c", }), # long title (("https://www.slideshare.net/pragmaticsolutions/warum-sie-nicht-ihren" diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index f7408e6..5d582b5 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -111,13 +111,13 @@ class SmugmugImageExtractor(SmugmugExtractor): test = ( ("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", { "url": "e6408fd2c64e721fd146130dceb56a971ceb4259", - "keyword": "05c8d50aa6ea08d458f83c38d7f9e92148362f0e", + "keyword": "b15af021186b7234cebcac758d2a4fd8462f9912", "content": "ecbd9d7b4f75a637abc8d35319be9ec065a44eb0", }), # video ("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", { "url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee", - "keyword": "720da317232504f05099da37802ed3c3ce3cd310", + "keyword": "f6967cc5a46c3e130a4f8de7c5c971f72e07fe61", }), ) diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py index ec1e044..9b06f92 100644 --- a/gallery_dl/extractor/tapas.py +++ b/gallery_dl/extractor/tapas.py @@ -150,7 +150,7 @@ class TapasEpisodeExtractor(TapasExtractor): subcategory = "episode" pattern = BASE_PATTERN + r"/episode/(\d+)" test = ("https://tapas.io/episode/2068651", { - "url": "0e536117dfaa17972e83d2e0141e6f9e91a33611", + "url": "f122b05648a9f53c2ddb2f6854a7a80ab946e9e8", "pattern": "^text:", "keyword": { "book": True, diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 568ee2e..00f3b04 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -36,6 +36,7 @@ class TwitterExtractor(Extractor): self.retweets = self.config("retweets", False) self.replies = self.config("replies", True) self.twitpic = self.config("twitpic", False) + self.pinned = self.config("pinned", False) self.quoted = self.config("quoted", False) self.videos = self.config("videos", True) self.cards = self.config("cards", False) @@ -838,7 +839,7 @@ class TwitterAPI(): if params is None: params = self.params.copy() original_retweets = (self.extractor.retweets == "original") - pinned_tweet = True + pinned_tweet = self.extractor.pinned while True: cursor = tweet = None diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index 59649a0..0922c7c 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -31,7 +31,10 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor): r"/([^/?#]+)/photos/(\d+)") test = ( ("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", { - "url": "cb4657a37eea5ab6b1d333491cee7eeb529b0645", + "count": 8, + "pattern": r"https://profile-pics-l3\.xvideos-cdn\.com" + r"/[0-9a-f]{40}-\d+/videos/profiles/galleries/84/ca/37" + r"/pervertedcouple/gal751031/pic_\d+_big\.jpg", "keyword": { "gallery": { "id" : 751031, diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index 654e5d0..f5d961a 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -8,14 +8,20 @@ """String formatters""" +import os import json import string import _string +import datetime import operator from . import text, util _CACHE = {} _CONVERSIONS = None +_GLOBALS = { + "_env": lambda: os.environ, + "_now": datetime.datetime.now, +} def parse(format_string, default=None): @@ -58,7 +64,7 @@ class StringFormatter(): - "j". calls json.dumps - "t": calls str.strip - "d": calls text.parse_timestamp - - "U": calls urllib.parse.unquote + - "U": calls urllib.parse.unescape - "S": calls util.to_string() - "T": calls util.to_timestamü() - Example: {f!l} -> "example"; {f!u} -> "EXAMPLE" @@ -125,6 +131,8 @@ class StringFormatter(): ], fmt) else: key, funcs = parse_field_name(field_name) + if key in _GLOBALS: + return self._apply_globals(_GLOBALS[key], funcs, fmt) if funcs: return self._apply(key, funcs, fmt) return self._apply_simple(key, fmt) @@ -140,6 +148,17 @@ class StringFormatter(): return fmt(obj) return wrap + def _apply_globals(self, gobj, funcs, fmt): + def wrap(_): + try: + obj = gobj() + for func in funcs: + obj = func(obj) + except Exception: + obj = self.default + return fmt(obj) + return wrap + def _apply_simple(self, key, fmt): def wrap(kwdict): return fmt(kwdict[key] if key in kwdict else self.default) @@ -149,7 +168,7 @@ class StringFormatter(): def wrap(kwdict): for key, funcs in lst: try: - obj = kwdict[key] + obj = _GLOBALS[key]() if key in _GLOBALS else kwdict[key] for func in funcs: obj = func(obj) if obj: diff --git a/gallery_dl/version.py b/gallery_dl/version.py index ee01549..48817be 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.19.1" +__version__ = "1.19.2" diff --git a/test/test_formatter.py b/test/test_formatter.py index 70201f3..088b45b 100644 --- a/test/test_formatter.py +++ b/test/test_formatter.py @@ -11,6 +11,7 @@ import os import sys import unittest import datetime +import tempfile sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from gallery_dl import formatter # noqa E402 @@ -173,6 +174,96 @@ class TestFormatter(unittest.TestCase): self._run_test("{d[a]:?</>/L1/too long/}", "<too long>") self._run_test("{d[c]:?</>/L5/too long/}", "") + def test_globals_env(self): + os.environ["FORMATTER_TEST"] = value = self.kwdict["a"] + + self._run_test("{_env[FORMATTER_TEST]}" , value) + self._run_test("{_env[FORMATTER_TEST]!l}", value.lower()) + self._run_test("{z|_env[FORMATTER_TEST]}", value) + + def test_globals_now(self): + fmt = formatter.parse("{_now}") + out1 = fmt.format_map(self.kwdict) + self.assertRegex(out1, r"^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d(\.\d+)?$") + + out = formatter.parse("{_now:%Y%m%d}").format_map(self.kwdict) + now = datetime.datetime.now() + self.assertRegex(out, r"^\d{8}$") + self.assertEqual(out, format(now, "%Y%m%d")) + + out = formatter.parse("{z|_now:%Y}").format_map(self.kwdict) + self.assertRegex(out, r"^\d{4}$") + self.assertEqual(out, format(now, "%Y")) + + out2 = fmt.format_map(self.kwdict) + self.assertRegex(out1, r"^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d(\.\d+)?$") + self.assertNotEqual(out1, out2) + + def test_template(self): + with tempfile.TemporaryDirectory() as tmpdirname: + path1 = os.path.join(tmpdirname, "tpl1") + path2 = os.path.join(tmpdirname, "tpl2") + + with open(path1, "w") as fp: + fp.write("{a}") + fmt1 = formatter.parse("\fT " + path1) + + with open(path2, "w") as fp: + fp.write("{a!u:Rh/C/}\nFooBar") + fmt2 = formatter.parse("\fT " + path2) + + self.assertEqual(fmt1.format_map(self.kwdict), self.kwdict["a"]) + self.assertEqual(fmt2.format_map(self.kwdict), "HELLO WORLD\nFooBar") + + with self.assertRaises(OSError): + formatter.parse("\fT /") + + def test_expression(self): + self._run_test("\fE a", self.kwdict["a"]) + self._run_test("\fE name * 2 + ' ' + a", "{}{} {}".format( + self.kwdict["name"], self.kwdict["name"], self.kwdict["a"])) + + def test_module(self): + with tempfile.TemporaryDirectory() as tmpdirname: + path = os.path.join(tmpdirname, "testmod.py") + + with open(path, "w") as fp: + fp.write(""" +def gentext(kwdict): + name = kwdict.get("Name") or kwdict.get("name") or "foo" + return "'{title1}' by {}".format(name, **kwdict) + +def lengths(kwdict): + a = 0 + for k, v in kwdict.items(): + try: + a += len(v) + except TypeError: + pass + return format(a) + +def noarg(): + return "" +""") + sys.path.insert(0, tmpdirname) + try: + fmt1 = formatter.parse("\fM testmod:gentext") + fmt2 = formatter.parse("\fM testmod:lengths") + fmt3 = formatter.parse("\fM testmod:noarg") + + with self.assertRaises(AttributeError): + formatter.parse("\fM testmod:missing") + with self.assertRaises(ImportError): + formatter.parse("\fM missing:missing") + finally: + sys.path.pop(0) + + self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name") + self.assertEqual(fmt2.format_map(self.kwdict), "65") + + with self.assertRaises(TypeError): + self.assertEqual(fmt3.format_map(self.kwdict), "") + def _run_test(self, format_string, result, default=None): fmt = formatter.parse(format_string, default) output = fmt.format_map(self.kwdict) |