aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-01-11 03:25:49 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2021-01-11 03:25:49 -0500
commit99541ac9da24070cd241149ee54db4bf81d564b7 (patch)
tree073fefa95f7ddedf777db89bb237b51860c1f32f
parentbc18b55e1dd21a4528241926df6302182404fb5a (diff)
parent6335711bbe769b6b9301a88d88790d7a2f8aa82e (diff)
downloadgallery-dl-99541ac9da24070cd241149ee54db4bf81d564b7.tar.bz2
gallery-dl-99541ac9da24070cd241149ee54db4bf81d564b7.tar.xz
gallery-dl-99541ac9da24070cd241149ee54db4bf81d564b7.tar.zst
Update upstream source from tag 'upstream/1.16.3'
Update to upstream version '1.16.3' with Debian dir 9f836edb40c3d6f8084f75a5c1e53b6a52613e4f
-rw-r--r--CHANGELOG.md21
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/completion/_gallery-dl1
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.535
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/downloader/http.py12
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/derpibooru.py187
-rw-r--r--gallery_dl/extractor/deviantart.py5
-rw-r--r--gallery_dl/extractor/foolfuuka.py167
-rw-r--r--gallery_dl/extractor/gfycat.py16
-rw-r--r--gallery_dl/extractor/hentainexus.py9
-rw-r--r--gallery_dl/extractor/instagram.py4
-rw-r--r--gallery_dl/extractor/pinterest.py39
-rw-r--r--gallery_dl/extractor/redgifs.py13
-rw-r--r--gallery_dl/extractor/sankaku.py29
-rw-r--r--gallery_dl/extractor/twitter.py167
-rw-r--r--gallery_dl/extractor/wikiart.py48
-rw-r--r--gallery_dl/option.py5
-rw-r--r--gallery_dl/postprocessor/metadata.py5
-rw-r--r--gallery_dl/util.py5
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_postprocessor.py4
-rw-r--r--test/test_util.py5
27 files changed, 660 insertions, 145 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3531352..3b3060a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,26 @@
# Changelog
+## 1.16.3 - 2021-01-10
+
+## 1.16.2 - 2021-01-09
+### Additions
+- [derpibooru] add `search` and `gallery` extractors ([#862](https://github.com/mikf/gallery-dl/issues/862))
+- [foolfuuka] add `board` and `search` extractors ([#1044](https://github.com/mikf/gallery-dl/issues/1044), [#1174](https://github.com/mikf/gallery-dl/issues/1174))
+- [gfycat] add `date` metadata field ([#1138](https://github.com/mikf/gallery-dl/issues/1138))
+- [pinterest] add support for getting all boards of a user ([#1205](https://github.com/mikf/gallery-dl/issues/1205))
+- [sankaku] add support for book searches ([#1204](https://github.com/mikf/gallery-dl/issues/1204))
+- [twitter] fetch media from pinned tweets ([#1203](https://github.com/mikf/gallery-dl/issues/1203))
+- [wikiart] add extractor for single paintings ([#1233](https://github.com/mikf/gallery-dl/issues/1233))
+- [downloader:http] add MIME type and signature for `.ico` files ([#1211](https://github.com/mikf/gallery-dl/issues/1211))
+- add a `d` format string conversion for timestamp values
+- add `"ascii"` as a special `path-restrict` value
+### Fixes
+- [hentainexus] fix extraction ([#1234](https://github.com/mikf/gallery-dl/issues/1234))
+- [instagram] categorize single highlight URLs as `highlights` ([#1222](https://github.com/mikf/gallery-dl/issues/1222))
+- [redgifs] fix search results
+- [twitter] fix login with username & password
+- [twitter] fetch tweets from `homeConversation` entries
+
## 1.16.1 - 2020-12-27
### Additions
- [instagram] add `include` option ([#1180](https://github.com/mikf/gallery-dl/issues/1180))
diff --git a/PKG-INFO b/PKG-INFO
index ea38c4b..6b9724a 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.16.1
+Version: 1.16.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -332,7 +332,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.1.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 03d5367..4982919 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -321,7 +321,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.1.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 5194312..f134f63 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -15,6 +15,7 @@ _arguments -C -S \
{-q,--quiet}'[Activate quiet mode]' \
{-v,--verbose}'[Print various debugging information]' \
{-g,--get-urls}'[Print URLs instead of downloading]' \
+-G'[==SUPPRESS==]' \
{-j,--dump-json}'[Print JSON information]' \
{-s,--simulate}'[Simulate data extraction; do not download anything]' \
{-K,--list-keywords}'[Print a list of available keywords and example values for the given URLs]' \
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index c3df997..a260907 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-12-27" "1.16.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-01-10" "1.16.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 40efa15..609d1de 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-12-27" "1.16.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-01-10" "1.16.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -186,6 +186,8 @@ depending on the local operating system
* \f[I]"unix"\f[]: \f[I]"/"\f[]
.br
* \f[I]"windows"\f[]: \f[I]"\\\\\\\\|/<>:\\"?*"\f[]
+.br
+* \f[I]"ascii"\f[]: \f[I]"^0-9A-Za-z_."\f[]
Note: In a string with 2 or more characters, \f[I][]^-\\\f[] need to be
escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[]
@@ -803,6 +805,37 @@ Controls the download target for Ugoira posts.
* \f[I]false\f[]: Converted video files
+.SS extractor.derpibooru.api-key
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Your \f[I]Derpibooru API Key\f[],
+to use your account's browsing settings and filters.
+
+
+.SS extractor.derpibooru.filter
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Example:" 4
+56027 (\f[I]Everything\f[] filter)
+
+.IP "Description:" 4
+The content filter ID to use.
+
+Setting an explicit filter ID overrides any default filters and can be used
+to access 18+ content without \f[I]API Key\f[].
+
+See \f[I]Filters\f[] for details.
+
+
.SS extractor.deviantart.extra
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 7b2006e..2ce1d97 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.16.1
+Version: 1.16.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -332,7 +332,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.1.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 3b28345..cb025ff 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -51,6 +51,7 @@ gallery_dl/extractor/blogger.py
gallery_dl/extractor/booru.py
gallery_dl/extractor/common.py
gallery_dl/extractor/danbooru.py
+gallery_dl/extractor/derpibooru.py
gallery_dl/extractor/deviantart.py
gallery_dl/extractor/directlink.py
gallery_dl/extractor/dynastyscans.py
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index b8546a8..179a552 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -283,7 +283,10 @@ MIME_TYPES = {
"image/x-ms-bmp": "bmp",
"image/webp" : "webp",
"image/svg+xml" : "svg",
-
+ "image/ico" : "ico",
+ "image/icon" : "ico",
+ "image/x-icon" : "ico",
+ "image/vnd.microsoft.icon" : "ico",
"image/x-photoshop" : "psd",
"application/x-photoshop" : "psd",
"image/vnd.adobe.photoshop": "psd",
@@ -314,7 +317,7 @@ MIME_TYPES = {
"application/octet-stream": "bin",
}
-# taken from https://en.wikipedia.org/wiki/List_of_file_signatures
+# https://en.wikipedia.org/wiki/List_of_file_signatures
FILE_SIGNATURES = {
"jpg" : b"\xFF\xD8\xFF",
"png" : b"\x89PNG\r\n\x1A\n",
@@ -322,6 +325,8 @@ FILE_SIGNATURES = {
"bmp" : b"BM",
"webp": b"RIFF",
"svg" : b"<?xml",
+ "ico" : b"\x00\x00\x01\x00",
+ "cur" : b"\x00\x00\x02\x00",
"psd" : b"8BPS",
"webm": b"\x1A\x45\xDF\xA3",
"ogg" : b"OggS",
@@ -333,8 +338,7 @@ FILE_SIGNATURES = {
"pdf" : b"%PDF-",
"swf" : (b"CWS", b"FWS"),
# check 'bin' files against all other file signatures
- "bin" : b"\x00\x00\x00\x00",
+ "bin" : b"\x00\x00\x00\x00\x00\x00\x00\x00",
}
-
__downloader__ = HttpDownloader
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index b38cddc..36107d9 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -24,6 +24,7 @@ modules = [
"behance",
"blogger",
"danbooru",
+ "derpibooru",
"deviantart",
"dynastyscans",
"e621",
diff --git a/gallery_dl/extractor/derpibooru.py b/gallery_dl/extractor/derpibooru.py
new file mode 100644
index 0000000..3b20fa5
--- /dev/null
+++ b/gallery_dl/extractor/derpibooru.py
@@ -0,0 +1,187 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://derpibooru.org/"""
+
+from .booru import BooruExtractor
+from .. import text, exception
+import operator
+
+BASE_PATTERN = r"(?:https?://)?derpibooru\.org"
+
+
+class DerpibooruExtractor(BooruExtractor):
+ """Base class for derpibooru extractors"""
+ category = "derpibooru"
+ filename_fmt = "{filename}.{extension}"
+ archive_fmt = "{id}"
+ root = "https://derpibooru.org"
+ request_interval = 1.0
+ per_page = 50
+
+ _file_url = operator.itemgetter("view_url")
+
+ @staticmethod
+ def _prepare(post):
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%dT%H:%M:%S")
+
+ @staticmethod
+ def _extended_tags(post):
+ pass
+
+ def _pagination(self, url, params):
+ params["page"] = 1
+ params["per_page"] = self.per_page
+
+ api_key = self.config("api-key")
+ if api_key:
+ params["key"] = api_key
+
+ filter_id = self.config("filter")
+ if filter_id:
+ params["filter_id"] = filter_id
+
+ while True:
+ data = self.request(url, params=params).json()
+ yield from data["images"]
+
+ if len(data["images"]) < self.per_page:
+ return
+ params["page"] += 1
+
+
+class DerpibooruPostExtractor(DerpibooruExtractor):
+ """Extractor for single posts from derpibooru.org"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/images/(\d+)"
+ test = ("https://derpibooru.org/images/1", {
+ "content": "88449eeb0c4fa5d3583d0b794f6bc1d70bf7f889",
+ "count": 1,
+ "keyword": {
+ "animated": False,
+ "aspect_ratio": 1.0,
+ "comment_count": int,
+ "created_at": "2012-01-02T03:12:33",
+ "date": "dt:2012-01-02 03:12:33",
+ "deletion_reason": None,
+ "description": "",
+ "downvotes": int,
+ "duplicate_of": None,
+ "duration": 0.04,
+ "extension": "png",
+ "faves": int,
+ "first_seen_at": "2012-01-02T03:12:33",
+ "format": "png",
+ "height": 900,
+ "hidden_from_users": False,
+ "id": 1,
+ "mime_type": "image/png",
+ "name": "1__safe_fluttershy_solo_cloud_happy_flying_upvotes+galore"
+ "_artist-colon-speccysy_get_sunshine",
+ "orig_sha512_hash": None,
+ "processed": True,
+ "representations": dict,
+ "score": int,
+ "sha512_hash": "f16c98e2848c2f1bfff3985e8f1a54375cc49f78125391aeb8"
+ "0534ce011ead14e3e452a5c4bc98a66f56bdfcd07ef7800663"
+ "b994f3f343c572da5ecc22a9660f",
+ "size": 860914,
+ "source_url": "https://www.deviantart.com/speccysy/art"
+ "/Afternoon-Flight-215193985",
+ "spoilered": False,
+ "tag_count": 36,
+ "tag_ids": list,
+ "tags": list,
+ "thumbnails_generated": True,
+ "updated_at": "2020-05-28T13:14:07",
+ "uploader": "Clover the Clever",
+ "uploader_id": 211188,
+ "upvotes": int,
+ "view_url": str,
+ "width": 900,
+ "wilson_score": float,
+ },
+ })
+
+ def __init__(self, match):
+ DerpibooruExtractor.__init__(self, match)
+ self.image_id = match.group(1)
+
+ def posts(self):
+ url = self.root + "/api/v1/json/images/" + self.image_id
+ return (self.request(url).json()["image"],)
+
+
+class DerpibooruSearchExtractor(DerpibooruExtractor):
+ """Extractor for search results on derpibooru.org"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))"
+ test = (
+ ("https://derpibooru.org/search?q=cute", {
+ "range": "40-60",
+ "count": 21,
+ }),
+ ("https://derpibooru.org/tags/cute", {
+ "range": "40-60",
+ "count": 21,
+ }),
+ )
+
+ def __init__(self, match):
+ DerpibooruExtractor.__init__(self, match)
+ query, tags = match.groups()
+ self.params = text.parse_query(query) if query else {"q": tags}
+
+ def metadata(self):
+ return {"search_tags": self.params.get("q", "")}
+
+ def posts(self):
+ url = self.root + "/api/v1/json/search/images"
+ return self._pagination(url, self.params)
+
+
+class DerpibooruGalleryExtractor(DerpibooruExtractor):
+ """Extractor for galleries on derpibooru.org"""
+ subcategory = "gallery"
+ directory_fmt = ("{category}", "galleries",
+ "{gallery[id]} {gallery[title]}")
+ pattern = BASE_PATTERN + r"/galleries/(\d+)"
+ test = ("https://derpibooru.org/galleries/1", {
+ "pattern": r"https://derpicdn\.net/img/view/\d+/\d+/\d+/\d+[^/]+$",
+ "keyword": {
+ "gallery": {
+ "description": "Indexes start at 1 :P",
+ "id": 1,
+ "spoiler_warning": "",
+ "thumbnail_id": 1,
+ "title": "The Very First Gallery",
+ "user": "DeliciousBlackInk",
+ "user_id": 365446,
+ },
+ },
+ })
+
+ def __init__(self, match):
+ DerpibooruExtractor.__init__(self, match)
+ self.gallery_id = match.group(1)
+
+ def metadata(self):
+ url = self.root + "/api/v1/json/search/galleries"
+ params = {"q": "id:" + self.gallery_id}
+ galleries = self.request(url, params=params).json()["galleries"]
+ if not galleries:
+ raise exception.NotFoundError("gallery")
+ return {"gallery": galleries[0]}
+
+ def posts(self):
+ gallery_id = "gallery_id:" + self.gallery_id
+ url = self.root + "/api/v1/json/search/images"
+ params = {"sd": "desc", "sf": gallery_id, "q" : gallery_id}
+ return self._pagination(url, params)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 456a173..a9c63a9 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -588,10 +588,13 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
def deviations(self):
folders = self.api.collections_folders(self.user)
if self.flat:
- return itertools.chain.from_iterable(
+ deviations = itertools.chain.from_iterable(
self.api.collections(self.user, folder["folderid"])
for folder in folders
)
+ if self.offset:
+ deviations = util.advance(deviations, self.offset)
+ return deviations
return self._folder_urls(
folders, "favourites", DeviantartCollectionExtractor)
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 8a03dc9..81f2bc2 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,37 +11,26 @@
from .common import Extractor, Message, generate_extractors
from .. import text
import itertools
-import operator
-class FoolfuukaThreadExtractor(Extractor):
+class FoolfuukaExtractor(Extractor):
"""Base extractor for FoolFuuka based boards/archives"""
basecategory = "foolfuuka"
- subcategory = "thread"
- directory_fmt = ("{category}", "{board[shortname]}",
- "{thread_num}{title:? - //}")
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
- pattern_fmt = r"/([^/]+)/thread/(\d+)"
external = "default"
def __init__(self, match):
Extractor.__init__(self, match)
- self.board, self.thread = match.groups()
self.session.headers["Referer"] = self.root
if self.external == "direct":
self.remote = self._remote_direct
def items(self):
- op = True
- yield Message.Version, 1
+ yield Message.Directory, self.metadata()
for post in self.posts():
- if op:
- yield Message.Directory, post
- op = False
- if not post["media"]:
- continue
-
media = post["media"]
+ if not media:
+ continue
url = media["media_link"]
if not url and "remote_media_link" in media:
@@ -53,17 +42,11 @@ class FoolfuukaThreadExtractor(Extractor):
media["media"].rpartition(".")
yield Message.Url, url, post
- def posts(self):
- """Return an iterable with all posts in this thread"""
- url = self.root + "/_/api/chan/thread/"
- params = {"board": self.board, "num": self.thread}
- data = self.request(url, params=params).json()[self.thread]
-
- # sort post-objects by key
- posts = sorted(data.get("posts", {}).items())
- posts = map(operator.itemgetter(1), posts)
+ def metadata(self):
+ """ """
- return itertools.chain((data["op"],), posts)
+ def posts(self):
+ """Return an iterable with all relevant posts"""
def remote(self, media):
"""Resolve a remote media link"""
@@ -76,6 +59,117 @@ class FoolfuukaThreadExtractor(Extractor):
return media["remote_media_link"]
+class FoolfuukaThreadExtractor(FoolfuukaExtractor):
+ """Base extractor for threads on FoolFuuka based boards/archives"""
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{board[shortname]}",
+ "{thread_num}{title:? - //}")
+ pattern_fmt = r"/([^/?#]+)/thread/(\d+)"
+
+ def __init__(self, match):
+ FoolfuukaExtractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+ self.data = None
+
+ def metadata(self):
+ url = self.root + "/_/api/chan/thread/"
+ params = {"board": self.board, "num": self.thread}
+ self.data = self.request(url, params=params).json()[self.thread]
+ return self.data["op"]
+
+ def posts(self):
+ posts = self.data.get("posts")
+ if posts:
+ posts = list(posts.values())
+ posts.sort(key=lambda p: p["timestamp"])
+ else:
+ posts = ()
+ return itertools.chain((self.data["op"],), posts)
+
+
+class FoolfuukaBoardExtractor(FoolfuukaExtractor):
+ """Base extractor for FoolFuuka based boards/archives"""
+ subcategory = "board"
+ pattern_fmt = r"/([^/?#]+)/\d*$"
+
+ def __init__(self, match):
+ FoolfuukaExtractor.__init__(self, match)
+ self.board = match.group(1)
+
+ def items(self):
+ index_base = "{}/_/api/chan/index/?board={}&page=".format(
+ self.root, self.board)
+ thread_base = "{}/{}/thread/".format(self.root, self.board)
+
+ for page in itertools.count(1):
+ with self.request(index_base + format(page)) as response:
+ try:
+ threads = response.json()
+ except ValueError:
+ threads = None
+
+ if not threads:
+ return
+
+ for num, thread in threads.items():
+ thread["url"] = thread_base + format(num)
+ thread["_extractor"] = self.childclass
+ yield Message.Queue, thread["url"], thread
+
+
+class FoolfuukaSearchExtractor(FoolfuukaExtractor):
+ """Base extractor for search results on FoolFuuka based boards/archives"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "search", "{search}")
+ pattern_fmt = r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
+ request_interval = 1.0
+
+ def __init__(self, match):
+ FoolfuukaExtractor.__init__(self, match)
+ board, search = match.groups()
+
+ self.params = params = {}
+ args = search.split("/")
+ key = None
+
+ for arg in args:
+ if key:
+ params[key] = text.unescape(arg)
+ key = None
+ else:
+ key = arg
+ if board != "_":
+ params["boards"] = board
+
+ def metadata(self):
+ return {"search": self.params.get("text", "")}
+
+ def posts(self):
+ url = self.root + "/_/api/chan/search/"
+ params = self.params.copy()
+ params["page"] = text.parse_int(params.get("page"), 1)
+ if "filter" not in params:
+ params["filter"] = "text"
+
+ while True:
+ try:
+ data = self.request(url, params=params).json()
+ except ValueError:
+ return
+
+ if isinstance(data, dict):
+ if data.get("error"):
+ return
+ posts = data["0"]["posts"]
+ elif isinstance(data, list):
+ posts = data[0]["posts"]
+ else:
+ return
+
+ yield from posts
+ params["page"] += 1
+
+
EXTRACTORS = {
"4plebs": {
"name": "_4plebs",
@@ -84,6 +178,8 @@ EXTRACTORS = {
"test-thread": ("https://archive.4plebs.org/tg/thread/54059290", {
"url": "07452944164b602502b02b24521f8cee5c484d2a",
}),
+ "test-board": ("https://archive.4plebs.org/tg/",),
+ "test-search": ("https://archive.4plebs.org/_/search/text/test/",),
},
"archivedmoe": {
"root": "https://archived.moe",
@@ -96,6 +192,8 @@ EXTRACTORS = {
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
}),
),
+ "test-board": ("https://archived.moe/gd/",),
+ "test-search": ("https://archived.moe/_/search/text/test/",),
},
"archiveofsins": {
"root": "https://archiveofsins.com",
@@ -104,6 +202,8 @@ EXTRACTORS = {
"url": "f612d287087e10a228ef69517cf811539db9a102",
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
}),
+ "test-board": ("https://archiveofsins.com/h/",),
+ "test-search": ("https://archiveofsins.com/_/search/text/test/",),
},
"b4k": {
"root": "https://arch.b4k.co",
@@ -111,18 +211,24 @@ EXTRACTORS = {
"test-thread": ("https://arch.b4k.co/meta/thread/196/", {
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
}),
+ "test-board": ("https://arch.b4k.co/meta/",),
+ "test-search": ("https://arch.b4k.co/_/search/text/test/",),
},
"desuarchive": {
"root": "https://desuarchive.org",
"test-thread": ("https://desuarchive.org/a/thread/159542679/", {
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
}),
+ "test-board": ("https://desuarchive.org/a/",),
+ "test-search": ("https://desuarchive.org/_/search/text/test/",),
},
"fireden": {
"root": "https://boards.fireden.net",
"test-thread": ("https://boards.fireden.net/sci/thread/11264294/", {
"url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
}),
+ "test-board": ("https://boards.fireden.net/sci/",),
+ "test-search": ("https://boards.fireden.net/_/search/text/test/",),
},
"nyafuu": {
"root": "https://archive.nyafuu.org",
@@ -130,6 +236,8 @@ EXTRACTORS = {
"test-thread": ("https://archive.nyafuu.org/c/thread/2849220/", {
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
}),
+ "test-board": ("https://archive.nyafuu.org/c/",),
+ "test-search": ("https://archive.nyafuu.org/_/search/text/test/",),
},
"rbt": {
"root": "https://rbt.asia",
@@ -142,6 +250,8 @@ EXTRACTORS = {
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
),
+ "test-board": ("https://rbt.asia/g/",),
+ "test-search": ("https://rbt.asia/_/search/text/test/",),
},
"thebarchive": {
"root": "https://thebarchive.com",
@@ -149,9 +259,14 @@ EXTRACTORS = {
"test-thread": ("https://thebarchive.com/b/thread/739772332/", {
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
}),
+ "test-board": ("https://thebarchive.com/b/",),
+ "test-search": ("https://thebarchive.com/_/search/text/test/",),
},
+ "_ckey": "childclass",
}
generate_extractors(EXTRACTORS, globals(), (
FoolfuukaThreadExtractor,
+ FoolfuukaBoardExtractor,
+ FoolfuukaSearchExtractor,
))
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index f878dbd..4e62165 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2020 Mike Fährmann
+# Copyright 2017-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -22,7 +22,7 @@ class GfycatExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.key = match.group(1)
+ self.key = match.group(1).lower()
self.formats = (self.config("format", "mp4"), "mp4", "webm", "gif")
def items(self):
@@ -33,6 +33,7 @@ class GfycatExtractor(Extractor):
continue
url = self._select_format(gfycat)
gfycat.update(metadata)
+ gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
@@ -41,8 +42,11 @@ class GfycatExtractor(Extractor):
key = fmt + "Url"
if key in gfyitem:
url = gfyitem[key]
+ if url.startswith("http:"):
+ url = "https" + url[4:]
gfyitem["extension"] = url.rpartition(".")[2]
return url
+ gfyitem["extension"] = ""
return ""
def metadata(self):
@@ -102,6 +106,7 @@ class GfycatImageExtractor(GfycatExtractor):
"title": "Bottom's up",
"username": "jackson3oh3",
"createDate": 1495884169,
+ "date": "dt:2017-05-27 11:22:49",
"md5": "a4796e05b0db9ba9ce5140145cd318aa",
"width": 400,
"height": 224,
@@ -143,6 +148,7 @@ class GfycatImageExtractor(GfycatExtractor):
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
return
url = self._select_format(gfycat)
+ gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
@@ -190,7 +196,11 @@ class GfycatAPI():
while True:
data = self._call(endpoint, params)
gfycats = data["gfycats"]
- yield from gfycats
+
+ for gfycat in gfycats:
+ if "gfyName" not in gfycat:
+ gfycat.update(self.gfycat(gfycat["gfyId"]))
+ yield gfycat
if "found" not in data and len(gfycats) < params["count"] or \
not data["gfycats"]:
diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py
index 519453b..6d3ed74 100644
--- a/gallery_dl/extractor/hentainexus.py
+++ b/gallery_dl/extractor/hentainexus.py
@@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor):
r"/(?:view|read)/(\d+)")
test = (
("https://hentainexus.com/view/5688", {
- "url": "746d0043e20030f1171aae5ea113176607302517",
+ "url": "f1761895fb7aca2f6ff9e09f839c0ee2fa7a5e54",
"keyword": "5e5bb4b1553b1c6e126b198f9ae017a1a5d0a5ad",
}),
("https://hentainexus.com/read/5688"),
@@ -60,12 +60,15 @@ class HentainexusGalleryExtractor(GalleryExtractor):
def images(self, _):
url = "{}/read/{}".format(self.root, self.gallery_id)
page = self.request(url).text
-
data = json.loads(self._decode(text.extract(
page, 'initReader("', '"')[0]))
+
+ pages = data.get("pages")
+ if pages:
+ return [(page, None) for page in pages]
+
base = data["b"] + data["r"]
gid = data["i"]
-
return [
("{}{}/{}/{}".format(base, page["h"], gid, page["p"]), None)
for page in data["f"]
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 930c8b4..c3e7fe4 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -609,8 +609,10 @@ class InstagramStoriesExtractor(InstagramExtractor):
request_interval = 1.0
def __init__(self, match):
- InstagramExtractor.__init__(self, match)
self.highlight_id, self.user = match.groups()
+ if self.highlight_id:
+ self.subcategory = InstagramHighlightsExtractor.subcategory
+ InstagramExtractor.__init__(self, match)
def posts(self):
if self.highlight_id:
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 739e67e..334412d 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -22,6 +22,7 @@ class PinterestExtractor(Extractor):
category = "pinterest"
filename_fmt = "{category}_{id}.{extension}"
archive_fmt = "{id}"
+ root = "https://www.pinterest.com"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -123,7 +124,7 @@ class PinterestBoardExtractor(PinterestExtractor):
subcategory = "board"
directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
archive_fmt = "{board[id]}_{id}"
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?$"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/(?!_saved)([^/?#&]+)/?$"
test = (
("https://www.pinterest.com/g1952849/test-/", {
"pattern": r"https://i\.pinimg\.com/originals/",
@@ -167,6 +168,30 @@ class PinterestBoardExtractor(PinterestExtractor):
return self.api.board_pins(board["id"])
+class PinterestUserExtractor(PinterestExtractor):
+ """Extractor for a user's boards"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)(?:/_saved)?/?$"
+ test = (
+ ("https://www.pinterest.de/g1952849/", {
+ "pattern": PinterestBoardExtractor.pattern,
+ "count": ">= 2",
+ }),
+ ("https://www.pinterest.de/g1952849/_saved/"),
+ )
+
+ def __init__(self, match):
+ PinterestExtractor.__init__(self, match)
+ self.user = text.unquote(match.group(1))
+
+ def items(self):
+ for board in self.api.boards(self.user):
+ url = board.get("url")
+ if url:
+ board["_extractor"] = PinterestBoardExtractor
+ yield Message.Queue, self.root + url, board
+
+
class PinterestSectionExtractor(PinterestExtractor):
"""Extractor for board sections on pinterest.com"""
subcategory = "section"
@@ -301,6 +326,18 @@ class PinterestAPI():
"field_set_key": "detailed"}
return self._call("Board", options)["resource_response"]["data"]
+ def boards(self, user):
+ """Yield all boards from 'user'"""
+ options = {
+ "sort" : "last_pinned_to",
+ "field_set_key" : "profile_grid_item",
+ "filter_stories" : False,
+ "username" : user,
+ "page_size" : 25,
+ "include_archived": True,
+ }
+ return self._pagination("Boards", options)
+
def board_pins(self, board_id):
"""Yield all pins of a specific board"""
options = {"board_id": board_id}
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 0a85be6..8611dcb 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -15,7 +15,7 @@ from .. import text
class RedgifsExtractor(GfycatExtractor):
"""Base class for redgifs extractors"""
category = "redgifs"
- root = "https://www.redgifs.com/"
+ root = "https://www.redgifs.com"
class RedgifsUserExtractor(RedgifsExtractor):
@@ -39,8 +39,8 @@ class RedgifsSearchExtractor(RedgifsExtractor):
pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?#]+)"
test = ("https://www.redgifs.com/gifs/browse/jav", {
"pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4",
- "range": "100-300",
- "count": "> 200",
+ "range": "1-10",
+ "count": 10,
})
def metadata(self):
@@ -54,7 +54,7 @@ class RedgifsSearchExtractor(RedgifsExtractor):
class RedgifsImageExtractor(RedgifsExtractor):
"""Extractor for individual gifs from redgifs.com"""
subcategory = "image"
- pattern = (r"(?:https?://)?(?:www\.)?(?:redgifs\.com/watch"
+ pattern = (r"(?:https?://)?(?:www\.)?(?:redgifs\.com/(?:watch|ifr)"
r"|gifdeliverynetwork.com)/([A-Za-z]+)")
test = (
("https://redgifs.com/watch/foolishforkedabyssiniancat", {
@@ -62,6 +62,7 @@ class RedgifsImageExtractor(RedgifsExtractor):
r"/FoolishForkedAbyssiniancat\.mp4",
"content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533",
}),
+ ("https://redgifs.com/ifr/FoolishForkedAbyssiniancat"),
("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"),
)
@@ -70,6 +71,6 @@ class RedgifsImageExtractor(RedgifsExtractor):
class RedgifsAPI(GfycatAPI):
- API_ROOT = "https://napi.redgifs.com/"
+ API_ROOT = "https://napi.redgifs.com"
ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe"
"AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9")
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 9e64eac..6a499a3 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -9,6 +9,7 @@
"""Extractors for https://sankaku.app/"""
from .booru import BooruExtractor
+from .common import Message
from .. import text, exception
from ..cache import cache
import collections
@@ -163,6 +164,31 @@ class SankakuPostExtractor(SankakuExtractor):
return SankakuAPI(self).posts(self.post_id)
+class SankakuBooksExtractor(SankakuExtractor):
+ """Extractor for books by tag search on sankaku.app"""
+ subcategory = "books"
+ pattern = BASE_PATTERN + r"/books/?\?([^#]*)"
+ test = (
+ ("https://sankaku.app/books?tags=aiue_oka", {
+ "range": "1-20",
+ "count": 20,
+ }),
+ ("https://beta.sankakucomplex.com/books?tags=aiue_oka"),
+ )
+
+ def __init__(self, match):
+ SankakuExtractor.__init__(self, match)
+ query = text.parse_query(match.group(1))
+ self.tags = text.unquote(query.get("tags", "").replace("+", " "))
+
+ def items(self):
+ params = {"tags": self.tags, "pool_type": "0"}
+ for pool in SankakuAPI(self).pools_keyset(params):
+ pool["_extractor"] = SankakuPoolExtractor
+ url = "https://sankaku.app/books/{}".format(pool["id"])
+ yield Message.Queue, url, pool
+
+
class SankakuAPI():
"""Interface for the sankaku.app API"""
@@ -178,6 +204,9 @@ class SankakuAPI():
params = {"lang": "en"}
return self._call("/pools/" + pool_id, params)
+ def pools_keyset(self, params):
+ return self._pagination("/pools/keyset", params)
+
def posts(self, post_id):
params = {
"lang" : "en",
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index a77ea06..b769912 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -239,30 +239,29 @@ class TwitterExtractor(Extractor):
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
- url = "https://mobile.twitter.com/i/nojs_router"
- params = {"path": "/login"}
- headers = {"Referer": self.root + "/", "Origin": self.root}
- page = self.request(
- url, method="POST", params=params, headers=headers, data={}).text
+ token = util.generate_csrf_token()
+ self.session.cookies.clear()
+ self.request(self.root + "/login")
- pos = page.index('name="authenticity_token"')
- token = text.extract(page, 'value="', '"', pos)[0]
-
- url = "https://mobile.twitter.com/sessions"
+ url = self.root + "/sessions"
+ cookies = {
+ "_mb_tk": token,
+ }
data = {
+ "redirect_after_login" : "/",
+ "remember_me" : "1",
"authenticity_token" : token,
+ "wfa" : "1",
+ "ui_metrics" : "{}",
"session[username_or_email]": username,
"session[password]" : password,
- "remember_me" : "1",
- "wfa" : "1",
- "commit" : "+Log+in+",
- "ui_metrics" : "",
}
- response = self.request(url, method="POST", data=data)
+ response = self.request(
+ url, method="POST", cookies=cookies, data=data)
+
cookies = {
cookie.name: cookie.value
for cookie in self.session.cookies
- if cookie.domain == self.cookiedomain
}
if "/error" in response.url or "auth_token" not in cookies:
@@ -464,15 +463,17 @@ class TwitterAPI():
def __init__(self, extractor):
self.extractor = extractor
+
+ self.root = "https://twitter.com/i/api"
self.headers = {
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
"COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
"4FA33AGWWjCpTnA",
"x-guest-token": None,
+ "x-twitter-auth-type": None,
"x-twitter-client-language": "en",
"x-twitter-active-user": "yes",
"x-csrf-token": None,
- "Origin": "https://twitter.com",
"Referer": "https://twitter.com/",
}
self.params = {
@@ -487,8 +488,8 @@ class TwitterAPI():
"skip_status": "1",
"cards_platform": "Web-12",
"include_cards": "1",
- "include_composer_source": "true",
"include_ext_alt_text": "true",
+ "include_quote_count": "true",
"include_reply_count": "1",
"tweet_mode": "extended",
"include_entities": "true",
@@ -497,11 +498,9 @@ class TwitterAPI():
"include_ext_media_availability": "true",
"send_error_codes": "true",
"simple_quoted_tweet": "true",
- # "count": "20",
"count": "100",
"cursor": None,
- "ext": "mediaStats,highlightedLabel,cameraMoment",
- "include_quote_count": "true",
+ "ext": "mediaStats,highlightedLabel",
}
cookies = self.extractor.session.cookies
@@ -516,17 +515,15 @@ class TwitterAPI():
if cookies.get("auth_token", domain=cookiedomain):
# logged in
- self.root = "https://twitter.com/i/api/"
self.headers["x-twitter-auth-type"] = "OAuth2Session"
else:
# guest
- self.root = "https://api.twitter.com/"
guest_token = self._guest_token()
cookies.set("gt", guest_token, domain=cookiedomain)
self.headers["x-guest-token"] = guest_token
def tweet(self, tweet_id):
- endpoint = "2/timeline/conversation/{}.json".format(tweet_id)
+ endpoint = "/2/timeline/conversation/{}.json".format(tweet_id)
tweets = []
for tweet in self._pagination(endpoint):
if tweet["id_str"] == tweet_id or \
@@ -540,43 +537,46 @@ class TwitterAPI():
def timeline_profile(self, screen_name):
user_id = self._user_id_by_screen_name(screen_name)
- endpoint = "2/timeline/profile/{}.json".format(user_id)
- return self._pagination(endpoint)
+ endpoint = "/2/timeline/profile/{}.json".format(user_id)
+ params = self.params.copy()
+ params["include_tweet_replies"] = "false"
+ return self._pagination(endpoint, params)
def timeline_media(self, screen_name):
user_id = self._user_id_by_screen_name(screen_name)
- endpoint = "2/timeline/media/{}.json".format(user_id)
+ endpoint = "/2/timeline/media/{}.json".format(user_id)
return self._pagination(endpoint)
def timeline_favorites(self, screen_name):
user_id = self._user_id_by_screen_name(screen_name)
- endpoint = "2/timeline/favorites/{}.json".format(user_id)
+ endpoint = "/2/timeline/favorites/{}.json".format(user_id)
+ params = self.params.copy()
+ params["sorted_by_time"] = "true"
return self._pagination(endpoint)
def timeline_bookmark(self):
- endpoint = "2/timeline/bookmark.json"
+ endpoint = "/2/timeline/bookmark.json"
return self._pagination(endpoint)
def timeline_list(self, list_id):
- endpoint = "2/timeline/list.json"
+ endpoint = "/2/timeline/list.json"
params = self.params.copy()
params["list_id"] = list_id
params["ranking_mode"] = "reverse_chronological"
return self._pagination(endpoint, params)
def search(self, query):
- endpoint = "2/search/adaptive.json"
+ endpoint = "/2/search/adaptive.json"
params = self.params.copy()
params["q"] = query
params["tweet_search_mode"] = "live"
params["query_source"] = "typed_query"
params["pc"] = "1"
params["spelling_corrections"] = "1"
- return self._pagination(
- endpoint, params, "sq-I-t-", "sq-cursor-bottom")
+ return self._pagination(endpoint, params)
def list_members(self, list_id):
- endpoint = "graphql/M74V2EwlxxVYGB4DbyAphQ/ListMembers"
+ endpoint = "/graphql/3pV4YlpljXUTFAa1jVNWQw/ListMembers"
variables = {
"listId": list_id,
"count" : 20,
@@ -586,7 +586,7 @@ class TwitterAPI():
return self._pagination_members(endpoint, variables)
def list_by_rest_id(self, list_id):
- endpoint = "graphql/LXXTUytSX1QY-2p8Xp9BFA/ListByRestId"
+ endpoint = "/graphql/EhaI2uiCBJI97e28GN8WjQ/ListByRestId"
params = {"variables": '{"listId":"' + list_id + '"'
',"withUserResult":false}'}
try:
@@ -595,7 +595,7 @@ class TwitterAPI():
raise exception.NotFoundError("list")
def user_by_screen_name(self, screen_name):
- endpoint = "graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName"
+ endpoint = "/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName"
params = {"variables": '{"screen_name":"' + screen_name + '"'
',"withHighlightedLabel":true}'}
try:
@@ -610,14 +610,16 @@ class TwitterAPI():
@cache(maxage=3600)
def _guest_token(self):
- endpoint = "1.1/guest/activate.json"
- return self._call(endpoint, None, "POST")["guest_token"]
+ root = "https://api.twitter.com"
+ endpoint = "/1.1/guest/activate.json"
+ return self._call(endpoint, None, root, "POST")["guest_token"]
- def _call(self, endpoint, params, method="GET"):
- url = self.root + endpoint
+ def _call(self, endpoint, params, root=None, method="GET"):
+ if root is None:
+ root = self.root
response = self.extractor.request(
- url, method=method, params=params, headers=self.headers,
- fatal=None)
+ root + endpoint, method=method, params=params,
+ headers=self.headers, fatal=None)
# update 'x-csrf-token' header (#1170)
csrf_token = response.cookies.get("ct0")
@@ -641,11 +643,11 @@ class TwitterAPI():
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, msg)
- def _pagination(self, endpoint, params=None,
- entry_tweet="tweet-", entry_cursor="cursor-bottom-"):
+ def _pagination(self, endpoint, params=None):
if params is None:
params = self.params.copy()
original_retweets = (self.extractor.retweets == "original")
+ pinned_tweet = True
while True:
cursor = tweet = None
@@ -654,48 +656,65 @@ class TwitterAPI():
instr = data["timeline"]["instructions"]
if not instr:
return
+ tweet_ids = []
tweets = data["globalObjects"]["tweets"]
users = data["globalObjects"]["users"]
+ if pinned_tweet:
+ if "pinEntry" in instr[-1]:
+ tweet_ids.append(instr[-1]["pinEntry"]["entry"]["content"]
+ ["item"]["content"]["tweet"]["id"])
+ pinned_tweet = False
+
+ # collect tweet IDs and cursor value
for entry in instr[0]["addEntries"]["entries"]:
+ entry_startswith = entry["entryId"].startswith
+
+ if entry_startswith(("tweet-", "sq-I-t-")):
+ tweet_ids.append(
+ entry["content"]["item"]["content"]["tweet"]["id"])
- if entry["entryId"].startswith(entry_tweet):
- try:
- tweet = tweets[
- entry["content"]["item"]["content"]["tweet"]["id"]]
- except KeyError:
- self.extractor.log.debug(
- "Skipping %s (deleted)",
- entry["entryId"][len(entry_tweet):])
- continue
-
- if "retweeted_status_id_str" in tweet:
- retweet = tweets.get(tweet["retweeted_status_id_str"])
- if original_retweets:
- if not retweet:
- continue
- retweet["_retweet_id_str"] = tweet["id_str"]
- tweet = retweet
- elif retweet:
- tweet["author"] = users[retweet["user_id_str"]]
- tweet["user"] = users[tweet["user_id_str"]]
- yield tweet
-
- if "quoted_status_id_str" in tweet:
- quoted = tweets.get(tweet["quoted_status_id_str"])
- if quoted:
- quoted["author"] = users[quoted["user_id_str"]]
- quoted["user"] = tweet["user"]
- quoted["quoted"] = True
- yield quoted
-
- elif entry["entryId"].startswith(entry_cursor):
+ elif entry_startswith("homeConversation-"):
+ tweet_ids.extend(
+ entry["content"]["timelineModule"]["metadata"]
+ ["conversationMetadata"]["allTweetIds"][::-1])
+
+ elif entry_startswith(("cursor-bottom-", "sq-cursor-bottom")):
cursor = entry["content"]["operation"]["cursor"]
if not cursor.get("stopOnEmptyResponse"):
# keep going even if there are no tweets
tweet = True
cursor = cursor["value"]
+ # process tweets
+ for tweet_id in tweet_ids:
+ try:
+ tweet = tweets[tweet_id]
+ except KeyError:
+ self.extractor.log.debug("Skipping %s (deleted)", tweet_id)
+ continue
+
+ if "retweeted_status_id_str" in tweet:
+ retweet = tweets.get(tweet["retweeted_status_id_str"])
+ if original_retweets:
+ if not retweet:
+ continue
+ retweet["_retweet_id_str"] = tweet["id_str"]
+ tweet = retweet
+ elif retweet:
+ tweet["author"] = users[retweet["user_id_str"]]
+ tweet["user"] = users[tweet["user_id_str"]]
+ yield tweet
+
+ if "quoted_status_id_str" in tweet:
+ quoted = tweets.get(tweet["quoted_status_id_str"])
+ if quoted:
+ quoted["author"] = users[quoted["user_id_str"]]
+ quoted["user"] = tweet["user"]
+ quoted["quoted"] = True
+ yield quoted
+
+ # update cursor value
if "replaceEntry" in instr[-1] :
cursor = (instr[-1]["replaceEntry"]["entry"]
["content"]["operation"]["cursor"]["value"])
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 9238590..428c6b5 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -40,7 +40,7 @@ class WikiartExtractor(Extractor):
def paintings(self):
"""Return an iterable containing all relevant 'painting' objects"""
- def _pagination(self, url, extra_params=None, key="Paintings"):
+ def _pagination(self, url, extra_params=None, key="Paintings", stop=False):
headers = {
"X-Requested-With": "XMLHttpRequest",
"Referer": url,
@@ -60,6 +60,8 @@ class WikiartExtractor(Extractor):
if not items:
return
yield from items
+ if stop:
+ return
params["page"] += 1
@@ -67,7 +69,7 @@ class WikiartArtistExtractor(WikiartExtractor):
"""Extractor for an artist's paintings on wikiart.org"""
subcategory = "artist"
directory_fmt = ("{category}", "{artist[artistName]}")
- pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)"
+ pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$"
test = ("https://www.wikiart.org/en/thomas-cole", {
"url": "5ba2fbe6783fcce34e65014d16e5fbc581490c98",
"keyword": "6d92913c55675e05553f000cfee5daff0b4107cf",
@@ -75,18 +77,50 @@ class WikiartArtistExtractor(WikiartExtractor):
def __init__(self, match):
WikiartExtractor.__init__(self, match)
- self.artist = match.group(2)
+ self.artist_name = match.group(2)
+ self.artist = None
def metadata(self):
- url = "{}/{}/{}?json=2".format(self.root, self.lang, self.artist)
- return {"artist": self.request(url).json()}
+ url = "{}/{}/{}?json=2".format(self.root, self.lang, self.artist_name)
+ self.artist = self.request(url).json()
+ return {"artist": self.artist}
def paintings(self):
url = "{}/{}/{}/mode/all-paintings".format(
- self.root, self.lang, self.artist)
+ self.root, self.lang, self.artist_name)
return self._pagination(url)
+class WikiartImageExtractor(WikiartArtistExtractor):
+ """Extractor for individual paintings on wikiart.org"""
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)"
+ test = (
+ ("https://www.wikiart.org/en/thomas-cole/the-departure-1838", {
+ "url": "4d9fd87680a2620eaeaf1f13e3273475dec93231",
+ "keyword": "a1b083d500ce2fd364128e35b026e4ca526000cc",
+ }),
+ # no year or '-' in slug
+ ("https://www.wikiart.org/en/huang-shen/summer", {
+ "url": "d7f60118c34067b2b37d9577e412dc1477b94207",
+ }),
+ )
+
+ def __init__(self, match):
+ WikiartArtistExtractor.__init__(self, match)
+ self.title = match.group(3)
+
+ def paintings(self):
+ title, sep, year = self.title.rpartition("-")
+ if not sep or not year.isdecimal():
+ title = self.title
+ url = "{}/{}/Search/{} {}".format(
+ self.root, self.lang,
+ self.artist.get("artistName") or self.artist_name, title,
+ )
+ return self._pagination(url, stop=True)
+
+
class WikiartArtworksExtractor(WikiartExtractor):
"""Extractor for artwork collections on wikiart.org"""
subcategory = "artworks"
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 01537d6..367b934 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -136,6 +136,11 @@ def build_parser():
help="Print URLs instead of downloading",
)
output.add_argument(
+ "-G",
+ dest="list_urls", action="store_const", const=128,
+ help=argparse.SUPPRESS,
+ )
+ output.add_argument(
"-j", "--dump-json",
dest="jobtype", action="store_const", const=job.DataJob,
help="Print JSON information",
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 27f9c03..71a67c1 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -86,7 +86,8 @@ class MetadataPP(PostProcessor):
return (pathfmt.filename or "metadata") + "." + self.extension
def _filename_custom(self, pathfmt):
- return self._filename_fmt(pathfmt.kwdict)
+ return pathfmt.clean_path(pathfmt.clean_segment(
+ self._filename_fmt(pathfmt.kwdict)))
def _filename_extfmt(self, pathfmt):
kwdict = pathfmt.kwdict
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index d91d29a..ffd686e 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -506,6 +506,7 @@ class Formatter():
- "c": calls str.capitalize
- "C": calls string.capwords
- "t": calls str.strip
+ - "d": calls text.parse_timestamp
- "U": calls urllib.parse.unquote
- "S": calls util.to_string()
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
@@ -537,6 +538,7 @@ class Formatter():
"c": str.capitalize,
"C": string.capwords,
"t": str.strip,
+ "d": text.parse_timestamp,
"U": urllib.parse.unquote,
"S": to_string,
"s": str,
@@ -767,13 +769,14 @@ class PathFormat():
restrict = extractor.config("path-restrict", "auto")
replace = extractor.config("path-replace", "_")
-
if restrict == "auto":
restrict = "\\\\|/<>:\"?*" if WINDOWS else "/"
elif restrict == "unix":
restrict = "/"
elif restrict == "windows":
restrict = "\\\\|/<>:\"?*"
+ elif restrict == "ascii":
+ restrict = "^0-9A-Za-z_."
self.clean_segment = self._build_cleanfunc(restrict, replace)
remove = extractor.config("path-remove", "\x00-\x1f\x7f")
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 21541be..601eeed 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.16.1"
+__version__ = "1.16.3"
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 74e8742..4e98a97 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -306,14 +306,14 @@ class MetadataTest(BasePostprocessorTest):
def test_metadata_filename(self):
self._create({
- "filename" : "{category}_{filename}_meta.data",
+ "filename" : "{category}_{filename}_/meta/\n\r.data",
"extension-format": "json",
})
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realdirectory + "test_file_meta.data"
+ path = self.pathfmt.realdirectory + "test_file__meta_.data"
m.assert_called_once_with(path, "w", encoding="utf-8")
@staticmethod
diff --git a/test/test_util.py b/test/test_util.py
index fd659a0..159c4bc 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -14,6 +14,7 @@ import unittest
import io
import random
import string
+import datetime
import http.cookiejar
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -267,6 +268,7 @@ class TestFormatter(unittest.TestCase):
"n": None,
"s": " \n\r\tSPACE ",
"u": "%27%3C%20/%20%3E%27",
+ "t": 1262304000,
"name": "Name",
"title1": "Title",
"title2": "",
@@ -289,6 +291,9 @@ class TestFormatter(unittest.TestCase):
self._run_test("{a!S}", self.kwdict["a"])
self._run_test("{l!S}", "a, b, c")
self._run_test("{n!S}", "")
+ self._run_test("{t!d}", datetime.datetime(2010, 1, 1))
+ self._run_test("{t!d:%Y-%m-%d}", "2010-01-01")
+
with self.assertRaises(KeyError):
self._run_test("{a!q}", "hello world")