aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-08-13 17:45:31 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-08-13 17:45:31 -0400
commitd50ba9cfe80f00e02ca9a4714f75699c00e67128 (patch)
tree01fe7b46370d5068b8c692ae5ea95cab4d734bd8
parent873d9a628e9412a79bdc64cd962470749de3425b (diff)
downloadgallery-dl-d50ba9cfe80f00e02ca9a4714f75699c00e67128.tar.bz2
gallery-dl-d50ba9cfe80f00e02ca9a4714f75699c00e67128.tar.xz
gallery-dl-d50ba9cfe80f00e02ca9a4714f75699c00e67128.tar.zst
New upstream version 1.18.3.upstream/1.18.3
-rw-r--r--CHANGELOG.md28
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5101
-rw-r--r--docs/gallery-dl.conf15
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/downloader/http.py19
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/bbc.py53
-rw-r--r--gallery_dl/extractor/behance.py381
-rw-r--r--gallery_dl/extractor/danbooru.py10
-rw-r--r--gallery_dl/extractor/deviantart.py14
-rw-r--r--gallery_dl/extractor/furaffinity.py33
-rw-r--r--gallery_dl/extractor/hitomi.py2
-rw-r--r--gallery_dl/extractor/instagram.py5
-rw-r--r--gallery_dl/extractor/kemonoparty.py14
-rw-r--r--gallery_dl/extractor/luscious.py6
-rw-r--r--gallery_dl/extractor/newgrounds.py55
-rw-r--r--gallery_dl/extractor/nsfwalbum.py24
-rw-r--r--gallery_dl/extractor/reactor.py7
-rw-r--r--gallery_dl/extractor/twitter.py65
-rw-r--r--gallery_dl/extractor/vk.py1
-rw-r--r--gallery_dl/extractor/wikieat.py96
-rw-r--r--gallery_dl/util.py11
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_util.py16
28 files changed, 699 insertions, 279 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 72f9c42..1cfd97d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,33 @@
# Changelog
+## 1.18.3 - 2021-08-13
+### Additions
+- [bbc] add `width` option ([#1706](https://github.com/mikf/gallery-dl/issues/1706))
+- [danbooru] add `external` option ([#1747](https://github.com/mikf/gallery-dl/issues/1747))
+- [furaffinity] add `external` option ([#1492](https://github.com/mikf/gallery-dl/issues/1492))
+- [luscious] add `gif` option ([#1701](https://github.com/mikf/gallery-dl/issues/1701))
+- [newgrounds] add `format` option ([#1729](https://github.com/mikf/gallery-dl/issues/1729))
+- [reactor] add `gif` option ([#1701](https://github.com/mikf/gallery-dl/issues/1701))
+- [twitter] warn about suspended accounts ([#1759](https://github.com/mikf/gallery-dl/issues/1759))
+- [twitter] extend `replies` option ([#1254](https://github.com/mikf/gallery-dl/issues/1254))
+- [twitter] add option to log out and retry when blocked ([#1719](https://github.com/mikf/gallery-dl/issues/1719))
+- [wikieat] add `thread` and `board` extractors ([#1699](https://github.com/mikf/gallery-dl/issues/1699), [#1607](https://github.com/mikf/gallery-dl/issues/1607))
+### Changes
+- [instagram] increase default delay between HTTP requests from 5s to 8s ([#1732](https://github.com/mikf/gallery-dl/issues/1732))
+### Fixes
+- [bbc] improve image dimensions ([#1706](https://github.com/mikf/gallery-dl/issues/1706))
+- [bbc] support multi-page gallery listings ([#1730](https://github.com/mikf/gallery-dl/issues/1730))
+- [behance] fix `collection` extraction
+- [deviantart] get original files for GIF previews ([#1731](https://github.com/mikf/gallery-dl/issues/1731))
+- [furaffinity] fix errors when using `category-transfer` ([#1274](https://github.com/mikf/gallery-dl/issues/1274))
+- [hitomi] fix image URLs ([#1765](https://github.com/mikf/gallery-dl/issues/1765))
+- [instagram] use custom User-Agent header for video downloads ([#1682](https://github.com/mikf/gallery-dl/issues/1682), [#1623](https://github.com/mikf/gallery-dl/issues/1623), [#1580](https://github.com/mikf/gallery-dl/issues/1580))
+- [kemonoparty] fix username extraction ([#1750](https://github.com/mikf/gallery-dl/issues/1750))
+- [kemonoparty] update file server domain ([#1764](https://github.com/mikf/gallery-dl/issues/1764))
+- [newgrounds] fix errors when using `category-transfer` ([#1274](https://github.com/mikf/gallery-dl/issues/1274))
+- [nsfwalbum] retry backend requests when extracting image URLs ([#1733](https://github.com/mikf/gallery-dl/issues/1733), [#1271](https://github.com/mikf/gallery-dl/issues/1271))
+- [vk] prevent exception for empty/private profiles ([#1742](https://github.com/mikf/gallery-dl/issues/1742))
+
## 1.18.2 - 2021-07-23
### Additions
- [bbc] add `gallery` and `programme` extractors ([#1706](https://github.com/mikf/gallery-dl/issues/1706))
diff --git a/PKG-INFO b/PKG-INFO
index fa33df0..58f985a 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.18.2
+Version: 1.18.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index cbdb93e..dcf0337 100644
--- a/README.rst
+++ b/README.rst
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index ee57b4b..363ff0a 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-07-23" "1.18.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-08-13" "1.18.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 91101d1..008129f 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-07-23" "1.18.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-08-13" "1.18.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -903,6 +903,20 @@ descend into subfolders
* \f[I]false\f[]: Get posts from "Latest Updates" pages
+.SS extractor.bbc.width
+.IP "Type:" 6
+\f[I]int\f[]
+
+.IP "Default:" 9
+\f[I]1920\f[]
+
+.IP "Description:" 4
+Specifies the requested image width.
+
+This value must be divisble by 16 and gets rounded down otherwise.
+The maximum possible value appears to be \f[I]1920\f[].
+
+
.SS extractor.blogger.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -914,7 +928,7 @@ descend into subfolders
Download embedded videos hosted on https://www.blogger.com/
-.SS extractor.danbooru.ugoira
+.SS extractor.danbooru.external
.IP "Type:" 6
\f[I]bool\f[]
@@ -922,12 +936,8 @@ Download embedded videos hosted on https://www.blogger.com/
\f[I]false\f[]
.IP "Description:" 4
-Controls the download target for Ugoira posts.
-
-.br
-* \f[I]true\f[]: Original ZIP archives
-.br
-* \f[I]false\f[]: Converted video files
+For unavailable or restricted posts,
+follow the \f[I]source\f[] and download from there if possible.
.SS extractor.danbooru.metadata
@@ -943,6 +953,22 @@ Extract additional metadata (notes, artist commentary, parent, children)
Note: This requires 1 additional HTTP request for each post.
+.SS extractor.danbooru.ugoira
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Controls the download target for Ugoira posts.
+
+.br
+* \f[I]true\f[]: Original ZIP archives
+.br
+* \f[I]false\f[]: Converted video files
+
+
.SS extractor.derpibooru.api-key
.IP "Type:" 6
\f[I]string\f[]
@@ -1278,6 +1304,17 @@ Controls the format of \f[I]description\f[] metadata fields.
* \f[I]"html"\f[]: Raw HTML content
+.SS extractor.furaffinity.external
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Follow external URLs linked in descriptions.
+
+
.SS extractor.furaffinity.include
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -1472,6 +1509,20 @@ If the selected format is not available,
the first in the list gets chosen (usually mp3).
+.SS extractor.luscious.gif
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Format in which to download animated images.
+
+Use \f[I]true\f[] to download animated images as gifs and \f[I]false\f[]
+to download as mp4 videos.
+
+
.SS extractor.mangadex.api-server
.IP "Type:" 6
\f[I]string\f[]
@@ -1550,6 +1601,23 @@ Also emit metadata for text-only posts without media content.
Download original Adobe Flash animations instead of pre-rendered videos.
+.SS extractor.newgrounds.format
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"original"\f[]
+
+.IP "Example:" 4
+"720p"
+
+.IP "Description:" 4
+Selects the preferred format for video downloads.
+
+If the selected format is not available,
+the next smaller one gets chosen.
+
+
.SS extractor.newgrounds.include
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -1789,6 +1857,20 @@ A value of \f[I]0\f[] means no limit.
Also search Plurk comments for URLs.
+.SS extractor.reactor.gif
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Format in which to download animated images.
+
+Use \f[I]true\f[] to download animated images as gifs and \f[I]false\f[]
+to download as mp4 videos.
+
+
.SS extractor.readcomiconline.captcha
.IP "Type:" 6
\f[I]string\f[]
@@ -2094,6 +2176,9 @@ Fetch media from quoted Tweets.
.IP "Description:" 4
Fetch media from replies to other Tweets.
+If this value is \f[I]"self"\f[], only consider replies where
+reply and original Tweet are from the same user.
+
.SS extractor.twitter.retweets
.IP "Type:" 6
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index ffbed52..020b802 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -41,6 +41,9 @@
"password": null,
"recursive": true
},
+ "bbc": {
+ "width": 1920
+ },
"blogger":
{
"videos": true
@@ -49,8 +52,9 @@
{
"username": null,
"password": null,
- "ugoira": false,
- "metadata": false
+ "external": false,
+ "metadata": false,
+ "ugoira": false
},
"derpibooru":
{
@@ -95,6 +99,7 @@
"furaffinity":
{
"descriptions": "text",
+ "external": false,
"include": "gallery"
},
"gfycat":
@@ -142,6 +147,10 @@
{
"format": "mp3"
},
+ "luscious":
+ {
+ "gif": false
+ },
"mangadex":
{
"api-server": "https://api.mangadex.org",
@@ -158,6 +167,7 @@
"username": null,
"password": null,
"flash": true,
+ "format": "original",
"include": "art"
},
"nijie":
@@ -192,6 +202,7 @@
},
"reactor":
{
+ "gif": false,
"sleep-request": 5.0
},
"reddit":
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index c8f8dec..3e6ca0e 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.18.2
+Version: 1.18.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index f8a3c2c..c10b36d 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -170,6 +170,7 @@ gallery_dl/extractor/weasyl.py
gallery_dl/extractor/webtoons.py
gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
+gallery_dl/extractor/wikieat.py
gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
gallery_dl/extractor/ytdl.py
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 76ec46f..78d8d34 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -98,20 +98,21 @@ class HttpDownloader(DownloaderBase):
time.sleep(tries)
tries += 1
- headers = {"Accept": "*/*"}
file_header = None
- # check for .part file
- file_size = pathfmt.part_size()
- if file_size:
- headers["Range"] = "bytes={}-".format(file_size)
- # general headers
- if self.headers:
- headers.update(self.headers)
- # file-specific headers
+ # collect HTTP headers
+ headers = {"Accept": "*/*"}
+ # file-specific headers
extra = kwdict.get("_http_headers")
if extra:
headers.update(extra)
+ # general headers
+ if self.headers:
+ headers.update(self.headers)
+ # partial content
+ file_size = pathfmt.part_size()
+ if file_size:
+ headers["Range"] = "bytes={}-".format(file_size)
# connect to (remote) source
try:
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 1a6a899..e130db2 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -132,6 +132,7 @@ modules = [
"webtoons",
"weibo",
"wikiart",
+ "wikieat",
"xhamster",
"xvideos",
"booru",
diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py
index ace8a28..17b5f52 100644
--- a/gallery_dl/extractor/bbc.py
+++ b/gallery_dl/extractor/bbc.py
@@ -27,7 +27,7 @@ class BbcGalleryExtractor(GalleryExtractor):
test = (
("https://www.bbc.co.uk/programmes/p084qtzs/p085g9kg", {
"pattern": r"https://ichef\.bbci\.co\.uk"
- r"/images/ic/976x549_b/\w+\.jpg",
+ r"/images/ic/1920xn/\w+\.jpg",
"count": 37,
"keyword": {
"programme": "p084qtzs",
@@ -49,32 +49,57 @@ class BbcGalleryExtractor(GalleryExtractor):
}
def images(self, page):
+ width = self.config("width")
+ width = width - width % 16 if width else 1920
+ dimensions = "/{}xn/".format(width)
+
return [
- (imgset.rpartition(", ")[2].partition(" ")[0], None)
- for imgset in text.extract_iter(page, 'data-image-src-sets="', '"')
+ (src.replace("/320x180_b/", dimensions),
+ {"_fallback": self._fallback_urls(src, width)})
+ for src in text.extract_iter(page, 'data-image-src="', '"')
]
+ @staticmethod
+ def _fallback_urls(src, max_width):
+ front, _, back = src.partition("/320x180_b/")
+ for width in (1920, 1600, 1280, 976):
+ if width < max_width:
+ yield "{}/{}xn/{}".format(front, width, back)
+
class BbcProgrammeExtractor(Extractor):
"""Extractor for all galleries of a bbc programme"""
category = "bbc"
subcategory = "programme"
root = "https://www.bbc.co.uk"
- pattern = BASE_PATTERN + r"[^/?#]+/galleries)"
- test = ("https://www.bbc.co.uk/programmes/b006q2x0/galleries", {
- "pattern": BbcGalleryExtractor.pattern,
- "count": ">= 24",
- })
+ pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?"
+ test = (
+ ("https://www.bbc.co.uk/programmes/b006q2x0/galleries", {
+ "pattern": BbcGalleryExtractor.pattern,
+ "range": "1-50",
+ "count": ">= 50",
+ }),
+ ("https://www.bbc.co.uk/programmes/b006q2x0/galleries?page=40", {
+ "pattern": BbcGalleryExtractor.pattern,
+ "count": ">= 100",
+ }),
+ )
def __init__(self, match):
Extractor.__init__(self, match)
- self.galleries_url = self.root + match.group(1)
+ self.path, self.page = match.groups()
def items(self):
- page = self.request(self.galleries_url).text
data = {"_extractor": BbcGalleryExtractor}
+ params = {"page": text.parse_int(self.page, 1)}
+ galleries_url = self.root + self.path
- for programme_id in text.extract_iter(
- page, '<a href="https://www.bbc.co.uk/programmes/', '"'):
- url = "https://www.bbc.co.uk/programmes/" + programme_id
- yield Message.Queue, url, data
+ while True:
+ page = self.request(galleries_url, params=params).text
+ for programme_id in text.extract_iter(
+ page, '<a href="https://www.bbc.co.uk/programmes/', '"'):
+ url = "https://www.bbc.co.uk/programmes/" + programme_id
+ yield Message.Queue, url, data
+ if 'rel="next"' not in page:
+ return
+ params["page"] += 1
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index b081cc9..f13edf7 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -217,236 +217,253 @@ class BehanceCollectionExtractor(BehanceExtractor):
query = """
query GetMoodboardItemsAndRecommendations(
- $id: Int!
- $firstItem: Int!
- $afterItem: String
- $shouldGetRecommendations: Boolean!
- $shouldGetItems: Boolean!
- $shouldGetMoodboardFields: Boolean!
- ) {
- viewer @include(if: $shouldGetMoodboardFields) {
- isOptedOutOfRecommendations
- }
- moodboard(id: $id) {
- ...moodboardFields @include(if: $shouldGetMoodboardFields)
-
- items(first: $firstItem, after: $afterItem) @include(if: $shouldGetItems)
- {
- pageInfo {
- endCursor
- hasNextPage
- }
- nodes {
- ...nodesFields
- }
- }
+ $id: Int!
+ $firstItem: Int!
+ $afterItem: String
+ $shouldGetRecommendations: Boolean!
+ $shouldGetItems: Boolean!
+ $shouldGetMoodboardFields: Boolean!
+) {
+ viewer @include(if: $shouldGetMoodboardFields) {
+ isOptedOutOfRecommendations
+ isAdmin
+ }
+ moodboard(id: $id) {
+ ...moodboardFields @include(if: $shouldGetMoodboardFields)
- recommendedItems(first: 80) @include(if: $shouldGetRecommendations) {
- nodes {
- ...nodesFields
- fetchSource
- }
+ items(first: $firstItem, after: $afterItem) @include(if: $shouldGetItems) {
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ nodes {
+ ...nodesFields
}
}
- }
- fragment moodboardFields on Moodboard {
- id
- label
- privacy
- followerCount
- isFollowing
- projectCount
- url
- isOwner
- owners {
- id
- displayName
- url
- firstName
- location
- locationUrl
- images {
- size_50 {
- url
- }
- size_100 {
- url
- }
- size_115 {
- url
- }
- size_230 {
- url
- }
- size_138 {
- url
- }
- size_276 {
- url
- }
+ recommendedItems(first: 80) @include(if: $shouldGetRecommendations) {
+ nodes {
+ ...nodesFields
+ fetchSource
}
}
}
-
- fragment projectFields on Project {
+}
+
+fragment moodboardFields on Moodboard {
+ id
+ label
+ privacy
+ followerCount
+ isFollowing
+ projectCount
+ url
+ isOwner
+ owners {
id
- isOwner
- publishedOn
- matureAccess
- hasMatureContent
- modifiedOn
- name
+ displayName
url
- isPrivate
- slug
- fields {
- label
- }
- colors {
- r
- g
- b
- }
- owners {
- url
- displayName
- id
- location
- locationUrl
- isProfileOwner
- images {
- size_50 {
- url
- }
- size_100 {
- url
- }
- size_115 {
- url
- }
- size_230 {
- url
- }
- size_138 {
- url
- }
- size_276 {
- url
- }
+ firstName
+ location
+ locationUrl
+ isFollowing
+ images {
+ size_50 {
+ url
}
- }
- covers {
- size_original {
+ size_100 {
url
}
- size_max_808 {
+ size_115 {
url
}
- size_808 {
+ size_230 {
url
}
- size_404 {
+ size_138 {
url
}
- size_202 {
+ size_276 {
url
}
- size_230 {
+ }
+ }
+}
+
+fragment projectFields on Project {
+ id
+ isOwner
+ publishedOn
+ matureAccess
+ hasMatureContent
+ modifiedOn
+ name
+ url
+ isPrivate
+ slug
+ license {
+ license
+ description
+ id
+ label
+ url
+ text
+ images
+ }
+ fields {
+ label
+ }
+ colors {
+ r
+ g
+ b
+ }
+ owners {
+ url
+ displayName
+ id
+ location
+ locationUrl
+ isProfileOwner
+ isFollowing
+ images {
+ size_50 {
+ url
+ }
+ size_100 {
url
}
size_115 {
url
}
- }
- stats {
- views {
- all
+ size_230 {
+ url
}
- appreciations {
- all
+ size_138 {
+ url
}
- comments {
- all
+ size_276 {
+ url
}
}
}
-
- fragment exifDataValueFields on exifDataValue {
- id
- label
- value
- searchValue
+ covers {
+ size_original {
+ url
+ }
+ size_max_808 {
+ url
+ }
+ size_808 {
+ url
+ }
+ size_404 {
+ url
+ }
+ size_202 {
+ url
+ }
+ size_230 {
+ url
+ }
+ size_115 {
+ url
+ }
+ }
+ stats {
+ views {
+ all
+ }
+ appreciations {
+ all
+ }
+ comments {
+ all
+ }
+ }
+}
+
+fragment exifDataValueFields on exifDataValue {
+ id
+ label
+ value
+ searchValue
+}
+
+fragment nodesFields on MoodboardItem {
+ id
+ entityType
+ width
+ height
+ flexWidth
+ flexHeight
+ images {
+ size
+ url
}
- fragment nodesFields on MoodboardItem {
- id
- entityType
- width
- height
- flexWidth
- flexHeight
- images {
- size
- url
+ entity {
+ ... on Project {
+ ...projectFields
}
- entity {
- ... on Project {
+ ... on ImageModule {
+ project {
...projectFields
}
- ... on ImageModule {
- project {
- ...projectFields
- }
+ colors {
+ r
+ g
+ b
+ }
- exifData {
- lens {
- ...exifDataValueFields
- }
- software {
- ...exifDataValueFields
- }
- makeAndModel {
- ...exifDataValueFields
- }
- focalLength {
- ...exifDataValueFields
- }
- iso {
- ...exifDataValueFields
- }
- location {
- ...exifDataValueFields
- }
- flash {
- ...exifDataValueFields
- }
- exposureMode {
- ...exifDataValueFields
- }
- shutterSpeed {
- ...exifDataValueFields
- }
- aperture {
- ...exifDataValueFields
- }
+ exifData {
+ lens {
+ ...exifDataValueFields
+ }
+ software {
+ ...exifDataValueFields
+ }
+ makeAndModel {
+ ...exifDataValueFields
+ }
+ focalLength {
+ ...exifDataValueFields
+ }
+ iso {
+ ...exifDataValueFields
+ }
+ location {
+ ...exifDataValueFields
+ }
+ flash {
+ ...exifDataValueFields
+ }
+ exposureMode {
+ ...exifDataValueFields
+ }
+ shutterSpeed {
+ ...exifDataValueFields
+ }
+ aperture {
+ ...exifDataValueFields
}
}
+ }
- ... on MediaCollectionComponent {
- project {
- ...projectFields
- }
+ ... on MediaCollectionComponent {
+ project {
+ ...projectFields
}
}
}
+}
"""
variables = {
"afterItem": "MAo=",
"firstItem": 40,
- "id" : self.collection_id,
+ "id" : int(self.collection_id),
"shouldGetItems" : True,
"shouldGetMoodboardFields": False,
"shouldGetRecommendations": False,
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 3b96a4e..c6c33b4 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -32,6 +32,7 @@ class DanbooruExtractor(Extractor):
super().__init__(match)
self.root = "https://{}.donmai.us".format(match.group(1))
self.ugoira = self.config("ugoira", False)
+ self.external = self.config("external", False)
self.extended_metadata = self.config("metadata", False)
username, api_key = self._get_auth_info()
@@ -52,6 +53,10 @@ class DanbooruExtractor(Extractor):
try:
url = post["file_url"]
except KeyError:
+ if self.external and post["source"]:
+ post.update(data)
+ yield Message.Directory, post
+ yield Message.Queue, post["source"], post
continue
text.nameext_from_url(url, post)
@@ -126,6 +131,11 @@ class DanbooruTagExtractor(DanbooruExtractor):
("https://danbooru.donmai.us/posts?tags=mushishi", {
"count": ">= 300",
}),
+ # 'external' option (#1747)
+ ("https://danbooru.donmai.us/posts?tags=pixiv_id%3A1476533", {
+ "options": (("external", True),),
+ "pattern": r"http://img16.pixiv.net/img/takaraakihito/1476533.jpg",
+ }),
("https://hijiribe.donmai.us/posts?tags=bonocho"),
("https://sonohara.donmai.us/posts?tags=bonocho"),
("https://safebooru.donmai.us/posts?tags=bonocho"),
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 163d7ba..900fde8 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -95,8 +95,7 @@ class DeviantartExtractor(Extractor):
if "content" in deviation:
content = deviation["content"]
- if self.original and deviation["is_downloadable"] and \
- text.ext_from_url(content["src"]) != "gif":
+ if self.original and deviation["is_downloadable"]:
self._update_content(deviation, content)
if content["src"].startswith("https://images-wixmp-"):
@@ -807,11 +806,20 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
(("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
"pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
}),
- # non-download URL for GIFs (#242)
+ # GIF (#242)
(("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
r"/f/[^/]+/[^.]+\.gif\?token="),
}),
+ # Flash animation with GIF preview (#1731)
+ ("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", {
+ "pattern": r"https://api-da\.wixmp\.com/_api/download"
+ r"/file\?downloadToken=.+",
+ "keyword": {
+ "filename": "flash_comic_tutorial_by_yuumei-d3juatd",
+ "extension": "swf",
+ },
+ }),
# sta.sh URLs from description (#302)
(("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
"options": (("extra", 1), ("original", 0)),
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 86e1678..8c2887e 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -29,9 +29,10 @@ class FuraffinityExtractor(Extractor):
self.offset = 0
if self.config("descriptions") == "html":
- self._process_description = lambda x: x.strip()
+ self._process_description = str.strip
def items(self):
+ external = self.config("external", False)
metadata = self.metadata()
for post_id in util.advance(self.posts(), self.offset):
post = self._parse_post(post_id)
@@ -41,8 +42,10 @@ class FuraffinityExtractor(Extractor):
yield Message.Directory, post
yield Message.Url, post["url"], post
- def posts(self):
- return self._pagination()
+ if external:
+ for url in text.extract_iter(
+ post["_description"], 'href="http', '"'):
+ yield Message.Queue, "http" + url, post
def metadata(self):
return None
@@ -80,8 +83,7 @@ class FuraffinityExtractor(Extractor):
data["tags"] = text.split_html(tags)
data["title"] = text.unescape(extr("<h2><p>", "</p></h2>"))
data["artist"] = extr("<strong>", "<")
- data["description"] = self._process_description(extr(
- 'class="section-body">', '</div>'))
+ data["_description"] = extr('class="section-body">', '</div>')
data["views"] = pi(rh(extr('class="views">', '</span>')))
data["favorites"] = pi(rh(extr('class="favorites">', '</span>')))
data["comments"] = pi(rh(extr('class="comments">', '</span>')))
@@ -108,12 +110,12 @@ class FuraffinityExtractor(Extractor):
data["tags"] = text.split_html(extr(
'id="keywords">', '</div>'))[::2]
data["rating"] = extr('<img alt="', ' ')
- data["description"] = self._process_description(extr(
- "</table>", "</table>"))
+ data["_description"] = extr("</table>", "</table>")
data["artist_url"] = data["artist"].replace("_", "").lower()
data["user"] = self.user or data["artist_url"]
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
+ data["description"] = self._process_description(data["_description"])
return data
@@ -121,12 +123,12 @@ class FuraffinityExtractor(Extractor):
def _process_description(description):
return text.unescape(text.remove_html(description, "", ""))
- def _pagination(self):
+ def _pagination(self, path):
num = 1
while True:
url = "{}/{}/{}/{}/".format(
- self.root, self.subcategory, self.user, num)
+ self.root, path, self.user, num)
page = self.request(url).text
post_id = None
@@ -191,6 +193,9 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor):
"count": 6,
})
+ def posts(self):
+ return self._pagination("gallery")
+
class FuraffinityScrapsExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's scraps"""
@@ -203,6 +208,9 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor):
"count": ">= 3",
})
+ def posts(self):
+ return self._pagination("scraps")
+
class FuraffinityFavoriteExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's favorites"""
@@ -273,6 +281,13 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
"height" : 120,
},
}),
+ # 'external' option (#1492)
+ ("https://www.furaffinity.net/view/42166511/", {
+ "options": (("external", True),),
+ "pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/"
+ r"|http://www\.postybirb\.com",
+ "count": 2,
+ }),
("https://furaffinity.net/view/21835115/"),
("https://sfw.furaffinity.net/view/21835115/"),
("https://www.furaffinity.net/full/21835115/"),
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 2ea5dfa..7c338a8 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -140,7 +140,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
# see https://ltn.hitomi.la/common.js
inum = int(ihash[-3:-1], 16)
- offset = 2 if inum < 0x40 else 1 if inum < 0x80 else 0
+ offset = 2 if inum < 0x44 else 1 if inum < 0x88 else 0
url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format(
chr(97 + offset),
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 28b5506..3590e17 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -29,7 +29,7 @@ class InstagramExtractor(Extractor):
root = "https://www.instagram.com"
cookiedomain = ".instagram.com"
cookienames = ("sessionid",)
- request_interval = 5.0
+ request_interval = 8.0
def __init__(self, match):
Extractor.__init__(self, match)
@@ -43,6 +43,7 @@ class InstagramExtractor(Extractor):
self.login()
data = self.metadata()
videos = self.config("videos", True)
+ video_headers = {"User-Agent": "Mozilla/5.0"}
for post in self.posts():
@@ -60,6 +61,8 @@ class InstagramExtractor(Extractor):
url = file["display_url"]
elif not videos:
continue
+ else:
+ file["_http_headers"] = video_headers
file.update(post)
yield Message.Url, url, text.nameext_from_url(url, file)
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 7218488..972316b 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -38,8 +38,8 @@ class KemonopartyExtractor(Extractor):
if self.config("metadata"):
username = text.unescape(text.extract(
- self.request(self.user_url).text, "<title>", " | Kemono"
- )[0]).lstrip()
+ self.request(self.user_url).text,
+ '<meta name="artist_name" content="', '"')[0])
else:
username = None
@@ -74,9 +74,7 @@ class KemonopartyExtractor(Extractor):
post["type"] = file["type"]
url = file["path"]
if url[0] == "/":
- url = "https://data.kemono.party" + url
- elif url.startswith("https://kemono.party/"):
- url = "https://data.kemono.party" + url[20:]
+ url = self.root + url
text.nameext_from_url(file["name"], post)
yield Message.Url, url, post
@@ -125,7 +123,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
pattern = BASE_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://data\.kemono\.party/files/fanbox"
+ "pattern": r"https://kemono\.party/files/fanbox"
r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
@@ -148,12 +146,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
}),
# inline image (#1286)
("https://kemono.party/fanbox/user/7356311/post/802343", {
- "pattern": r"https://data\.kemono\.party/inline/fanbox"
+ "pattern": r"https://kemono\.party/inline/fanbox"
r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
}),
# kemono.party -> data.kemono.party
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
- "pattern": r"https://data\.kemono\.party/(file|attachment)s"
+ "pattern": r"https://kemono\.party/(file|attachment)s"
r"/gumroad/trylsc/IURjT/",
}),
# username (#1548, #1652)
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 852c49f..c296102 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -116,6 +116,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
def __init__(self, match):
LusciousExtractor.__init__(self, match)
self.album_id = match.group(1)
+ self.gif = self.config("gif", False)
def items(self):
album = self.metadata()
@@ -130,7 +131,10 @@ class LusciousAlbumExtractor(LusciousExtractor):
image["date"] = text.parse_timestamp(image["created"])
image["id"] = text.parse_int(image["id"])
- url = image["url_to_video"] or image["url_to_original"]
+ url = (image["url_to_original"] or image["url_to_video"]
+ if self.gif else
+ image["url_to_video"] or image["url_to_original"])
+
yield Message.Url, url, text.nameext_from_url(url, image)
def metadata(self):
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 4fdfac9..a699401 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2020 Mike Fährmann
+# Copyright 2018-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -31,9 +31,13 @@ class NewgroundsExtractor(Extractor):
self.user_root = "https://{}.newgrounds.com".format(self.user)
self.flash = self.config("flash", True)
+ fmt = self.config("format", "original")
+ self.format = (True if not fmt or fmt == "original" else
+ fmt if isinstance(fmt, int) else
+ text.parse_int(fmt.rstrip("p")))
+
def items(self):
self.login()
- yield Message.Version, 1
for post_url in self.posts():
try:
@@ -59,7 +63,7 @@ class NewgroundsExtractor(Extractor):
def posts(self):
"""Return urls of all relevant image pages"""
- return self._pagination(self.subcategory)
+ return self._pagination(self._path)
def login(self):
username, password = self._get_auth_info()
@@ -176,8 +180,23 @@ class NewgroundsExtractor(Extractor):
"Referer": self.root,
}
sources = self.request(url, headers=headers).json()["sources"]
- src = sources["360p"][0]["src"].replace(".360p.", ".")
- fallback = self._video_fallback(sources)
+
+ if self.format is True:
+ src = sources["360p"][0]["src"].replace(".360p.", ".")
+ formats = sources
+ else:
+ formats = []
+ for fmt, src in sources.items():
+ width = text.parse_int(fmt.rstrip("p"))
+ if width <= self.format:
+ formats.append((width, src))
+ if formats:
+ formats.sort(reverse=True)
+ src, formats = formats[0][1][0]["src"], formats[1:]
+ else:
+ src = ""
+
+ fallback = self._video_fallback(formats)
date = text.parse_timestamp(src.rpartition("?")[2])
return {
@@ -193,11 +212,13 @@ class NewgroundsExtractor(Extractor):
}
@staticmethod
- def _video_fallback(sources):
- sources = list(sources.items())
- sources.sort(key=lambda src: text.parse_int(src[0][:-1]), reverse=True)
- for src in sources:
- yield src[1][0]["src"]
+ def _video_fallback(formats):
+ if isinstance(formats, dict):
+ formats = list(formats.items())
+ formats.sort(key=lambda fmt: text.parse_int(fmt[0].rstrip("p")),
+ reverse=True)
+ for fmt in formats:
+ yield fmt[1][0]["src"]
def _pagination(self, kind):
root = self.user_root
@@ -322,7 +343,13 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
("https://www.newgrounds.com/portal/view/161181/format/flash", {
"pattern": r"https://uploads\.ungrounded\.net/161000"
r"/161181_ddautta_mask__550x281_\.swf\?f1081628129",
- })
+ }),
+ # format selection (#1729)
+ ("https://www.newgrounds.com/portal/view/758545", {
+ "options": (("format", "720p"),),
+ "pattern": r"https://uploads\.ungrounded\.net/alternate/1482000"
+ r"/1482860_alternate_102516\.720p\.mp4\?\d+",
+ }),
)
def __init__(self, match):
@@ -336,7 +363,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
class NewgroundsArtExtractor(NewgroundsExtractor):
"""Extractor for all images of a newgrounds user"""
- subcategory = "art"
+ subcategory = _path = "art"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$"
test = ("https://tomfulp.newgrounds.com/art", {
"pattern": NewgroundsImageExtractor.pattern,
@@ -346,7 +373,7 @@ class NewgroundsArtExtractor(NewgroundsExtractor):
class NewgroundsAudioExtractor(NewgroundsExtractor):
"""Extractor for all audio submissions of a newgrounds user"""
- subcategory = "audio"
+ subcategory = _path = "audio"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$"
test = ("https://tomfulp.newgrounds.com/audio", {
"pattern": r"https://audio.ngfiles.com/\d+/\d+_.+\.mp3",
@@ -356,7 +383,7 @@ class NewgroundsAudioExtractor(NewgroundsExtractor):
class NewgroundsMoviesExtractor(NewgroundsExtractor):
"""Extractor for all movies of a newgrounds user"""
- subcategory = "movies"
+ subcategory = _path = "movies"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$"
test = ("https://tomfulp.newgrounds.com/movies", {
"pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py
index 9c4d686..be736d1 100644
--- a/gallery_dl/extractor/nsfwalbum.py
+++ b/gallery_dl/extractor/nsfwalbum.py
@@ -43,11 +43,27 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
def images(self, page):
iframe = self.root + "/iframe_image.php?id="
backend = self.root + "/backend.php"
+ retries = self._retries
+
for image_id in text.extract_iter(page, 'data-img-id="', '"'):
- spirit = self._annihilate(text.extract(self.request(
- iframe + image_id).text, 'giraffe.annihilate("', '"')[0])
- params = {"spirit": spirit, "photo": image_id}
- data = self.request(backend, params=params).json()
+ spirit = None
+ tries = 0
+
+ while tries <= retries:
+ try:
+ if not spirit:
+ spirit = self._annihilate(text.extract(
+ self.request(iframe + image_id).text,
+ 'giraffe.annihilate("', '"')[0])
+ params = {"spirit": spirit, "photo": image_id}
+ data = self.request(backend, params=params).json()
+ break
+ except Exception:
+ tries += 1
+ else:
+ self.log.warning("Unable to fetch image %s", image_id)
+ continue
+
yield data[0], {
"id" : text.parse_int(image_id),
"width" : text.parse_int(data[1]),
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 104dc23..bbbdd3f 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -28,6 +28,7 @@ class ReactorExtractor(Extractor):
Extractor.__init__(self, match)
self.root = "http://" + match.group(1)
self.session.headers["Referer"] = self.root
+ self.gif = self.config("gif", False)
if not self.category:
# set category based on domain name
@@ -124,6 +125,12 @@ class ReactorExtractor(Extractor):
elif "/post/webm/" not in url and "/post/mp4/" not in url:
url = url.replace("/post/", "/post/full/")
+ if self.gif and ("/post/webm/" in url or "/post/mp4/" in url):
+ gif_url = text.extract(image, '<a href="', '"')[0]
+ if not gif_url:
+ continue
+ url = gif_url
+
yield {
"url": url,
"post_id": post_id,
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index fd0140d..7e78941 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -50,12 +50,17 @@ class TwitterExtractor(Extractor):
if not self.retweets and "retweeted_status_id_str" in tweet:
self.log.debug("Skipping %s (retweet)", tweet["id_str"])
continue
- if not self.replies and "in_reply_to_user_id_str" in tweet:
- self.log.debug("Skipping %s (reply)", tweet["id_str"])
- continue
if not self.quoted and "quoted" in tweet:
self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"])
continue
+ if "in_reply_to_user_id_str" in tweet and (
+ not self.replies or (
+ self.replies == "self" and
+ tweet["in_reply_to_user_id_str"] != tweet["user_id_str"]
+ )
+ ):
+ self.log.debug("Skipping %s (reply)", tweet["id_str"])
+ continue
files = []
if "extended_entities" in tweet:
@@ -452,6 +457,15 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("replies", False),),
"count": 0,
}),
+ # 'replies' to self (#1254)
+ ("https://twitter.com/i/web/status/1424882930803908612", {
+ "options": (("replies", "self"),),
+ "count": 4,
+ }),
+ ("https://twitter.com/i/web/status/1424898916156284928", {
+ "options": (("replies", "self"),),
+ "count": 0,
+ }),
# "quoted" option (#854)
("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
"options": (("quoted", True),),
@@ -582,8 +596,8 @@ class TwitterAPI():
"ext": "mediaStats,highlightedLabel",
}
- cookies = self.extractor.session.cookies
- cookiedomain = ".twitter.com"
+ cookies = extractor.session.cookies
+ cookiedomain = extractor.cookiedomain
# CSRF
csrf_token = cookies.get("ct0", domain=cookiedomain)
@@ -726,21 +740,46 @@ class TwitterAPI():
if csrf_token:
self.headers["x-csrf-token"] = csrf_token
+ data = response.json()
+ if "errors" in data:
+ try:
+ msg = ", ".join(
+ '"' + error["message"] + '"'
+ for error in data["errors"]
+ )
+ except Exception:
+ msg = data["errors"]
+ if response.status_code < 400:
+ self.extractor.log.warning(msg)
+ else:
+ msg = ""
+
if response.status_code < 400:
- return response.json()
+ # success
+ return data
+
if response.status_code == 429:
+ # rate limit exceeded
until = response.headers.get("x-rate-limit-reset")
seconds = None if until else 60
self.extractor.wait(until=until, seconds=seconds)
continue
- try:
- msg = ", ".join(
- '"' + error["message"] + '"'
- for error in response.json()["errors"]
- )
- except Exception:
- msg = response.text
+ if response.status_code == 401 and \
+ "have been blocked from viewing" in msg:
+ # account blocked
+ extr = extr = self.extractor
+ if self.headers["x-twitter-auth-type"] and \
+ extr.config("logout"):
+ guest_token = self._guest_token()
+ extr.session.cookies.set(
+ "gt", guest_token, domain=extr.cookiedomain)
+ self.headers["x-guest-token"] = guest_token
+ self.headers["x-twitter-auth-type"] = None
+ extr.log.info("Retrying API request as guest")
+ continue
+
+ # error
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, msg)
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 2178641..9dd2d47 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -88,6 +88,7 @@ class VkPhotosExtractor(Extractor):
yield Message.Directory, data
sub = re.compile(r"/imp[fg]/").sub
needle = 'data-id="{}_'.format(user_id)
+ cnt = 0
while True:
offset, html = self.request(
diff --git a/gallery_dl/extractor/wikieat.py b/gallery_dl/extractor/wikieat.py
new file mode 100644
index 0000000..f544bcb
--- /dev/null
+++ b/gallery_dl/extractor/wikieat.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://wikieat.club/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class WikieatThreadExtractor(Extractor):
+ """Extractor for Wikieat threads"""
+ category = "wikieat"
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{board}", "{thread} {title}")
+ filename_fmt = "{time}{num:?-//} {filename}.{extension}"
+ archive_fmt = "{board}_{thread}_{tim}"
+ pattern = r"(?:https?://)?wikieat\.club/([^/]+)/res/(\d+)"
+ test = ("https://wikieat.club/cel/res/25321.html", {
+ "pattern": r"https://wikieat\.club/cel/src/\d+(-\d)?\.\w+",
+ "count": ">= 200",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+
+ def items(self):
+ url = "https://wikieat.club/{}/res/{}.json".format(
+ self.board, self.thread)
+ posts = self.request(url).json()["posts"]
+ title = posts[0].get("sub") or text.remove_html(posts[0]["com"])
+ process = self._process
+
+ data = {
+ "board" : self.board,
+ "thread": self.thread,
+ "title" : text.unescape(title)[:50],
+ "num" : 0,
+ }
+
+ yield Message.Version, 1
+ yield Message.Directory, data
+ for post in posts:
+ if "filename" in post:
+ yield process(post, data)
+ if "extra_files" in post:
+ for post["num"], filedata in enumerate(
+ post["extra_files"], 1):
+ yield process(post, filedata)
+
+ @staticmethod
+ def _process(post, data):
+ post.update(data)
+ post["extension"] = post["ext"][1:]
+ tim = post["tim"]
+ url = ("https://wikieat.club/" +
+ post["board"] + "/src/" +
+ tim + post["ext"])
+ return Message.Url, url, post
+
+
+class WikieatBoardExtractor(Extractor):
+ """Extractor for Wikieat boards"""
+ category = "wikieat"
+ subcategory = "board"
+ pattern = (r"(?:https?://)?wikieat\.club"
+ r"/([^/?#]+)/(?:index|catalog|\d+)\.html")
+ test = (
+ ("https://wikieat.club/cel/index.html", {
+ "pattern": WikieatThreadExtractor.pattern,
+ "count": ">= 100",
+ }),
+ ("https://wikieat.club/cel/catalog.html"),
+ ("https://wikieat.club/cel/2.html")
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board = match.group(1)
+
+ def items(self):
+ url = "https://wikieat.club/{}/threads.json".format(self.board)
+ threads = self.request(url).json()
+
+ for page in threads:
+ for thread in page["threads"]:
+ url = "https://wikieat.club/{}/res/{}.html".format(
+ self.board, thread["no"])
+ thread["page"] = page["page"]
+ thread["_extractor"] = WikieatThreadExtractor
+ yield Message.Queue, url, thread
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 3462138..2c0fae6 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -97,6 +97,17 @@ def generate_token(size=16):
return binascii.hexlify(data).decode()
+def format_value(value, unit="B", suffixes="kMGTPEZY"):
+ value = format(value)
+ value_len = len(value)
+ index = value_len - 4
+ if index >= 0:
+ offset = (value_len - 1) % 3 + 1
+ return (value[:offset] + "." + value[offset:offset+2] +
+ suffixes[index // 3] + unit)
+ return value + unit
+
+
def combine_dict(a, b):
"""Recursively combine the contents of 'b' into 'a'"""
for key, value in b.items():
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index fbb4e5b..566159d 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.18.2"
+__version__ = "1.18.3"
diff --git a/test/test_util.py b/test/test_util.py
index 2d574da..7a31ebb 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -532,6 +532,22 @@ class TestOther(unittest.TestCase):
self.assertEqual(len(token), 80 * 2)
self.assertRegex(token, r"^[0-9a-f]+$")
+ def test_format_value(self):
+ self.assertEqual(util.format_value(0) , "0B")
+ self.assertEqual(util.format_value(1) , "1B")
+ self.assertEqual(util.format_value(12) , "12B")
+ self.assertEqual(util.format_value(123) , "123B")
+ self.assertEqual(util.format_value(1234) , "1.23kB")
+ self.assertEqual(util.format_value(12345) , "12.34kB")
+ self.assertEqual(util.format_value(123456) , "123.45kB")
+ self.assertEqual(util.format_value(1234567) , "1.23MB")
+ self.assertEqual(util.format_value(12345678) , "12.34MB")
+ self.assertEqual(util.format_value(123456789) , "123.45MB")
+ self.assertEqual(util.format_value(1234567890), "1.23GB")
+
+ self.assertEqual(util.format_value(123 , "B/s"), "123B/s")
+ self.assertEqual(util.format_value(123456, "B/s"), "123.45kB/s")
+
def test_combine_dict(self):
self.assertEqual(
util.combine_dict({}, {}),