diff options
author | Unit 193 <unit193@unit193.net> | 2021-08-13 17:45:41 -0400 |
---|---|---|
committer | Unit 193 <unit193@unit193.net> | 2021-08-13 17:45:41 -0400 |
commit | 8479f38b9345855a8f59543cfa283372bad90edd (patch) | |
tree | d5995df6def9f62a5f091b9ba2da37ed00503ec0 | |
parent | 7461397a2974ba9d1cbd9921f7f41677e1f53aa1 (diff) | |
parent | d50ba9cfe80f00e02ca9a4714f75699c00e67128 (diff) | |
download | gallery-dl-8479f38b9345855a8f59543cfa283372bad90edd.tar.bz2 gallery-dl-8479f38b9345855a8f59543cfa283372bad90edd.tar.xz gallery-dl-8479f38b9345855a8f59543cfa283372bad90edd.tar.zst |
Update upstream source from tag 'upstream/1.18.3'
Update to upstream version '1.18.3'
with Debian dir ee2931f43a63c78afa8e9359bd29ee3343026bdb
28 files changed, 699 insertions, 279 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 72f9c42..1cfd97d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # Changelog +## 1.18.3 - 2021-08-13 +### Additions +- [bbc] add `width` option ([#1706](https://github.com/mikf/gallery-dl/issues/1706)) +- [danbooru] add `external` option ([#1747](https://github.com/mikf/gallery-dl/issues/1747)) +- [furaffinity] add `external` option ([#1492](https://github.com/mikf/gallery-dl/issues/1492)) +- [luscious] add `gif` option ([#1701](https://github.com/mikf/gallery-dl/issues/1701)) +- [newgrounds] add `format` option ([#1729](https://github.com/mikf/gallery-dl/issues/1729)) +- [reactor] add `gif` option ([#1701](https://github.com/mikf/gallery-dl/issues/1701)) +- [twitter] warn about suspended accounts ([#1759](https://github.com/mikf/gallery-dl/issues/1759)) +- [twitter] extend `replies` option ([#1254](https://github.com/mikf/gallery-dl/issues/1254)) +- [twitter] add option to log out and retry when blocked ([#1719](https://github.com/mikf/gallery-dl/issues/1719)) +- [wikieat] add `thread` and `board` extractors ([#1699](https://github.com/mikf/gallery-dl/issues/1699), [#1607](https://github.com/mikf/gallery-dl/issues/1607)) +### Changes +- [instagram] increase default delay between HTTP requests from 5s to 8s ([#1732](https://github.com/mikf/gallery-dl/issues/1732)) +### Fixes +- [bbc] improve image dimensions ([#1706](https://github.com/mikf/gallery-dl/issues/1706)) +- [bbc] support multi-page gallery listings ([#1730](https://github.com/mikf/gallery-dl/issues/1730)) +- [behance] fix `collection` extraction +- [deviantart] get original files for GIF previews ([#1731](https://github.com/mikf/gallery-dl/issues/1731)) +- [furaffinity] fix errors when using `category-transfer` ([#1274](https://github.com/mikf/gallery-dl/issues/1274)) +- [hitomi] fix image URLs ([#1765](https://github.com/mikf/gallery-dl/issues/1765)) +- [instagram] use custom User-Agent header for video downloads ([#1682](https://github.com/mikf/gallery-dl/issues/1682), [#1623](https://github.com/mikf/gallery-dl/issues/1623), [#1580](https://github.com/mikf/gallery-dl/issues/1580)) +- [kemonoparty] fix username extraction ([#1750](https://github.com/mikf/gallery-dl/issues/1750)) +- [kemonoparty] update file server domain ([#1764](https://github.com/mikf/gallery-dl/issues/1764)) +- [newgrounds] fix errors when using `category-transfer` ([#1274](https://github.com/mikf/gallery-dl/issues/1274)) +- [nsfwalbum] retry backend requests when extracting image URLs ([#1733](https://github.com/mikf/gallery-dl/issues/1733), [#1271](https://github.com/mikf/gallery-dl/issues/1271)) +- [vk] prevent exception for empty/private profiles ([#1742](https://github.com/mikf/gallery-dl/issues/1742)) + ## 1.18.2 - 2021-07-23 ### Additions - [bbc] add `gallery` and `programme` extractors ([#1706](https://github.com/mikf/gallery-dl/issues/1706)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.18.2 +Version: 1.18.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index ee57b4b..363ff0a 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-07-23" "1.18.2" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-08-13" "1.18.3" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 91101d1..008129f 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-07-23" "1.18.2" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-08-13" "1.18.3" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -903,6 +903,20 @@ descend into subfolders * \f[I]false\f[]: Get posts from "Latest Updates" pages +.SS extractor.bbc.width +.IP "Type:" 6 +\f[I]int\f[] + +.IP "Default:" 9 +\f[I]1920\f[] + +.IP "Description:" 4 +Specifies the requested image width. + +This value must be divisble by 16 and gets rounded down otherwise. +The maximum possible value appears to be \f[I]1920\f[]. + + .SS extractor.blogger.videos .IP "Type:" 6 \f[I]bool\f[] @@ -914,7 +928,7 @@ descend into subfolders Download embedded videos hosted on https://www.blogger.com/ -.SS extractor.danbooru.ugoira +.SS extractor.danbooru.external .IP "Type:" 6 \f[I]bool\f[] @@ -922,12 +936,8 @@ Download embedded videos hosted on https://www.blogger.com/ \f[I]false\f[] .IP "Description:" 4 -Controls the download target for Ugoira posts. - -.br -* \f[I]true\f[]: Original ZIP archives -.br -* \f[I]false\f[]: Converted video files +For unavailable or restricted posts, +follow the \f[I]source\f[] and download from there if possible. .SS extractor.danbooru.metadata @@ -943,6 +953,22 @@ Extract additional metadata (notes, artist commentary, parent, children) Note: This requires 1 additional HTTP request for each post. +.SS extractor.danbooru.ugoira +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Controls the download target for Ugoira posts. + +.br +* \f[I]true\f[]: Original ZIP archives +.br +* \f[I]false\f[]: Converted video files + + .SS extractor.derpibooru.api-key .IP "Type:" 6 \f[I]string\f[] @@ -1278,6 +1304,17 @@ Controls the format of \f[I]description\f[] metadata fields. * \f[I]"html"\f[]: Raw HTML content +.SS extractor.furaffinity.external +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Follow external URLs linked in descriptions. + + .SS extractor.furaffinity.include .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] @@ -1472,6 +1509,20 @@ If the selected format is not available, the first in the list gets chosen (usually mp3). +.SS extractor.luscious.gif +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Format in which to download animated images. + +Use \f[I]true\f[] to download animated images as gifs and \f[I]false\f[] +to download as mp4 videos. + + .SS extractor.mangadex.api-server .IP "Type:" 6 \f[I]string\f[] @@ -1550,6 +1601,23 @@ Also emit metadata for text-only posts without media content. Download original Adobe Flash animations instead of pre-rendered videos. +.SS extractor.newgrounds.format +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"original"\f[] + +.IP "Example:" 4 +"720p" + +.IP "Description:" 4 +Selects the preferred format for video downloads. + +If the selected format is not available, +the next smaller one gets chosen. + + .SS extractor.newgrounds.include .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] @@ -1789,6 +1857,20 @@ A value of \f[I]0\f[] means no limit. Also search Plurk comments for URLs. +.SS extractor.reactor.gif +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Format in which to download animated images. + +Use \f[I]true\f[] to download animated images as gifs and \f[I]false\f[] +to download as mp4 videos. + + .SS extractor.readcomiconline.captcha .IP "Type:" 6 \f[I]string\f[] @@ -2094,6 +2176,9 @@ Fetch media from quoted Tweets. .IP "Description:" 4 Fetch media from replies to other Tweets. +If this value is \f[I]"self"\f[], only consider replies where +reply and original Tweet are from the same user. + .SS extractor.twitter.retweets .IP "Type:" 6 diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index ffbed52..020b802 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -41,6 +41,9 @@ "password": null, "recursive": true }, + "bbc": { + "width": 1920 + }, "blogger": { "videos": true @@ -49,8 +52,9 @@ { "username": null, "password": null, - "ugoira": false, - "metadata": false + "external": false, + "metadata": false, + "ugoira": false }, "derpibooru": { @@ -95,6 +99,7 @@ "furaffinity": { "descriptions": "text", + "external": false, "include": "gallery" }, "gfycat": @@ -142,6 +147,10 @@ { "format": "mp3" }, + "luscious": + { + "gif": false + }, "mangadex": { "api-server": "https://api.mangadex.org", @@ -158,6 +167,7 @@ "username": null, "password": null, "flash": true, + "format": "original", "include": "art" }, "nijie": @@ -192,6 +202,7 @@ }, "reactor": { + "gif": false, "sleep-request": 5.0 }, "reddit": diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index c8f8dec..3e6ca0e 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.18.2 +Version: 1.18.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index f8a3c2c..c10b36d 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -170,6 +170,7 @@ gallery_dl/extractor/weasyl.py gallery_dl/extractor/webtoons.py gallery_dl/extractor/weibo.py gallery_dl/extractor/wikiart.py +gallery_dl/extractor/wikieat.py gallery_dl/extractor/xhamster.py gallery_dl/extractor/xvideos.py gallery_dl/extractor/ytdl.py diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 76ec46f..78d8d34 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -98,20 +98,21 @@ class HttpDownloader(DownloaderBase): time.sleep(tries) tries += 1 - headers = {"Accept": "*/*"} file_header = None - # check for .part file - file_size = pathfmt.part_size() - if file_size: - headers["Range"] = "bytes={}-".format(file_size) - # general headers - if self.headers: - headers.update(self.headers) - # file-specific headers + # collect HTTP headers + headers = {"Accept": "*/*"} + # file-specific headers extra = kwdict.get("_http_headers") if extra: headers.update(extra) + # general headers + if self.headers: + headers.update(self.headers) + # partial content + file_size = pathfmt.part_size() + if file_size: + headers["Range"] = "bytes={}-".format(file_size) # connect to (remote) source try: diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 1a6a899..e130db2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -132,6 +132,7 @@ modules = [ "webtoons", "weibo", "wikiart", + "wikieat", "xhamster", "xvideos", "booru", diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py index ace8a28..17b5f52 100644 --- a/gallery_dl/extractor/bbc.py +++ b/gallery_dl/extractor/bbc.py @@ -27,7 +27,7 @@ class BbcGalleryExtractor(GalleryExtractor): test = ( ("https://www.bbc.co.uk/programmes/p084qtzs/p085g9kg", { "pattern": r"https://ichef\.bbci\.co\.uk" - r"/images/ic/976x549_b/\w+\.jpg", + r"/images/ic/1920xn/\w+\.jpg", "count": 37, "keyword": { "programme": "p084qtzs", @@ -49,32 +49,57 @@ class BbcGalleryExtractor(GalleryExtractor): } def images(self, page): + width = self.config("width") + width = width - width % 16 if width else 1920 + dimensions = "/{}xn/".format(width) + return [ - (imgset.rpartition(", ")[2].partition(" ")[0], None) - for imgset in text.extract_iter(page, 'data-image-src-sets="', '"') + (src.replace("/320x180_b/", dimensions), + {"_fallback": self._fallback_urls(src, width)}) + for src in text.extract_iter(page, 'data-image-src="', '"') ] + @staticmethod + def _fallback_urls(src, max_width): + front, _, back = src.partition("/320x180_b/") + for width in (1920, 1600, 1280, 976): + if width < max_width: + yield "{}/{}xn/{}".format(front, width, back) + class BbcProgrammeExtractor(Extractor): """Extractor for all galleries of a bbc programme""" category = "bbc" subcategory = "programme" root = "https://www.bbc.co.uk" - pattern = BASE_PATTERN + r"[^/?#]+/galleries)" - test = ("https://www.bbc.co.uk/programmes/b006q2x0/galleries", { - "pattern": BbcGalleryExtractor.pattern, - "count": ">= 24", - }) + pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?" + test = ( + ("https://www.bbc.co.uk/programmes/b006q2x0/galleries", { + "pattern": BbcGalleryExtractor.pattern, + "range": "1-50", + "count": ">= 50", + }), + ("https://www.bbc.co.uk/programmes/b006q2x0/galleries?page=40", { + "pattern": BbcGalleryExtractor.pattern, + "count": ">= 100", + }), + ) def __init__(self, match): Extractor.__init__(self, match) - self.galleries_url = self.root + match.group(1) + self.path, self.page = match.groups() def items(self): - page = self.request(self.galleries_url).text data = {"_extractor": BbcGalleryExtractor} + params = {"page": text.parse_int(self.page, 1)} + galleries_url = self.root + self.path - for programme_id in text.extract_iter( - page, '<a href="https://www.bbc.co.uk/programmes/', '"'): - url = "https://www.bbc.co.uk/programmes/" + programme_id - yield Message.Queue, url, data + while True: + page = self.request(galleries_url, params=params).text + for programme_id in text.extract_iter( + page, '<a href="https://www.bbc.co.uk/programmes/', '"'): + url = "https://www.bbc.co.uk/programmes/" + programme_id + yield Message.Queue, url, data + if 'rel="next"' not in page: + return + params["page"] += 1 diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index b081cc9..f13edf7 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -217,236 +217,253 @@ class BehanceCollectionExtractor(BehanceExtractor): query = """ query GetMoodboardItemsAndRecommendations( - $id: Int! - $firstItem: Int! - $afterItem: String - $shouldGetRecommendations: Boolean! - $shouldGetItems: Boolean! - $shouldGetMoodboardFields: Boolean! - ) { - viewer @include(if: $shouldGetMoodboardFields) { - isOptedOutOfRecommendations - } - moodboard(id: $id) { - ...moodboardFields @include(if: $shouldGetMoodboardFields) - - items(first: $firstItem, after: $afterItem) @include(if: $shouldGetItems) - { - pageInfo { - endCursor - hasNextPage - } - nodes { - ...nodesFields - } - } + $id: Int! + $firstItem: Int! + $afterItem: String + $shouldGetRecommendations: Boolean! + $shouldGetItems: Boolean! + $shouldGetMoodboardFields: Boolean! +) { + viewer @include(if: $shouldGetMoodboardFields) { + isOptedOutOfRecommendations + isAdmin + } + moodboard(id: $id) { + ...moodboardFields @include(if: $shouldGetMoodboardFields) - recommendedItems(first: 80) @include(if: $shouldGetRecommendations) { - nodes { - ...nodesFields - fetchSource - } + items(first: $firstItem, after: $afterItem) @include(if: $shouldGetItems) { + pageInfo { + endCursor + hasNextPage + } + nodes { + ...nodesFields } } - } - fragment moodboardFields on Moodboard { - id - label - privacy - followerCount - isFollowing - projectCount - url - isOwner - owners { - id - displayName - url - firstName - location - locationUrl - images { - size_50 { - url - } - size_100 { - url - } - size_115 { - url - } - size_230 { - url - } - size_138 { - url - } - size_276 { - url - } + recommendedItems(first: 80) @include(if: $shouldGetRecommendations) { + nodes { + ...nodesFields + fetchSource } } } - - fragment projectFields on Project { +} + +fragment moodboardFields on Moodboard { + id + label + privacy + followerCount + isFollowing + projectCount + url + isOwner + owners { id - isOwner - publishedOn - matureAccess - hasMatureContent - modifiedOn - name + displayName url - isPrivate - slug - fields { - label - } - colors { - r - g - b - } - owners { - url - displayName - id - location - locationUrl - isProfileOwner - images { - size_50 { - url - } - size_100 { - url - } - size_115 { - url - } - size_230 { - url - } - size_138 { - url - } - size_276 { - url - } + firstName + location + locationUrl + isFollowing + images { + size_50 { + url } - } - covers { - size_original { + size_100 { url } - size_max_808 { + size_115 { url } - size_808 { + size_230 { url } - size_404 { + size_138 { url } - size_202 { + size_276 { url } - size_230 { + } + } +} + +fragment projectFields on Project { + id + isOwner + publishedOn + matureAccess + hasMatureContent + modifiedOn + name + url + isPrivate + slug + license { + license + description + id + label + url + text + images + } + fields { + label + } + colors { + r + g + b + } + owners { + url + displayName + id + location + locationUrl + isProfileOwner + isFollowing + images { + size_50 { + url + } + size_100 { url } size_115 { url } - } - stats { - views { - all + size_230 { + url } - appreciations { - all + size_138 { + url } - comments { - all + size_276 { + url } } } - - fragment exifDataValueFields on exifDataValue { - id - label - value - searchValue + covers { + size_original { + url + } + size_max_808 { + url + } + size_808 { + url + } + size_404 { + url + } + size_202 { + url + } + size_230 { + url + } + size_115 { + url + } + } + stats { + views { + all + } + appreciations { + all + } + comments { + all + } + } +} + +fragment exifDataValueFields on exifDataValue { + id + label + value + searchValue +} + +fragment nodesFields on MoodboardItem { + id + entityType + width + height + flexWidth + flexHeight + images { + size + url } - fragment nodesFields on MoodboardItem { - id - entityType - width - height - flexWidth - flexHeight - images { - size - url + entity { + ... on Project { + ...projectFields } - entity { - ... on Project { + ... on ImageModule { + project { ...projectFields } - ... on ImageModule { - project { - ...projectFields - } + colors { + r + g + b + } - exifData { - lens { - ...exifDataValueFields - } - software { - ...exifDataValueFields - } - makeAndModel { - ...exifDataValueFields - } - focalLength { - ...exifDataValueFields - } - iso { - ...exifDataValueFields - } - location { - ...exifDataValueFields - } - flash { - ...exifDataValueFields - } - exposureMode { - ...exifDataValueFields - } - shutterSpeed { - ...exifDataValueFields - } - aperture { - ...exifDataValueFields - } + exifData { + lens { + ...exifDataValueFields + } + software { + ...exifDataValueFields + } + makeAndModel { + ...exifDataValueFields + } + focalLength { + ...exifDataValueFields + } + iso { + ...exifDataValueFields + } + location { + ...exifDataValueFields + } + flash { + ...exifDataValueFields + } + exposureMode { + ...exifDataValueFields + } + shutterSpeed { + ...exifDataValueFields + } + aperture { + ...exifDataValueFields } } + } - ... on MediaCollectionComponent { - project { - ...projectFields - } + ... on MediaCollectionComponent { + project { + ...projectFields } } } +} """ variables = { "afterItem": "MAo=", "firstItem": 40, - "id" : self.collection_id, + "id" : int(self.collection_id), "shouldGetItems" : True, "shouldGetMoodboardFields": False, "shouldGetRecommendations": False, diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 3b96a4e..c6c33b4 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -32,6 +32,7 @@ class DanbooruExtractor(Extractor): super().__init__(match) self.root = "https://{}.donmai.us".format(match.group(1)) self.ugoira = self.config("ugoira", False) + self.external = self.config("external", False) self.extended_metadata = self.config("metadata", False) username, api_key = self._get_auth_info() @@ -52,6 +53,10 @@ class DanbooruExtractor(Extractor): try: url = post["file_url"] except KeyError: + if self.external and post["source"]: + post.update(data) + yield Message.Directory, post + yield Message.Queue, post["source"], post continue text.nameext_from_url(url, post) @@ -126,6 +131,11 @@ class DanbooruTagExtractor(DanbooruExtractor): ("https://danbooru.donmai.us/posts?tags=mushishi", { "count": ">= 300", }), + # 'external' option (#1747) + ("https://danbooru.donmai.us/posts?tags=pixiv_id%3A1476533", { + "options": (("external", True),), + "pattern": r"http://img16.pixiv.net/img/takaraakihito/1476533.jpg", + }), ("https://hijiribe.donmai.us/posts?tags=bonocho"), ("https://sonohara.donmai.us/posts?tags=bonocho"), ("https://safebooru.donmai.us/posts?tags=bonocho"), diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 163d7ba..900fde8 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -95,8 +95,7 @@ class DeviantartExtractor(Extractor): if "content" in deviation: content = deviation["content"] - if self.original and deviation["is_downloadable"] and \ - text.ext_from_url(content["src"]) != "gif": + if self.original and deviation["is_downloadable"]: self._update_content(deviation, content) if content["src"].startswith("https://images-wixmp-"): @@ -807,11 +806,20 @@ class DeviantartDeviationExtractor(DeviantartExtractor): (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), { "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100," }), - # non-download URL for GIFs (#242) + # GIF (#242) (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), { "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" r"/f/[^/]+/[^.]+\.gif\?token="), }), + # Flash animation with GIF preview (#1731) + ("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", { + "pattern": r"https://api-da\.wixmp\.com/_api/download" + r"/file\?downloadToken=.+", + "keyword": { + "filename": "flash_comic_tutorial_by_yuumei-d3juatd", + "extension": "swf", + }, + }), # sta.sh URLs from description (#302) (("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), { "options": (("extra", 1), ("original", 0)), diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 86e1678..8c2887e 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -29,9 +29,10 @@ class FuraffinityExtractor(Extractor): self.offset = 0 if self.config("descriptions") == "html": - self._process_description = lambda x: x.strip() + self._process_description = str.strip def items(self): + external = self.config("external", False) metadata = self.metadata() for post_id in util.advance(self.posts(), self.offset): post = self._parse_post(post_id) @@ -41,8 +42,10 @@ class FuraffinityExtractor(Extractor): yield Message.Directory, post yield Message.Url, post["url"], post - def posts(self): - return self._pagination() + if external: + for url in text.extract_iter( + post["_description"], 'href="http', '"'): + yield Message.Queue, "http" + url, post def metadata(self): return None @@ -80,8 +83,7 @@ class FuraffinityExtractor(Extractor): data["tags"] = text.split_html(tags) data["title"] = text.unescape(extr("<h2><p>", "</p></h2>")) data["artist"] = extr("<strong>", "<") - data["description"] = self._process_description(extr( - 'class="section-body">', '</div>')) + data["_description"] = extr('class="section-body">', '</div>') data["views"] = pi(rh(extr('class="views">', '</span>'))) data["favorites"] = pi(rh(extr('class="favorites">', '</span>'))) data["comments"] = pi(rh(extr('class="comments">', '</span>'))) @@ -108,12 +110,12 @@ class FuraffinityExtractor(Extractor): data["tags"] = text.split_html(extr( 'id="keywords">', '</div>'))[::2] data["rating"] = extr('<img alt="', ' ') - data["description"] = self._process_description(extr( - "</table>", "</table>")) + data["_description"] = extr("</table>", "</table>") data["artist_url"] = data["artist"].replace("_", "").lower() data["user"] = self.user or data["artist_url"] data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) + data["description"] = self._process_description(data["_description"]) return data @@ -121,12 +123,12 @@ class FuraffinityExtractor(Extractor): def _process_description(description): return text.unescape(text.remove_html(description, "", "")) - def _pagination(self): + def _pagination(self, path): num = 1 while True: url = "{}/{}/{}/{}/".format( - self.root, self.subcategory, self.user, num) + self.root, path, self.user, num) page = self.request(url).text post_id = None @@ -191,6 +193,9 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor): "count": 6, }) + def posts(self): + return self._pagination("gallery") + class FuraffinityScrapsExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's scraps""" @@ -203,6 +208,9 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor): "count": ">= 3", }) + def posts(self): + return self._pagination("scraps") + class FuraffinityFavoriteExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's favorites""" @@ -273,6 +281,13 @@ class FuraffinityPostExtractor(FuraffinityExtractor): "height" : 120, }, }), + # 'external' option (#1492) + ("https://www.furaffinity.net/view/42166511/", { + "options": (("external", True),), + "pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/" + r"|http://www\.postybirb\.com", + "count": 2, + }), ("https://furaffinity.net/view/21835115/"), ("https://sfw.furaffinity.net/view/21835115/"), ("https://www.furaffinity.net/full/21835115/"), diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 2ea5dfa..7c338a8 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -140,7 +140,7 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/common.js inum = int(ihash[-3:-1], 16) - offset = 2 if inum < 0x40 else 1 if inum < 0x80 else 0 + offset = 2 if inum < 0x44 else 1 if inum < 0x88 else 0 url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format( chr(97 + offset), diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 28b5506..3590e17 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -29,7 +29,7 @@ class InstagramExtractor(Extractor): root = "https://www.instagram.com" cookiedomain = ".instagram.com" cookienames = ("sessionid",) - request_interval = 5.0 + request_interval = 8.0 def __init__(self, match): Extractor.__init__(self, match) @@ -43,6 +43,7 @@ class InstagramExtractor(Extractor): self.login() data = self.metadata() videos = self.config("videos", True) + video_headers = {"User-Agent": "Mozilla/5.0"} for post in self.posts(): @@ -60,6 +61,8 @@ class InstagramExtractor(Extractor): url = file["display_url"] elif not videos: continue + else: + file["_http_headers"] = video_headers file.update(post) yield Message.Url, url, text.nameext_from_url(url, file) diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 7218488..972316b 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -38,8 +38,8 @@ class KemonopartyExtractor(Extractor): if self.config("metadata"): username = text.unescape(text.extract( - self.request(self.user_url).text, "<title>", " | Kemono" - )[0]).lstrip() + self.request(self.user_url).text, + '<meta name="artist_name" content="', '"')[0]) else: username = None @@ -74,9 +74,7 @@ class KemonopartyExtractor(Extractor): post["type"] = file["type"] url = file["path"] if url[0] == "/": - url = "https://data.kemono.party" + url - elif url.startswith("https://kemono.party/"): - url = "https://data.kemono.party" + url[20:] + url = self.root + url text.nameext_from_url(file["name"], post) yield Message.Url, url, post @@ -125,7 +123,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor): pattern = BASE_PATTERN + r"/post/([^/?#]+)" test = ( ("https://kemono.party/fanbox/user/6993449/post/506575", { - "pattern": r"https://data\.kemono\.party/files/fanbox" + "pattern": r"https://kemono\.party/files/fanbox" r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", "keyword": { "added": "Wed, 06 May 2020 20:28:02 GMT", @@ -148,12 +146,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor): }), # inline image (#1286) ("https://kemono.party/fanbox/user/7356311/post/802343", { - "pattern": r"https://data\.kemono\.party/inline/fanbox" + "pattern": r"https://kemono\.party/inline/fanbox" r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg", }), # kemono.party -> data.kemono.party ("https://kemono.party/gumroad/user/trylsc/post/IURjT", { - "pattern": r"https://data\.kemono\.party/(file|attachment)s" + "pattern": r"https://kemono\.party/(file|attachment)s" r"/gumroad/trylsc/IURjT/", }), # username (#1548, #1652) diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 852c49f..c296102 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -116,6 +116,7 @@ class LusciousAlbumExtractor(LusciousExtractor): def __init__(self, match): LusciousExtractor.__init__(self, match) self.album_id = match.group(1) + self.gif = self.config("gif", False) def items(self): album = self.metadata() @@ -130,7 +131,10 @@ class LusciousAlbumExtractor(LusciousExtractor): image["date"] = text.parse_timestamp(image["created"]) image["id"] = text.parse_int(image["id"]) - url = image["url_to_video"] or image["url_to_original"] + url = (image["url_to_original"] or image["url_to_video"] + if self.gif else + image["url_to_video"] or image["url_to_original"]) + yield Message.Url, url, text.nameext_from_url(url, image) def metadata(self): diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 4fdfac9..a699401 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -31,9 +31,13 @@ class NewgroundsExtractor(Extractor): self.user_root = "https://{}.newgrounds.com".format(self.user) self.flash = self.config("flash", True) + fmt = self.config("format", "original") + self.format = (True if not fmt or fmt == "original" else + fmt if isinstance(fmt, int) else + text.parse_int(fmt.rstrip("p"))) + def items(self): self.login() - yield Message.Version, 1 for post_url in self.posts(): try: @@ -59,7 +63,7 @@ class NewgroundsExtractor(Extractor): def posts(self): """Return urls of all relevant image pages""" - return self._pagination(self.subcategory) + return self._pagination(self._path) def login(self): username, password = self._get_auth_info() @@ -176,8 +180,23 @@ class NewgroundsExtractor(Extractor): "Referer": self.root, } sources = self.request(url, headers=headers).json()["sources"] - src = sources["360p"][0]["src"].replace(".360p.", ".") - fallback = self._video_fallback(sources) + + if self.format is True: + src = sources["360p"][0]["src"].replace(".360p.", ".") + formats = sources + else: + formats = [] + for fmt, src in sources.items(): + width = text.parse_int(fmt.rstrip("p")) + if width <= self.format: + formats.append((width, src)) + if formats: + formats.sort(reverse=True) + src, formats = formats[0][1][0]["src"], formats[1:] + else: + src = "" + + fallback = self._video_fallback(formats) date = text.parse_timestamp(src.rpartition("?")[2]) return { @@ -193,11 +212,13 @@ class NewgroundsExtractor(Extractor): } @staticmethod - def _video_fallback(sources): - sources = list(sources.items()) - sources.sort(key=lambda src: text.parse_int(src[0][:-1]), reverse=True) - for src in sources: - yield src[1][0]["src"] + def _video_fallback(formats): + if isinstance(formats, dict): + formats = list(formats.items()) + formats.sort(key=lambda fmt: text.parse_int(fmt[0].rstrip("p")), + reverse=True) + for fmt in formats: + yield fmt[1][0]["src"] def _pagination(self, kind): root = self.user_root @@ -322,7 +343,13 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): ("https://www.newgrounds.com/portal/view/161181/format/flash", { "pattern": r"https://uploads\.ungrounded\.net/161000" r"/161181_ddautta_mask__550x281_\.swf\?f1081628129", - }) + }), + # format selection (#1729) + ("https://www.newgrounds.com/portal/view/758545", { + "options": (("format", "720p"),), + "pattern": r"https://uploads\.ungrounded\.net/alternate/1482000" + r"/1482860_alternate_102516\.720p\.mp4\?\d+", + }), ) def __init__(self, match): @@ -336,7 +363,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): class NewgroundsArtExtractor(NewgroundsExtractor): """Extractor for all images of a newgrounds user""" - subcategory = "art" + subcategory = _path = "art" pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$" test = ("https://tomfulp.newgrounds.com/art", { "pattern": NewgroundsImageExtractor.pattern, @@ -346,7 +373,7 @@ class NewgroundsArtExtractor(NewgroundsExtractor): class NewgroundsAudioExtractor(NewgroundsExtractor): """Extractor for all audio submissions of a newgrounds user""" - subcategory = "audio" + subcategory = _path = "audio" pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$" test = ("https://tomfulp.newgrounds.com/audio", { "pattern": r"https://audio.ngfiles.com/\d+/\d+_.+\.mp3", @@ -356,7 +383,7 @@ class NewgroundsAudioExtractor(NewgroundsExtractor): class NewgroundsMoviesExtractor(NewgroundsExtractor): """Extractor for all movies of a newgrounds user""" - subcategory = "movies" + subcategory = _path = "movies" pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$" test = ("https://tomfulp.newgrounds.com/movies", { "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+", diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py index 9c4d686..be736d1 100644 --- a/gallery_dl/extractor/nsfwalbum.py +++ b/gallery_dl/extractor/nsfwalbum.py @@ -43,11 +43,27 @@ class NsfwalbumAlbumExtractor(GalleryExtractor): def images(self, page): iframe = self.root + "/iframe_image.php?id=" backend = self.root + "/backend.php" + retries = self._retries + for image_id in text.extract_iter(page, 'data-img-id="', '"'): - spirit = self._annihilate(text.extract(self.request( - iframe + image_id).text, 'giraffe.annihilate("', '"')[0]) - params = {"spirit": spirit, "photo": image_id} - data = self.request(backend, params=params).json() + spirit = None + tries = 0 + + while tries <= retries: + try: + if not spirit: + spirit = self._annihilate(text.extract( + self.request(iframe + image_id).text, + 'giraffe.annihilate("', '"')[0]) + params = {"spirit": spirit, "photo": image_id} + data = self.request(backend, params=params).json() + break + except Exception: + tries += 1 + else: + self.log.warning("Unable to fetch image %s", image_id) + continue + yield data[0], { "id" : text.parse_int(image_id), "width" : text.parse_int(data[1]), diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index 104dc23..bbbdd3f 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -28,6 +28,7 @@ class ReactorExtractor(Extractor): Extractor.__init__(self, match) self.root = "http://" + match.group(1) self.session.headers["Referer"] = self.root + self.gif = self.config("gif", False) if not self.category: # set category based on domain name @@ -124,6 +125,12 @@ class ReactorExtractor(Extractor): elif "/post/webm/" not in url and "/post/mp4/" not in url: url = url.replace("/post/", "/post/full/") + if self.gif and ("/post/webm/" in url or "/post/mp4/" in url): + gif_url = text.extract(image, '<a href="', '"')[0] + if not gif_url: + continue + url = gif_url + yield { "url": url, "post_id": post_id, diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index fd0140d..7e78941 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -50,12 +50,17 @@ class TwitterExtractor(Extractor): if not self.retweets and "retweeted_status_id_str" in tweet: self.log.debug("Skipping %s (retweet)", tweet["id_str"]) continue - if not self.replies and "in_reply_to_user_id_str" in tweet: - self.log.debug("Skipping %s (reply)", tweet["id_str"]) - continue if not self.quoted and "quoted" in tweet: self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"]) continue + if "in_reply_to_user_id_str" in tweet and ( + not self.replies or ( + self.replies == "self" and + tweet["in_reply_to_user_id_str"] != tweet["user_id_str"] + ) + ): + self.log.debug("Skipping %s (reply)", tweet["id_str"]) + continue files = [] if "extended_entities" in tweet: @@ -452,6 +457,15 @@ class TwitterTweetExtractor(TwitterExtractor): "options": (("replies", False),), "count": 0, }), + # 'replies' to self (#1254) + ("https://twitter.com/i/web/status/1424882930803908612", { + "options": (("replies", "self"),), + "count": 4, + }), + ("https://twitter.com/i/web/status/1424898916156284928", { + "options": (("replies", "self"),), + "count": 0, + }), # "quoted" option (#854) ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { "options": (("quoted", True),), @@ -582,8 +596,8 @@ class TwitterAPI(): "ext": "mediaStats,highlightedLabel", } - cookies = self.extractor.session.cookies - cookiedomain = ".twitter.com" + cookies = extractor.session.cookies + cookiedomain = extractor.cookiedomain # CSRF csrf_token = cookies.get("ct0", domain=cookiedomain) @@ -726,21 +740,46 @@ class TwitterAPI(): if csrf_token: self.headers["x-csrf-token"] = csrf_token + data = response.json() + if "errors" in data: + try: + msg = ", ".join( + '"' + error["message"] + '"' + for error in data["errors"] + ) + except Exception: + msg = data["errors"] + if response.status_code < 400: + self.extractor.log.warning(msg) + else: + msg = "" + if response.status_code < 400: - return response.json() + # success + return data + if response.status_code == 429: + # rate limit exceeded until = response.headers.get("x-rate-limit-reset") seconds = None if until else 60 self.extractor.wait(until=until, seconds=seconds) continue - try: - msg = ", ".join( - '"' + error["message"] + '"' - for error in response.json()["errors"] - ) - except Exception: - msg = response.text + if response.status_code == 401 and \ + "have been blocked from viewing" in msg: + # account blocked + extr = extr = self.extractor + if self.headers["x-twitter-auth-type"] and \ + extr.config("logout"): + guest_token = self._guest_token() + extr.session.cookies.set( + "gt", guest_token, domain=extr.cookiedomain) + self.headers["x-guest-token"] = guest_token + self.headers["x-twitter-auth-type"] = None + extr.log.info("Retrying API request as guest") + continue + + # error raise exception.StopExtraction( "%s %s (%s)", response.status_code, response.reason, msg) diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index 2178641..9dd2d47 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -88,6 +88,7 @@ class VkPhotosExtractor(Extractor): yield Message.Directory, data sub = re.compile(r"/imp[fg]/").sub needle = 'data-id="{}_'.format(user_id) + cnt = 0 while True: offset, html = self.request( diff --git a/gallery_dl/extractor/wikieat.py b/gallery_dl/extractor/wikieat.py new file mode 100644 index 0000000..f544bcb --- /dev/null +++ b/gallery_dl/extractor/wikieat.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://wikieat.club/""" + +from .common import Extractor, Message +from .. import text + + +class WikieatThreadExtractor(Extractor): + """Extractor for Wikieat threads""" + category = "wikieat" + subcategory = "thread" + directory_fmt = ("{category}", "{board}", "{thread} {title}") + filename_fmt = "{time}{num:?-//} {filename}.{extension}" + archive_fmt = "{board}_{thread}_{tim}" + pattern = r"(?:https?://)?wikieat\.club/([^/]+)/res/(\d+)" + test = ("https://wikieat.club/cel/res/25321.html", { + "pattern": r"https://wikieat\.club/cel/src/\d+(-\d)?\.\w+", + "count": ">= 200", + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.thread = match.groups() + + def items(self): + url = "https://wikieat.club/{}/res/{}.json".format( + self.board, self.thread) + posts = self.request(url).json()["posts"] + title = posts[0].get("sub") or text.remove_html(posts[0]["com"]) + process = self._process + + data = { + "board" : self.board, + "thread": self.thread, + "title" : text.unescape(title)[:50], + "num" : 0, + } + + yield Message.Version, 1 + yield Message.Directory, data + for post in posts: + if "filename" in post: + yield process(post, data) + if "extra_files" in post: + for post["num"], filedata in enumerate( + post["extra_files"], 1): + yield process(post, filedata) + + @staticmethod + def _process(post, data): + post.update(data) + post["extension"] = post["ext"][1:] + tim = post["tim"] + url = ("https://wikieat.club/" + + post["board"] + "/src/" + + tim + post["ext"]) + return Message.Url, url, post + + +class WikieatBoardExtractor(Extractor): + """Extractor for Wikieat boards""" + category = "wikieat" + subcategory = "board" + pattern = (r"(?:https?://)?wikieat\.club" + r"/([^/?#]+)/(?:index|catalog|\d+)\.html") + test = ( + ("https://wikieat.club/cel/index.html", { + "pattern": WikieatThreadExtractor.pattern, + "count": ">= 100", + }), + ("https://wikieat.club/cel/catalog.html"), + ("https://wikieat.club/cel/2.html") + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board = match.group(1) + + def items(self): + url = "https://wikieat.club/{}/threads.json".format(self.board) + threads = self.request(url).json() + + for page in threads: + for thread in page["threads"]: + url = "https://wikieat.club/{}/res/{}.html".format( + self.board, thread["no"]) + thread["page"] = page["page"] + thread["_extractor"] = WikieatThreadExtractor + yield Message.Queue, url, thread diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 3462138..2c0fae6 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -97,6 +97,17 @@ def generate_token(size=16): return binascii.hexlify(data).decode() +def format_value(value, unit="B", suffixes="kMGTPEZY"): + value = format(value) + value_len = len(value) + index = value_len - 4 + if index >= 0: + offset = (value_len - 1) % 3 + 1 + return (value[:offset] + "." + value[offset:offset+2] + + suffixes[index // 3] + unit) + return value + unit + + def combine_dict(a, b): """Recursively combine the contents of 'b' into 'a'""" for key, value in b.items(): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index fbb4e5b..566159d 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.18.2" +__version__ = "1.18.3" diff --git a/test/test_util.py b/test/test_util.py index 2d574da..7a31ebb 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -532,6 +532,22 @@ class TestOther(unittest.TestCase): self.assertEqual(len(token), 80 * 2) self.assertRegex(token, r"^[0-9a-f]+$") + def test_format_value(self): + self.assertEqual(util.format_value(0) , "0B") + self.assertEqual(util.format_value(1) , "1B") + self.assertEqual(util.format_value(12) , "12B") + self.assertEqual(util.format_value(123) , "123B") + self.assertEqual(util.format_value(1234) , "1.23kB") + self.assertEqual(util.format_value(12345) , "12.34kB") + self.assertEqual(util.format_value(123456) , "123.45kB") + self.assertEqual(util.format_value(1234567) , "1.23MB") + self.assertEqual(util.format_value(12345678) , "12.34MB") + self.assertEqual(util.format_value(123456789) , "123.45MB") + self.assertEqual(util.format_value(1234567890), "1.23GB") + + self.assertEqual(util.format_value(123 , "B/s"), "123B/s") + self.assertEqual(util.format_value(123456, "B/s"), "123.45kB/s") + def test_combine_dict(self): self.assertEqual( util.combine_dict({}, {}), |