aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-10-05 23:30:05 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-10-05 23:30:05 -0400
commit34ba2951b8c523713425c98addb9256ea05c946f (patch)
tree6ec7e96d0c6e6f6e94b6b97ecd8c0a414ceef93d
parent3f5483df9075ae526f4c54f4cbe80edeabf6d4cc (diff)
downloadgallery-dl-34ba2951b8c523713425c98addb9256ea05c946f.tar.bz2
gallery-dl-34ba2951b8c523713425c98addb9256ea05c946f.tar.xz
gallery-dl-34ba2951b8c523713425c98addb9256ea05c946f.tar.zst
New upstream version 1.19.0.upstream/1.19.0
-rw-r--r--CHANGELOG.md40
-rw-r--r--PKG-INFO7
-rw-r--r--README.rst5
-rw-r--r--data/completion/_gallery-dl1
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/man/gallery-dl.15
-rw-r--r--data/man/gallery-dl.conf.5132
-rw-r--r--docs/gallery-dl.conf1
-rw-r--r--gallery_dl.egg-info/PKG-INFO7
-rw-r--r--gallery_dl.egg-info/SOURCES.txt5
-rw-r--r--gallery_dl/downloader/http.py46
-rw-r--r--gallery_dl/downloader/ytdl.py18
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/artstation.py3
-rw-r--r--gallery_dl/extractor/aryion.py57
-rw-r--r--gallery_dl/extractor/common.py44
-rw-r--r--gallery_dl/extractor/cyberdrop.py2
-rw-r--r--gallery_dl/extractor/desktopography.py95
-rw-r--r--gallery_dl/extractor/deviantart.py63
-rw-r--r--gallery_dl/extractor/erome.py7
-rw-r--r--gallery_dl/extractor/fantia.py2
-rw-r--r--gallery_dl/extractor/foolslide.py4
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py52
-rw-r--r--gallery_dl/extractor/gfycat.py17
-rw-r--r--gallery_dl/extractor/hiperdex.py10
-rw-r--r--gallery_dl/extractor/imagehosts.py20
-rw-r--r--gallery_dl/extractor/instagram.py3
-rw-r--r--gallery_dl/extractor/kemonoparty.py54
-rw-r--r--gallery_dl/extractor/mangadex.py38
-rw-r--r--gallery_dl/extractor/mastodon.py52
-rw-r--r--gallery_dl/extractor/nozomi.py4
-rw-r--r--gallery_dl/extractor/oauth.py2
-rw-r--r--gallery_dl/extractor/reddit.py16
-rw-r--r--gallery_dl/extractor/redgifs.py2
-rw-r--r--gallery_dl/extractor/twitter.py37
-rw-r--r--gallery_dl/formatter.py306
-rw-r--r--gallery_dl/job.py35
-rw-r--r--gallery_dl/option.py10
-rw-r--r--gallery_dl/output.py89
-rw-r--r--gallery_dl/path.py332
-rw-r--r--gallery_dl/postprocessor/compare.py29
-rw-r--r--gallery_dl/postprocessor/exec.py4
-rw-r--r--gallery_dl/postprocessor/metadata.py8
-rw-r--r--gallery_dl/util.py592
-rw-r--r--gallery_dl/version.py2
-rw-r--r--setup.py45
-rw-r--r--test/test_downloader.py4
-rw-r--r--test/test_formatter.py183
-rw-r--r--test/test_output.py156
-rw-r--r--test/test_postprocessor.py4
-rw-r--r--test/test_results.py11
-rw-r--r--test/test_util.py188
52 files changed, 1887 insertions, 965 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 84ffc1e..87dd18f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,45 @@
# Changelog
+## 1.19.0 - 2021-10-01
+### Additions
+- [aryion] add `tag` extractor ([#1849](https://github.com/mikf/gallery-dl/issues/1849))
+- [desktopography] implement desktopography extractors ([#1740](https://github.com/mikf/gallery-dl/issues/1740))
+- [deviantart] implement `auto-unwatch` option ([#1466](https://github.com/mikf/gallery-dl/issues/1466), [#1757](https://github.com/mikf/gallery-dl/issues/1757))
+- [fantia] add `date` metadata field ([#1853](https://github.com/mikf/gallery-dl/issues/1853))
+- [fappic] add `image` extractor ([#1898](https://github.com/mikf/gallery-dl/issues/1898))
+- [gelbooru_v02] add `favorite` extractor ([#1834](https://github.com/mikf/gallery-dl/issues/1834))
+- [kemonoparty] add `favorite` extractor ([#1824](https://github.com/mikf/gallery-dl/issues/1824))
+- [kemonoparty] implement login with username & password ([#1824](https://github.com/mikf/gallery-dl/issues/1824))
+- [mastodon] add `following` extractor ([#1891](https://github.com/mikf/gallery-dl/issues/1891))
+- [mastodon] support specifying accounts by ID
+- [twitter] support `/with_replies` URLs ([#1833](https://github.com/mikf/gallery-dl/issues/1833))
+- [twitter] add `quote_by` metadata field ([#1481](https://github.com/mikf/gallery-dl/issues/1481))
+- [postprocessor:compare] extend `action` option ([#1592](https://github.com/mikf/gallery-dl/issues/1592))
+- implement a download progress indicator ([#1519](https://github.com/mikf/gallery-dl/issues/1519))
+- implement a `page-reverse` option ([#1854](https://github.com/mikf/gallery-dl/issues/1854))
+- implement a way to specify extended format strings
+- allow specifying a minimum/maximum for `sleep-*` options ([#1835](https://github.com/mikf/gallery-dl/issues/1835))
+- add a `--write-infojson` command-line option
+### Changes
+- [cyberdrop] change directory name format ([#1871](https://github.com/mikf/gallery-dl/issues/1871))
+- [instagram] update default delay to 6-12 seconds ([#1835](https://github.com/mikf/gallery-dl/issues/1835))
+- [reddit] extend subcategory depending on input URL ([#1836](https://github.com/mikf/gallery-dl/issues/1836))
+- move util.Formatter and util.PathFormat into their own modules
+### Fixes
+- [artstation] use `/album/all` view for user portfolios ([#1826](https://github.com/mikf/gallery-dl/issues/1826))
+- [aryion] update/improve pagination ([#1849](https://github.com/mikf/gallery-dl/issues/1849))
+- [deviantart] fix bug with fetching premium content ([#1879](https://github.com/mikf/gallery-dl/issues/1879))
+- [deviantart] update default archive_fmt for single deviations ([#1874](https://github.com/mikf/gallery-dl/issues/1874))
+- [erome] send Referer header for file downloads ([#1829](https://github.com/mikf/gallery-dl/issues/1829))
+- [hiperdex] fix extraction
+- [kemonoparty] update file download URLs ([#1902](https://github.com/mikf/gallery-dl/issues/1902), [#1903](https://github.com/mikf/gallery-dl/issues/1903))
+- [mangadex] fix extraction ([#1852](https://github.com/mikf/gallery-dl/issues/1852))
+- [mangadex] fix retrieving chapters from "pornographic" titles ([#1908](https://github.com/mikf/gallery-dl/issues/1908))
+- [nozomi] preserve case of search tags ([#1860](https://github.com/mikf/gallery-dl/issues/1860))
+- [redgifs][gfycat] remove webtoken code ([#1907](https://github.com/mikf/gallery-dl/issues/1907))
+- [twitter] ensure card entries have a `url` ([#1868](https://github.com/mikf/gallery-dl/issues/1868))
+- implement a way to correctly shorten displayed filenames containing east-asian characters ([#1377](https://github.com/mikf/gallery-dl/issues/1377))
+
## 1.18.4 - 2021-09-04
### Additions
- [420chan] add `thread` and `board` extractors ([#1773](https://github.com/mikf/gallery-dl/issues/1773))
diff --git a/PKG-INFO b/PKG-INFO
index 1dfa877..b101649 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.18.4
+Version: 1.19.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -223,7 +223,6 @@ Description: ==========
``mangadex``,
``mangoxo``,
``pillowfort``,
- ``pinterest``,
``sankaku``,
``subscribestar``,
``tapas``,
diff --git a/README.rst b/README.rst
index 5c09275..bd79958 100644
--- a/README.rst
+++ b/README.rst
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -212,7 +212,6 @@ and optional for
``mangadex``,
``mangoxo``,
``pillowfort``,
-``pinterest``,
``sankaku``,
``subscribestar``,
``tapas``,
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 15806e8..22a5f25 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -54,6 +54,7 @@ _arguments -C -S \
--ugoira-conv'[Convert Pixiv Ugoira to WebM (requires FFmpeg)]' \
--ugoira-conv-lossless'[Convert Pixiv Ugoira to WebM in VP9 lossless mode]' \
--write-metadata'[Write metadata to separate JSON files]' \
+--write-infojson'[Write gallery metadata to a info.json file]' \
--write-tags'[Write image tags to separate text files]' \
--mtime-from-date'[Set file modification times according to "date" metadata]' \
--exec'[Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"]':'<cmd>' \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index f3d1100..c2ef896 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") )
fi
}
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index edf1068..29621b0 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-09-04" "1.18.4" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-10-01" "1.19.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -161,6 +161,9 @@ Convert Pixiv Ugoira to WebM in VP9 lossless mode
.B "\-\-write\-metadata"
Write metadata to separate JSON files
.TP
+.B "\-\-write\-infojson"
+Write gallery metadata to a info.json file
+.TP
.B "\-\-write\-tags"
Write image tags to separate text files
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 869d605..a5c0970 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-09-04" "1.18.4" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-10-01" "1.19.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -371,7 +371,7 @@ filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.)
.SS extractor.*.sleep
.IP "Type:" 6
-\f[I]float\f[]
+\f[I]Duration\f[]
.IP "Default:" 9
\f[I]0\f[]
@@ -382,7 +382,7 @@ Number of seconds to sleep before each download.
.SS extractor.*.sleep-extractor
.IP "Type:" 6
-\f[I]float\f[]
+\f[I]Duration\f[]
.IP "Default:" 9
\f[I]0\f[]
@@ -394,7 +394,7 @@ i.e. before starting a new extractor.
.SS extractor.*.sleep-request
.IP "Type:" 6
-\f[I]float\f[]
+\f[I]Duration\f[]
.IP "Default:" 9
\f[I]0\f[]
@@ -441,14 +441,14 @@ and optional for
.br
* \f[I]instagram\f[]
.br
+* \f[I]kemonoparty\f[]
+.br
* \f[I]mangadex\f[]
.br
* \f[I]mangoxo\f[]
.br
* \f[I]pillowfort\f[]
.br
-* \f[I]pinterest\f[]
-.br
* \f[I]sankaku\f[]
.br
* \f[I]subscribestar\f[]
@@ -620,6 +620,12 @@ Default value used for missing or undefined keyword names in
.IP "Description:" 4
Insert a file's download URL into its metadata dictionary as the given name.
+For example, setting this option to \f[I]"gdl_file_url"\f[] will cause a new
+metadata field with name \f[I]gdl_file_url\f[] to appear, which contains the
+current file's download URL.
+This can then be used in \f[I]filenames\f[],
+with a \f[I]metadata\f[] post processor, etc.
+
.SS extractor.*.category-transfer
.IP "Type:" 6
@@ -1030,6 +1036,30 @@ to access 18+ content without \f[I]API Key\f[].
See \f[I]Filters\f[] for details.
+.SS extractor.deviantart.auto-watch
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Automatically watch users when encountering "Watchers-Only Deviations"
+(requires a \f[I]refresh-token\f[]).
+
+
+.SS extractor.deviantart.auto-unwatch
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+After watching a user through \f[I]auto-watch\f[],
+unwatch that user at the end of the current extractor run.
+
+
.SS extractor.deviantart.comments
.IP "Type:" 6
\f[I]bool\f[]
@@ -1204,18 +1234,6 @@ Note: The \f[I]refresh-token\f[] becomes invalid
or whenever your \f[I]cache file\f[] is deleted or cleared.
-.SS extractor.deviantart.auto-watch
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]false\f[]
-
-.IP "Description:" 4
-Automatically watch users when encountering "Watchers-Only Deviations"
-(requires a \f[I]refresh-token\f[]).
-
-
.SS extractor.deviantart.wait-min
.IP "Type:" 6
\f[I]integer\f[]
@@ -2538,6 +2556,17 @@ Reverse the order of chapter URLs extracted from manga pages.
* \f[I]false\f[]: Start with the first chapter
+.SS extractor.[manga-extractor].page-reverse
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download manga chapter pages in reverse order.
+
+
.SH DOWNLOADER OPTIONS
.SS downloader.*.enabled
.IP "Type:" 6
@@ -2615,6 +2644,20 @@ If this value is \f[I]null\f[], \f[I].part\f[] files are going to be stored
alongside the actual output files.
+.SS downloader.*.progress
+.IP "Type:" 6
+\f[I]float\f[]
+
+.IP "Default:" 9
+\f[I]3.0\f[]
+
+.IP "Description:" 4
+Number of seconds until a download progress indicator
+for the current download is displayed.
+
+Set this option to \f[I]null\f[] to disable this indicator.
+
+
.SS downloader.*.rate
.IP "Type:" 6
\f[I]string\f[]
@@ -2829,6 +2872,9 @@ Controls the output string format and status indicators.
Controls whether the output strings should be shortened to fit
on one console line.
+Set this option to \f[I]"eaw"\f[] to also work with east-asian characters
+with a display width greater than 1.
+
.SS output.skip
.IP "Type:" 6
@@ -2949,6 +2995,20 @@ The action to take when files do not compare as equal.
.br
* \f[I]"replace"\f[]: Replace/Overwrite the old version with the new one
+
+.br
+* \f[I]"abort:N"\f[]: Same as \f[I]"replace"\f[] and stop the current extractor run
+after \f[I]N\f[] consecutive files compared as equal.
+
+.br
+* \f[I]"terminate:N"\f[]: Same as \f[I]"replace"\f[]
+and stop the current extractor run, including parent extractors,
+after \f[I]N\f[] consecutive files compared as equal.
+
+.br
+* \f[I]"exit:N"\f[]: Same as \f[I]"replace"\f[] and exit the program
+after \f[I]N\f[] consecutive files compared as equal.
+
.br
* \f[I]"enumerate"\f[]: Add an enumeration index to the filename of the new
version like \f[I]skip = "enumerate"\f[]
@@ -3409,7 +3469,7 @@ application and put them in your configuration file
as \f[I]"client-id"\f[] and \f[I]"client-secret"\f[]
.br
* clear your \f[I]cache\f[] to delete any remaining
-\f[I]access-token\f[] entries. (\f[I]gallery-dl --clear-cache\f[])
+\f[I]access-token\f[] entries. (\f[I]gallery-dl --clear-cache deviantart\f[])
.br
* get a new \f[I]refresh-token\f[] for the
new \f[I]client-id\f[] (\f[I]gallery-dl oauth:deviantart\f[])
@@ -3501,7 +3561,10 @@ and put them in your configuration file
.SH CUSTOM TYPES
.SS Date
.IP "Type:" 6
-\f[I]string\f[] or \f[I]integer\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]integer\f[]
.IP "Example:" 4
.br
@@ -3520,9 +3583,36 @@ A \f[I]Date\f[] value represents a specific point in time.
* If given as \f[I]integer\f[], it is interpreted as UTC timestamp.
+.SS Duration
+.IP "Type:" 6
+.br
+* \f[I]float\f[]
+.br
+* \f[I]list\f[] with 2 \f[I]floats\f[]
+
+.IP "Example:" 4
+.br
+* 2.85
+.br
+* [1.5, 3.0]
+
+.IP "Description:" 4
+A \f[I]Duration\f[] represents a span of time in seconds.
+
+.br
+* If given as a single \f[I]float\f[], it will be used as that exact value.
+.br
+* If given as a \f[I]list\f[] with 2 floating-point numbers \f[I]a\f[] & \f[I]b\f[] ,
+it will be randomly chosen with uniform distribution such that \f[I]a <= N <=b\f[].
+(see \f[I]random.uniform()\f[])
+
+
.SS Path
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Example:" 4
.br
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index f8b6c36..b998597 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -319,6 +319,7 @@
"mtime": true,
"part": true,
"part-directory": null,
+ "progress": 3.0,
"rate": null,
"retries": 4,
"timeout": 30.0,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 30c9c75..63101a1 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.18.4
+Version: 1.19.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -223,7 +223,6 @@ Description: ==========
``mangadex``,
``mangoxo``,
``pillowfort``,
- ``pinterest``,
``sankaku``,
``subscribestar``,
``tapas``,
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 5c427d6..b662b96 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -15,10 +15,12 @@ gallery_dl/__main__.py
gallery_dl/cache.py
gallery_dl/config.py
gallery_dl/exception.py
+gallery_dl/formatter.py
gallery_dl/job.py
gallery_dl/oauth.py
gallery_dl/option.py
gallery_dl/output.py
+gallery_dl/path.py
gallery_dl/text.py
gallery_dl/util.py
gallery_dl/version.py
@@ -55,6 +57,7 @@ gallery_dl/extractor/comicvine.py
gallery_dl/extractor/common.py
gallery_dl/extractor/cyberdrop.py
gallery_dl/extractor/danbooru.py
+gallery_dl/extractor/desktopography.py
gallery_dl/extractor/deviantart.py
gallery_dl/extractor/directlink.py
gallery_dl/extractor/dynastyscans.py
@@ -189,8 +192,10 @@ test/test_config.py
test/test_cookies.py
test/test_downloader.py
test/test_extractor.py
+test/test_formatter.py
test/test_job.py
test/test_oauth.py
+test/test_output.py
test/test_postprocessor.py
test/test_results.py
test/test_text.py
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 78d8d34..d2efd3f 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -31,6 +31,7 @@ class HttpDownloader(DownloaderBase):
self.downloading = False
self.adjust_extension = self.config("adjust-extensions", True)
+ self.progress = self.config("progress", 3.0)
self.headers = self.config("headers")
self.minsize = self.config("filesize-min")
self.maxsize = self.config("filesize-max")
@@ -63,6 +64,8 @@ class HttpDownloader(DownloaderBase):
self.receive = self._receive_rate
else:
self.log.warning("Invalid rate limit (%r)", self.rate)
+ if self.progress is not None:
+ self.receive = self._receive_rate
def download(self, url, pathfmt):
try:
@@ -202,6 +205,7 @@ class HttpDownloader(DownloaderBase):
with pathfmt.open(mode) as fp:
if file_header:
fp.write(file_header)
+ offset += len(file_header)
elif offset:
if adjust_extension and \
pathfmt.extension in FILE_SIGNATURES:
@@ -210,7 +214,7 @@ class HttpDownloader(DownloaderBase):
self.out.start(pathfmt.path)
try:
- self.receive(fp, content)
+ self.receive(fp, content, size, offset)
except (RequestException, SSLError, OpenSSLError) as exc:
msg = str(exc)
print()
@@ -234,28 +238,42 @@ class HttpDownloader(DownloaderBase):
return True
@staticmethod
- def receive(fp, content):
+ def receive(fp, content, bytes_total, bytes_downloaded):
write = fp.write
for data in content:
write(data)
- def _receive_rate(self, fp, content):
- rt = self.rate
- t1 = time.time()
+ def _receive_rate(self, fp, content, bytes_total, bytes_downloaded):
+ rate = self.rate
+ progress = self.progress
+ bytes_start = bytes_downloaded
+ write = fp.write
+ t1 = tstart = time.time()
for data in content:
- fp.write(data)
+ write(data)
t2 = time.time() # current time
- actual = t2 - t1 # actual elapsed time
- expected = len(data) / rt # expected elapsed time
+ elapsed = t2 - t1 # elapsed time
+ num_bytes = len(data)
+
+ if progress is not None:
+ bytes_downloaded += num_bytes
+ tdiff = t2 - tstart
+ if tdiff >= progress:
+ self.out.progress(
+ bytes_total, bytes_downloaded,
+ int((bytes_downloaded - bytes_start) / tdiff),
+ )
- if actual < expected:
- # sleep if less time elapsed than expected
- time.sleep(expected - actual)
- t1 = time.time()
- else:
- t1 = t2
+ if rate:
+ expected = num_bytes / rate # expected elapsed time
+ if elapsed < expected:
+ # sleep if less time elapsed than expected
+ time.sleep(expected - elapsed)
+ t2 = time.time()
+
+ t1 = t2
def _find_extension(self, response):
"""Get filename extension from MIME type"""
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index b1e1d58..86e247b 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -42,6 +42,10 @@ class YoutubeDLDownloader(DownloaderBase):
if raw_options:
options.update(raw_options)
+ self.progress = self.config("progress", 3.0)
+ if self.progress is not None:
+ options["progress_hooks"] = (self._progress_hook,)
+
if self.config("logging", True):
options["logger"] = self.log
self.forward_cookies = self.config("forward-cookies", False)
@@ -56,7 +60,10 @@ class YoutubeDLDownloader(DownloaderBase):
kwdict = pathfmt.kwdict
ytdl = kwdict.pop("_ytdl_instance", None)
- if not ytdl:
+ if ytdl:
+ if self.progress is not None and not ytdl._progress_hooks:
+ ytdl.add_progress_hook(self._progress_hook)
+ else:
ytdl = self.ytdl
if self.forward_cookies:
set_cookie = ytdl.cookiejar.set_cookie
@@ -126,6 +133,15 @@ class YoutubeDLDownloader(DownloaderBase):
ytdl.process_info(entry)
return True
+ def _progress_hook(self, info):
+ if info["status"] == "downloading" and \
+ info["elapsed"] >= self.progress:
+ self.out.progress(
+ info["total_bytes"],
+ info["downloaded_bytes"],
+ int(info["speed"]),
+ )
+
@staticmethod
def _set_outtmpl(ytdl, outtmpl):
try:
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index f68ea9f..c512548 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -28,6 +28,7 @@ modules = [
"comicvine",
"cyberdrop",
"danbooru",
+ "desktopography",
"deviantart",
"dynastyscans",
"e621",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index f2ad0ab..f687ff8 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -158,7 +158,8 @@ class ArtstationUserExtractor(ArtstationExtractor):
def projects(self):
url = "{}/users/{}/projects.json".format(self.root, self.user)
- return self._pagination(url)
+ params = {"album_id": "all"}
+ return self._pagination(url, params)
class ArtstationAlbumExtractor(ArtstationExtractor):
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 0d0ad70..06ec571 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -29,7 +29,6 @@ class AryionExtractor(Extractor):
Extractor.__init__(self, match)
self.user = match.group(1)
self.recursive = True
- self._needle = "class='gallery-item' id='"
def login(self):
if self._check_cookies(self.cookienames):
@@ -56,25 +55,50 @@ class AryionExtractor(Extractor):
def items(self):
self.login()
+ data = self.metadata()
for post_id in self.posts():
post = self._parse_post(post_id)
if post:
+ if data:
+ post.update(data)
yield Message.Directory, post
yield Message.Url, post["url"], post
elif post is False and self.recursive:
base = self.root + "/g4/view/"
data = {"_extractor": AryionPostExtractor}
- for post_id in self._pagination(base + post_id):
+ for post_id in self._pagination_params(base + post_id):
yield Message.Queue, base + post_id, data
def posts(self):
"""Yield relevant post IDs"""
- def _pagination(self, url):
+ def metadata(self):
+ """Return general metadata"""
+
+ def _pagination_params(self, url, params=None):
+ if params is None:
+ params = {"p": 1}
+ else:
+ params["p"] = text.parse_int(params.get("p"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for post_id in text.extract_iter(
+ page, "class='gallery-item' id='", "'"):
+ cnt += 1
+ yield post_id
+
+ if cnt < 40:
+ return
+ params["p"] += 1
+
+ def _pagination_next(self, url):
while True:
page = self.request(url).text
- yield from text.extract_iter(page, self._needle, "'")
+ yield from text.extract_iter(page, "thumb' href='/g4/view/", "'")
pos = page.find("Next &gt;&gt;")
if pos < 0:
@@ -180,11 +204,30 @@ class AryionGalleryExtractor(AryionExtractor):
def posts(self):
if self.recursive:
url = "{}/g4/gallery/{}".format(self.root, self.user)
- return self._pagination(url)
+ return self._pagination_params(url)
else:
- self._needle = "thumb' href='/g4/view/"
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
- return util.advance(self._pagination(url), self.offset)
+ return util.advance(self._pagination_next(url), self.offset)
+
+
+class AryionTagExtractor(AryionExtractor):
+ """Extractor for tag searches on eka's portal"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "tags", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{id}"
+ pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)"
+ test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", {
+ "count": ">= 5",
+ })
+
+ def metadata(self):
+ self.params = text.parse_query(self.user)
+ self.user = None
+ return {"search_tags": self.params.get("tag")}
+
+ def posts(self):
+ url = self.root + "/g4/tags.php"
+ return self._pagination_params(url, self.params)
class AryionPostExtractor(AryionExtractor):
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d9f69ab..4f42477 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -42,6 +42,7 @@ class Extractor():
def __init__(self, match):
self.log = logging.getLogger(self.category)
self.url = match.string
+ self.finalize = None
if self.basecategory:
self.config = self._config_shared
@@ -53,13 +54,13 @@ class Extractor():
self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
- self.request_interval = self.config(
- "sleep-request", self.request_interval)
+ self._interval = util.build_duration_func(
+ self.config("sleep-request", self.request_interval),
+ self.request_interval_min,
+ )
if self._retries < 0:
self._retries = float("inf")
- if self.request_interval < self.request_interval_min:
- self.request_interval = self.request_interval_min
self._init_session()
self._init_cookies()
@@ -102,15 +103,19 @@ class Extractor():
def request(self, url, *, method="GET", session=None, retries=None,
encoding=None, fatal=True, notfound=None, **kwargs):
- tries = 1
- retries = self._retries if retries is None else retries
- session = self.session if session is None else session
- kwargs.setdefault("timeout", self._timeout)
- kwargs.setdefault("verify", self._verify)
+ if retries is None:
+ retries = self._retries
+ if session is None:
+ session = self.session
+ if "timeout" not in kwargs:
+ kwargs["timeout"] = self._timeout
+ if "verify" not in kwargs:
+ kwargs["verify"] = self._verify
response = None
+ tries = 1
- if self.request_interval:
- seconds = (self.request_interval -
+ if self._interval:
+ seconds = (self._interval() -
(time.time() - Extractor.request_timestamp))
if seconds > 0.0:
self.log.debug("Sleeping for %.5s seconds", seconds)
@@ -442,16 +447,23 @@ class GalleryExtractor(Extractor):
imgs = self.images(page)
if "count" in data:
- images = zip(
- range(1, data["count"]+1),
- imgs,
- )
+ if self.config("page-reverse"):
+ images = util.enumerate_reversed(imgs, 1, data["count"])
+ else:
+ images = zip(
+ range(1, data["count"]+1),
+ imgs,
+ )
else:
+ enum = enumerate
try:
data["count"] = len(imgs)
except TypeError:
pass
- images = enumerate(imgs, 1)
+ else:
+ if self.config("page-reverse"):
+ enum = util.enumerate_reversed
+ images = enum(imgs, 1)
yield Message.Directory, data
for data[self.enum], (url, imgdata) in images:
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index e354cb7..2004921 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -15,7 +15,7 @@ class CyberdropAlbumExtractor(Extractor):
category = "cyberdrop"
subcategory = "album"
root = "https://cyberdrop.me"
- directory_fmt = ("{category}", "{album_id} {album_name}")
+ directory_fmt = ("{category}", "{album_name} ({album_id})")
archive_fmt = "{album_id}_{id}"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)"
test = ("https://cyberdrop.me/a/keKRjm4t", {
diff --git a/gallery_dl/extractor/desktopography.py b/gallery_dl/extractor/desktopography.py
new file mode 100644
index 0000000..363341a
--- /dev/null
+++ b/gallery_dl/extractor/desktopography.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://desktopography.net/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?desktopography\.net"
+
+
+class DesktopographyExtractor(Extractor):
+ """Base class for desktopography extractors"""
+ category = "desktopography"
+ archive_fmt = "{filename}"
+ root = "https://desktopography.net"
+
+
+class DesktopographySiteExtractor(DesktopographyExtractor):
+ """Extractor for all desktopography exhibitions """
+ subcategory = "site"
+ pattern = BASE_PATTERN + r"/$"
+ test = ("https://desktopography.net/",)
+
+ def items(self):
+ page = self.request(self.root).text
+ data = {"_extractor": DesktopographyExhibitionExtractor}
+
+ for exhibition_year in text.extract_iter(
+ page,
+ '<a href="https://desktopography.net/exhibition-',
+ '/">'):
+
+ url = self.root + "/exhibition-" + exhibition_year + "/"
+ yield Message.Queue, url, data
+
+
+class DesktopographyExhibitionExtractor(DesktopographyExtractor):
+ """Extractor for a yearly desktopography exhibition"""
+ subcategory = "exhibition"
+ pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
+ test = ("https://desktopography.net/exhibition-2020/",)
+
+ def __init__(self, match):
+ DesktopographyExtractor.__init__(self, match)
+ self.year = match.group(1)
+
+ def items(self):
+ url = "{}/exhibition-{}/".format(self.root, self.year)
+ base_entry_url = "https://desktopography.net/portfolios/"
+ page = self.request(url).text
+
+ data = {
+ "_extractor": DesktopographyEntryExtractor,
+ "year": self.year,
+ }
+
+ for entry_url in text.extract_iter(
+ page,
+ '<a class="overlay-background" href="' + base_entry_url,
+ '">'):
+
+ url = base_entry_url + entry_url
+ yield Message.Queue, url, data
+
+
+class DesktopographyEntryExtractor(DesktopographyExtractor):
+ """Extractor for all resolutions of a desktopography wallpaper"""
+ subcategory = "entry"
+ pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
+ test = ("https://desktopography.net/portfolios/new-era/",)
+
+ def __init__(self, match):
+ DesktopographyExtractor.__init__(self, match)
+ self.entry = match.group(1)
+
+ def items(self):
+ url = "{}/portfolios/{}".format(self.root, self.entry)
+ page = self.request(url).text
+
+ entry_data = {"entry": self.entry}
+ yield Message.Directory, entry_data
+
+ for image_data in text.extract_iter(
+ page,
+ '<a target="_blank" href="https://desktopography.net',
+ '">'):
+
+ path, _, filename = image_data.partition(
+ '" class="wallpaper-button" download="')
+ text.nameext_from_url(filename, entry_data)
+ yield Message.Url, self.root + path, entry_data
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index b4ac742..7dac770 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -46,6 +46,13 @@ class DeviantartExtractor(Extractor):
self.group = False
self.api = None
+ unwatch = self.config("auto-unwatch")
+ if unwatch:
+ self.unwatch = []
+ self.finalize = self._unwatch_premium
+ else:
+ self.unwatch = None
+
if self.quality:
self.quality = ",q_{}".format(self.quality)
@@ -318,44 +325,48 @@ class DeviantartExtractor(Extractor):
except KeyError:
pass
- # check accessibility
- if self.api.refresh_token_key:
- dev = self.api.deviation(deviation["deviationid"], False)
- has_access = dev["premium_folder_data"]["has_access"]
- username = dev["author"]["username"]
- folder = dev["premium_folder_data"]
-
- if not has_access and folder["type"] == "watchers" and \
- self.config("auto-watch"):
- if self.api.user_friends_watch(username):
- has_access = True
- self.log.info(
- "Watching %s for premium folder access", username)
- else:
- self.log.warning(
- "Error when trying to watch %s. "
- "Try again with a new refresh-token", username)
- else:
+ if not self.api.refresh_token_key:
self.log.warning(
"Unable to access premium content (no refresh-token)")
self._fetch_premium = lambda _: None
return None
+ dev = self.api.deviation(deviation["deviationid"], False)
+ folder = dev["premium_folder_data"]
+ username = dev["author"]["username"]
+ has_access = folder["has_access"]
+
+ if not has_access and folder["type"] == "watchers" and \
+ self.config("auto-watch"):
+ if self.unwatch is not None:
+ self.unwatch.append(username)
+ if self.api.user_friends_watch(username):
+ has_access = True
+ self.log.info(
+ "Watching %s for premium folder access", username)
+ else:
+ self.log.warning(
+ "Error when trying to watch %s. "
+ "Try again with a new refresh-token", username)
+
if has_access:
self.log.info("Fetching premium folder data")
else:
self.log.warning("Unable to access premium content (type: %s)",
folder["type"])
- self._fetch_premium = lambda _: None
- return None
- # fill cache
cache = self._premium_cache
for dev in self.api.gallery(
username, folder["gallery_id"], public=False):
- cache[dev["deviationid"]] = dev
+ cache[dev["deviationid"]] = dev if has_access else None
+
return cache[deviation["deviationid"]]
+ def _unwatch_premium(self):
+ for username in self.unwatch:
+ self.log.info("Unwatching %s", username)
+ self.api.user_friends_unwatch(username)
+
class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
@@ -823,7 +834,7 @@ class DeviantartWatchPostsExtractor(DeviantartExtractor):
class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
- archive_fmt = "{index}.{extension}"
+ archive_fmt = "g_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
@@ -1153,13 +1164,15 @@ class DeviantartOAuthAPI():
"mature_content" : self.mature,
}
return self._call(
- endpoint, method="POST", data=data, public=False, fatal=False)
+ endpoint, method="POST", data=data, public=False, fatal=False,
+ ).get("success")
def user_friends_unwatch(self, username):
"""Unwatch a user"""
endpoint = "user/friends/unwatch/" + username
return self._call(
- endpoint, method="POST", public=False, fatal=False)
+ endpoint, method="POST", public=False, fatal=False,
+ ).get("success")
def authenticate(self, refresh_token_key):
"""Authenticate the application by requesting an access token"""
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index d4fd826..992db97 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -46,9 +46,10 @@ class EromeExtractor(Extractor):
user, pos = text.extract(
page, 'href="https://www.erome.com/', '"', pos)
data = {
- "album_id": album_id,
- "title" : text.unescape(title),
- "user" : text.unquote(user),
+ "album_id" : album_id,
+ "title" : text.unescape(title),
+ "user" : text.unquote(user),
+ "_http_headers": {"Referer": url},
}
yield Message.Directory, data
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index 9df2bef..62f7429 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -66,6 +66,8 @@ class FantiaExtractor(Extractor):
"comment": resp["comment"],
"rating": resp["rating"],
"posted_at": resp["posted_at"],
+ "date": text.parse_datetime(
+ resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
"fanclub_id": resp["fanclub"]["id"],
"fanclub_user_id": resp["fanclub"]["user"]["id"],
"fanclub_user_name": resp["fanclub"]["user"]["name"],
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index a1470dc..c09eb96 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -88,7 +88,9 @@ class FoolslideChapterExtractor(FoolslideExtractor):
data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"])
yield Message.Directory, data
- for data["page"], image in enumerate(imgs, 1):
+ enum = util.enumerate_reversed if self.config(
+ "page-reverse") else enumerate
+ for data["page"], image in enum(imgs, 1):
try:
url = image["url"]
del image["url"]
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 1b877b3..e09e190 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -176,6 +176,58 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
yield post.attrib
+class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "favorites", "{favorite_id}")
+ archive_fmt = "f_{favorite_id}_{id}"
+ per_page = 50
+ pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+ test = (
+ ("https://rule34.xxx/index.php?page=favorites&s=view&id=1030218", {
+ "count": 3,
+ }),
+ ("https://safebooru.org/index.php?page=favorites&s=view&id=17567", {
+ "count": 2,
+ }),
+ ("https://realbooru.com/index.php?page=favorites&s=view&id=274", {
+ "count": 4,
+ }),
+ ("https://tbib.org/index.php?page=favorites&s=view&id=7881", {
+ "count": 3,
+ }),
+ )
+
+ def __init__(self, match):
+ GelbooruV02Extractor.__init__(self, match)
+ self.favorite_id = match.group(match.lastindex)
+
+ def metadata(self):
+ return {"favorite_id": text.parse_int(self.favorite_id)}
+
+ def posts(self):
+ url = self.root + "/index.php"
+ params = {
+ "page": "favorites",
+ "s" : "view",
+ "id" : self.favorite_id,
+ "pid" : self.page_start * self.per_page,
+ }
+
+ data = {}
+ while True:
+ num_ids = 0
+ page = self.request(url, params=params).text
+
+ for data["id"] in text.extract_iter(page, '" id="p', '"'):
+ num_ids += 1
+ for post in self._api_request(data):
+ yield post.attrib
+
+ if num_ids < self.per_page:
+ return
+ params["pid"] += self.per_page
+
+
class GelbooruV02PostExtractor(GelbooruV02Extractor):
subcategory = "post"
archive_fmt = "{id}"
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 6d31f7d..2757852 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -10,7 +10,6 @@
from .common import Extractor, Message
from .. import text, exception
-from ..cache import cache
class GfycatExtractor(Extractor):
@@ -155,7 +154,6 @@ class GfycatImageExtractor(GfycatExtractor):
class GfycatAPI():
API_ROOT = "https://api.gfycat.com"
- ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"
def __init__(self, extractor):
self.extractor = extractor
@@ -175,23 +173,8 @@ class GfycatAPI():
params = {"search_text": query, "count": 150}
return self._pagination(endpoint, params)
- @cache(keyarg=1, maxage=3600)
- def _authenticate_impl(self, category):
- if category == "redgifs":
- url = "https://api.redgifs.com/v1/oauth/webtoken"
- else:
- url = "https://weblogin." + category + ".com/oauth/webtoken"
- data = {"access_key": self.ACCESS_KEY}
- headers = {"Referer": self.extractor.root + "/",
- "Origin" : self.extractor.root}
- response = self.extractor.request(
- url, method="POST", headers=headers, json=data)
- return "Bearer " + response.json()["access_token"]
-
def _call(self, endpoint, params=None):
url = self.API_ROOT + endpoint
- self.headers["Authorization"] = self._authenticate_impl(
- self.extractor.category)
return self.extractor.request(
url, params=params, headers=self.headers).json()
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index a40d631..201ffdd 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -141,13 +141,17 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
shortlink = text.extract(page, "rel='shortlink' href='", "'")[0]
data = {
- "action": "manga_get_chapters",
- "manga" : shortlink.rpartition("=")[2],
+ "action" : "manga_get_reading_nav",
+ "manga" : shortlink.rpartition("=")[2],
+ "chapter" : "",
+ "volume_id": "",
+ "style" : "list",
+ "type" : "manga",
}
url = self.root + "/wp-admin/admin-ajax.php"
page = self.request(url, method="POST", data=data).text
- for url in text.extract_iter(page, 'href="', '"', 320):
+ for url in text.extract_iter(page, 'data-redirect="', '"'):
chapter = url.rpartition("/")[2]
results.append((url, self.chapter_data(chapter)))
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 13996d0..d699f07 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -279,3 +279,23 @@ class ImgclickImageExtractor(ImagehostImageExtractor):
url , pos = text.extract(page, '<br><img src="', '"')
filename, pos = text.extract(page, 'alt="', '"', pos)
return url, filename
+
+
+class FappicImageExtractor(ImagehostImageExtractor):
+ """Extractor for single images from fappic.com"""
+ category = "fappic"
+ pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)"
+ test = ("https://www.fappic.com/98wxqcklyh8k/test.png", {
+ "pattern": r"https://img\d+\.fappic\.com/img/\w+/test\.png",
+ "keyword": "433b1d310b0ff12ad8a71ac7b9d8ba3f8cd1e898",
+ "content": "0c8768055e4e20e7c7259608b67799171b691140",
+ })
+
+ def get_info(self, page):
+ url , pos = text.extract(page, '<a href="/?click"><img src="', '"')
+ filename, pos = text.extract(page, 'alt="', '"', pos)
+
+ if filename.startswith("Porn-Picture-"):
+ filename = filename[13:]
+
+ return url, filename
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 3590e17..983ae37 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -29,7 +29,7 @@ class InstagramExtractor(Extractor):
root = "https://www.instagram.com"
cookiedomain = ".instagram.com"
cookienames = ("sessionid",)
- request_interval = 8.0
+ request_interval = (6.0, 12.0)
def __init__(self, match):
Extractor.__init__(self, match)
@@ -679,7 +679,6 @@ class InstagramStoriesExtractor(InstagramExtractor):
("https://www.instagram.com/stories/instagram/"),
("https://www.instagram.com/stories/highlights/18042509488170095/"),
)
- request_interval = 1.0
def __init__(self, match):
self.highlight_id, self.user = match.groups()
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index a911d35..c5f5ae7 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -9,7 +9,8 @@
"""Extractors for https://kemono.party/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
+from ..cache import cache
import itertools
import re
@@ -70,11 +71,32 @@ class KemonopartyExtractor(Extractor):
post["type"] = file["type"]
url = file["path"]
if url[0] == "/":
- url = self.root + url
+ url = self.root + "/data" + url
+ elif url.startswith("https://kemono.party"):
+ url = self.root + "/data" + url[20:]
text.nameext_from_url(file["name"], post)
yield Message.Url, url, post
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+
+ @cache(maxage=28*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/account/login"
+ data = {"username": username, "password": password}
+
+ response = self.request(url, method="POST", data=data)
+ if response.url.endswith("/account/login") and \
+ "Username or password is incorrect" in response.text:
+ raise exception.AuthenticationError()
+
+ return {c.name: c.value for c in response.history[0].cookies}
+
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
@@ -119,7 +141,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
pattern = BASE_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://kemono\.party/files/fanbox"
+ "pattern": r"https://kemono\.party/data/files/fanbox"
r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
@@ -142,12 +164,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
}),
# inline image (#1286)
("https://kemono.party/fanbox/user/7356311/post/802343", {
- "pattern": r"https://kemono\.party/inline/fanbox"
+ "pattern": r"https://kemono\.party/data/inline/fanbox"
r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
}),
# kemono.party -> data.kemono.party
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
- "pattern": r"https://kemono\.party/(file|attachment)s"
+ "pattern": r"https://kemono\.party/data/(file|attachment)s"
r"/gumroad/trylsc/IURjT/",
}),
# username (#1548, #1652)
@@ -173,3 +195,25 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
def posts(self):
posts = self.request(self.api_url).json()
return (posts[0],) if len(posts) > 1 else posts
+
+
+class KemonopartyFavoriteExtractor(KemonopartyExtractor):
+ """Extractor for kemono.party favorites"""
+ subcategory = "favorite"
+ pattern = r"(?:https?://)?kemono\.party/favorites"
+ test = ("https://kemono.party/favorites", {
+ "pattern": KemonopartyUserExtractor.pattern,
+ "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
+ "count": 3,
+ })
+
+ def items(self):
+ self._prepare_ddosguard_cookies()
+ self.login()
+
+ users = self.request(self.root + "/api/favorites").json()
+ for user in users:
+ user["_extractor"] = KemonopartyUserExtractor
+ url = "{}/{}/user/{}".format(
+ self.root, user["service"], user["id"])
+ yield Message.Queue, url, user
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 53ae76a..634a92d 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -37,7 +37,7 @@ class MangadexExtractor(Extractor):
def items(self):
for chapter in self.chapters():
- uuid = chapter["data"]["id"]
+ uuid = chapter["id"]
data = self._transform(chapter)
data["_extractor"] = MangadexChapterExtractor
self._cache[uuid] = (chapter, data)
@@ -51,8 +51,8 @@ class MangadexExtractor(Extractor):
for item in manga["relationships"]:
relationships[item["type"]].append(item["id"])
- cattributes = chapter["data"]["attributes"]
- mattributes = manga["data"]["attributes"]
+ cattributes = chapter["attributes"]
+ mattributes = manga["attributes"]
lang = cattributes["translatedLanguage"].partition("-")[0]
if cattributes["chapter"]:
@@ -63,12 +63,12 @@ class MangadexExtractor(Extractor):
data = {
"manga" : (mattributes["title"].get("en") or
next(iter(mattributes["title"].values()))),
- "manga_id": manga["data"]["id"],
+ "manga_id": manga["id"],
"title" : cattributes["title"],
"volume" : text.parse_int(cattributes["volume"]),
"chapter" : text.parse_int(chnum),
"chapter_minor": sep + minor,
- "chapter_id": chapter["data"]["id"],
+ "chapter_id": chapter["id"],
"date" : text.parse_datetime(cattributes["publishAt"]),
"lang" : lang,
"language": util.code_to_language(lang),
@@ -77,13 +77,13 @@ class MangadexExtractor(Extractor):
if self.config("metadata"):
data["artist"] = [
- self.api.author(uuid)["data"]["attributes"]["name"]
+ self.api.author(uuid)["attributes"]["name"]
for uuid in relationships["artist"]]
data["author"] = [
- self.api.author(uuid)["data"]["attributes"]["name"]
+ self.api.author(uuid)["attributes"]["name"]
for uuid in relationships["author"]]
data["group"] = [
- self.api.group(uuid)["data"]["attributes"]["name"]
+ self.api.group(uuid)["attributes"]["name"]
for uuid in relationships["scanlation_group"]]
return data
@@ -118,11 +118,14 @@ class MangadexChapterExtractor(MangadexExtractor):
data = self._transform(chapter)
yield Message.Directory, data
- cattributes = chapter["data"]["attributes"]
+ cattributes = chapter["attributes"]
data["_http_headers"] = self._headers
base = "{}/data/{}/".format(
self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"])
- for data["page"], page in enumerate(cattributes["data"], 1):
+
+ enum = util.enumerate_reversed if self.config(
+ "page-reverse") else enumerate
+ for data["page"], page in enum(cattributes["data"], 1):
text.nameext_from_url(page, data)
yield Message.Url, base + page, data
@@ -153,6 +156,9 @@ class MangadexMangaExtractor(MangadexExtractor):
("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", {
"count": 1,
}),
+ ("https://mangadex.org/title/584ef094-b2ab-40ce-962c-bce341fb9d10", {
+ "count": ">= 20",
+ })
)
def chapters(self):
@@ -189,18 +195,18 @@ class MangadexAPI():
@memcache(keyarg=1)
def author(self, uuid):
- return self._call("/author/" + uuid)
+ return self._call("/author/" + uuid)["data"]
def chapter(self, uuid):
- return self._call("/chapter/" + uuid)
+ return self._call("/chapter/" + uuid)["data"]
@memcache(keyarg=1)
def group(self, uuid):
- return self._call("/group/" + uuid)
+ return self._call("/group/" + uuid)["data"]
@memcache(keyarg=1)
def manga(self, uuid):
- return self._call("/manga/" + uuid)
+ return self._call("/manga/" + uuid)["data"]
def manga_feed(self, uuid):
config = self.extractor.config
@@ -209,6 +215,8 @@ class MangadexAPI():
"order[volume]" : order,
"order[chapter]" : order,
"translatedLanguage[]": config("lang"),
+ "contentRating[]" : [
+ "safe", "suggestive", "erotica", "pornographic"],
}
return self._pagination("/manga/" + uuid + "/feed", params)
@@ -271,7 +279,7 @@ class MangadexAPI():
while True:
data = self._call(endpoint, params)
- yield from data["results"]
+ yield from data["data"]
params["offset"] = data["offset"] + data["limit"]
if params["offset"] >= data["total"]:
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index ff0bfc3..cd7cabb 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -87,7 +87,7 @@ BASE_PATTERN = MastodonExtractor.update(INSTANCES)
class MastodonUserExtractor(MastodonExtractor):
"""Extractor for all images of an account/user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/@([^/?#]+)(?:/media)?/?$"
+ pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)(?:/media)?/?$"
test = (
("https://mastodon.social/@jk", {
"pattern": r"https://files.mastodon.social/media_attachments"
@@ -100,26 +100,44 @@ class MastodonUserExtractor(MastodonExtractor):
"count": 60,
}),
("https://baraag.net/@pumpkinnsfw"),
+ ("https://mastodon.social/@id:10843"),
+ ("https://mastodon.social/users/id:10843"),
+ ("https://mastodon.social/users/jk"),
)
def statuses(self):
api = MastodonAPI(self)
- username = self.item
- handle = "@{}@{}".format(username, self.instance)
- for account in api.account_search(handle, 1):
- if account["username"] == username:
- break
- else:
- raise exception.NotFoundError("account")
-
return api.account_statuses(
- account["id"],
+ api.account_id_by_username(self.item),
only_media=not self.config("text-posts", False),
exclude_replies=not self.replies,
)
+class MastodonFollowingExtractor(MastodonExtractor):
+ """Extractor for followed mastodon users"""
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/users/([^/?#]+)/following"
+ test = (
+ ("https://mastodon.social/users/0x4f/following", {
+ "extractor": False,
+ "count": ">= 20",
+ }),
+ ("https://mastodon.social/users/id:10843/following"),
+ ("https://pawoo.net/users/yoru_nine/following"),
+ ("https://baraag.net/users/pumpkinnsfw/following"),
+ )
+
+ def items(self):
+ api = MastodonAPI(self)
+ account_id = api.account_id_by_username(self.item)
+
+ for account in api.account_following(account_id):
+ account["_extractor"] = MastodonUserExtractor
+ yield Message.Queue, account["url"], account
+
+
class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"
@@ -165,6 +183,20 @@ class MastodonAPI():
self.headers = {"Authorization": "Bearer " + access_token}
+ def account_id_by_username(self, username):
+ if username.startswith("id:"):
+ return username[3:]
+
+ handle = "@{}@{}".format(username, self.extractor.instance)
+ for account in self.account_search(handle, 1):
+ if account["username"] == username:
+ return account["id"]
+ raise exception.NotFoundError("account")
+
+ def account_following(self, account_id):
+ endpoint = "/v1/accounts/{}/following".format(account_id)
+ return self._pagination(endpoint, None)
+
def account_search(self, query, limit=40):
"""Search for accounts"""
endpoint = "/v1/accounts/search"
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 44411c8..4dc880f 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -168,7 +168,7 @@ class NozomiTagExtractor(NozomiExtractor):
def __init__(self, match):
NozomiExtractor.__init__(self, match)
tags, self.pnum = match.groups()
- self.tags = text.unquote(tags).lower()
+ self.tags = text.unquote(tags)
self.nozomi = "/nozomi/{}.nozomi".format(self.tags)
def metadata(self):
@@ -187,7 +187,7 @@ class NozomiSearchExtractor(NozomiExtractor):
def __init__(self, match):
NozomiExtractor.__init__(self, match)
- self.tags = text.unquote(match.group(1)).lower().split()
+ self.tags = text.unquote(match.group(1)).split()
def metadata(self):
return {"search_tags": self.tags}
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 4dc1e43..6812f35 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -415,7 +415,7 @@ class OAuthPixiv(OAuthBase):
print("""
1) Open your browser's Developer Tools (F12) and switch to the Network tab
2) Login
-4) Select the last network monitor entry ('callback?state=...')
+3) Select the last network monitor entry ('callback?state=...')
4) Copy its 'code' query parameter, paste it below, and press Enter
""")
code = input("code: ")
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 8953edd..43c7e50 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -139,7 +139,7 @@ class RedditSubredditExtractor(RedditExtractor):
"""Extractor for URLs from subreddits on reddit.com"""
subcategory = "subreddit"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/r/"
- r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?(?:$|#)")
+ r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?(?:$|#)")
test = (
("https://www.reddit.com/r/lavaporn/", {
"range": "1-20",
@@ -152,9 +152,11 @@ class RedditSubredditExtractor(RedditExtractor):
)
def __init__(self, match):
+ self.subreddit, sub, params = match.groups()
+ self.params = text.parse_query(params)
+ if sub:
+ self.subcategory += "-" + sub
RedditExtractor.__init__(self, match)
- self.subreddit = match.group(1)
- self.params = text.parse_query(match.group(2))
def submissions(self):
return self.api.submissions_subreddit(self.subreddit, self.params)
@@ -164,7 +166,7 @@ class RedditUserExtractor(RedditExtractor):
"""Extractor for URLs from posts by a reddit user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/"
- r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?")
+ r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?")
test = (
("https://www.reddit.com/user/username/", {
"count": ">= 2",
@@ -175,9 +177,11 @@ class RedditUserExtractor(RedditExtractor):
)
def __init__(self, match):
+ self.user, sub, params = match.groups()
+ self.params = text.parse_query(params)
+ if sub:
+ self.subcategory += "-" + sub
RedditExtractor.__init__(self, match)
- self.user = match.group(1)
- self.params = text.parse_query(match.group(2))
def submissions(self):
return self.api.submissions_user(self.user, self.params)
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 576564c..e078bef 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -72,5 +72,3 @@ class RedgifsImageExtractor(RedgifsExtractor):
class RedgifsAPI(GfycatAPI):
API_ROOT = "https://api.redgifs.com"
- ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe"
- "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9")
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 2dfcb55..4a3f6cd 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -50,7 +50,7 @@ class TwitterExtractor(Extractor):
if not self.retweets and "retweeted_status_id_str" in tweet:
self.log.debug("Skipping %s (retweet)", tweet["id_str"])
continue
- if not self.quoted and "quoted" in tweet:
+ if not self.quoted and "quoted_by_id_str" in tweet:
self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"])
continue
if "in_reply_to_user_id_str" in tweet and (
@@ -139,8 +139,10 @@ class TwitterExtractor(Extractor):
for size in ("original", "x_large", "large", "small"):
key = prefix + size
if key in bvals:
- files.append(bvals[key]["image_value"])
- return
+ value = bvals[key].get("image_value")
+ if value and "url" in value:
+ files.append(value)
+ return
elif self.videos:
url = "ytdl:{}/i/web/status/{}".format(self.root, tweet["id_str"])
files.append({"url": url})
@@ -199,6 +201,8 @@ class TwitterExtractor(Extractor):
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
+ if "quoted_by_id_str" in tweet:
+ tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"])
if "author" in tweet:
tdata["author"] = self._transform_user(tweet["author"])
@@ -316,7 +320,7 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
- """Extractor for all images from a user's timeline"""
+ """Extractor for Tweets from a user's timeline"""
subcategory = "timeline"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
@@ -341,8 +345,25 @@ class TwitterTimelineExtractor(TwitterExtractor):
return TwitterAPI(self).timeline_profile(self.user)
+class TwitterRepliesExtractor(TwitterExtractor):
+ """Extractor for Tweets from a user's timeline including replies"""
+ subcategory = "replies"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)"
+ test = (
+ ("https://twitter.com/supernaturepics/with_replies", {
+ "range": "1-40",
+ "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
+ }),
+ ("https://mobile.twitter.com/supernaturepics/with_replies#t"),
+ ("https://www.twitter.com/id:2976459548/with_replies"),
+ )
+
+ def tweets(self):
+ return TwitterAPI(self).timeline_profile(self.user, replies=True)
+
+
class TwitterMediaExtractor(TwitterExtractor):
- """Extractor for all images from a user's Media Tweets"""
+ """Extractor for Tweets from a user's Media timeline"""
subcategory = "media"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
test = (
@@ -652,11 +673,11 @@ class TwitterAPI():
endpoint = "/2/timeline/conversation/{}.json".format(conversation_id)
return self._pagination(endpoint)
- def timeline_profile(self, screen_name):
+ def timeline_profile(self, screen_name, replies=False):
user_id = self._user_id_by_screen_name(screen_name)
endpoint = "/2/timeline/profile/{}.json".format(user_id)
params = self.params.copy()
- params["include_tweet_replies"] = "false"
+ params["include_tweet_replies"] = "true" if replies else "false"
return self._pagination(endpoint, params)
def timeline_media(self, screen_name):
@@ -886,7 +907,7 @@ class TwitterAPI():
quoted = quoted.copy()
quoted["author"] = users[quoted["user_id_str"]]
quoted["user"] = tweet["user"]
- quoted["quoted"] = True
+ quoted["quoted_by_id_str"] = tweet["id_str"]
yield quoted
# update cursor value
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
new file mode 100644
index 0000000..654e5d0
--- /dev/null
+++ b/gallery_dl/formatter.py
@@ -0,0 +1,306 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""String formatters"""
+
+import json
+import string
+import _string
+import operator
+from . import text, util
+
+_CACHE = {}
+_CONVERSIONS = None
+
+
+def parse(format_string, default=None):
+ key = format_string, default
+
+ try:
+ return _CACHE[key]
+ except KeyError:
+ pass
+
+ cls = StringFormatter
+ if format_string.startswith("\f"):
+ kind, _, format_string = format_string.partition(" ")
+ kind = kind[1:]
+
+ if kind == "T":
+ cls = TemplateFormatter
+ elif kind == "E":
+ cls = ExpressionFormatter
+ elif kind == "M":
+ cls = ModuleFormatter
+
+ formatter = _CACHE[key] = cls(format_string, default)
+ return formatter
+
+
+class StringFormatter():
+ """Custom, extended version of string.Formatter
+
+ This string formatter implementation is a mostly performance-optimized
+ variant of the original string.Formatter class. Unnecessary features have
+ been removed (positional arguments, unused argument check) and new
+ formatting options have been added.
+
+ Extra Conversions:
+ - "l": calls str.lower on the target value
+ - "u": calls str.upper
+ - "c": calls str.capitalize
+ - "C": calls string.capwords
+ - "j". calls json.dumps
+ - "t": calls str.strip
+ - "d": calls text.parse_timestamp
+ - "U": calls urllib.parse.unquote
+ - "S": calls util.to_string()
+ - "T": calls util.to_timestamü()
+ - Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
+
+ Extra Format Specifiers:
+ - "?<before>/<after>/":
+ Adds <before> and <after> to the actual value if it evaluates to True.
+ Otherwise the whole replacement field becomes an empty string.
+ Example: {f:?-+/+-/} -> "-+Example+-" (if "f" contains "Example")
+ -> "" (if "f" is None, 0, "")
+
+ - "L<maxlen>/<replacement>/":
+ Replaces the output with <replacement> if its length (in characters)
+ exceeds <maxlen>. Otherwise everything is left as is.
+ Example: {f:L5/too long/} -> "foo" (if "f" is "foo")
+ -> "too long" (if "f" is "foobar")
+
+ - "J<separator>/":
+ Joins elements of a list (or string) using <separator>
+ Example: {f:J - /} -> "a - b - c" (if "f" is ["a", "b", "c"])
+
+ - "R<old>/<new>/":
+ Replaces all occurrences of <old> with <new>
+ Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
+ """
+
+ def __init__(self, format_string, default=None):
+ self.default = default
+ self.result = []
+ self.fields = []
+
+ for literal_text, field_name, format_spec, conv in \
+ _string.formatter_parser(format_string):
+ if literal_text:
+ self.result.append(literal_text)
+ if field_name:
+ self.fields.append((
+ len(self.result),
+ self._field_access(field_name, format_spec, conv),
+ ))
+ self.result.append("")
+
+ if len(self.result) == 1:
+ if self.fields:
+ self.format_map = self.fields[0][1]
+ else:
+ self.format_map = lambda _: format_string
+ del self.result, self.fields
+
+ def format_map(self, kwdict):
+ """Apply 'kwdict' to the initial format_string and return its result"""
+ result = self.result
+ for index, func in self.fields:
+ result[index] = func(kwdict)
+ return "".join(result)
+
+ def _field_access(self, field_name, format_spec, conversion):
+ fmt = parse_format_spec(format_spec, conversion)
+
+ if "|" in field_name:
+ return self._apply_list([
+ parse_field_name(fn)
+ for fn in field_name.split("|")
+ ], fmt)
+ else:
+ key, funcs = parse_field_name(field_name)
+ if funcs:
+ return self._apply(key, funcs, fmt)
+ return self._apply_simple(key, fmt)
+
+ def _apply(self, key, funcs, fmt):
+ def wrap(kwdict):
+ try:
+ obj = kwdict[key]
+ for func in funcs:
+ obj = func(obj)
+ except Exception:
+ obj = self.default
+ return fmt(obj)
+ return wrap
+
+ def _apply_simple(self, key, fmt):
+ def wrap(kwdict):
+ return fmt(kwdict[key] if key in kwdict else self.default)
+ return wrap
+
+ def _apply_list(self, lst, fmt):
+ def wrap(kwdict):
+ for key, funcs in lst:
+ try:
+ obj = kwdict[key]
+ for func in funcs:
+ obj = func(obj)
+ if obj:
+ break
+ except Exception:
+ pass
+ else:
+ obj = self.default
+ return fmt(obj)
+ return wrap
+
+
+class TemplateFormatter(StringFormatter):
+ """Read format_string from file"""
+
+ def __init__(self, path, default=None):
+ with open(util.expand_path(path)) as fp:
+ format_string = fp.read()
+ StringFormatter.__init__(self, format_string, default)
+
+
+class ExpressionFormatter():
+ """Generate text by evaluating a Python expression"""
+
+ def __init__(self, expression, default=None):
+ self.format_map = util.compile_expression(expression)
+
+
+class ModuleFormatter():
+ """Generate text by calling an external function"""
+
+ def __init__(self, function_spec, default=None):
+ module_name, _, function_name = function_spec.partition(":")
+ module = __import__(module_name)
+ self.format_map = getattr(module, function_name)
+
+
+def parse_field_name(field_name):
+ first, rest = _string.formatter_field_name_split(field_name)
+ funcs = []
+
+ for is_attr, key in rest:
+ if is_attr:
+ func = operator.attrgetter
+ else:
+ func = operator.itemgetter
+ try:
+ if ":" in key:
+ start, _, stop = key.partition(":")
+ stop, _, step = stop.partition(":")
+ start = int(start) if start else None
+ stop = int(stop) if stop else None
+ step = int(step) if step else None
+ key = slice(start, stop, step)
+ except TypeError:
+ pass # key is an integer
+
+ funcs.append(func(key))
+
+ return first, funcs
+
+
+def parse_format_spec(format_spec, conversion):
+ fmt = build_format_func(format_spec)
+ if not conversion:
+ return fmt
+
+ global _CONVERSIONS
+ if _CONVERSIONS is None:
+ _CONVERSIONS = {
+ "l": str.lower,
+ "u": str.upper,
+ "c": str.capitalize,
+ "C": string.capwords,
+ "j": json.dumps,
+ "t": str.strip,
+ "T": util.to_timestamp,
+ "d": text.parse_timestamp,
+ "U": text.unescape,
+ "S": util.to_string,
+ "s": str,
+ "r": repr,
+ "a": ascii,
+ }
+
+ conversion = _CONVERSIONS[conversion]
+ if fmt is format:
+ return conversion
+ else:
+ def chain(obj):
+ return fmt(conversion(obj))
+ return chain
+
+
+def build_format_func(format_spec):
+ if format_spec:
+ fmt = format_spec[0]
+ if fmt == "?":
+ return _parse_optional(format_spec)
+ if fmt == "L":
+ return _parse_maxlen(format_spec)
+ if fmt == "J":
+ return _parse_join(format_spec)
+ if fmt == "R":
+ return _parse_replace(format_spec)
+ return _default_format(format_spec)
+ return format
+
+
+def _parse_optional(format_spec):
+ before, after, format_spec = format_spec.split("/", 2)
+ before = before[1:]
+ fmt = build_format_func(format_spec)
+
+ def optional(obj):
+ return before + fmt(obj) + after if obj else ""
+ return optional
+
+
+def _parse_maxlen(format_spec):
+ maxlen, replacement, format_spec = format_spec.split("/", 2)
+ maxlen = text.parse_int(maxlen[1:])
+ fmt = build_format_func(format_spec)
+
+ def mlen(obj):
+ obj = fmt(obj)
+ return obj if len(obj) <= maxlen else replacement
+ return mlen
+
+
+def _parse_join(format_spec):
+ separator, _, format_spec = format_spec.partition("/")
+ separator = separator[1:]
+ fmt = build_format_func(format_spec)
+
+ def join(obj):
+ return fmt(separator.join(obj))
+ return join
+
+
+def _parse_replace(format_spec):
+ old, new, format_spec = format_spec.split("/", 2)
+ old = old[1:]
+ fmt = build_format_func(format_spec)
+
+ def replace(obj):
+ return fmt(obj.replace(old, new))
+ return replace
+
+
+def _default_format(format_spec):
+ def wrap(obj):
+ return format(obj, format_spec)
+ return wrap
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 32e9bb5..4e185d0 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -15,7 +15,7 @@ import operator
import functools
import collections
from . import extractor, downloader, postprocessor
-from . import config, text, util, output, exception
+from . import config, text, util, path, formatter, output, exception
from .extractor.message import Message
@@ -72,9 +72,9 @@ class Job():
log = extractor.log
msg = None
- sleep = extractor.config("sleep-extractor")
+ sleep = util.build_duration_func(extractor.config("sleep-extractor"))
if sleep:
- time.sleep(sleep)
+ time.sleep(sleep())
try:
for msg in extractor:
@@ -109,6 +109,8 @@ class Job():
log.info("No results for %s", extractor.url)
finally:
self.handle_finalize()
+ if extractor.finalize:
+ extractor.finalize()
return self.status
@@ -234,7 +236,7 @@ class DownloadJob(Job):
return
if self.sleep:
- time.sleep(self.sleep)
+ time.sleep(self.sleep())
# download from URL
if not self.download(url):
@@ -392,11 +394,11 @@ class DownloadJob(Job):
def initialize(self, kwdict=None):
"""Delayed initialization of PathFormat, etc."""
cfg = self.extractor.config
- pathfmt = self.pathfmt = util.PathFormat(self.extractor)
+ pathfmt = self.pathfmt = path.PathFormat(self.extractor)
if kwdict:
pathfmt.set_directory(kwdict)
- self.sleep = cfg("sleep")
+ self.sleep = util.build_duration_func(cfg("sleep"))
self.fallback = cfg("fallback", True)
if not cfg("download", True):
# monkey-patch method to do nothing and always return True
@@ -404,17 +406,18 @@ class DownloadJob(Job):
archive = cfg("archive")
if archive:
- path = util.expand_path(archive)
+ archive = util.expand_path(archive)
try:
- if "{" in path:
- path = util.Formatter(path).format_map(kwdict)
- self.archive = util.DownloadArchive(path, self.extractor)
+ if "{" in archive:
+ archive = formatter.parse(archive).format_map(kwdict)
+ self.archive = util.DownloadArchive(archive, self.extractor)
except Exception as exc:
self.extractor.log.warning(
"Failed to open download archive at '%s' ('%s: %s')",
- path, exc.__class__.__name__, exc)
+ archive, exc.__class__.__name__, exc)
else:
- self.extractor.log.debug("Using download archive '%s'", path)
+ self.extractor.log.debug(
+ "Using download archive '%s'", archive)
skip = cfg("skip", True)
if skip:
@@ -469,6 +472,7 @@ class DownloadJob(Job):
except Exception as exc:
pp_log.error("'%s' initialization failed: %s: %s",
name, exc.__class__.__name__, exc)
+ pp_log.debug("", exc_info=True)
else:
pp_list.append(pp_obj)
@@ -539,7 +543,7 @@ class SimulationJob(DownloadJob):
self.pathfmt.set_filename(kwdict)
self.out.skip(self.pathfmt.path)
if self.sleep:
- time.sleep(self.sleep)
+ time.sleep(self.sleep())
if self.archive:
self.archive.add(kwdict)
@@ -693,9 +697,10 @@ class DataJob(Job):
self.filter = util.identity if private else util.filter_dict
def run(self):
- sleep = self.extractor.config("sleep-extractor")
+ sleep = util.build_duration_func(
+ self.extractor.config("sleep-extractor"))
if sleep:
- time.sleep(sleep)
+ time.sleep(sleep())
# collect data
try:
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index a046a27..5f7b281 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -372,6 +372,16 @@ def build_parser():
help="Write metadata to separate JSON files",
)
postprocessor.add_argument(
+ "--write-infojson",
+ dest="postprocessors",
+ action="append_const", const={
+ "name" : "metadata",
+ "event" : "init",
+ "filename": "info.json",
+ },
+ help="Write gallery metadata to a info.json file",
+ )
+ postprocessor.add_argument(
"--write-tags",
dest="postprocessors",
action="append_const", const={"name": "metadata", "mode": "tags"},
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 7e1f8c1..d4d295f 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -10,7 +10,8 @@ import os
import sys
import shutil
import logging
-from . import config, util
+import unicodedata
+from . import config, util, formatter
# --------------------------------------------------------------------
@@ -91,13 +92,13 @@ class Formatter(logging.Formatter):
if isinstance(fmt, dict):
for key in ("debug", "info", "warning", "error"):
value = fmt[key] if key in fmt else LOG_FORMAT
- fmt[key] = (util.Formatter(value).format_map,
+ fmt[key] = (formatter.parse(value).format_map,
"{asctime" in value)
else:
if fmt == LOG_FORMAT:
fmt = (fmt.format_map, False)
else:
- fmt = (util.Formatter(fmt).format_map, "{asctime" in fmt)
+ fmt = (formatter.parse(fmt).format_map, "{asctime" in fmt)
fmt = {"debug": fmt, "info": fmt, "warning": fmt, "error": fmt}
self.formats = fmt
@@ -257,6 +258,9 @@ class NullOutput():
def success(self, path, tries):
"""Print a message indicating the completion of a download"""
+ def progress(self, bytes_total, bytes_downloaded, bytes_per_second):
+ """Display download progress"""
+
class PipeOutput(NullOutput):
@@ -270,9 +274,14 @@ class PipeOutput(NullOutput):
class TerminalOutput(NullOutput):
def __init__(self):
- self.short = config.get(("output",), "shorten", True)
- if self.short:
- self.width = shutil.get_terminal_size().columns - OFFSET
+ shorten = config.get(("output",), "shorten", True)
+ if shorten:
+ func = shorten_string_eaw if shorten == "eaw" else shorten_string
+ limit = shutil.get_terminal_size().columns - OFFSET
+ sep = CHAR_ELLIPSIES
+ self.shorten = lambda txt: func(txt, limit, sep)
+ else:
+ self.shorten = util.identity
def start(self, path):
print(self.shorten(" " + path), end="", flush=True)
@@ -283,16 +292,14 @@ class TerminalOutput(NullOutput):
def success(self, path, tries):
print("\r", self.shorten(CHAR_SUCCESS + path), sep="")
- def shorten(self, txt):
- """Reduce the length of 'txt' to the width of the terminal"""
- if self.short and len(txt) > self.width:
- hwidth = self.width // 2 - OFFSET
- return "".join((
- txt[:hwidth-1],
- CHAR_ELLIPSIES,
- txt[-hwidth-(self.width % 2):]
- ))
- return txt
+ def progress(self, bytes_total, bytes_downloaded, bytes_per_second):
+ bdl = util.format_value(bytes_downloaded)
+ bps = util.format_value(bytes_per_second)
+ if bytes_total is None:
+ print("\r{:>7}B {:>7}B/s ".format(bdl, bps), end="")
+ else:
+ print("\r{:>3}% {:>7}B {:>7}B/s ".format(
+ bytes_downloaded * 100 // bytes_total, bdl, bps), end="")
class ColorOutput(TerminalOutput):
@@ -307,6 +314,56 @@ class ColorOutput(TerminalOutput):
print("\r\033[1;32m", self.shorten(path), "\033[0m", sep="")
+class EAWCache(dict):
+
+ def __missing__(self, key):
+ width = self[key] = \
+ 2 if unicodedata.east_asian_width(key) in "WF" else 1
+ return width
+
+
+def shorten_string(txt, limit, sep="…"):
+ """Limit width of 'txt'; assume all characters have a width of 1"""
+ if len(txt) <= limit:
+ return txt
+ limit -= len(sep)
+ return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
+
+
+def shorten_string_eaw(txt, limit, sep="…", cache=EAWCache()):
+ """Limit width of 'txt'; check for east-asian characters with width > 1"""
+ char_widths = [cache[c] for c in txt]
+ text_width = sum(char_widths)
+
+ if text_width <= limit:
+ # no shortening required
+ return txt
+
+ limit -= len(sep)
+ if text_width == len(txt):
+ # all characters have a width of 1
+ return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
+
+ # wide characters
+ left = 0
+ lwidth = limit // 2
+ while True:
+ lwidth -= char_widths[left]
+ if lwidth < 0:
+ break
+ left += 1
+
+ right = -1
+ rwidth = (limit+1) // 2 + (lwidth + char_widths[left])
+ while True:
+ rwidth -= char_widths[right]
+ if rwidth < 0:
+ break
+ right -= 1
+
+ return txt[:left] + sep + txt[right+1:]
+
+
if util.WINDOWS:
ANSI = os.environ.get("TERM") == "ANSI"
OFFSET = 1
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
new file mode 100644
index 0000000..12ce8ad
--- /dev/null
+++ b/gallery_dl/path.py
@@ -0,0 +1,332 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Filesystem path handling"""
+
+import os
+import re
+import time
+import shutil
+import functools
+from email.utils import mktime_tz, parsedate_tz
+from . import util, formatter, exception
+
+WINDOWS = util.WINDOWS
+
+
+class PathFormat():
+ EXTENSION_MAP = {
+ "jpeg": "jpg",
+ "jpe" : "jpg",
+ "jfif": "jpg",
+ "jif" : "jpg",
+ "jfi" : "jpg",
+ }
+
+ def __init__(self, extractor):
+ config = extractor.config
+ kwdefault = config("keywords-default")
+
+ filename_fmt = config("filename")
+ try:
+ if filename_fmt is None:
+ filename_fmt = extractor.filename_fmt
+ elif isinstance(filename_fmt, dict):
+ self.filename_conditions = [
+ (util.compile_expression(expr),
+ formatter.parse(fmt, kwdefault).format_map)
+ for expr, fmt in filename_fmt.items() if expr
+ ]
+ self.build_filename = self.build_filename_conditional
+ filename_fmt = filename_fmt.get("", extractor.filename_fmt)
+
+ self.filename_formatter = formatter.parse(
+ filename_fmt, kwdefault).format_map
+ except Exception as exc:
+ raise exception.FilenameFormatError(exc)
+
+ directory_fmt = config("directory")
+ try:
+ if directory_fmt is None:
+ directory_fmt = extractor.directory_fmt
+ elif isinstance(directory_fmt, dict):
+ self.directory_conditions = [
+ (util.compile_expression(expr), [
+ formatter.parse(fmt, kwdefault).format_map
+ for fmt in fmts
+ ])
+ for expr, fmts in directory_fmt.items() if expr
+ ]
+ self.build_directory = self.build_directory_conditional
+ directory_fmt = directory_fmt.get("", extractor.directory_fmt)
+
+ self.directory_formatters = [
+ formatter.parse(dirfmt, kwdefault).format_map
+ for dirfmt in directory_fmt
+ ]
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
+ self.kwdict = {}
+ self.directory = self.realdirectory = \
+ self.filename = self.extension = self.prefix = \
+ self.path = self.realpath = self.temppath = ""
+ self.delete = self._create_directory = False
+
+ extension_map = config("extension-map")
+ if extension_map is None:
+ extension_map = self.EXTENSION_MAP
+ self.extension_map = extension_map.get
+
+ restrict = config("path-restrict", "auto")
+ replace = config("path-replace", "_")
+ if restrict == "auto":
+ restrict = "\\\\|/<>:\"?*" if WINDOWS else "/"
+ elif restrict == "unix":
+ restrict = "/"
+ elif restrict == "windows":
+ restrict = "\\\\|/<>:\"?*"
+ elif restrict == "ascii":
+ restrict = "^0-9A-Za-z_."
+ self.clean_segment = self._build_cleanfunc(restrict, replace)
+
+ remove = config("path-remove", "\x00-\x1f\x7f")
+ self.clean_path = self._build_cleanfunc(remove, "")
+
+ strip = config("path-strip", "auto")
+ if strip == "auto":
+ strip = ". " if WINDOWS else ""
+ elif strip == "unix":
+ strip = ""
+ elif strip == "windows":
+ strip = ". "
+ self.strip = strip
+
+ basedir = extractor._parentdir
+ if not basedir:
+ basedir = config("base-directory")
+ sep = os.sep
+ if basedir is None:
+ basedir = "." + sep + "gallery-dl" + sep
+ elif basedir:
+ basedir = util.expand_path(basedir)
+ altsep = os.altsep
+ if altsep and altsep in basedir:
+ basedir = basedir.replace(altsep, sep)
+ if basedir[-1] != sep:
+ basedir += sep
+ basedir = self.clean_path(basedir)
+ self.basedirectory = basedir
+
+ @staticmethod
+ def _build_cleanfunc(chars, repl):
+ if not chars:
+ return util.identity
+ elif isinstance(chars, dict):
+ def func(x, table=str.maketrans(chars)):
+ return x.translate(table)
+ elif len(chars) == 1:
+ def func(x, c=chars, r=repl):
+ return x.replace(c, r)
+ else:
+ return functools.partial(
+ re.compile("[" + chars + "]").sub, repl)
+ return func
+
+ def open(self, mode="wb"):
+ """Open file and return a corresponding file object"""
+ return open(self.temppath, mode)
+
+ def exists(self):
+ """Return True if the file exists on disk"""
+ if self.extension and os.path.exists(self.realpath):
+ return self.check_file()
+ return False
+
+ @staticmethod
+ def check_file():
+ return True
+
+ def _enum_file(self):
+ num = 1
+ try:
+ while True:
+ self.prefix = str(num) + "."
+ self.set_extension(self.extension, False)
+ os.stat(self.realpath) # raises OSError if file doesn't exist
+ num += 1
+ except OSError:
+ pass
+ return False
+
+ def set_directory(self, kwdict):
+ """Build directory path and create it if necessary"""
+ self.kwdict = kwdict
+ sep = os.sep
+
+ segments = self.build_directory(kwdict)
+ if segments:
+ self.directory = directory = self.basedirectory + self.clean_path(
+ sep.join(segments) + sep)
+ else:
+ self.directory = directory = self.basedirectory
+
+ if WINDOWS:
+ # Enable longer-than-260-character paths on Windows
+ directory = "\\\\?\\" + os.path.abspath(directory)
+
+ # abspath() in Python 3.7+ removes trailing path separators (#402)
+ if directory[-1] != sep:
+ directory += sep
+
+ self.realdirectory = directory
+ self._create_directory = True
+
+ def set_filename(self, kwdict):
+ """Set general filename data"""
+ self.kwdict = kwdict
+ self.temppath = self.prefix = ""
+
+ ext = kwdict["extension"]
+ kwdict["extension"] = self.extension = self.extension_map(ext, ext)
+
+ if self.extension:
+ self.build_path()
+ else:
+ self.filename = ""
+
+ def set_extension(self, extension, real=True):
+ """Set filename extension"""
+ extension = self.extension_map(extension, extension)
+ if real:
+ self.extension = extension
+ self.kwdict["extension"] = self.prefix + extension
+ self.build_path()
+
+ def fix_extension(self, _=None):
+ """Fix filenames without a given filename extension"""
+ if not self.extension:
+ self.set_extension("", False)
+ if self.path[-1] == ".":
+ self.path = self.path[:-1]
+ self.temppath = self.realpath = self.realpath[:-1]
+ return True
+
+ def build_filename(self, kwdict):
+ """Apply 'kwdict' to filename format string"""
+ try:
+ return self.clean_path(self.clean_segment(
+ self.filename_formatter(kwdict)))
+ except Exception as exc:
+ raise exception.FilenameFormatError(exc)
+
+ def build_filename_conditional(self, kwdict):
+ try:
+ for condition, fmt in self.filename_conditions:
+ if condition(kwdict):
+ break
+ else:
+ fmt = self.filename_formatter
+ return self.clean_path(self.clean_segment(fmt(kwdict)))
+ except Exception as exc:
+ raise exception.FilenameFormatError(exc)
+
+ def build_directory(self, kwdict):
+ """Apply 'kwdict' to directory format strings"""
+ segments = []
+ append = segments.append
+ strip = self.strip
+
+ try:
+ for fmt in self.directory_formatters:
+ segment = fmt(kwdict).strip()
+ if strip:
+ # remove trailing dots and spaces (#647)
+ segment = segment.rstrip(strip)
+ if segment:
+ append(self.clean_segment(segment))
+ return segments
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
+ def build_directory_conditional(self, kwdict):
+ segments = []
+ append = segments.append
+ strip = self.strip
+
+ try:
+ for condition, formatters in self.directory_conditions:
+ if condition(kwdict):
+ break
+ else:
+ formatters = self.directory_formatters
+ for fmt in formatters:
+ segment = fmt(kwdict).strip()
+ if strip:
+ segment = segment.rstrip(strip)
+ if segment:
+ append(self.clean_segment(segment))
+ return segments
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
+ def build_path(self):
+ """Combine directory and filename to full paths"""
+ if self._create_directory:
+ os.makedirs(self.realdirectory, exist_ok=True)
+ self._create_directory = False
+ self.filename = filename = self.build_filename(self.kwdict)
+ self.path = self.directory + filename
+ self.realpath = self.realdirectory + filename
+ if not self.temppath:
+ self.temppath = self.realpath
+
+ def part_enable(self, part_directory=None):
+ """Enable .part file usage"""
+ if self.extension:
+ self.temppath += ".part"
+ else:
+ self.set_extension("part", False)
+ if part_directory:
+ self.temppath = os.path.join(
+ part_directory,
+ os.path.basename(self.temppath),
+ )
+
+ def part_size(self):
+ """Return size of .part file"""
+ try:
+ return os.stat(self.temppath).st_size
+ except OSError:
+ pass
+ return 0
+
+ def finalize(self):
+ """Move tempfile to its target location"""
+ if self.delete:
+ self.delete = False
+ os.unlink(self.temppath)
+ return
+
+ if self.temppath != self.realpath:
+ # Move temp file to its actual location
+ try:
+ os.replace(self.temppath, self.realpath)
+ except OSError:
+ shutil.copyfile(self.temppath, self.realpath)
+ os.unlink(self.temppath)
+
+ mtime = self.kwdict.get("_mtime")
+ if mtime:
+ # Set file modification time
+ try:
+ if isinstance(mtime, str):
+ mtime = mktime_tz(parsedate_tz(mtime))
+ os.utime(self.realpath, (time.time(), mtime))
+ except Exception:
+ pass
diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py
index 1bca593..a08cdc4 100644
--- a/gallery_dl/postprocessor/compare.py
+++ b/gallery_dl/postprocessor/compare.py
@@ -9,6 +9,8 @@
"""Compare versions of the same file and replace/enumerate them on mismatch"""
from .common import PostProcessor
+from .. import text, util, exception
+import sys
import os
@@ -19,16 +21,33 @@ class ComparePP(PostProcessor):
if options.get("shallow"):
self._compare = self._compare_size
- job.register_hooks({"file": (
- self.enumerate
- if options.get("action") == "enumerate" else
- self.compare
- )}, options)
+ action = options.get("action")
+ if action == "enumerate":
+ job.register_hooks({"file": self.enumerate}, options)
+ else:
+ job.register_hooks({"file": self.compare}, options)
+ action, _, smax = action.partition(":")
+ self._skipmax = text.parse_int(smax)
+ self._skipexc = self._skipcnt = 0
+ if action == "abort":
+ self._skipexc = exception.StopExtraction
+ elif action == "terminate":
+ self._skipexc = exception.TerminateExtraction
+ elif action == "exit":
+ self._skipexc = sys.exit
def compare(self, pathfmt):
try:
if self._compare(pathfmt.realpath, pathfmt.temppath):
+ if self._skipexc:
+ self._skipcnt += 1
+ if self._skipcnt >= self._skipmax:
+ util.remove_file(pathfmt.temppath)
+ print()
+ raise self._skipexc()
pathfmt.delete = True
+ else:
+ self._skipcnt = 0
except OSError:
pass
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index 8fed723..cc217c3 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -9,7 +9,7 @@
"""Execute processes"""
from .common import PostProcessor
-from .. import util
+from .. import util, formatter
import subprocess
@@ -33,7 +33,7 @@ class ExecPP(PostProcessor):
self.args = args
execute = self.exec_string
else:
- self.args = [util.Formatter(arg) for arg in args]
+ self.args = [formatter.parse(arg) for arg in args]
execute = self.exec_list
events = options.get("event")
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index c721612..fe65c88 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -9,7 +9,7 @@
"""Write metadata to external files"""
from .common import PostProcessor
-from .. import util
+from .. import util, formatter
import os
@@ -24,7 +24,7 @@ class MetadataPP(PostProcessor):
cfmt = options.get("content-format") or options.get("format")
if isinstance(cfmt, list):
cfmt = "\n".join(cfmt) + "\n"
- self._content_fmt = util.Formatter(cfmt).format_map
+ self._content_fmt = formatter.parse(cfmt).format_map
ext = "txt"
elif mode == "tags":
self.write = self._write_tags
@@ -45,10 +45,10 @@ class MetadataPP(PostProcessor):
extfmt = options.get("extension-format")
if filename:
self._filename = self._filename_custom
- self._filename_fmt = util.Formatter(filename).format_map
+ self._filename_fmt = formatter.parse(filename).format_map
elif extfmt:
self._filename = self._filename_extfmt
- self._extension_fmt = util.Formatter(extfmt).format_map
+ self._extension_fmt = formatter.parse(extfmt).format_map
else:
self.extension = options.get("extension", ext)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 935bf99..4a7fdbf 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -12,20 +12,14 @@ import re
import os
import sys
import json
-import time
import random
-import shutil
-import string
-import _string
import sqlite3
import binascii
import datetime
-import operator
import functools
import itertools
import urllib.parse
from http.cookiejar import Cookie
-from email.utils import mktime_tz, parsedate_tz
from . import text, exception
@@ -97,15 +91,15 @@ def generate_token(size=16):
return binascii.hexlify(data).decode()
-def format_value(value, unit="B", suffixes="kMGTPEZY"):
+def format_value(value, suffixes="kMGTPEZY"):
value = format(value)
value_len = len(value)
index = value_len - 4
if index >= 0:
offset = (value_len - 1) % 3 + 1
return (value[:offset] + "." + value[offset:offset+2] +
- suffixes[index // 3] + unit)
- return value + unit
+ suffixes[index // 3])
+ return value
def combine_dict(a, b):
@@ -139,6 +133,17 @@ def delete_items(obj, keys):
del obj[key]
+def enumerate_reversed(iterable, start=0, length=None):
+ """Enumerate 'iterable' and return its elements in reverse order"""
+ start -= 1
+ if length is None:
+ length = len(iterable)
+ return zip(
+ range(length - start, start, -1),
+ reversed(iterable),
+ )
+
+
def number_to_string(value, numbers=(int, float)):
"""Convert numbers (int, float) to string; Return everything else as is."""
return str(value) if value.__class__ in numbers else value
@@ -409,6 +414,24 @@ def compile_expression(expr, name="<expr>", globals=GLOBALS):
return functools.partial(eval, code_object, globals)
+def build_duration_func(duration, min=0.0):
+ if not duration:
+ return None
+
+ try:
+ lower, upper = duration
+ except TypeError:
+ pass
+ else:
+ return functools.partial(
+ random.uniform,
+ lower if lower > min else min,
+ upper if upper > min else min,
+ )
+
+ return functools.partial(identity, duration if duration > min else min)
+
+
def build_predicate(predicates):
if not predicates:
return lambda url, kwdict: True
@@ -534,557 +557,6 @@ class ExtendedUrl():
return self.value
-class Formatter():
- """Custom, extended version of string.Formatter
-
- This string formatter implementation is a mostly performance-optimized
- variant of the original string.Formatter class. Unnecessary features have
- been removed (positional arguments, unused argument check) and new
- formatting options have been added.
-
- Extra Conversions:
- - "l": calls str.lower on the target value
- - "u": calls str.upper
- - "c": calls str.capitalize
- - "C": calls string.capwords
- - "j". calls json.dumps
- - "t": calls str.strip
- - "d": calls text.parse_timestamp
- - "U": calls urllib.parse.unquote
- - "S": calls util.to_string()
- - "T": calls util.to_timestamü()
- - Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
-
- Extra Format Specifiers:
- - "?<before>/<after>/":
- Adds <before> and <after> to the actual value if it evaluates to True.
- Otherwise the whole replacement field becomes an empty string.
- Example: {f:?-+/+-/} -> "-+Example+-" (if "f" contains "Example")
- -> "" (if "f" is None, 0, "")
-
- - "L<maxlen>/<replacement>/":
- Replaces the output with <replacement> if its length (in characters)
- exceeds <maxlen>. Otherwise everything is left as is.
- Example: {f:L5/too long/} -> "foo" (if "f" is "foo")
- -> "too long" (if "f" is "foobar")
-
- - "J<separator>/":
- Joins elements of a list (or string) using <separator>
- Example: {f:J - /} -> "a - b - c" (if "f" is ["a", "b", "c"])
-
- - "R<old>/<new>/":
- Replaces all occurrences of <old> with <new>
- Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
- """
- CACHE = {}
- CONVERSIONS = {
- "l": str.lower,
- "u": str.upper,
- "c": str.capitalize,
- "C": string.capwords,
- "j": json.dumps,
- "t": str.strip,
- "T": to_timestamp,
- "d": text.parse_timestamp,
- "U": urllib.parse.unquote,
- "S": to_string,
- "s": str,
- "r": repr,
- "a": ascii,
- }
-
- def __init__(self, format_string, default=None):
- self.default = default
- key = (format_string, default)
-
- try:
- self.result, self.fields = self.CACHE[key]
- except KeyError:
- self.result = []
- self.fields = []
-
- for literal_text, field_name, format_spec, conv in \
- _string.formatter_parser(format_string):
- if literal_text:
- self.result.append(literal_text)
- if field_name:
- self.fields.append((
- len(self.result),
- self._field_access(field_name, format_spec, conv),
- ))
- self.result.append("")
-
- self.CACHE[key] = (self.result, self.fields)
-
- if len(self.result) == 1:
- if self.fields:
- self.format_map = self.fields[0][1]
- else:
- self.format_map = lambda _: format_string
- del self.result, self.fields
-
- def format_map(self, kwdict):
- """Apply 'kwdict' to the initial format_string and return its result"""
- result = self.result
- for index, func in self.fields:
- result[index] = func(kwdict)
- return "".join(result)
-
- def _field_access(self, field_name, format_spec, conversion):
- fmt = self._parse_format_spec(format_spec, conversion)
-
- if "|" in field_name:
- return self._apply_list([
- self._parse_field_name(fn)
- for fn in field_name.split("|")
- ], fmt)
- else:
- key, funcs = self._parse_field_name(field_name)
- if funcs:
- return self._apply(key, funcs, fmt)
- return self._apply_simple(key, fmt)
-
- @staticmethod
- def _parse_field_name(field_name):
- first, rest = _string.formatter_field_name_split(field_name)
- funcs = []
-
- for is_attr, key in rest:
- if is_attr:
- func = operator.attrgetter
- else:
- func = operator.itemgetter
- try:
- if ":" in key:
- start, _, stop = key.partition(":")
- stop, _, step = stop.partition(":")
- start = int(start) if start else None
- stop = int(stop) if stop else None
- step = int(step) if step else None
- key = slice(start, stop, step)
- except TypeError:
- pass # key is an integer
-
- funcs.append(func(key))
-
- return first, funcs
-
- def _parse_format_spec(self, format_spec, conversion):
- fmt = self._build_format_func(format_spec)
- if not conversion:
- return fmt
-
- conversion = self.CONVERSIONS[conversion]
- if fmt is format:
- return conversion
- else:
- def chain(obj):
- return fmt(conversion(obj))
- return chain
-
- def _build_format_func(self, format_spec):
- if format_spec:
- fmt = format_spec[0]
- if fmt == "?":
- return self._parse_optional(format_spec)
- if fmt == "L":
- return self._parse_maxlen(format_spec)
- if fmt == "J":
- return self._parse_join(format_spec)
- if fmt == "R":
- return self._parse_replace(format_spec)
- return self._default_format(format_spec)
- return format
-
- def _apply(self, key, funcs, fmt):
- def wrap(kwdict):
- try:
- obj = kwdict[key]
- for func in funcs:
- obj = func(obj)
- except Exception:
- obj = self.default
- return fmt(obj)
- return wrap
-
- def _apply_simple(self, key, fmt):
- def wrap(kwdict):
- return fmt(kwdict[key] if key in kwdict else self.default)
- return wrap
-
- def _apply_list(self, lst, fmt):
- def wrap(kwdict):
- for key, funcs in lst:
- try:
- obj = kwdict[key]
- for func in funcs:
- obj = func(obj)
- if obj:
- break
- except Exception:
- pass
- else:
- obj = self.default
- return fmt(obj)
- return wrap
-
- def _parse_optional(self, format_spec):
- before, after, format_spec = format_spec.split("/", 2)
- before = before[1:]
- fmt = self._build_format_func(format_spec)
-
- def optional(obj):
- return before + fmt(obj) + after if obj else ""
- return optional
-
- def _parse_maxlen(self, format_spec):
- maxlen, replacement, format_spec = format_spec.split("/", 2)
- maxlen = text.parse_int(maxlen[1:])
- fmt = self._build_format_func(format_spec)
-
- def mlen(obj):
- obj = fmt(obj)
- return obj if len(obj) <= maxlen else replacement
- return mlen
-
- def _parse_join(self, format_spec):
- separator, _, format_spec = format_spec.partition("/")
- separator = separator[1:]
- fmt = self._build_format_func(format_spec)
-
- def join(obj):
- return fmt(separator.join(obj))
- return join
-
- def _parse_replace(self, format_spec):
- old, new, format_spec = format_spec.split("/", 2)
- old = old[1:]
- fmt = self._build_format_func(format_spec)
-
- def replace(obj):
- return fmt(obj.replace(old, new))
- return replace
-
- @staticmethod
- def _default_format(format_spec):
- def wrap(obj):
- return format(obj, format_spec)
- return wrap
-
-
-class PathFormat():
- EXTENSION_MAP = {
- "jpeg": "jpg",
- "jpe" : "jpg",
- "jfif": "jpg",
- "jif" : "jpg",
- "jfi" : "jpg",
- }
-
- def __init__(self, extractor):
- config = extractor.config
- kwdefault = config("keywords-default")
-
- filename_fmt = config("filename")
- try:
- if filename_fmt is None:
- filename_fmt = extractor.filename_fmt
- elif isinstance(filename_fmt, dict):
- self.filename_conditions = [
- (compile_expression(expr),
- Formatter(fmt, kwdefault).format_map)
- for expr, fmt in filename_fmt.items() if expr
- ]
- self.build_filename = self.build_filename_conditional
- filename_fmt = filename_fmt.get("", extractor.filename_fmt)
-
- self.filename_formatter = Formatter(
- filename_fmt, kwdefault).format_map
- except Exception as exc:
- raise exception.FilenameFormatError(exc)
-
- directory_fmt = config("directory")
- try:
- if directory_fmt is None:
- directory_fmt = extractor.directory_fmt
- elif isinstance(directory_fmt, dict):
- self.directory_conditions = [
- (compile_expression(expr), [
- Formatter(fmt, kwdefault).format_map
- for fmt in fmts
- ])
- for expr, fmts in directory_fmt.items() if expr
- ]
- self.build_directory = self.build_directory_conditional
- directory_fmt = directory_fmt.get("", extractor.directory_fmt)
-
- self.directory_formatters = [
- Formatter(dirfmt, kwdefault).format_map
- for dirfmt in directory_fmt
- ]
- except Exception as exc:
- raise exception.DirectoryFormatError(exc)
-
- self.kwdict = {}
- self.directory = self.realdirectory = \
- self.filename = self.extension = self.prefix = \
- self.path = self.realpath = self.temppath = ""
- self.delete = self._create_directory = False
-
- extension_map = config("extension-map")
- if extension_map is None:
- extension_map = self.EXTENSION_MAP
- self.extension_map = extension_map.get
-
- restrict = config("path-restrict", "auto")
- replace = config("path-replace", "_")
- if restrict == "auto":
- restrict = "\\\\|/<>:\"?*" if WINDOWS else "/"
- elif restrict == "unix":
- restrict = "/"
- elif restrict == "windows":
- restrict = "\\\\|/<>:\"?*"
- elif restrict == "ascii":
- restrict = "^0-9A-Za-z_."
- self.clean_segment = self._build_cleanfunc(restrict, replace)
-
- remove = config("path-remove", "\x00-\x1f\x7f")
- self.clean_path = self._build_cleanfunc(remove, "")
-
- strip = config("path-strip", "auto")
- if strip == "auto":
- strip = ". " if WINDOWS else ""
- elif strip == "unix":
- strip = ""
- elif strip == "windows":
- strip = ". "
- self.strip = strip
-
- basedir = extractor._parentdir
- if not basedir:
- basedir = config("base-directory")
- sep = os.sep
- if basedir is None:
- basedir = "." + sep + "gallery-dl" + sep
- elif basedir:
- basedir = expand_path(basedir)
- altsep = os.altsep
- if altsep and altsep in basedir:
- basedir = basedir.replace(altsep, sep)
- if basedir[-1] != sep:
- basedir += sep
- basedir = self.clean_path(basedir)
- self.basedirectory = basedir
-
- @staticmethod
- def _build_cleanfunc(chars, repl):
- if not chars:
- return identity
- elif isinstance(chars, dict):
- def func(x, table=str.maketrans(chars)):
- return x.translate(table)
- elif len(chars) == 1:
- def func(x, c=chars, r=repl):
- return x.replace(c, r)
- else:
- return functools.partial(
- re.compile("[" + chars + "]").sub, repl)
- return func
-
- def open(self, mode="wb"):
- """Open file and return a corresponding file object"""
- return open(self.temppath, mode)
-
- def exists(self):
- """Return True if the file exists on disk"""
- if self.extension and os.path.exists(self.realpath):
- return self.check_file()
- return False
-
- @staticmethod
- def check_file():
- return True
-
- def _enum_file(self):
- num = 1
- try:
- while True:
- self.prefix = str(num) + "."
- self.set_extension(self.extension, False)
- os.stat(self.realpath) # raises OSError if file doesn't exist
- num += 1
- except OSError:
- pass
- return False
-
- def set_directory(self, kwdict):
- """Build directory path and create it if necessary"""
- self.kwdict = kwdict
- sep = os.sep
-
- segments = self.build_directory(kwdict)
- if segments:
- self.directory = directory = self.basedirectory + self.clean_path(
- sep.join(segments) + sep)
- else:
- self.directory = directory = self.basedirectory
-
- if WINDOWS:
- # Enable longer-than-260-character paths on Windows
- directory = "\\\\?\\" + os.path.abspath(directory)
-
- # abspath() in Python 3.7+ removes trailing path separators (#402)
- if directory[-1] != sep:
- directory += sep
-
- self.realdirectory = directory
- self._create_directory = True
-
- def set_filename(self, kwdict):
- """Set general filename data"""
- self.kwdict = kwdict
- self.temppath = self.prefix = ""
-
- ext = kwdict["extension"]
- kwdict["extension"] = self.extension = self.extension_map(ext, ext)
-
- if self.extension:
- self.build_path()
- else:
- self.filename = ""
-
- def set_extension(self, extension, real=True):
- """Set filename extension"""
- extension = self.extension_map(extension, extension)
- if real:
- self.extension = extension
- self.kwdict["extension"] = self.prefix + extension
- self.build_path()
-
- def fix_extension(self, _=None):
- """Fix filenames without a given filename extension"""
- if not self.extension:
- self.set_extension("", False)
- if self.path[-1] == ".":
- self.path = self.path[:-1]
- self.temppath = self.realpath = self.realpath[:-1]
- return True
-
- def build_filename(self, kwdict):
- """Apply 'kwdict' to filename format string"""
- try:
- return self.clean_path(self.clean_segment(
- self.filename_formatter(kwdict)))
- except Exception as exc:
- raise exception.FilenameFormatError(exc)
-
- def build_filename_conditional(self, kwdict):
- try:
- for condition, formatter in self.filename_conditions:
- if condition(kwdict):
- break
- else:
- formatter = self.filename_formatter
- return self.clean_path(self.clean_segment(formatter(kwdict)))
- except Exception as exc:
- raise exception.FilenameFormatError(exc)
-
- def build_directory(self, kwdict):
- """Apply 'kwdict' to directory format strings"""
- segments = []
- append = segments.append
- strip = self.strip
-
- try:
- for formatter in self.directory_formatters:
- segment = formatter(kwdict).strip()
- if strip:
- # remove trailing dots and spaces (#647)
- segment = segment.rstrip(strip)
- if segment:
- append(self.clean_segment(segment))
- return segments
- except Exception as exc:
- raise exception.DirectoryFormatError(exc)
-
- def build_directory_conditional(self, kwdict):
- segments = []
- append = segments.append
- strip = self.strip
-
- try:
- for condition, formatters in self.directory_conditions:
- if condition(kwdict):
- break
- else:
- formatters = self.directory_formatters
- for formatter in formatters:
- segment = formatter(kwdict).strip()
- if strip:
- segment = segment.rstrip(strip)
- if segment:
- append(self.clean_segment(segment))
- return segments
- except Exception as exc:
- raise exception.DirectoryFormatError(exc)
-
- def build_path(self):
- """Combine directory and filename to full paths"""
- if self._create_directory:
- os.makedirs(self.realdirectory, exist_ok=True)
- self._create_directory = False
- self.filename = filename = self.build_filename(self.kwdict)
- self.path = self.directory + filename
- self.realpath = self.realdirectory + filename
- if not self.temppath:
- self.temppath = self.realpath
-
- def part_enable(self, part_directory=None):
- """Enable .part file usage"""
- if self.extension:
- self.temppath += ".part"
- else:
- self.set_extension("part", False)
- if part_directory:
- self.temppath = os.path.join(
- part_directory,
- os.path.basename(self.temppath),
- )
-
- def part_size(self):
- """Return size of .part file"""
- try:
- return os.stat(self.temppath).st_size
- except OSError:
- pass
- return 0
-
- def finalize(self):
- """Move tempfile to its target location"""
- if self.delete:
- self.delete = False
- os.unlink(self.temppath)
- return
-
- if self.temppath != self.realpath:
- # Move temp file to its actual location
- try:
- os.replace(self.temppath, self.realpath)
- except OSError:
- shutil.copyfile(self.temppath, self.realpath)
- os.unlink(self.temppath)
-
- mtime = self.kwdict.get("_mtime")
- if mtime:
- # Set file modification time
- try:
- if isinstance(mtime, str):
- mtime = mktime_tz(parsedate_tz(mtime))
- os.utime(self.realpath, (time.time(), mtime))
- except Exception:
- pass
-
-
class DownloadArchive():
def __init__(self, path, extractor):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 7e6458f..acc3b8d 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.18.4"
+__version__ = "1.19.0"
diff --git a/setup.py b/setup.py
index ab708d8..1a5c315 100644
--- a/setup.py
+++ b/setup.py
@@ -40,13 +40,51 @@ FILES = [
]
]
+DESCRIPTION = ("Command-line program to download image galleries and "
+ "collections from several image hosting sites")
+LONG_DESCRIPTION = read("README.rst")
+
+
+if "py2exe" in sys.argv:
+ try:
+ import py2exe
+ except ImportError:
+ sys.exit("Error importing 'py2exe'")
+
+ # py2exe dislikes version specifiers with a trailing '-dev'
+ VERSION = VERSION.partition("-")[0]
+
+ params = {
+ "console": [{
+ "script" : "./gallery_dl/__main__.py",
+ "dest_base" : "gallery-dl",
+ "version" : VERSION,
+ "description" : DESCRIPTION,
+ "comments" : LONG_DESCRIPTION,
+ "product_name" : "gallery-dl",
+ "product_version": VERSION,
+ }],
+ "options": {"py2exe": {
+ "bundle_files": 0,
+ "compressed" : 1,
+ "optimize" : 1,
+ "dist_dir" : ".",
+ "packages" : ["gallery_dl"],
+ "includes" : ["youtube_dl"],
+ "dll_excludes": ["w9xpopen.exe"],
+ }},
+ "zipfile": None,
+ }
+
+else:
+ params = {}
+
setup(
name="gallery_dl",
version=VERSION,
- description=("Command-line program to download image galleries and "
- "collections from several image hosting sites"),
- long_description=read("README.rst"),
+ description=DESCRIPTION,
+ long_description=LONG_DESCRIPTION,
url="https://github.com/mikf/gallery-dl",
download_url="https://github.com/mikf/gallery-dl/releases/latest",
author="Mike Fährmann",
@@ -96,4 +134,5 @@ setup(
"Topic :: Utilities",
],
test_suite="test",
+ **params,
)
diff --git a/test/test_downloader.py b/test/test_downloader.py
index 42b5c72..9350ce4 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -22,7 +22,7 @@ import http.server
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from gallery_dl import downloader, extractor, output, config, util # noqa E402
+from gallery_dl import downloader, extractor, output, config, path # noqa E402
class MockDownloaderModule(Mock):
@@ -33,7 +33,7 @@ class FakeJob():
def __init__(self):
self.extractor = extractor.find("test:")
- self.pathfmt = util.PathFormat(self.extractor)
+ self.pathfmt = path.PathFormat(self.extractor)
self.out = output.NullOutput()
self.get_logger = logging.getLogger
diff --git a/test/test_formatter.py b/test/test_formatter.py
new file mode 100644
index 0000000..70201f3
--- /dev/null
+++ b/test/test_formatter.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import os
+import sys
+import unittest
+import datetime
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import formatter # noqa E402
+
+
+class TestFormatter(unittest.TestCase):
+
+ kwdict = {
+ "a": "hElLo wOrLd",
+ "b": "äöü",
+ "d": {"a": "foo", "b": 0, "c": None},
+ "l": ["a", "b", "c"],
+ "n": None,
+ "s": " \n\r\tSPACE ",
+ "u": "&#x27;&lt; / &gt;&#x27;",
+ "t": 1262304000,
+ "dt": datetime.datetime(2010, 1, 1),
+ "name": "Name",
+ "title1": "Title",
+ "title2": "",
+ "title3": None,
+ "title4": 0,
+ }
+
+ def test_conversions(self):
+ self._run_test("{a!l}", "hello world")
+ self._run_test("{a!u}", "HELLO WORLD")
+ self._run_test("{a!c}", "Hello world")
+ self._run_test("{a!C}", "Hello World")
+ self._run_test("{s!t}", "SPACE")
+ self._run_test("{a!U}", self.kwdict["a"])
+ self._run_test("{u!U}", "'< / >'")
+ self._run_test("{a!s}", self.kwdict["a"])
+ self._run_test("{a!r}", "'" + self.kwdict["a"] + "'")
+ self._run_test("{a!a}", "'" + self.kwdict["a"] + "'")
+ self._run_test("{b!a}", "'\\xe4\\xf6\\xfc'")
+ self._run_test("{a!S}", self.kwdict["a"])
+ self._run_test("{l!S}", "a, b, c")
+ self._run_test("{n!S}", "")
+ self._run_test("{t!d}", datetime.datetime(2010, 1, 1))
+ self._run_test("{t!d:%Y-%m-%d}", "2010-01-01")
+ self._run_test("{dt!T}", "1262304000")
+ self._run_test("{l!j}", '["a", "b", "c"]')
+
+ with self.assertRaises(KeyError):
+ self._run_test("{a!q}", "hello world")
+
+ def test_optional(self):
+ self._run_test("{name}{title1}", "NameTitle")
+ self._run_test("{name}{title1:?//}", "NameTitle")
+ self._run_test("{name}{title1:? **/''/}", "Name **Title''")
+
+ self._run_test("{name}{title2}", "Name")
+ self._run_test("{name}{title2:?//}", "Name")
+ self._run_test("{name}{title2:? **/''/}", "Name")
+
+ self._run_test("{name}{title3}", "NameNone")
+ self._run_test("{name}{title3:?//}", "Name")
+ self._run_test("{name}{title3:? **/''/}", "Name")
+
+ self._run_test("{name}{title4}", "Name0")
+ self._run_test("{name}{title4:?//}", "Name")
+ self._run_test("{name}{title4:? **/''/}", "Name")
+
+ def test_missing(self):
+ replacement = "None"
+
+ self._run_test("{missing}", replacement)
+ self._run_test("{missing.attr}", replacement)
+ self._run_test("{missing[key]}", replacement)
+ self._run_test("{missing:?a//}", "")
+
+ self._run_test("{name[missing]}", replacement)
+ self._run_test("{name[missing].attr}", replacement)
+ self._run_test("{name[missing][key]}", replacement)
+ self._run_test("{name[missing]:?a//}", "")
+
+ def test_missing_custom_default(self):
+ replacement = default = "foobar"
+ self._run_test("{missing}" , replacement, default)
+ self._run_test("{missing.attr}", replacement, default)
+ self._run_test("{missing[key]}", replacement, default)
+ self._run_test("{missing:?a//}", "a" + default, default)
+
+ def test_alternative(self):
+ self._run_test("{a|z}" , "hElLo wOrLd")
+ self._run_test("{z|a}" , "hElLo wOrLd")
+ self._run_test("{z|y|a}" , "hElLo wOrLd")
+ self._run_test("{z|y|x|a}", "hElLo wOrLd")
+ self._run_test("{z|n|a|y}", "hElLo wOrLd")
+
+ self._run_test("{z|a!C}" , "Hello World")
+ self._run_test("{z|a:Rh/C/}" , "CElLo wOrLd")
+ self._run_test("{z|a!C:RH/C/}", "Cello World")
+ self._run_test("{z|y|x:?</>/}", "")
+
+ self._run_test("{d[c]|d[b]|d[a]}", "foo")
+ self._run_test("{d[a]|d[b]|d[c]}", "foo")
+ self._run_test("{d[z]|d[y]|d[x]}", "None")
+
+ def test_indexing(self):
+ self._run_test("{l[0]}" , "a")
+ self._run_test("{a[6]}" , "w")
+
+ def test_slicing(self):
+ v = self.kwdict["a"]
+ self._run_test("{a[1:10]}" , v[1:10])
+ self._run_test("{a[-10:-1]}", v[-10:-1])
+ self._run_test("{a[5:]}" , v[5:])
+ self._run_test("{a[50:]}", v[50:])
+ self._run_test("{a[:5]}" , v[:5])
+ self._run_test("{a[:50]}", v[:50])
+ self._run_test("{a[:]}" , v)
+ self._run_test("{a[1:10:2]}" , v[1:10:2])
+ self._run_test("{a[-10:-1:2]}", v[-10:-1:2])
+ self._run_test("{a[5::2]}" , v[5::2])
+ self._run_test("{a[50::2]}", v[50::2])
+ self._run_test("{a[:5:2]}" , v[:5:2])
+ self._run_test("{a[:50:2]}", v[:50:2])
+ self._run_test("{a[::]}" , v)
+
+ def test_maxlen(self):
+ v = self.kwdict["a"]
+ self._run_test("{a:L5/foo/}" , "foo")
+ self._run_test("{a:L50/foo/}", v)
+ self._run_test("{a:L50/foo/>50}", " " * 39 + v)
+ self._run_test("{a:L50/foo/>51}", "foo")
+ self._run_test("{a:Lab/foo/}", "foo")
+
+ def test_join(self):
+ self._run_test("{l:J}" , "abc")
+ self._run_test("{l:J,}" , "a,b,c")
+ self._run_test("{l:J,/}" , "a,b,c")
+ self._run_test("{l:J,/>20}" , " a,b,c")
+ self._run_test("{l:J - }" , "a - b - c")
+ self._run_test("{l:J - /}" , "a - b - c")
+ self._run_test("{l:J - />20}", " a - b - c")
+
+ self._run_test("{a:J/}" , self.kwdict["a"])
+ self._run_test("{a:J, /}" , ", ".join(self.kwdict["a"]))
+
+ def test_replace(self):
+ self._run_test("{a:Rh/C/}" , "CElLo wOrLd")
+ self._run_test("{a!l:Rh/C/}", "Cello world")
+ self._run_test("{a!u:Rh/C/}", "HELLO WORLD")
+
+ self._run_test("{a!l:Rl/_/}", "he__o wor_d")
+ self._run_test("{a!l:Rl//}" , "heo word")
+ self._run_test("{name:Rame/othing/}", "Nothing")
+
+ def test_chain_special(self):
+ # multiple replacements
+ self._run_test("{a:Rh/C/RE/e/RL/l/}", "Cello wOrld")
+ self._run_test("{d[b]!s:R1/Q/R2/A/R0/Y/}", "Y")
+
+ # join-and-replace
+ self._run_test("{l:J-/Rb/E/}", "a-E-c")
+
+ # optional-and-maxlen
+ self._run_test("{d[a]:?</>/L1/too long/}", "<too long>")
+ self._run_test("{d[c]:?</>/L5/too long/}", "")
+
+ def _run_test(self, format_string, result, default=None):
+ fmt = formatter.parse(format_string, default)
+ output = fmt.format_map(self.kwdict)
+ self.assertEqual(output, result, format_string)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_output.py b/test/test_output.py
new file mode 100644
index 0000000..84433f0
--- /dev/null
+++ b/test/test_output.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import output # noqa E402
+
+
+class TestShorten(unittest.TestCase):
+
+ def test_shorten_noop(self, f=output.shorten_string):
+ self.assertEqual(f("" , 10), "")
+ self.assertEqual(f("foobar", 10), "foobar")
+
+ def test_shorten(self, f=output.shorten_string):
+ s = "01234567890123456789" # string of length 20
+ self.assertEqual(f(s, 30), s)
+ self.assertEqual(f(s, 25), s)
+ self.assertEqual(f(s, 20), s)
+ self.assertEqual(f(s, 19), "012345678…123456789")
+ self.assertEqual(f(s, 18), "01234567…123456789")
+ self.assertEqual(f(s, 17), "01234567…23456789")
+ self.assertEqual(f(s, 16), "0123456…23456789")
+ self.assertEqual(f(s, 15), "0123456…3456789")
+ self.assertEqual(f(s, 14), "012345…3456789")
+ self.assertEqual(f(s, 13), "012345…456789")
+ self.assertEqual(f(s, 12), "01234…456789")
+ self.assertEqual(f(s, 11), "01234…56789")
+ self.assertEqual(f(s, 10), "0123…56789")
+ self.assertEqual(f(s, 9) , "0123…6789")
+ self.assertEqual(f(s, 3) , "0…9")
+ self.assertEqual(f(s, 2) , "…9")
+
+ def test_shorten_separator(self, f=output.shorten_string):
+ s = "01234567890123456789" # string of length 20
+ self.assertEqual(f(s, 20, "|---|"), s)
+ self.assertEqual(f(s, 19, "|---|"), "0123456|---|3456789")
+ self.assertEqual(f(s, 15, "|---|"), "01234|---|56789")
+ self.assertEqual(f(s, 10, "|---|"), "01|---|789")
+
+ self.assertEqual(f(s, 19, "..."), "01234567...23456789")
+ self.assertEqual(f(s, 19, "..") , "01234567..123456789")
+ self.assertEqual(f(s, 19, ".") , "012345678.123456789")
+ self.assertEqual(f(s, 19, "") , "0123456780123456789")
+
+
+class TestShortenEAW(unittest.TestCase):
+
+ def test_shorten_eaw_noop(self, f=output.shorten_string_eaw):
+ self.assertEqual(f("" , 10), "")
+ self.assertEqual(f("foobar", 10), "foobar")
+
+ def test_shorten_eaw(self, f=output.shorten_string_eaw):
+ s = "01234567890123456789" # 20 ascii characters
+ self.assertEqual(f(s, 30), s)
+ self.assertEqual(f(s, 25), s)
+ self.assertEqual(f(s, 20), s)
+ self.assertEqual(f(s, 19), "012345678…123456789")
+ self.assertEqual(f(s, 18), "01234567…123456789")
+ self.assertEqual(f(s, 17), "01234567…23456789")
+ self.assertEqual(f(s, 16), "0123456…23456789")
+ self.assertEqual(f(s, 15), "0123456…3456789")
+ self.assertEqual(f(s, 14), "012345…3456789")
+ self.assertEqual(f(s, 13), "012345…456789")
+ self.assertEqual(f(s, 12), "01234…456789")
+ self.assertEqual(f(s, 11), "01234…56789")
+ self.assertEqual(f(s, 10), "0123…56789")
+ self.assertEqual(f(s, 9) , "0123…6789")
+ self.assertEqual(f(s, 3) , "0…9")
+ self.assertEqual(f(s, 2) , "…9")
+
+ def test_shorten_eaw_wide(self, f=output.shorten_string_eaw):
+ s = "幻想郷幻想郷幻想郷幻想郷" # 12 wide characters
+ self.assertEqual(f(s, 30), s)
+ self.assertEqual(f(s, 25), s)
+ self.assertEqual(f(s, 20), "幻想郷幻…想郷幻想郷")
+ self.assertEqual(f(s, 19), "幻想郷幻…想郷幻想郷")
+ self.assertEqual(f(s, 18), "幻想郷幻…郷幻想郷")
+ self.assertEqual(f(s, 17), "幻想郷幻…郷幻想郷")
+ self.assertEqual(f(s, 16), "幻想郷…郷幻想郷")
+ self.assertEqual(f(s, 15), "幻想郷…郷幻想郷")
+ self.assertEqual(f(s, 14), "幻想郷…幻想郷")
+ self.assertEqual(f(s, 13), "幻想郷…幻想郷")
+ self.assertEqual(f(s, 12), "幻想…幻想郷")
+ self.assertEqual(f(s, 11), "幻想…幻想郷")
+ self.assertEqual(f(s, 10), "幻想…想郷")
+ self.assertEqual(f(s, 9) , "幻想…想郷")
+ self.assertEqual(f(s, 3) , "…郷")
+
+ def test_shorten_eaw_mix(self, f=output.shorten_string_eaw):
+ s = "幻-想-郷##幻-想-郷##幻-想-郷" # mixed characters
+ self.assertEqual(f(s, 28), s)
+ self.assertEqual(f(s, 25), "幻-想-郷##幻…郷##幻-想-郷")
+
+ self.assertEqual(f(s, 20), "幻-想-郷#…##幻-想-郷")
+ self.assertEqual(f(s, 19), "幻-想-郷#…#幻-想-郷")
+ self.assertEqual(f(s, 18), "幻-想-郷…#幻-想-郷")
+ self.assertEqual(f(s, 17), "幻-想-郷…幻-想-郷")
+ self.assertEqual(f(s, 16), "幻-想-…#幻-想-郷")
+ self.assertEqual(f(s, 15), "幻-想-…幻-想-郷")
+ self.assertEqual(f(s, 14), "幻-想-…-想-郷")
+ self.assertEqual(f(s, 13), "幻-想-…-想-郷")
+ self.assertEqual(f(s, 12), "幻-想…-想-郷")
+ self.assertEqual(f(s, 11), "幻-想…想-郷")
+ self.assertEqual(f(s, 10), "幻-…-想-郷")
+ self.assertEqual(f(s, 9) , "幻-…想-郷")
+ self.assertEqual(f(s, 3) , "…郷")
+
+ def test_shorten_eaw_separator(self, f=output.shorten_string_eaw):
+ s = "01234567890123456789" # 20 ascii characters
+ self.assertEqual(f(s, 20, "|---|"), s)
+ self.assertEqual(f(s, 19, "|---|"), "0123456|---|3456789")
+ self.assertEqual(f(s, 15, "|---|"), "01234|---|56789")
+ self.assertEqual(f(s, 10, "|---|"), "01|---|789")
+
+ self.assertEqual(f(s, 19, "..."), "01234567...23456789")
+ self.assertEqual(f(s, 19, "..") , "01234567..123456789")
+ self.assertEqual(f(s, 19, ".") , "012345678.123456789")
+ self.assertEqual(f(s, 19, "") , "0123456780123456789")
+
+ def test_shorten_eaw_separator_wide(self, f=output.shorten_string_eaw):
+ s = "幻想郷幻想郷幻想郷幻想郷" # 12 wide characters
+ self.assertEqual(f(s, 24, "|---|"), s)
+ self.assertEqual(f(s, 19, "|---|"), "幻想郷|---|郷幻想郷")
+ self.assertEqual(f(s, 15, "|---|"), "幻想|---|幻想郷")
+ self.assertEqual(f(s, 10, "|---|"), "幻|---|郷")
+
+ self.assertEqual(f(s, 19, "..."), "幻想郷幻...郷幻想郷")
+ self.assertEqual(f(s, 19, "..") , "幻想郷幻..郷幻想郷")
+ self.assertEqual(f(s, 19, ".") , "幻想郷幻.想郷幻想郷")
+ self.assertEqual(f(s, 19, "") , "幻想郷幻想郷幻想郷")
+
+ def test_shorten_eaw_separator_mix_(self, f=output.shorten_string_eaw):
+ s = "幻-想-郷##幻-想-郷##幻-想-郷" # mixed characters
+ self.assertEqual(f(s, 30, "|---|"), s)
+ self.assertEqual(f(s, 19, "|---|"), "幻-想-|---|幻-想-郷")
+ self.assertEqual(f(s, 15, "|---|"), "幻-想|---|想-郷")
+ self.assertEqual(f(s, 10, "|---|"), "幻|---|-郷")
+
+ self.assertEqual(f(s, 19, "..."), "幻-想-郷...幻-想-郷")
+ self.assertEqual(f(s, 19, "..") , "幻-想-郷..#幻-想-郷")
+ self.assertEqual(f(s, 19, ".") , "幻-想-郷#.#幻-想-郷")
+ self.assertEqual(f(s, 19, "") , "幻-想-郷###幻-想-郷")
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 00c17b2..84d2747 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -19,7 +19,7 @@ import collections
from datetime import datetime, timezone as tz
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from gallery_dl import extractor, output, util # noqa E402
+from gallery_dl import extractor, output, path # noqa E402
from gallery_dl import postprocessor, config # noqa E402
from gallery_dl.postprocessor.common import PostProcessor # noqa E402
@@ -32,7 +32,7 @@ class FakeJob():
def __init__(self, extr=extractor.find("test:")):
self.extractor = extr
- self.pathfmt = util.PathFormat(extr)
+ self.pathfmt = path.PathFormat(extr)
self.out = output.NullOutput()
self.get_logger = logging.getLogger
self.hooks = collections.defaultdict(list)
diff --git a/test/test_results.py b/test/test_results.py
index 8a20e6b..ba1e0b1 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -17,7 +17,8 @@ import hashlib
import datetime
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from gallery_dl import extractor, util, job, config, exception # noqa E402
+from gallery_dl import \
+ extractor, util, job, config, exception, formatter # noqa E402
# temporary issues, etc.
@@ -91,6 +92,8 @@ class TestExtractorResults(unittest.TestCase):
for url, kwdict in zip(tjob.url_list, tjob.kwdict_list):
if "_extractor" in kwdict:
extr = kwdict["_extractor"].from_url(url)
+ if extr is None and not result.get("extractor", True):
+ continue
self.assertIsInstance(extr, kwdict["_extractor"])
self.assertEqual(extr.url, url)
else:
@@ -260,14 +263,14 @@ class TestPathfmt():
return 0
-class TestFormatter(util.Formatter):
+class TestFormatter(formatter.StringFormatter):
@staticmethod
def _noop(_):
return ""
def _apply_simple(self, key, fmt):
- if key == "extension" or "._parse_optional." in repr(fmt):
+ if key == "extension" or "_parse_optional." in repr(fmt):
return self._noop
def wrap(obj):
@@ -275,7 +278,7 @@ class TestFormatter(util.Formatter):
return wrap
def _apply(self, key, funcs, fmt):
- if key == "extension" or "._parse_optional." in repr(fmt):
+ if key == "extension" or "_parse_optional." in repr(fmt):
return self._noop
def wrap(obj):
diff --git a/test/test_util.py b/test/test_util.py
index 1aa66d1..0fbbbce 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -260,169 +260,6 @@ class TestCookiesTxt(unittest.TestCase):
)
-class TestFormatter(unittest.TestCase):
-
- kwdict = {
- "a": "hElLo wOrLd",
- "b": "äöü",
- "d": {"a": "foo", "b": 0, "c": None},
- "l": ["a", "b", "c"],
- "n": None,
- "s": " \n\r\tSPACE ",
- "u": "%27%3C%20/%20%3E%27",
- "t": 1262304000,
- "dt": datetime.datetime(2010, 1, 1),
- "name": "Name",
- "title1": "Title",
- "title2": "",
- "title3": None,
- "title4": 0,
- }
-
- def test_conversions(self):
- self._run_test("{a!l}", "hello world")
- self._run_test("{a!u}", "HELLO WORLD")
- self._run_test("{a!c}", "Hello world")
- self._run_test("{a!C}", "Hello World")
- self._run_test("{s!t}", "SPACE")
- self._run_test("{a!U}", self.kwdict["a"])
- self._run_test("{u!U}", "'< / >'")
- self._run_test("{a!s}", self.kwdict["a"])
- self._run_test("{a!r}", "'" + self.kwdict["a"] + "'")
- self._run_test("{a!a}", "'" + self.kwdict["a"] + "'")
- self._run_test("{b!a}", "'\\xe4\\xf6\\xfc'")
- self._run_test("{a!S}", self.kwdict["a"])
- self._run_test("{l!S}", "a, b, c")
- self._run_test("{n!S}", "")
- self._run_test("{t!d}", datetime.datetime(2010, 1, 1))
- self._run_test("{t!d:%Y-%m-%d}", "2010-01-01")
- self._run_test("{dt!T}", "1262304000")
- self._run_test("{l!j}", '["a", "b", "c"]')
-
- with self.assertRaises(KeyError):
- self._run_test("{a!q}", "hello world")
-
- def test_optional(self):
- self._run_test("{name}{title1}", "NameTitle")
- self._run_test("{name}{title1:?//}", "NameTitle")
- self._run_test("{name}{title1:? **/''/}", "Name **Title''")
-
- self._run_test("{name}{title2}", "Name")
- self._run_test("{name}{title2:?//}", "Name")
- self._run_test("{name}{title2:? **/''/}", "Name")
-
- self._run_test("{name}{title3}", "NameNone")
- self._run_test("{name}{title3:?//}", "Name")
- self._run_test("{name}{title3:? **/''/}", "Name")
-
- self._run_test("{name}{title4}", "Name0")
- self._run_test("{name}{title4:?//}", "Name")
- self._run_test("{name}{title4:? **/''/}", "Name")
-
- def test_missing(self):
- replacement = "None"
-
- self._run_test("{missing}", replacement)
- self._run_test("{missing.attr}", replacement)
- self._run_test("{missing[key]}", replacement)
- self._run_test("{missing:?a//}", "")
-
- self._run_test("{name[missing]}", replacement)
- self._run_test("{name[missing].attr}", replacement)
- self._run_test("{name[missing][key]}", replacement)
- self._run_test("{name[missing]:?a//}", "")
-
- def test_missing_custom_default(self):
- replacement = default = "foobar"
- self._run_test("{missing}" , replacement, default)
- self._run_test("{missing.attr}", replacement, default)
- self._run_test("{missing[key]}", replacement, default)
- self._run_test("{missing:?a//}", "a" + default, default)
-
- def test_alternative(self):
- self._run_test("{a|z}" , "hElLo wOrLd")
- self._run_test("{z|a}" , "hElLo wOrLd")
- self._run_test("{z|y|a}" , "hElLo wOrLd")
- self._run_test("{z|y|x|a}", "hElLo wOrLd")
- self._run_test("{z|n|a|y}", "hElLo wOrLd")
-
- self._run_test("{z|a!C}" , "Hello World")
- self._run_test("{z|a:Rh/C/}" , "CElLo wOrLd")
- self._run_test("{z|a!C:RH/C/}", "Cello World")
- self._run_test("{z|y|x:?</>/}", "")
-
- self._run_test("{d[c]|d[b]|d[a]}", "foo")
- self._run_test("{d[a]|d[b]|d[c]}", "foo")
- self._run_test("{d[z]|d[y]|d[x]}", "None")
-
- def test_indexing(self):
- self._run_test("{l[0]}" , "a")
- self._run_test("{a[6]}" , "w")
-
- def test_slicing(self):
- v = self.kwdict["a"]
- self._run_test("{a[1:10]}" , v[1:10])
- self._run_test("{a[-10:-1]}", v[-10:-1])
- self._run_test("{a[5:]}" , v[5:])
- self._run_test("{a[50:]}", v[50:])
- self._run_test("{a[:5]}" , v[:5])
- self._run_test("{a[:50]}", v[:50])
- self._run_test("{a[:]}" , v)
- self._run_test("{a[1:10:2]}" , v[1:10:2])
- self._run_test("{a[-10:-1:2]}", v[-10:-1:2])
- self._run_test("{a[5::2]}" , v[5::2])
- self._run_test("{a[50::2]}", v[50::2])
- self._run_test("{a[:5:2]}" , v[:5:2])
- self._run_test("{a[:50:2]}", v[:50:2])
- self._run_test("{a[::]}" , v)
-
- def test_maxlen(self):
- v = self.kwdict["a"]
- self._run_test("{a:L5/foo/}" , "foo")
- self._run_test("{a:L50/foo/}", v)
- self._run_test("{a:L50/foo/>50}", " " * 39 + v)
- self._run_test("{a:L50/foo/>51}", "foo")
- self._run_test("{a:Lab/foo/}", "foo")
-
- def test_join(self):
- self._run_test("{l:J}" , "abc")
- self._run_test("{l:J,}" , "a,b,c")
- self._run_test("{l:J,/}" , "a,b,c")
- self._run_test("{l:J,/>20}" , " a,b,c")
- self._run_test("{l:J - }" , "a - b - c")
- self._run_test("{l:J - /}" , "a - b - c")
- self._run_test("{l:J - />20}", " a - b - c")
-
- self._run_test("{a:J/}" , self.kwdict["a"])
- self._run_test("{a:J, /}" , ", ".join(self.kwdict["a"]))
-
- def test_replace(self):
- self._run_test("{a:Rh/C/}" , "CElLo wOrLd")
- self._run_test("{a!l:Rh/C/}", "Cello world")
- self._run_test("{a!u:Rh/C/}", "HELLO WORLD")
-
- self._run_test("{a!l:Rl/_/}", "he__o wor_d")
- self._run_test("{a!l:Rl//}" , "heo word")
- self._run_test("{name:Rame/othing/}", "Nothing")
-
- def test_chain_special(self):
- # multiple replacements
- self._run_test("{a:Rh/C/RE/e/RL/l/}", "Cello wOrld")
- self._run_test("{d[b]!s:R1/Q/R2/A/R0/Y/}", "Y")
-
- # join-and-replace
- self._run_test("{l:J-/Rb/E/}", "a-E-c")
-
- # optional-and-maxlen
- self._run_test("{d[a]:?</>/L1/too long/}", "<too long>")
- self._run_test("{d[c]:?</>/L5/too long/}", "")
-
- def _run_test(self, format_string, result, default=None):
- formatter = util.Formatter(format_string, default)
- output = formatter.format_map(self.kwdict)
- self.assertEqual(output, result, format_string)
-
-
class TestOther(unittest.TestCase):
def test_bencode(self):
@@ -534,20 +371,17 @@ class TestOther(unittest.TestCase):
self.assertRegex(token, r"^[0-9a-f]+$")
def test_format_value(self):
- self.assertEqual(util.format_value(0) , "0B")
- self.assertEqual(util.format_value(1) , "1B")
- self.assertEqual(util.format_value(12) , "12B")
- self.assertEqual(util.format_value(123) , "123B")
- self.assertEqual(util.format_value(1234) , "1.23kB")
- self.assertEqual(util.format_value(12345) , "12.34kB")
- self.assertEqual(util.format_value(123456) , "123.45kB")
- self.assertEqual(util.format_value(1234567) , "1.23MB")
- self.assertEqual(util.format_value(12345678) , "12.34MB")
- self.assertEqual(util.format_value(123456789) , "123.45MB")
- self.assertEqual(util.format_value(1234567890), "1.23GB")
-
- self.assertEqual(util.format_value(123 , "B/s"), "123B/s")
- self.assertEqual(util.format_value(123456, "B/s"), "123.45kB/s")
+ self.assertEqual(util.format_value(0) , "0")
+ self.assertEqual(util.format_value(1) , "1")
+ self.assertEqual(util.format_value(12) , "12")
+ self.assertEqual(util.format_value(123) , "123")
+ self.assertEqual(util.format_value(1234) , "1.23k")
+ self.assertEqual(util.format_value(12345) , "12.34k")
+ self.assertEqual(util.format_value(123456) , "123.45k")
+ self.assertEqual(util.format_value(1234567) , "1.23M")
+ self.assertEqual(util.format_value(12345678) , "12.34M")
+ self.assertEqual(util.format_value(123456789) , "123.45M")
+ self.assertEqual(util.format_value(1234567890), "1.23G")
def test_combine_dict(self):
self.assertEqual(