python - Python SimpleHTTPServer でディレクトリ全体をダウンロードする

Question

SimpleHTTPServer を使用してネットワーク上でファイルを簡単に共有できる方法が本当に気に入っていますが、「ディレクトリ全体をダウンロードする」などのオプションがあればいいのにと思います。これを実装する簡単な（ワンライナー）方法はありますか？

ありがとう

score 6 · Accepted Answer

私はあなたのためにその変更を行いました、それを行うためのより良い方法があるかどうかはわかりませんが：

ファイル（例：ThreadedHTTPServer.py）を保存し、次のようにアクセスします。

$ python -m /path/to/ThreadedHTTPServer PORT

BPasteRawバージョン

変更はスレッド化された方法でも機能するため、ダウンロードとナビゲーションを同時に行うことで問題が発生することはありません。コードは整理されていませんが、次のようになります。

from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from SocketServer import ThreadingMixIn
import threading
import SimpleHTTPServer
import sys, os, zipfile

PORT = int(sys.argv[1])

def send_head(self):
    """Common code for GET and HEAD commands.

    This sends the response code and MIME headers.

    Return value is either a file object (which has to be copied
    to the outputfile by the caller unless the command was HEAD,
    and must be closed by the caller under all circumstances), or
    None, in which case the caller has nothing further to do.

    """
    path = self.translate_path(self.path)
    f = None

    if self.path.endswith('?download'):

        tmp_file = "tmp.zip"
        self.path = self.path.replace("?download","")

        zip = zipfile.ZipFile(tmp_file, 'w')
        for root, dirs, files in os.walk(path):
            for file in files:
                if os.path.join(root, file) != os.path.join(root, tmp_file):
                    zip.write(os.path.join(root, file))
        zip.close()
        path = self.translate_path(tmp_file)

    elif os.path.isdir(path):

        if not self.path.endswith('/'):
            # redirect browser - doing basically what apache does
            self.send_response(301)
            self.send_header("Location", self.path + "/")
            self.end_headers()
            return None
        else:

            for index in "index.html", "index.htm":
                index = os.path.join(path, index)
                if os.path.exists(index):
                    path = index
                    break
            else:
                return self.list_directory(path)
    ctype = self.guess_type(path)
    try:
        # Always read in binary mode. Opening files in text mode may cause
        # newline translations, making the actual size of the content
        # transmitted *less* than the content-length!
        f = open(path, 'rb')
    except IOError:
        self.send_error(404, "File not found")
        return None
    self.send_response(200)
    self.send_header("Content-type", ctype)
    fs = os.fstat(f.fileno())
    self.send_header("Content-Length", str(fs[6]))
    self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
    self.end_headers()
    return f

def list_directory(self, path):

    try:
        from cStringIO import StringIO
    except ImportError:
        from StringIO import StringIO
    import cgi, urllib

    """Helper to produce a directory listing (absent index.html).

    Return value is either a file object, or None (indicating an
    error).  In either case, the headers are sent, making the
    interface the same as for send_head().

    """
    try:
        list = os.listdir(path)
    except os.error:
        self.send_error(404, "No permission to list directory")
        return None
    list.sort(key=lambda a: a.lower())
    f = StringIO()
    displaypath = cgi.escape(urllib.unquote(self.path))
    f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
    f.write("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
    f.write("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
    f.write("<a href='%s'>%s</a>\n" % (self.path+"?download",'Download Directory Tree as Zip'))
    f.write("<hr>\n<ul>\n")
    for name in list:
        fullname = os.path.join(path, name)
        displayname = linkname = name
        # Append / for directories or @ for symbolic links
        if os.path.isdir(fullname):
            displayname = name + "/"
            linkname = name + "/"
        if os.path.islink(fullname):
            displayname = name + "@"
            # Note: a link to a directory displays with @ and links with /
        f.write('<li><a href="%s">%s</a>\n'
                % (urllib.quote(linkname), cgi.escape(displayname)))
    f.write("</ul>\n<hr>\n</body>\n</html>\n")
    length = f.tell()
    f.seek(0)
    self.send_response(200)
    encoding = sys.getfilesystemencoding()
    self.send_header("Content-type", "text/html; charset=%s" % encoding)
    self.send_header("Content-Length", str(length))
    self.end_headers()
    return f

Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
Handler.send_head = send_head
Handler.list_directory = list_directory

class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
    """Handle requests in a separate thread."""

if __name__ == '__main__':
    server = ThreadedHTTPServer(('0.0.0.0', PORT), Handler)
    print 'Starting server, use <Ctrl-C> to stop'
    server.serve_forever()

score 5 · Accepted Answer

ソースを参照してください。現在、ディレクトリである URL を使用してサーバーを呼び出すと、そのindex.htmlファイルが提供されます。それがない場合は、list_directoryメソッドが呼び出されます。おそらく、代わりにzipディレクトリの内容を含むファイルを作成し（再帰的に、私は想像します）、それを提供したいと思いますか？send_head明らかに、1 行の変更でそれを行う方法はありません。現在の 68 ～ 80 行 ( method 内) と method の全体、 98 ～ 137行を置き換えたいためです。これはlist_directory、少なくとも以上の変更です。 50行;-)。

1 行ではなく数十行の変更に問題がなく、説明したセマンティクスが必要な場合は、もちろん、必要な zip ファイルをZipFileクラスのcStringIO.StringIOオブジェクトとして構築し、それにosを設定することができます。問題のディレクトリを .walkします（再帰的にすべてのサブディレクトリも取得する必要があると仮定します）。しかし、それは間違いなくワンライナーにはなりません;-)。

score 4 · Accepted Answer

それを行うライナーは1つもありません。また、「ディレクトリ全体をtarまたはzipとしてダウンロードする」とはどういう意味ですか?

とにかく、これらの手順に従うことができます

SimpleHTTPRequestHandler からクラスを派生させるか、そのコードをコピーするだけの場合があります
「フォルダ全体をダウンロード」へのリンクを返すように list_directory メソッドを変更します
リンクの場合、ディレクトリ全体を圧縮して返すようにcopyfileメソッドを変更します
zip をキャッシュして、毎回フォルダーを圧縮しないようにすることができます。代わりに、ファイルが変更されているかどうかを確認します。

楽しいエクササイズになります:)

score 1 · Accepted Answer

@mononoke のソリューションが好きです。しかし、それにはいくつかの問題があります。彼らです

テキストモードでファイルを書き込む
特に非ASCIIパスの場合href、とが異なる場合がありますtext
大きなファイルをブロック単位でダウンロードしない

これらの問題を解決しようとしました：</p>

import os
from pathlib import Path
from urllib.parse import urlparse, urljoin
import requests
from bs4 import BeautifulSoup
import math

def get_links(content):
    soup = BeautifulSoup(content)
    for a in soup.findAll('a'):
        yield a.get('href'), a.get_text()

def download(url, path=None, overwrite=False):
    if path is None:
        path = urlparse(url).path.lstrip('/')
    if url.endswith('/'):
        r = requests.get(url)
        if r.status_code != 200:
            raise Exception('status code is {} for {}'.format(r.status_code, url))
        content = r.text
        Path(path.rstrip('/')).mkdir(parents=True, exist_ok=True)
        for link, name in get_links(content):
            if not link.startswith('.'): # skip hidden files such as .DS_Store
                download(urljoin(url, link), os.path.join(path, name))
    else:
        if os.path.isfile(path):
            print("#existing", path)
            if not overwrite:
                return
        chunk_size = 1024*1024
        r = requests.get(url, stream=True)
        content_size = int(r.headers['content-length'])
        total = math.ceil(content_size / chunk_size)
        print("#", path)
        with open(path, 'wb') as f:
            c = 0
            st = 100
            for chunk in r.iter_content(chunk_size=chunk_size):
                c += 1
                if chunk:
                    f.write(chunk)
                ap = int(c*st/total) - int((c-1)*st/total)
                if ap > 0:
                    print("#" * ap, end="")
            print("\r  "," "*int(c*st/total), "\r", end="")
            
if __name__ == '__main__':
    # the trailing / indicates a folder
    url = 'http://ed470d37.ngrok.io/a/bc/'
    download(url, "/data/bc")

python - Python SimpleHTTPServer でディレクトリ全体をダウンロードする

5 に答える 5

Related

Reference