« 白絹(シルク)やぶれかぶれ | トップページ | utf-8 テキストファイルの <, >, & を html 用にエスケープ ver.1.0.1 »

2005年2月18日

ココログの過去ログ・アーカイブ作成 ver.1.0.1

# ココログのアーカイブ作成 ver.1.0.1
# 要 Python
# CSS はココログの style 要素(あるいは styles.css)がそれなりに使えます
import xmlrpclib, urlparse, calendar, time, os, os.path
WEBLOG = "http://tuchinoko.moe-nifty.com/oboegaki/" # ここを自分のココログ top に
ACCOUNT = "hoge" # ココログアカウント
PASSWORD = "????????" # パスワード
# 以下2つは False にすると速くなる
TRACKBACK = True # トラックバックを取得しないなら False
CATEGORIES = True # カテゴリーを取得しないなら False

MAXTITLES = 10000 # 取得する記事の最大数
WEEK = (u"月曜日", u"火曜日", u"水曜日", u"木曜日",
        u"金曜日", u"土曜日", u"日曜日") # 曜日の表記
HTMLHEAD = "".join([
#    '<?xml version="1.0" encoding="utf-8"?>\n' # IE でおかしい(過去互換モード?)
    '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'
    ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n'
    '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="ja" lang="ja">\n'
    '<head>\n'
    '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n'
    '<link rel="alternate" type="application/atom+xml" title="Atom"'
    ' href="%satom.xml" />\n' % WEBLOG,
    '<link rel="alternate" type="application/rss+xml" title="RSS"'
    ' href="%sindex.rdf" />\n' % WEBLOG])
class Post(object):
    def __init__(self, post, blogname, userdict, server):
        self.blogname = blogname
        self.title = unicode(post["title"])
        self.time = time.localtime(calendar.timegm(time.strptime( # 決め打ち(^^;
            str(post["dateCreated"]).rstrip("Z")+"UTC", "%Y-%m-%dT%H:%M:%S%Z")))
        self.link = post["link"]
        url = urlparse.urlparse(self.link)[2]
        (dummy, self.weblogname, self.dir_year,
         self.dir_month, self.filename) = url.split("/")
        self.url = "%s/%s/%s" % (
            self.dir_year,self.dir_month, self.filename)
        self.description = unicode(post["description"])
        self.text_more = unicode(post["mt_text_more"])
        if post["mt_convert_breaks"] != "0": # 手抜きです
            self.description = self.description.replace("\n", "<br />\n")
            self.text_more = self.text_more.replace("\n", "<br />\n")
        self.username = userdict[post["userid"]]
        self.postid = post["postid"]
        self.trackback = []
        if TRACKBACK:
            self.trackback = server.mt.getTrackbackPings(self.postid)
        self.categories = []
        if CATEGORIES:
            self.categories = server.mt.getPostCategories(
                self.postid, ACCOUNT, PASSWORD)
        print ".",
    def replace_text(self, replace_text):
        for old, new in replace_text:
            self.description = self.description.replace(old, new)
            self.text_more = self.text_more.replace(old, new)
    def get_html(self, prev=None, next=None):
        if prev:
            prev_url = "../../%s" % prev.url
        if next:
            next_url = "../../%s" % next.url
        data = [
            HTMLHEAD,
            u'<title>%s: %s (アーカイブ)</title>\n'
            % (self.blogname, tagcut(self.title)),
            '<link rel="stylesheet" type="text/css" href="../../styles.css" />\n'
            '<link rel="stylesheet" type="text/css" href="../../post.css" />\n'
            '<link rel="stylesheet" type="text/css" href="styles.css" />\n'
            '<link rel="start" href="../../index.html" title="Home" />\n']
        if prev:
            data += ['<link rel="prev" href="%s" title="%s" />\n'
                     % (prev_url, tagcut(prev.title))]
        if next:
            data += ['<link rel="next" href="%s" title="%s" />\n'
                     % (next_url, tagcut(next.title))]
        data += [
            '</head>\n<body>\n<div id="container">\n<div id="banner">\n'
            u'<h1><a href="%s">%s</a></h1>\n<h2>(アーカイブ)</h2>\n</div>'
            % (WEBLOG, self.blogname),
            '<div class="content">\n<div class="entry-nav">\n<p align="right">']
        if prev:
            data += ['<a href="%s">&laquo; %s</a> | ' % (prev_url, prev.title)]
        data += [u'<a href="../../index.html">目次</a>']
        if next:
            data += [' | <a href="%s">%s &raquo;</a>' % (next_url, next.title)]
        data += [
            '</p>\n</div>\n'
            '<h3><a href="%s">%s</a></h3>\n' % (self.link, self.title),
            '<div class="entry-body">\n%s</div>\n' % self.description]
        if self.text_more.strip():
            data += ['<hr />\n<div class="entry-more">\n%s</div>\n'
                     % self.text_more]
        t = self.time
        y, m = t.tm_year, t.tm_mon
        s = urlparse.urlsplit(WEBLOG)
        about = urlparse.urlunsplit((s[0], s[1], "about.html", "", ""))
        data += [
            '<p class="posted"  align="right">\n'
            u'<a href="../../index.html#year%04d">%d年</a>' % (y, y),
            u' <a href="../../%04d/%02d.html">%d月</a> %d日 %s %d時 %d分'
            % (y, m, m, t.tm_mday, WEEK[t.tm_wday], t.tm_hour, t.tm_min),
            ' <a href="%s">%s</a>' % (about, self.username)]
        if self.categories:
            data += [' in ',
                     ", ".join(['<a href="../../category/%s.html">%s</a>'
                                % (x["categoryId"], unicode(x["categoryName"]))
                                for x in self.categories])]
        data += ['</p>\n']
        if self.trackback:
            data += [
                u'<h2 id="trackback">トラックバック</h2>\n'
                '<div class="trackback-content"><ul>\n%s</ul>\n</div>\n'
                % "".join(['<li><a href="%s">%s</a></li>\n'
                           % (x["pingURL"], unicode(x["pingTitle"]))
                           for x in self.trackback])]
        data += ['</div>\n</div>\n</body>\n</html>\n']
        return "".join(data)
    def make_htmlfile(self, prev=None, next=None, path=""):
        writehtml(os.path.join(path, self.weblogname, self.dir_year,
                               self.dir_month, self.filename),
                  self.get_html(prev, next))
def tagcut(html):
    data = []
    in_tag = False
    for i in html:
        if in_tag:
            if i == ">":
                in_tag = False
        else:
            if i == "<":
                in_tag = True
            elif i == '"':
                data.append("&quot;")
            else:
                data.append(i)
    return "".join(data)
def writehtml(filename, html):
    dirname = os.path.dirname(filename)
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    file(filename, "w").write(html.encode("utf-8"))
def get_posts():
    server = xmlrpclib.ServerProxy("http://app.cocolog-nifty.com/t/api")
    for i in server.blogger.getUsersBlogs("", ACCOUNT, PASSWORD):
        if i["url"] == WEBLOG:
            id, blogname = i["blogid"], unicode(i["blogName"])
            break
    else:
        raise "%s not found." % WEBLOG
    u = server.blogger.getUserInfo("", ACCOUNT, PASSWORD)
    nickname = unicode(u["nickname"])
    print nickname, "'s blog:", blogname
    return [Post(x, blogname, {u["userid"]:nickname}, server) for x in
            server.metaWeblog.getRecentPosts(id, ACCOUNT, PASSWORD, MAXTITLES)]
def make_month_html(posts, prev=None, next=None):
    p = posts[0]
    blogname = p.blogname
    weblogdir = p.weblogname
    year, month = p.time.tm_year, p.time.tm_mon
    data = [
        HTMLHEAD,
        u'<title>%d年 %d月: %s (アーカイブ)</title>\n'
        % (year, month, blogname),
        '<link rel="stylesheet" type="text/css" href="../styles.css" />\n'
        '<link rel="stylesheet" type="text/css" href="../month.css" />\n']
    if prev:
        y, m = prev[0].time.tm_year, prev[0].time.tm_mon
        prev_date = ("../%04d/%02d.html" % (y, m), u"%d年 %d月" % (y, m))
        data += ['<link rel="prev" href="%s" title="%s" />\n' % prev_date]
    if next:
        y, m = next[0].time.tm_year, next[0].time.tm_mon
        next_date = ("../%04d/%02d.html" % (y, m), u"%d年 %d月" % (y, m))
        data += ['<link rel="next" href="%s" title="%s" />\n' % next_date]
    data += [
        '<link rel="start" href="../index.html" title="Home" />\n'
        '</head>\n<body>\n<div id="container">\n<div id="banner">\n'
        u'<h1><a href="../index.html#year%04d">%d年</a> %d月</h1>\n'
        % (year, year, month),
        u'<h2><a href="%s">%s</a> (アーカイブ)</h2>' % (WEBLOG, blogname),
        '</div>\n<div class="content">\n<div class="entry-nav">\n'
        '<p align="right">']
    if prev:
        data += [u'<a href="%s">&laquo; %s</a> | ' % prev_date]
    data += [u'<a href="../index.html">目次</a>']
    if next:
        data += [u' | <a href="%s">%s &raquo;</a>' % next_date]
    data += [
        '</p>\n</div>\n<ol>\n',
        "".join([
            u'<li><a href="../%s"><small>%02d日 %02d:%02d</small> %s</a></li>\n'
            % (p.url, p.time.tm_mday, p.time.tm_hour, p.time.tm_min, p.title)
            for p in posts]),
        '</ol>\n</div>\n</div>\n</body>\n</html>\n']
    writehtml(os.path.join(weblogdir, ("%04d" % year), ("%02d.html" % month)),
              "".join(data))
def make_category_html(posts):
    blogname = posts[0].blogname
    weblogdir = posts[0].weblogname
    categories = {}
    for p in posts:
        for c in p.categories:
            id = c["categoryId"]
            if id in categories:
                categories[id][1].append(p)
            else:
                categories[id] = (unicode(c["categoryName"]), [p])
    for id, (name, posts) in categories.items():
        data = [
            HTMLHEAD,
            u'<title>%s: %s (アーカイブ)</title>\n' % (name, blogname),
            '<link rel="stylesheet" type="text/css" href="../styles.css" />\n'
            '<link rel="stylesheet" type="text/css" href="styles.css" />\n'
            '<link rel="start" href="../index.html" title="Home" />\n'
            '</head>\n<body>\n<div id="container">\n<div id="banner">\n'
            u'<h1>%s</h1>\n<h2><a href="%s">%s</a> (アーカイブ)</h2>\n'
            % (name, WEBLOG, blogname),
            '</div>\n<div class="content">\n<div class="entry-nav">\n'
            u'<p align="right"><a href="../index.html">目次</a></p>\n</div>\n']
        for y, monthlist in make_yearlist(posts):
            data += [u'<h2><a href="../index.html#year%04d">%d年</a></h2>\n'
                     % (y, y)]
            for m, p_list in monthlist:
                data += [u'<h3><a href="../%04d/%02d.html">%d月</a></h3>\n'
                         % (y, m, m), '<ol>\n']
                for p in p_list:
                    t = p.time
                    data += [
                        u'<li><a href="../%s"><small>%02d日 %02d:%02d</small>'
                        % (p.url, t.tm_mday, t.tm_hour, t.tm_min),
                        ' %s</a>' % p.title]
                    if len(p.categories) > 1:
                        data += ['<small> in %s</small>' % ", ".join(
                            ['<a href="../category/%s.html">%s</a>'
                             % (x["categoryId"], unicode(x["categoryName"]))
                             for x in p.categories if id != x["categoryId"]])]
                    data += ['</li>\n']
                data += ['</ol>\n']
        data += ['</div>\n</div>\n</body>\n</html>\n']
        writehtml(os.path.join(weblogdir, "category", id+".html"),
                  "".join(data))
    c = [(id, name, len(posts)) for (id, (name, posts)) in categories.items()]
    c.sort(lambda x, y:cmp((y[2], x[1]), (x[2], y[1])))
    return c
def make_yearlist(posts):
    yeardict = {}
    for p in posts:
        y, m = p.time.tm_year, p.time.tm_mon
        if y in yeardict:
            if m in yeardict[y]:
                yeardict[y][m].append(p)
            else:
                yeardict[y][m] = [p]
        else:
            yeardict[y] = {m:[p]}
    years = yeardict.keys()
    years.sort()
    years.reverse()
    yearlist = []
    for y in years:
        months = yeardict[y].keys()
        months.sort()
        months.reverse()
        monthlist = []
        for m in months:
            monthlist.append((m, yeardict[y][m]))
        yearlist.append((y, monthlist))
    return yearlist
def make_menu(posts):
    blogname = posts[0].blogname
    weblogdir = posts[0].weblogname
    data = [
        HTMLHEAD,
        u'<title>目次: %s (アーカイブ)</title>\n' % blogname,
        '<link rel="stylesheet" type="text/css" href="styles.css" />\n'
        '<link rel="stylesheet" type="text/css" href="menu.css" />\n'
        '</head>\n<body>\n<div id="container">\n'
        u'<div id="banner">\n<h1>目次</h1>\n'
        u'<h2><a href="%s">%s</a> (アーカイブ)</h2>\n' % (WEBLOG, blogname),
        u'</div>\n<div class="content">\n']
    if CATEGORIES:
        data += [u'<dl>\n<dt>カテゴリー:</dt>\n<dd>%s</dd>\n</dl>\n'
                 % ", ".join(['<a href="./category/%s.html">%s(%d)</a>' % x
                              for x in make_category_html(posts)])]
    all_list = [None]
    for y, monthlist in make_yearlist(posts):
        for (m, p) in monthlist:
            all_list.append(p)
        data += [
            u'<h3 id="year%04d">%d年</h3>\n' % (y, y), '<p>',
            ", ".join([u'<a href="./%04d/%02d.html">%d月(%d)</a>'
                       % (y, m, m, len(p)) for (m, p) in monthlist]), '</p>\n']
    data += ['</div>\n</div>\n</body>\n</html>\n']
    writehtml(os.path.join(weblogdir, "index.html"), "".join(data))
    all_list.append(None)
    for i in range(1, len(all_list)-1):
        make_month_html(all_list[i], all_list[i+1], all_list[i-1])
t = time.clock()
posts = get_posts()
print "ok\n%d posts" % len(posts)
replace_url = [('href="%s"' % p.link, 'href="../../%s"' % p.url) for p in posts]
for p in posts:
    p.replace_text(replace_url)
p = [None] + posts + [None]
for i in range(1, len(p)-1):
    p[i].make_htmlfile(p[i+1], p[i-1])
make_menu(posts)
print "%.2f sec" % (time.clock()-t)
# 好きに流用してください。

« 白絹(シルク)やぶれかぶれ | トップページ | utf-8 テキストファイルの <, >, & を html 用にエスケープ ver.1.0.1 »

ウェブログ・ココログ関連」カテゴリの記事

Python」カテゴリの記事

トラックバック


この記事へのトラックバック一覧です: ココログの過去ログ・アーカイブ作成 ver.1.0.1:

« 白絹(シルク)やぶれかぶれ | トップページ | utf-8 テキストファイルの <, >, & を html 用にエスケープ ver.1.0.1 »

他のアカウント

ブログ妖精

  • ココロ

Affiliate

無料ブログはココログ