ココログの過去ログ・アーカイブ作成 ver.1.0.1
# ココログのアーカイブ作成 ver.1.0.1 # 要 Python # CSS はココログの style 要素(あるいは styles.css)がそれなりに使えます import xmlrpclib, urlparse, calendar, time, os, os.path WEBLOG = "http://tuchinoko.moe-nifty.com/oboegaki/" # ここを自分のココログ top に ACCOUNT = "hoge" # ココログアカウント PASSWORD = "????????" # パスワード # 以下2つは False にすると速くなる TRACKBACK = True # トラックバックを取得しないなら False CATEGORIES = True # カテゴリーを取得しないなら False MAXTITLES = 10000 # 取得する記事の最大数 WEEK = (u"月曜日", u"火曜日", u"水曜日", u"木曜日", u"金曜日", u"土曜日", u"日曜日") # 曜日の表記 HTMLHEAD = "".join([ # '<?xml version="1.0" encoding="utf-8"?>\n' # IE でおかしい(過去互換モード?) '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n' '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="ja" lang="ja">\n' '<head>\n' '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n' '<link rel="alternate" type="application/atom+xml" title="Atom"' ' href="%satom.xml" />\n' % WEBLOG, '<link rel="alternate" type="application/rss+xml" title="RSS"' ' href="%sindex.rdf" />\n' % WEBLOG]) class Post(object): def __init__(self, post, blogname, userdict, server): self.blogname = blogname self.title = unicode(post["title"]) self.time = time.localtime(calendar.timegm(time.strptime( # 決め打ち(^^; str(post["dateCreated"]).rstrip("Z")+"UTC", "%Y-%m-%dT%H:%M:%S%Z"))) self.link = post["link"] url = urlparse.urlparse(self.link)[2] (dummy, self.weblogname, self.dir_year, self.dir_month, self.filename) = url.split("/") self.url = "%s/%s/%s" % ( self.dir_year,self.dir_month, self.filename) self.description = unicode(post["description"]) self.text_more = unicode(post["mt_text_more"]) if post["mt_convert_breaks"] != "0": # 手抜きです self.description = self.description.replace("\n", "<br />\n") self.text_more = self.text_more.replace("\n", "<br />\n") self.username = userdict[post["userid"]] self.postid = post["postid"] self.trackback = [] if TRACKBACK: self.trackback = server.mt.getTrackbackPings(self.postid) self.categories = [] if CATEGORIES: self.categories = server.mt.getPostCategories( self.postid, ACCOUNT, PASSWORD) print ".", def replace_text(self, replace_text): for old, new in replace_text: self.description = self.description.replace(old, new) self.text_more = self.text_more.replace(old, new) def get_html(self, prev=None, next=None): if prev: prev_url = "../../%s" % prev.url if next: next_url = "../../%s" % next.url data = [ HTMLHEAD, u'<title>%s: %s (アーカイブ)</title>\n' % (self.blogname, tagcut(self.title)), '<link rel="stylesheet" type="text/css" href="../../styles.css" />\n' '<link rel="stylesheet" type="text/css" href="../../post.css" />\n' '<link rel="stylesheet" type="text/css" href="styles.css" />\n' '<link rel="start" href="../../index.html" title="Home" />\n'] if prev: data += ['<link rel="prev" href="%s" title="%s" />\n' % (prev_url, tagcut(prev.title))] if next: data += ['<link rel="next" href="%s" title="%s" />\n' % (next_url, tagcut(next.title))] data += [ '</head>\n<body>\n<div id="container">\n<div id="banner">\n' u'<h1><a href="%s">%s</a></h1>\n<h2>(アーカイブ)</h2>\n</div>' % (WEBLOG, self.blogname), '<div class="content">\n<div class="entry-nav">\n<p align="right">'] if prev: data += ['<a href="%s">« %s</a> | ' % (prev_url, prev.title)] data += [u'<a href="../../index.html">目次</a>'] if next: data += [' | <a href="%s">%s »</a>' % (next_url, next.title)] data += [ '</p>\n</div>\n' '<h3><a href="%s">%s</a></h3>\n' % (self.link, self.title), '<div class="entry-body">\n%s</div>\n' % self.description] if self.text_more.strip(): data += ['<hr />\n<div class="entry-more">\n%s</div>\n' % self.text_more] t = self.time y, m = t.tm_year, t.tm_mon s = urlparse.urlsplit(WEBLOG) about = urlparse.urlunsplit((s[0], s[1], "about.html", "", "")) data += [ '<p class="posted" align="right">\n' u'<a href="../../index.html#year%04d">%d年</a>' % (y, y), u' <a href="../../%04d/%02d.html">%d月</a> %d日 %s %d時 %d分' % (y, m, m, t.tm_mday, WEEK[t.tm_wday], t.tm_hour, t.tm_min), ' <a href="%s">%s</a>' % (about, self.username)] if self.categories: data += [' in ', ", ".join(['<a href="../../category/%s.html">%s</a>' % (x["categoryId"], unicode(x["categoryName"])) for x in self.categories])] data += ['</p>\n'] if self.trackback: data += [ u'<h2 id="trackback">トラックバック</h2>\n' '<div class="trackback-content"><ul>\n%s</ul>\n</div>\n' % "".join(['<li><a href="%s">%s</a></li>\n' % (x["pingURL"], unicode(x["pingTitle"])) for x in self.trackback])] data += ['</div>\n</div>\n</body>\n</html>\n'] return "".join(data) def make_htmlfile(self, prev=None, next=None, path=""): writehtml(os.path.join(path, self.weblogname, self.dir_year, self.dir_month, self.filename), self.get_html(prev, next)) def tagcut(html): data = [] in_tag = False for i in html: if in_tag: if i == ">": in_tag = False else: if i == "<": in_tag = True elif i == '"': data.append(""") else: data.append(i) return "".join(data) def writehtml(filename, html): dirname = os.path.dirname(filename) if not os.path.exists(dirname): os.makedirs(dirname) file(filename, "w").write(html.encode("utf-8")) def get_posts(): server = xmlrpclib.ServerProxy("http://app.cocolog-nifty.com/t/api") for i in server.blogger.getUsersBlogs("", ACCOUNT, PASSWORD): if i["url"] == WEBLOG: id, blogname = i["blogid"], unicode(i["blogName"]) break else: raise "%s not found." % WEBLOG u = server.blogger.getUserInfo("", ACCOUNT, PASSWORD) nickname = unicode(u["nickname"]) print nickname, "'s blog:", blogname return [Post(x, blogname, {u["userid"]:nickname}, server) for x in server.metaWeblog.getRecentPosts(id, ACCOUNT, PASSWORD, MAXTITLES)] def make_month_html(posts, prev=None, next=None): p = posts[0] blogname = p.blogname weblogdir = p.weblogname year, month = p.time.tm_year, p.time.tm_mon data = [ HTMLHEAD, u'<title>%d年 %d月: %s (アーカイブ)</title>\n' % (year, month, blogname), '<link rel="stylesheet" type="text/css" href="../styles.css" />\n' '<link rel="stylesheet" type="text/css" href="../month.css" />\n'] if prev: y, m = prev[0].time.tm_year, prev[0].time.tm_mon prev_date = ("../%04d/%02d.html" % (y, m), u"%d年 %d月" % (y, m)) data += ['<link rel="prev" href="%s" title="%s" />\n' % prev_date] if next: y, m = next[0].time.tm_year, next[0].time.tm_mon next_date = ("../%04d/%02d.html" % (y, m), u"%d年 %d月" % (y, m)) data += ['<link rel="next" href="%s" title="%s" />\n' % next_date] data += [ '<link rel="start" href="../index.html" title="Home" />\n' '</head>\n<body>\n<div id="container">\n<div id="banner">\n' u'<h1><a href="../index.html#year%04d">%d年</a> %d月</h1>\n' % (year, year, month), u'<h2><a href="%s">%s</a> (アーカイブ)</h2>' % (WEBLOG, blogname), '</div>\n<div class="content">\n<div class="entry-nav">\n' '<p align="right">'] if prev: data += [u'<a href="%s">« %s</a> | ' % prev_date] data += [u'<a href="../index.html">目次</a>'] if next: data += [u' | <a href="%s">%s »</a>' % next_date] data += [ '</p>\n</div>\n<ol>\n', "".join([ u'<li><a href="../%s"><small>%02d日 %02d:%02d</small> %s</a></li>\n' % (p.url, p.time.tm_mday, p.time.tm_hour, p.time.tm_min, p.title) for p in posts]), '</ol>\n</div>\n</div>\n</body>\n</html>\n'] writehtml(os.path.join(weblogdir, ("%04d" % year), ("%02d.html" % month)), "".join(data)) def make_category_html(posts): blogname = posts[0].blogname weblogdir = posts[0].weblogname categories = {} for p in posts: for c in p.categories: id = c["categoryId"] if id in categories: categories[id][1].append(p) else: categories[id] = (unicode(c["categoryName"]), [p]) for id, (name, posts) in categories.items(): data = [ HTMLHEAD, u'<title>%s: %s (アーカイブ)</title>\n' % (name, blogname), '<link rel="stylesheet" type="text/css" href="../styles.css" />\n' '<link rel="stylesheet" type="text/css" href="styles.css" />\n' '<link rel="start" href="../index.html" title="Home" />\n' '</head>\n<body>\n<div id="container">\n<div id="banner">\n' u'<h1>%s</h1>\n<h2><a href="%s">%s</a> (アーカイブ)</h2>\n' % (name, WEBLOG, blogname), '</div>\n<div class="content">\n<div class="entry-nav">\n' u'<p align="right"><a href="../index.html">目次</a></p>\n</div>\n'] for y, monthlist in make_yearlist(posts): data += [u'<h2><a href="../index.html#year%04d">%d年</a></h2>\n' % (y, y)] for m, p_list in monthlist: data += [u'<h3><a href="../%04d/%02d.html">%d月</a></h3>\n' % (y, m, m), '<ol>\n'] for p in p_list: t = p.time data += [ u'<li><a href="../%s"><small>%02d日 %02d:%02d</small>' % (p.url, t.tm_mday, t.tm_hour, t.tm_min), ' %s</a>' % p.title] if len(p.categories) > 1: data += ['<small> in %s</small>' % ", ".join( ['<a href="../category/%s.html">%s</a>' % (x["categoryId"], unicode(x["categoryName"])) for x in p.categories if id != x["categoryId"]])] data += ['</li>\n'] data += ['</ol>\n'] data += ['</div>\n</div>\n</body>\n</html>\n'] writehtml(os.path.join(weblogdir, "category", id+".html"), "".join(data)) c = [(id, name, len(posts)) for (id, (name, posts)) in categories.items()] c.sort(lambda x, y:cmp((y[2], x[1]), (x[2], y[1]))) return c def make_yearlist(posts): yeardict = {} for p in posts: y, m = p.time.tm_year, p.time.tm_mon if y in yeardict: if m in yeardict[y]: yeardict[y][m].append(p) else: yeardict[y][m] = [p] else: yeardict[y] = {m:[p]} years = yeardict.keys() years.sort() years.reverse() yearlist = [] for y in years: months = yeardict[y].keys() months.sort() months.reverse() monthlist = [] for m in months: monthlist.append((m, yeardict[y][m])) yearlist.append((y, monthlist)) return yearlist def make_menu(posts): blogname = posts[0].blogname weblogdir = posts[0].weblogname data = [ HTMLHEAD, u'<title>目次: %s (アーカイブ)</title>\n' % blogname, '<link rel="stylesheet" type="text/css" href="styles.css" />\n' '<link rel="stylesheet" type="text/css" href="menu.css" />\n' '</head>\n<body>\n<div id="container">\n' u'<div id="banner">\n<h1>目次</h1>\n' u'<h2><a href="%s">%s</a> (アーカイブ)</h2>\n' % (WEBLOG, blogname), u'</div>\n<div class="content">\n'] if CATEGORIES: data += [u'<dl>\n<dt>カテゴリー:</dt>\n<dd>%s</dd>\n</dl>\n' % ", ".join(['<a href="./category/%s.html">%s(%d)</a>' % x for x in make_category_html(posts)])] all_list = [None] for y, monthlist in make_yearlist(posts): for (m, p) in monthlist: all_list.append(p) data += [ u'<h3 id="year%04d">%d年</h3>\n' % (y, y), '<p>', ", ".join([u'<a href="./%04d/%02d.html">%d月(%d)</a>' % (y, m, m, len(p)) for (m, p) in monthlist]), '</p>\n'] data += ['</div>\n</div>\n</body>\n</html>\n'] writehtml(os.path.join(weblogdir, "index.html"), "".join(data)) all_list.append(None) for i in range(1, len(all_list)-1): make_month_html(all_list[i], all_list[i+1], all_list[i-1]) t = time.clock() posts = get_posts() print "ok\n%d posts" % len(posts) replace_url = [('href="%s"' % p.link, 'href="../../%s"' % p.url) for p in posts] for p in posts: p.replace_text(replace_url) p = [None] + posts + [None] for i in range(1, len(p)-1): p[i].make_htmlfile(p[i+1], p[i-1]) make_menu(posts) print "%.2f sec" % (time.clock()-t) # 好きに流用してください。
«
「ウェブログ・ココログ関連」カテゴリの記事
- ココログオリジナルテーマにココロちゃんが無い!(2010.05.28)
- はてなしてます(2008.11.02)
- ココロのパンの缶詰(2008.08.10)
- こういうときに限って驚くほど初音ミク オリジナルが生きてくるものなのだ。(2008.01.21)
- ブログ妖精と VOCALOID とオプション(2008.01.06)
「Python」カテゴリの記事
- from __future__ import hatsune(2008.09.15)
- Pygame1.8.1出たよ!(2008.08.02)
- それは kokoro.py と言うプログラム(2008.04.27)
- smf2txt.py ‐ SMF をテキストに(2008.04.09)
- 2007年下半期ライトノベルサイト杯結果と、同じのに投票した方々(2008.01.28)
«