Python で安直にローマ字変換
import unicodedata
import string
__author__ = "kadotanimitsuru"
__date__ = "2005-05-02"
__license__ = "public domain"
__version__ = "1.0.2"
ALNUM = unicode(string.ascii_letters+string.digits)
def toroman(text, romanonly=False, chinese_numeral=False):
"""It changes into a Roman alphabet."""
d = ""
for c in unicode(text):
num = unicodedata.numeric(c, None)
if num is not None:
c = unicode(int(num))
elif chinese_numeral:
num = u"〇一二三四五六七八九".index(c)
except ValueError:
c = {u"十":u" ten ", # これはやりすぎかな。
u"百":u" hundred ",
u"千":u" thousand ",
u"万":u" ten thousand ",
u"億":u" hundred million ",
u"兆":u" trillion "}.get(c, c)
c = unicode(num)
d += c
text = d
d = ""
for c in unicodedata.normalize("NFKC", text):
if c not in ALNUM:
s =
if c.isspace():
c = u" "
elif s[0] == "CJK":
if romanonly:
c = u" "
elif unicodedata.category(c) in ("Lu","Ll","Lt","Lo"):
if len(s) >= 2 and s[-2] == "SMALL":
c = s[-1].lower()
c = s[-1].capitalize()
elif romanonly:
c = u" "
d += c
return d
def tofilename(text):
"""It changes into a character string appropriate to the main file name."""
a = toroman(text, True)
return "_".join(a.strip().split()).lower()
if __name__=="__main__":
while True:
a = unicode(raw_input("text:").strip())
if not a:break
print "toroman :",toroman(a, chinese_numeral=True)
print "tofilename:",tofilename(a)
# 好きに流用してください。
text:“Hello,World!” toroman : “Hello,World!” tofilename: hello_world text:つちのこ、のこのこ。 toroman : TuTiNoKo、NoKoNoKo。 tofilename: tutinoko_nokonoko text:な!なんだってぇ~!? toroman : Na!NaNDatuTee~!? tofilename: na_nandatutee text:かな漢字をローマ字に変換する。 toroman : KaNa漢字WoRoーMa字Ni変換SuRu。 tofilename: kana_woro_ma_ni_suru text:우리나라半万年の歴史 toroman : URiNaRa半 ten thousand 年No歴史 tofilename: urinara_no text:ПРОЛЕТАРИИ ВСЕХ СТРАН, СОЕДИНЯЙТЕСЬ! toroman : PeErOElIeTeAErII VeEsIeHa EsTeErAEn, EsOIeDeIEnYaITeIeEsSign! tofilename: peeroelieteaerii_veesieha_esteeraen_esoiedeienyaiteieessign text:七五三 toroman : 753 tofilename: text:ギャフン toroman : GiyaHuN tofilename: giyahun
