# -*- coding: utf-8 -*- """ Created on Thu Aug 3 21:13:51 2023 @author: Arnd Helmut Hafner """ #本スクリプトは、釈文と様式項目を別々のテキストファイルから読み込み、様式項目を見出し行に含めたHTMLファイルに作り替える。ヘッダーとフッターに必要な情報は別途head.txtとfoot.txtから読み取る。 import re import sys # for sys.exit after error #read shakumon-file datafile = open('shakumon.txt', 'r', encoding='utf-8-sig') yoshiki = [] #様式分類を保存(書式は「n-n-n-n-n-n」) haichi = [] #様式分類内の配置位置・順番を記憶する kanmen = [] #同一の文書が記されている簡面を数える kanbango = [] #簡番号を記憶する shakumon = [] #store text recordcount = -1 records = {} #A dictionary of lists of records with the same yoshikibango; yoshikibango is used as key, the record number as value recordkeys = [] #Gathering the keys of the record-dictionary. It actually comprises a list of yoshikibango. linecount = 0 preshakumon = True #signals preexisting line of shakumon for line in datafile: #reads data line by line, but storing data record by record linecount += 1 m = re.search('^(\d+\-\d+\-\d+\-\d+\-\d+\-\d+)\t(\d+)\t(\d+)\t([\d\-\+abcd=J⑦⑨⑩⑫⑭⑮⑯⑰K()]+)$',line) if m: #detecting the first line of a record and gathering yoshiki etc recordcount += 1 yoshiki.append(m.group(1)) haichi.append(m.group(2)) kanmen.append(m.group(3)) kanbango.append(m.group(4)) preshakumon = False if yoshiki[recordcount] in recordkeys:#adding the record number to the list of records if the record key exists records[yoshiki[recordcount]].append(recordcount) else:#create a new key to the record list and add the first record number to the empty list recordkeys.append(yoshiki[recordcount]) records[yoshiki[recordcount]] = [recordcount] elif re.search('^[^\d]', line):#detecting shakumon if preshakumon:#second line of shakumon and below shakumon[recordcount] = shakumon[recordcount] + line else:#first line of shakumon shakumon.append(line) preshakumon = True else:#testing mismatches sys.exit('mismatch in line'+str(linecount)+':'+line) datafile.close() #read 様式分類項目 yoshikifile = open('yoshikikomoku.txt', 'r', encoding='utf-8-sig') yoshikibango = [] #様式分類記号を蓄積する yoshikikomoku = [] #様式分類項目を蓄積する linecount = 0 for line in yoshikifile: #reads data line by line linecount += 1 m = re.search('^(\d+\-\d+\-\d+\-\d+\-\d+\-\d+)\t([^\t\n]+)$',line) if m: yoshikibango.append(m.group(1)) yoshikikomoku.append(m.group(2)) else:#testing mismatches sys.exit('mismatch in line'+str(linecount)+':'+line) yoshikifile.close #reading and writing the header outfile = open('liya-shakumon(python).html', 'w', encoding='utf-8-sig') headfile = open('head.txt', 'r', encoding='utf-8-sig') for line in headfile: outfile.write(line) #write navigation list out ='' #出力用の加工データを記憶する。 head = '\n\n' isoshin = 0 #現在の位相を記憶する isokyu = 0 #一つ前の位相を記憶する indento = ['\t','\t\t','\t\t\t','\t\t\t\t','\t\t\t\t\t','\t\t\t\t\t\t']#位相に合わせたインデントを作るために、タブ文字のリストを記憶する。 listcontainermae = '\n' listkomokumae = '
  • ' listkomokuato = '
  • \n' for i in range(len(yoshikibango)): isokyu = isoshin #位相比較のため前の様式項目の位相を記憶する isoshin = 6-len(re.findall('\-0',yoshikibango[i]))#新しい項目の位相を「-0」の検出頻度で計算する if isoshin == isokyu:#位相に変化がなければ、outに単純に新しいリスト項目を加える。 out += indento[isoshin-1] + listkomokumae + yoshikibango[i] +listkomokunaka + yoshikikomoku[i] + listkomokuato elif isoshin > isokyu:#位相が上がるたび、最後のリスト項目から末尾の「」を削除した上、新しいリストタグ「」で閉じる。 out += indento[isokyu-2] + listcontainerato isokyu -= 1 out += indento[isoshin-1] + listkomokumae + yoshikibango[i] +listkomokunaka + yoshikikomoku[i] + listkomokuato out += indento[isokyu-2] + listcontainerato#最後のリストを閉じる out += foot outfile.write(out) #write text data outfile.write('\n/n

     

    \n') h = 0 #標題行の位相を記憶する refmae = '

        釈読情報    読下文と注釈

    ' space ='

     

    \n\n' for i in range(len(yoshikibango)): h = 7-len(re.findall('\-0',yoshikibango[i])) out = ''+yoshikikomoku[i]+'\n' outfile.write(out) if yoshikibango[i] in recordkeys: liste = records[yoshikibango[i]] liste.sort(reverse=True) text = '' for l in liste: honbun = re.sub('\n','

    \n

    ',shakumon[l]) honbun = re.sub('

    \n

    $','\t'+kanbango[l]+'

    \n',honbun) honbun = '

    '+honbun if (kanmen[l] == '0') and (l+1 not in liste): honbun = honbun+refmae+kanbango[l]+refnaka+kanbango[l]+refusiro+'\n'+space elif l+1 not in liste: bango = '' n = int(kanmen[l]) while n > 0: bango = bango + kanbango[l-n] + '=' n -=1 bango = bango + kanbango[l] honbun = honbun+refmae+bango+refnaka+bango+refusiro+'\n'+space elif int(kanmen[l]) > int(kanmen[l+1]): bango = '' n = int(kanmen[l]) while n > 0: bango = bango + kanbango[l-n] + '=' n -=1 bango = bango + kanbango[l] honbun = honbun+refmae+bango+refnaka+bango+refusiro+'\n'+space elif int(kanmen[l]) < int(kanmen[l+1]): honbun = honbun elif kanmen[l] == '0' and kanmen[l+1] == '0': honbun = honbun+refmae+kanbango[l]+refnaka+kanbango[l]+refusiro+'\n'+space else: print('mismatch for '+kanbango[l]) text = honbun + text outfile.write(text) #write footer footfile = open('foot.txt', 'r', encoding='utf-8-sig') for line in footfile: outfile.write(line) outfile.close()