#!/usr/bin/python from __future__ import print_function, unicode_literals __license__ = """ This file is part of GNU FreeFont. GNU FreeFont is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. GNU FreeFont is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU FreeFont. If not, see . """ __author__ = "Emmanuel Vallois" __email__ = "vallois@polytech.unice.fr" __copyright__ = "Copyright 2011 Emmanuel Vallois" __date__ = "$Date$" __version__ = "$Revision$" __doc__ = """ Writes in the file named by the first argument an HTML page comprising a table for testing joining cursive script characters. Runs under normal Python, version 2.7 or above. Typical usage: unicode_joining.py "Unicode joining test page.html" """ import sys from codecs import open from string import Template from collections import OrderedDict from itertools import chain _module_missing_msg = """Please run generate_arabic_shaping.py to generate arabic_shaping.py""" try: from arabic_shaping import arabic_shapings, joining_type except: print( _module_missing_msg, file=sys.stderr) sys.exit( 1 ) if len(sys.argv) > 1: outfile = sys.argv[1] else: outfile = 'Unicode joining test page.html' sys.stdout = open(outfile, 'w', 'utf-8') class OrderedDefaultDict(OrderedDict): def __missing__(self, key): self[key] = rv = [] return rv def move_to_end(self, key): tmp = self[key] del self[key] self[key] = tmp arabic_ranges = tuple(chain(range(0x600, 0x6FF +1), range(0x750, 0x77F +1), range(0x8A0, 0x8FF))) unicode61_new_ranges = [0x604, 0x8A0] unicode61_new_ranges.extend(range(0x8A2, 0x8AC + 1)) unicode61_new_ranges.extend(range(0x8E4, 0x8FE + 1)) unicode62_new_ranges = [0x605, 0x8A1] unicode62_new_ranges.extend(range(0x8AD, 0x8B1 + 1)) unicode62_new_ranges.append(0x8FF) shapings = filter(lambda s: s.joining_type in 'RD' and (s.joining_group != 'No_Joining_Group' or s.code_point not in arabic_ranges), arabic_shapings.values()) jg_shapings_arabic = OrderedDefaultDict() jg_shapings_other_scripts = OrderedDefaultDict() for s in shapings: if s.code_point in arabic_ranges: jg_shapings_arabic[s.joining_group].append(s) else: jg_shapings_other_scripts[s.joining_group].append(s) if s.code_point == 0x62B: jg_shapings_arabic.move_to_end('TEH MARBUTA') jg_shapings_arabic['TEH MARBUTA GOAL'] elif s.code_point == 0x642: jg_shapings_arabic.move_to_end('GAF') jg_shapings_arabic['SWASH KAF'] elif s.code_point == 0x646: jg_shapings_arabic['NYA'] elif s.code_point == 0x647: jg_shapings_arabic['KNOTTED HEH'] jg_shapings_arabic['HEH GOAL'] elif s.code_point == 0x64A: jg_shapings_arabic.move_to_end('FARSI YEH') elif s.code_point in chain(range(0x627, 0x63A + 1), range(0x641, 0x64A + 1)): jg_shapings_arabic.move_to_end(s.joining_group) #for jg, ls in jg_shapings_arabic.items(): # for s in ls: # print(jg, ls, file=sys.stderr) table_head = ''' {} ''' table_internal_title = '''''' def print_table(): contextual_form_formats = { 'isolat':'{}', 'final>':'‍{}', 'medial':'‍{}‍', 'initia':'{}‍' } contextual_forms = 'isolat', 'final>', 'medial', 'initia' def print_shaping(shaping, rowspan): # print('print_shaping', shaping, file=sys.stderr) cp = shaping.code_point char = unichr(cp) print(''.format(' class="nextVersion"' if cp in unicode61_new_ranges else ' class="furtherFuture"' if cp in unicode62_new_ranges else '')) if rowspan: print(''.format(rowspan, shaping.joining_group)) print(''.format(cp)) print(''.format(shaping.short_name)) i = 0 for form in contextual_forms: print(''.format(contextual_form_formats[form].format(char))) i += 1 if { 'R':'final>', 'D':'' }[joining_type(cp)] == form: break if i < 4: print(''.format(4 - i)) print(''.format('\u0640' * (4 - i) + char * (i - 1) + ' ' + char)) print('') print(table_head.format(caption)) print(table_internal_title.format('Arabic')) for shaping_list in jg_shapings_arabic.values(): rowspan = len(shaping_list) for shaping in shaping_list: print_shaping(shaping, rowspan) rowspan = None print(table_internal_title.format('Syriac, Nko and Mandaic')) for shaping_list in jg_shapings_other_scripts.values(): rowspan = len(shaping_list) for shaping in shaping_list: print_shaping(shaping, rowspan) rowspan = None print('

Joining Group	Code Point	Short Name	Contextual Forms
{}
Joining Group	Code Point	Short Name	Isolated	Final	Medial	Initial	Joined
{}	{:04X}	{}	{}		{}

') html_heading = Template(''' $title

$title

Choose the font to test:

''') caption=''' New characters in Unicode 6.1, which will be published in February 2012. These can be relied upon and will not change or be removed. See the Unicode chart for the new block Arabic Extended-A, and for more about these characters, see N3734 for U+0604, the complete proposal for most characters, N3791 for U+08F0-U+08F3.
Future new characters in Unicode 6.2. These can will probably be standardized this way, but could in principle still change or be removed. See N3990, in 4.2 Orthography for U+0605, N4072 proposal about U+08AD-U+08B1, and N3989 proposal about U+08FF.''' def print_arabic_test_page(): print(html_heading.substitute(title='Test of Joining Characters From Unicode Cursive Scripts')) print_table() print('') print('') print_arabic_test_page()