summaryrefslogtreecommitdiffstats
path: root/src/silfont/scripts/psfcsv2comp.py
blob: 5ce2f76fe18359ec5a50b414ffaac77ebbda3646 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
__doc__ = '''generate composite definitions from csv file'''
__url__ = 'https://github.com/silnrsi/pysilfont'
__copyright__ = 'Copyright (c) 2018 SIL International  (https://www.sil.org)'
__license__ = 'Released under the MIT License (https://opensource.org/licenses/MIT)'
__author__ = 'Bob Hallissy'

import re
from silfont.core import execute
import re

argspec = [
    ('output',{'help': 'Output file containing composite definitions'}, {'type': 'outfile'}),
    ('-i','--input',{'help': 'Glyph info csv file'}, {'type': 'incsv', 'def': 'glyph_data.csv'}),
    ('-f','--fontcode',{'help': 'letter to filter for glyph_data'},{}),
    ('--gname', {'help': 'Column header for glyph name', 'default': 'glyph_name'}, {}),
    ('--base', {'help': 'Column header for name of base', 'default': 'base'}, {}),
    ('--usv', {'help': 'Column header for USV'}, {}),
    ('--anchors', {'help': 'Column header(s) for APs to compose', 'default': 'above,below'}, {}),
    ('-r','--report',{'help': 'Set reporting level for log', 'type':str, 'choices':['X','S','E','P','W','I','V']},{}),
    ('-l','--log',{'help': 'Set log file name'}, {'type': 'outfile', 'def': 'csv2comp.log'}),
    ]

def doit(args):
    logger = args.logger
    if args.report: logger.loglevel = args.report
    # infont = args.ifont
    incsv = args.input
    output = args.output

    def csvWarning(msg, exception = None):
        m = "glyph_data warning: %s at line %d" % (msg, incsv.line_num)
        if exception is not None:
            m += '; ' + exception.message
        logger.log(m, 'W')

    if args.fontcode is not None:
        whichfont = args.fontcode.strip().lower()
        if len(whichfont) != 1:
            logger.log('-f parameter must be a single letter', 'S')
    else:
        whichfont = None

    # Which headers represent APs to use:
    apList = args.anchors.split(',')
    if len(apList) == 0:
        logger.log('--anchors option value "%s" is invalid' % args.anchors, 'S')

    # Get headings from csvfile:
    fl = incsv.firstline
    if fl is None: logger.log("Empty input file", "S")
    # required columns:
    try:
        nameCol = fl.index(args.gname)
        baseCol = fl.index(args.base)
        apCols = [fl.index(ap) for ap in apList]
        if args.usv is not None:
            usvCol =  fl.index(args.usv)
        else:
            usvCol = None
    except ValueError as e:
        logger.log('Missing csv input field: ' + e.message, 'S')
    except Exception as e:
        logger.log('Error reading csv input field: ' + e.message, 'S')

    # Now make strip AP names; pair up with columns so easy to iterate:
    apInfo = list(zip(apCols, [x.strip() for x in apList]))

    # If -f specified, make sure we have the fonts column
    if whichfont is not None:
        if 'fonts' not in fl: logger.log('-f requires "fonts" column in glyph_data', 'S')
        fontsCol = fl.index('fonts')

    # RE that matches names of glyphs we don't care about
    namesToSkipRE = re.compile('^(?:[._].*|null|cr|nonmarkingreturn|tab|glyph_name)$',re.IGNORECASE)

    # keep track of glyph names we've seen to detect duplicates
    namesSeen = set()

    # OK, process all records in glyph_data
    for line in incsv:
        base = line[baseCol].strip()
        if len(base) == 0:
            # No composites specified
            continue

        gname = line[nameCol].strip()
        # things to ignore:
        if namesToSkipRE.match(gname): continue
        if whichfont is not None and line[fontsCol] != '*' and line[fontsCol].lower().find(whichfont) < 0:
            continue

        if len(gname) == 0:
            csvWarning('empty glyph name in glyph_data; ignored')
            continue
        if gname.startswith('#'): continue
        if gname in namesSeen:
            csvWarning('glyph name %s previously seen in glyph_data; ignored' % gname)
            continue
        namesSeen.add(gname)

        # Ok, start building the composite
        composite = '%s = %s' %(gname, base)

        # The first component must *not* reference the base; all others *must*:
        seenfirst = False
        for apCol, apName in apInfo:
            component = line[apCol].strip()
            if len(component):
                if not seenfirst:
                    composite += ' + %s@%s' % (component, apName)
                    seenfirst = True
                else:
                    composite += ' + %s@%s:%s' % (component, base, apName)

        # Add USV if present
        if usvCol is not None:
            usv = line[usvCol].strip()
            if len(usv):
                composite += ' | %s' % usv

        # Output this one
        output.write(composite + '\n')

    output.close()

def cmd() : execute("",doit,argspec)
if __name__ == "__main__": cmd()