#! /usr/bin/env python
#
# Author: Aleix Conchillo Flaque <aconchillo@ieec.cat>
# Start date: Fri Jul 28, 2005 12:05
#
# This script parses the acronyms in the specified input TeX file (-i
# argument) and tries to find a description in all TeX files found in
# the acronyms directory (-d argument). Then, it will create two
# files: one with acronyms and their descriptions and one with
# conflicts, that is "Duplicated", "Undefined" or "Excluded" acronyms.
#
# The acronym format definition is:
#
#   \nomenclature{ACRONYM}{Definition}
#
# It is possible to parse TeX files recursively passing the "-r"
# argument. This will parse all the files included with "\input".
#
# A user defined acronyms file can be specified via the "-u"
# argument. Note that a user defined acronym takes rpecedence over a
# global acronym (the ones parsed from the acronyms directory).
#
# If an acronym is to be excluded, it is necessary to add it in the
# user acronyms file as an empty acronym:
#
#   \nomenclature{ACRONYM}{}
#

import getopt
import glob
import os
import re
import sys

def exit_usage():
    print "\nUsage: %s [-r] -d acronyms_dir -u user_acronyms_file " \
        "-i main_file -o acronyms_file -e conflicts_file \n" \
          % sys.argv[0]
    sys.exit(1)

def file_exists(f):
    try:
        file = open(f)
    except IOError:
        exists = 0
    else:
        exists = 1
    return exists

def add_acronyms(acronyms, new_acronyms):
    for key in new_acronyms:
        if acronyms.has_key(key):
            acronyms[key].append(new_acronyms[key])
        else:
            if type(new_acronyms[key]) == list:
                acronyms[key] = new_acronyms[key]
            else:
                acronyms[key] = [new_acronyms[key]]

def tex_input_files(filename):
    filenames = []
    p = re.compile(r"^\\input\{(.*)\}")
    input = open(filename, "r")
    line = input.readline()
    while line:
        m = p.search(line)
        if m:
            filenames.append(m.group(1))
        line = input.readline()
    return filenames

def parse_acronyms_file(filename):
    acronyms = {}
    p = re.compile(r"^\\nomenclature\{(.*)\}\{(.*)\}")
    input = open(filename, "r")
    line = input.readline()
    while line:
        m = p.match(line)
        if m:
            acronym = m.group(1)
            definition = m.group(2)
            if acronyms.has_key(acronym):
                acronyms[acronym].append(definition)
            else:
                acronyms[acronym] = [definition]
        line = input.readline()
    input.close()
    return acronyms

def parse_acronyms_dir(dir):
    acronyms = {}
    tex_files = glob.glob(os.path.join(dir, "*.tex"))
    for f in tex_files:
        file_acronyms = parse_acronyms_file(f)
        add_acronyms(acronyms, file_acronyms)
    return acronyms

def parse_line(regex, line, matches):
    m = regex.findall(line)
    if m:
        matches.extend(m)

def append_acronyms_file(filename, skip_filename, recursive, acronyms):
    if (filename == skip_filename) or (filename + ".tex" == skip_filename):
        return

    p1 = re.compile(r"^([A-Z0-9]{2,})[a-z0-9]*\W")
    p2 = re.compile(r"\W([A-Z0-9]{2,})[a-z0-9]*\W")
    if not file_exists(filename):
        filename = filename + ".tex"
    print "- Parsing %s" % filename
    input = open(filename, "r")
    line = input.readline()
    while line:
        matches = []
        parse_line(p1, line, matches)
        parse_line(p2, line, matches)
        if len(matches) > 0:
            for acronym in matches:
                if not acronyms.has_key(acronym):
                    acronyms[acronym] = acronym
        line = input.readline()
    input.close()
    if recursive:
        for f in tex_input_files(filename):
            append_acronyms_file(f, skip_filename, recursive, acronyms)

def intersect_acronyms(global_acronyms, user_acronyms, tex_acronyms):
    acronyms_int = {}
    acronyms_dup = {}
    acronyms_exc = {}
    acronyms = tex_acronyms.keys()
    acronyms.sort()
    for acronym in acronyms:
        if user_acronyms.has_key(acronym):
            if len(user_acronyms[acronym]) > 1:
                acronyms_dup[acronym] = user_acronyms[acronym]
            elif user_acronyms[acronym][0] == "":
                acronyms_exc[acronym] = ""
            else:
                acronyms_int[acronym] = user_acronyms[acronym][0]
                if global_acronyms.has_key(acronym):
                    print "- User acronym defined: %s = %s <-- %s" \
                        % (acronym,
                           user_acronyms[acronym][0],
                           global_acronyms[acronym][0])
        elif global_acronyms.has_key(acronym):
            if len(global_acronyms[acronym]) > 1:
                acronyms_dup[acronym] = global_acronyms[acronym]
            else:
                acronyms_int[acronym] = global_acronyms[acronym][0]
    return acronyms_int, acronyms_dup, acronyms_exc

### Start application

arguments, arguments_left = getopt.getopt(sys.argv[1:], "rd:u:i:o:e:")

recursive = False
acronyms_dir = None
acronyms_user_filename = None
acronyms_tex_filename = None
acronyms_err_filename = None
main_filename = None
for opt, value in arguments:
    if opt == "-r":
        recursive = True
    elif opt == "-d":
        acronyms_dir = value
    elif opt == "-u":
        acronyms_user_filename = value
    elif opt == "-i":
        main_filename = value
    elif opt == "-o":
        acronyms_tex_filename = value
    elif opt == "-e":
        acronyms_err_filename = value

# Check whether mandatory options are defined (except recursive and
# user acronyms file).
if not acronyms_dir or not main_filename or not acronyms_tex_filename \
        or not acronyms_err_filename:
    exit_usage()

# Parse the whole list of acronyms (from all files in directory).
global_acronyms = parse_acronyms_dir(acronyms_dir)

# Parse the user defined acronyms file.
user_acronyms = {}
if acronyms_user_filename:
    user_acronyms = parse_acronyms_file(acronyms_user_filename)

print

# Parse input TeX file and build the list of acronyms in file (not
# definitions yet).
tex_acronyms = {}
append_acronyms_file(main_filename, acronyms_tex_filename, recursive, tex_acronyms)

print

# Build the list of definitions from the global acronyms and user
# defined ones.
acronyms_int, acronyms_dup, acronyms_exc = intersect_acronyms(global_acronyms,
                                                              user_acronyms,
                                                              tex_acronyms)
print

## Save the acronyms list

output = open(acronyms_tex_filename, "w")
acronyms = acronyms_int.keys()
acronyms.sort()
for acronym in acronyms:
    output.write("\\nomenclature{%s}{%s}\n" % (acronym,
                                               acronyms_int[acronym]))
output.close()

## Save the acronyms confilcts

# Create TBD acronyms list
acronyms_nul = []
acronyms = tex_acronyms.keys()
acronyms.sort()
for acronym in acronyms:
    if not global_acronyms.has_key(acronym) \
            and not user_acronyms.has_key(acronym):
        acronyms_nul.append(acronym)

output = open(acronyms_err_filename, "w")

# Excluded
acronyms = acronyms_exc.keys()
acronyms.sort()
for acronym in acronyms:
    output.write("Excluded   : \\nomenclature{%s}{}\n" % acronym)

# TBD acronyms
acronyms_nul.sort()
for acronym in acronyms_nul:
    output.write("Undefined  : \\nomenclature{%s}{}\n" % acronym)

# Duplicate acronyms
acronyms = acronyms_dup.keys()
acronyms.sort()
for acronym in acronyms:
    output.write("Duplicated : \\nomenclature{%s}{%s}\n" % (acronym,
                                                            acronyms_dup[acronym]))

output.close()
