#!/usr/bin/env python3 from enum import Enum from pathlib import Path from typing import Sequence from typing import Tuple import argparse import os import re import sys # list of specific files to be ignored. IGNORE_FILE_NAME = [ # Exclude myself "generate_notice.py", # License files "LICENSE", "LICENSE.TXT", "LICENSE_APACHE2.TXT", "LICENSE_BSD_3_CLAUSE.TXT", "LICENSE_FSFAP.TXT", "LICENSE_MIT.TXT", "LICENSE_MIT_MODERN_VARIANT.TXT", "MODULE_LICENSE_BSD_LIKE", "NOTICE", "builds/unix/LICENSE_GPLv2_WITH_AUTOCONF_EXCEPTION.TXT", "builds/unix/LICENSE_GPLv3_WITH_AUTOCONF_EXCEPTION.TXT", "docs/FTL.TXT", "docs/GPLv2.TXT", "src/gzip/LICENSE_ZLIB.TXT", # The patch file contains copyright line as a diff. Use it if Copyright is not # in a unified diff line. "src/gzip/patches/freetype-zlib.diff", ] NO_COPYRIGHT_FILES = [ ".clang-format", ".gitignore", ".gitlab-ci.yml", ".mailmap", "Android.bp", "METADATA", "OWNERS", "README.android", "TEST_MAPPING", "builds/atari/ATARI.H", "builds/atari/FNames.SIC", "builds/atari/FREETYPE.PRJ", "builds/atari/README.TXT", "builds/atari/deflinejoiner.awk", "builds/atari/gen-purec-patch.sh", "builds/mac/FreeType.m68k_cfm.make.txt", "builds/mac/FreeType.m68k_far.make.txt", "builds/mac/FreeType.ppc_carbon.make.txt", "builds/mac/FreeType.ppc_classic.make.txt", "builds/mac/README", "builds/mac/ascii2mpw.py", "builds/mac/freetype-Info.plist", "builds/mac/ftlib.prj.xml", "builds/unix/.gitignore", "builds/unix/freetype2.in", "builds/vms/apinames_vms.bash", "builds/wince/vc2005-ce/freetype.sln", "builds/wince/vc2005-ce/freetype.vcproj", "builds/wince/vc2005-ce/index.html", "builds/wince/vc2008-ce/freetype.sln", "builds/wince/vc2008-ce/freetype.vcproj", "builds/wince/vc2008-ce/index.html", "builds/windows/.gitignore", "builds/windows/vc2010/freetype.sln", "builds/windows/vc2010/freetype.user.props", "builds/windows/vc2010/freetype.vcxproj", "builds/windows/vc2010/freetype.vcxproj.filters", "builds/windows/vc2010/index.html", "builds/windows/visualc/freetype.dsp", "builds/windows/visualc/freetype.dsw", "builds/windows/visualc/freetype.sln", "builds/windows/visualc/freetype.vcproj", "builds/windows/visualc/index.html", "builds/windows/visualce/freetype.dsp", "builds/windows/visualce/freetype.dsw", "builds/windows/visualce/freetype.vcproj", "builds/windows/visualce/index.html", "devel-teeui/OWNERS", "devel-teeui/README.md", "devel-teeui/ftmodule.h", "devel-teeui/rules.json", "devel-teeui/rules.mk", "docs/.gitignore", "docs/CMAKE", "docs/INSTALL.MAC", "docs/MAKEPP", "docs/PROBLEMS", "docs/README", "docs/freetype-config.1", "docs/markdown/images/favico.ico", "docs/markdown/javascripts/extra.js", "docs/markdown/stylesheets/extra.css", "include/freetype/config/ftmodule.h", "include/freetype/ftchapters.h", "libft2.map.txt", "objs/.gitignore", "objs/README", "src/gzip/README.freetype", "src/gzip/crc32.h", "src/gzip/inffixed.h", "src/tools/apinames.c", "src/tools/chktrcmp.py", "src/tools/cordic.py", "src/tools/ftrandom/Makefile", "src/tools/ftrandom/README", "src/tools/make_distribution_archives.py", "src/tools/no-copyright", "src/tools/test_afm.c", "src/tools/test_bbox.c", "src/tools/test_trig.c", "src/tools/update-copyright", "subprojects/harfbuzz.wrap", "subprojects/libpng.wrap", "subprojects/zlib.wrap", "tests/README.md", "tests/issue-1063/main.c", "tests/meson.build", "tests/scripts/download-test-fonts.py", ] class CommentType(Enum): C_STYLE_BLOCK = 1 # /* ... */ C_STYLE_BLOCK_AS_LINE = 2 # /* ... */ but uses multiple lines of block comments. C_STYLE_LINE = 3 # // ... SCRIPT_STYLE_HASH = 4 # # ... SCRIPT_STYLE_DOLLER = 5 # $! ... DOC_STYLE = 6 # no comment escape UNKNOWN = 10000 # Helper function of showing error message and immediate exit. def fatal(msg: str): sys.stderr.write(msg) sys.stderr.write("\n") sys.exit(1) def warn(msg: str): sys.stderr.write(msg) sys.stderr.write("\n") def cleanup_and_join(out_lines: Sequence[str]): while not out_lines[-1].strip(): out_lines.pop(-1) # If all lines starts from empty space, strip it out. while all([len(x) == 0 or x[0] == ' ' for x in out_lines]): out_lines = [x[1:] for x in out_lines] if not out_lines: fatal("Failed to get copyright info") return "\n".join(out_lines) def get_comment_type(copyright_line: str, path: str) -> CommentType: # vms_make.com contains multiple copyright header as a string constants. if path.endswith("/vms_make.com"): return CommentType.SCRIPT_STYLE_DOLLER if "docs/" in path or "README" in path: return CommentType.DOC_STYLE if copyright_line.startswith("#"): return CommentType.SCRIPT_STYLE_HASH if copyright_line.startswith("//"): return CommentType.C_STYLE_LINE if copyright_line.startswith("$!"): return CommentType.SCRIPT_STYLE_DOLLER if "/*" in copyright_line and "*/" in copyright_line: # ftrandom.c uses single line block comment for the first Copyright line, # and following license notice is wrapped with single block comment. # This file can be handled by C_STYLE_BLOCK parser. if path.endswith("src/tools/ftrandom/ftrandom.c"): return CommentType.C_STYLE_BLOCK else: return CommentType.C_STYLE_BLOCK_AS_LINE else: return CommentType.C_STYLE_BLOCK # Extract copyright notice and returns next index. def extract_copyright_at(lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: commentType = get_comment_type(lines[i], path) if commentType == CommentType.C_STYLE_BLOCK: return extract_from_c_style_block_at(lines, i, path) if commentType == CommentType.C_STYLE_BLOCK_AS_LINE: return extract_from_c_style_block_as_line_at(lines, i, path) elif commentType == CommentType.C_STYLE_LINE: return extract_from_c_style_lines_at(lines, i, path) elif commentType == CommentType.SCRIPT_STYLE_HASH: return extract_from_script_hash_at(lines, i, path) elif commentType == CommentType.SCRIPT_STYLE_DOLLER: return extract_from_script_doller_at(lines, i, path) elif commentType == CommentType.DOC_STYLE: return extract_from_doc_style_at(lines, i, path) else: fatal("Uknown comment style: %s" % lines[i]) def extract_from_doc_style_at( lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: if not lines[i].startswith("Copyright"): return (None, i + 1) def is_copyright_end(lines: str, start: int, i: int) -> bool: # treat double spacing as end of license header if i - start > 4 and lines[i] == "" and lines[i + 1] == "": return True return False start = i while i < len(lines): if is_copyright_end(lines, start, i): break i += 1 end = i if start == end: fatal("Failed to get copyright info") out_lines = lines[start:end] return (cleanup_and_join(out_lines), i + 1) def extract_from_c_style_lines_at( lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: def is_copyright_end(line): if line.startswith("//"): return False else: return True start = i while i < len(lines): if is_copyright_end(lines[i]): break i += 1 end = i if start == end: fatal("Failed to get copyright info") out_lines = [] for line in lines[start:end]: if line.startswith("// "): out_lines.append(line[3:]) elif line == "//": out_lines.append(line[2:]) else: out_lines.append(line) return (cleanup_and_join(out_lines), i + 1) def extract_from_script_hash_at( lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: if lines[i].strip()[0] != "#": return (None, i + 1) def is_copyright_end(lines: str, i: int) -> bool: if "#" not in lines[i]: return True # treat double spacing as end of license header if lines[i] == "#" and lines[i+1] == "#": return True return False start = i while i < len(lines): if is_copyright_end(lines, i): break i += 1 end = i if start == end: fatal("Failed to get copyright info") out_lines = [] for line in lines[start:end]: if line.startswith("# "): out_lines.append(line[2:]) elif line == "#": out_lines.append(line[1:]) else: out_lines.append(line) return (cleanup_and_join(out_lines), i + 1) def extract_from_script_doller_at( lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: if not lines[i].strip().startswith("$!"): return (None, i + 1) def is_copyright_end(lines: str, i: int) -> bool: if "$!" not in lines[i]: return True # treat double spacing as end of license header if lines[i] == "$!" and lines[i+1] == "$!": return True return False start = i while i < len(lines): if is_copyright_end(lines, i): break i += 1 end = i + 1 if start == end: fatal("Failed to get copyright info") out_lines = [] for line in lines[start:end]: if line.startswith("$! "): out_lines.append(line[3:]) elif line == "$!": out_lines.append(line[2:]) else: out_lines.append(line) return (cleanup_and_join(out_lines), i + 1) def extract_from_c_style_block_at( lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: def is_copyright_end(lines: str, i: int) -> bool: if "*/" in lines[i]: return True if "understand and accept it fully." in lines[i]: return True if "see copyright notice in zlib.h" in lines[i]: return True if lines[i] == " *" and lines[i + 1] == " *": return True if lines[i] == "" and lines[i + 1] == "": return True return False start = i i += 1 # include at least one line while i < len(lines): if is_copyright_end(lines, i): break i += 1 end = i + 1 out_lines = [] for line in lines[start:end]: clean_line = line # Strip begining "/*" chars if clean_line.startswith("/* "): clean_line = clean_line[3:] if clean_line == "/*": clean_line = clean_line[2:] # Strip ending "*/" chars if clean_line.endswith(" */"): clean_line = clean_line[:-3] if clean_line.endswith("*/"): clean_line = clean_line[:-2] # Strip starting " *" chars if clean_line.startswith(" * "): clean_line = clean_line[3:] if clean_line == " *": clean_line = line[2:] # Strip trailing spaces clean_line = clean_line.rstrip() out_lines.append(clean_line) return (cleanup_and_join(out_lines), i + 1) def extract_from_c_style_block_as_line_at( lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: def is_copyright_end(line: str) -> bool: if "*/" in line: return False if re.match(r'/\*+/', line.strip()): return False return True start = i i += 1 # include at least one line while i < len(lines): if is_copyright_end(lines[i]): break i += 1 end = i + 1 out_lines = [] for line in lines[start:end]: clean_line = line if re.match(r'/\*+/', line.strip()): continue # Strip begining "/*" chars if clean_line.startswith("/* "): clean_line = clean_line[3:] if clean_line == "/*": clean_line = clean_line[2:] # Strip ending "*/" chars if clean_line.endswith(" */"): clean_line = clean_line[:-3] if clean_line.endswith("*/"): clean_line = clean_line[:-2] # Strip starting " *" chars if clean_line.startswith(" * "): clean_line = clean_line[3:] if clean_line == " *": clean_line = line[2:] # Strip trailing spaces clean_line = clean_line.rstrip() out_lines.append(clean_line) return (cleanup_and_join(out_lines), i + 1) # Returns true if the line shows the start of copyright notice. def is_copyright_line(line: str, path: str) -> bool: if "Copyright" not in line: return False # For avoiding unexpected mismatches, exclude quoted Copyright string. if "`Copyright'" in line: # For src/psaux/psobjs.c return False if "\"Copyright\"" in line: # For src/cff/cfftoken.h return False if (path.endswith("src/tools/update-copyright-year") or path.endswith("src/tools/glnames.py")): # The comment contains string of Copyright. Use only immediate Copyright # string followed by "# ". return line.startswith("# Copyright ") if path.endswith("src/gzip/inftrees.c"): # The unused string constant contains word of Copyright. Use only immediate # Copyright string followed by " * ". return line.startswith(" * Copyright ") if path.endswith("src/base/ftver.rc"): # Copyright string matches with LegalCopyright key in the RC file. return not "LegalCopyright" in line return True # Extract the copyright notice and put it into copyrights arg. def do_file(path: str, copyrights: set, no_copyright_files: set): raw = Path(path).read_bytes() try: content = raw.decode("utf-8") except UnicodeDecodeError: content = raw.decode("iso-8859-1") lines = content.splitlines() if not "Copyright" in content: if path in no_copyright_files: no_copyright_files.remove(path) else: fatal("%s does not contain Copyright line" % path) return i = 0 license_found = False while i < len(lines): if is_copyright_line(lines[i], path): (notice, nexti) = extract_copyright_at(lines, i, path) if notice: if not notice in copyrights: copyrights[notice] = [] copyrights[notice].append(path) license_found = True i = nexti else: i += 1 if not license_found: fatal("License header could not found: %s" % path) def do_check(path, format): if not path.endswith('/'): # make sure the path ends with slash path = path + '/' file_to_ignore = set([os.path.join(path, x) for x in IGNORE_FILE_NAME]) no_copyright_files = set([os.path.join(path, x) for x in NO_COPYRIGHT_FILES]) copyrights = {} for directory, sub_directories, filenames in os.walk(path): # skip .git directory if ".git" in sub_directories: sub_directories.remove(".git") for fname in filenames: fpath = os.path.join(directory, fname) if fpath in file_to_ignore: file_to_ignore.remove(fpath) continue do_file(fpath, copyrights, no_copyright_files) if len(file_to_ignore) != 0: fatal("Following files are listed in IGNORE_FILE_NAME but doesn't exists,.\n" + "\n".join(file_to_ignore)) if len(no_copyright_files) != 0: fatal("Following files are listed in NO_COPYRIGHT_FILES but doesn't exists.\n" + "\n".join(no_copyright_files)) if format == Format.notice: print_notice(copyrights, False) elif format == Format.notice_with_filename: print_notice(copyrights, True) elif format == Format.html: print_html(copyrights) def print_html(copyrights): print('') print(""" """) print('') print('') for notice in sorted(copyrights.keys()): files = sorted(copyrights[notice]) print('') print('') print('') print('') print('
') print('
    ') for file in files: print('
  • %s
  • ' % file) print('
') print('
') print('

%s

' % notice.replace('\n', '
')) print('
') print('') def print_notice(copyrights, print_file): # print the copyright in sorted order for stable output. for notice in sorted(copyrights.keys()): if print_file: files = sorted(copyrights[notice]) print("\n".join(files)) print() print(notice) print() print("-" * 67) print() class Format(Enum): notice = 'notice' notice_with_filename = 'notice_with_filename' html = 'html' def __str__(self): return self.value def main(): parser = argparse.ArgumentParser(description="Collect notice headers.") parser.add_argument("--format", dest="format", type=Format, choices=list(Format), default=Format.notice, help="print filename before the license notice") parser.add_argument("--target", dest="target", action='store', required=True, help="target directory to collect notice headers") res = parser.parse_args() do_check(res.target, res.format) if __name__ == "__main__": main()