Scarab  v2.11.1
Project 8 C++ Utility Library
mkdoc.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 #
3 # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
4 #
5 # Extract documentation from C++ header files to use it in Python bindings
6 #
7 
8 import os
9 import sys
10 import platform
11 import re
12 import textwrap
13 
14 from clang import cindex
15 from clang.cindex import CursorKind
16 from collections import OrderedDict
17 from glob import glob
18 from threading import Thread, Semaphore
19 from multiprocessing import cpu_count
20 
21 RECURSE_LIST = [
22  CursorKind.TRANSLATION_UNIT,
23  CursorKind.NAMESPACE,
24  CursorKind.CLASS_DECL,
25  CursorKind.STRUCT_DECL,
26  CursorKind.ENUM_DECL,
27  CursorKind.CLASS_TEMPLATE
28 ]
29 
30 PRINT_LIST = [
31  CursorKind.CLASS_DECL,
32  CursorKind.STRUCT_DECL,
33  CursorKind.ENUM_DECL,
34  CursorKind.ENUM_CONSTANT_DECL,
35  CursorKind.CLASS_TEMPLATE,
36  CursorKind.FUNCTION_DECL,
37  CursorKind.FUNCTION_TEMPLATE,
38  CursorKind.CONVERSION_FUNCTION,
39  CursorKind.CXX_METHOD,
40  CursorKind.CONSTRUCTOR,
41  CursorKind.FIELD_DECL
42 ]
43 
44 PREFIX_BLACKLIST = [
45  CursorKind.TRANSLATION_UNIT
46 ]
47 
48 CPP_OPERATORS = {
49  '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
50  '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
51  'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
52  '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
53  'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
54  '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
55  'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
56 }
57 
58 CPP_OPERATORS = OrderedDict(
59  sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
60 
61 job_count = cpu_count()
62 job_semaphore = Semaphore(job_count)
63 
64 
65 class NoFilenamesError(ValueError):
66  pass
67 
68 
69 def d(s):
70  return s if isinstance(s, str) else s.decode('utf8')
71 
72 
73 def sanitize_name(name):
74  name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
75  for k, v in CPP_OPERATORS.items():
76  name = name.replace('operator%s' % k, 'operator_%s' % v)
77  name = re.sub('<.*>', '', name)
78  name = ''.join([ch if ch.isalnum() else '_' for ch in name])
79  name = re.sub('_$', '', re.sub('_+', '_', name))
80  return '__doc_' + name
81 
82 
83 def process_comment(comment):
84  result = ''
85 
86  # Remove C++ comment syntax
87  leading_spaces = float('inf')
88  for s in comment.expandtabs(tabsize=4).splitlines():
89  s = s.strip()
90  if s.startswith('/*'):
91  s = s[2:].lstrip('*')
92  elif s.endswith('*/'):
93  s = s[:-2].rstrip('*')
94  elif s.startswith('///'):
95  s = s[3:]
96  if s.startswith('*'):
97  s = s[1:]
98  if len(s) > 0:
99  leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
100  result += s + '\n'
101 
102  if leading_spaces != float('inf'):
103  result2 = ""
104  for s in result.splitlines():
105  result2 += s[leading_spaces:] + '\n'
106  result = result2
107 
108  # Doxygen tags
109  cpp_group = '([\w:]+)'
110  param_group = '([\[\w:\]]+)'
111 
112  s = result
113  s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
114  s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
115  s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
116  s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
117  s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
118  s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
119  s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
120  r'\n\n$Parameter ``\2``:\n\n', s)
121  s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
122  r'\n\n$Template parameter ``\2``:\n\n', s)
123 
124  for in_, out_ in {
125  'return': 'Returns',
126  'author': 'Author',
127  'authors': 'Authors',
128  'copyright': 'Copyright',
129  'date': 'Date',
130  'remark': 'Remark',
131  'sa': 'See also',
132  'see': 'See also',
133  'extends': 'Extends',
134  'throw': 'Throws',
135  'throws': 'Throws'
136  }.items():
137  s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
138 
139  s = re.sub(r'\\details\s*', r'\n\n', s)
140  s = re.sub(r'\\brief\s*', r'', s)
141  s = re.sub(r'\\short\s*', r'', s)
142  s = re.sub(r'\\ref\s*', r'', s)
143 
144  s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
145  r"```\n\1\n```\n", s, flags=re.DOTALL)
146 
147  # HTML/TeX tags
148  s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
149  s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
150  s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
151  s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
152  s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
153  s = re.sub(r'<li>', r'\n\n* ', s)
154  s = re.sub(r'</?ul>', r'', s)
155  s = re.sub(r'</li>', r'\n\n', s)
156 
157  s = s.replace('``true``', '``True``')
158  s = s.replace('``false``', '``False``')
159 
160  # Re-flow text
161  wrapper = textwrap.TextWrapper()
162  wrapper.expand_tabs = True
163  wrapper.replace_whitespace = True
164  wrapper.drop_whitespace = True
165  wrapper.width = 70
166  wrapper.initial_indent = wrapper.subsequent_indent = ''
167 
168  result = ''
169  in_code_segment = False
170  for x in re.split(r'(```)', s):
171  if x == '```':
172  if not in_code_segment:
173  result += '```\n'
174  else:
175  result += '\n```\n\n'
176  in_code_segment = not in_code_segment
177  elif in_code_segment:
178  result += x.strip()
179  else:
180  for y in re.split(r'(?: *\n *){2,}', x):
181  wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
182  if len(wrapped) > 0 and wrapped[0] == '$':
183  result += wrapped[1:] + '\n'
184  wrapper.initial_indent = \
185  wrapper.subsequent_indent = ' ' * 4
186  else:
187  if len(wrapped) > 0:
188  result += wrapped + '\n\n'
189  wrapper.initial_indent = wrapper.subsequent_indent = ''
190  return result.rstrip().lstrip('\n')
191 
192 
193 def extract(filename, node, prefix, output):
194  if not (node.location.file is None or
195  os.path.samefile(d(node.location.file.name), filename)):
196  return 0
197  if node.kind in RECURSE_LIST:
198  sub_prefix = prefix
199  if node.kind not in PREFIX_BLACKLIST:
200  if len(sub_prefix) > 0:
201  sub_prefix += '_'
202  sub_prefix += d(node.spelling)
203  for i in node.get_children():
204  extract(filename, i, sub_prefix, output)
205  if node.kind in PRINT_LIST:
206  comment = d(node.raw_comment) if node.raw_comment is not None else ''
207  comment = process_comment(comment)
208  sub_prefix = prefix
209  if len(sub_prefix) > 0:
210  sub_prefix += '_'
211  if len(node.spelling) > 0:
212  name = sanitize_name(sub_prefix + d(node.spelling))
213  output.append((name, filename, comment))
214 
215 
216 class ExtractionThread(Thread):
217  def __init__(self, filename, parameters, output):
218  Thread.__init__(self)
219  self.filename = filename
220  self.parameters = parameters
221  self.output = output
222  job_semaphore.acquire()
223 
224  def run(self):
225  print('Processing "%s" ..' % self.filename, file=sys.stderr)
226  try:
227  index = cindex.Index(
228  cindex.conf.lib.clang_createIndex(False, True))
229  tu = index.parse(self.filename, self.parameters)
230  extract(self.filename, tu.cursor, '', self.output)
231  finally:
232  job_semaphore.release()
233 
234 
235 def read_args(args):
236  parameters = []
237  filenames = []
238  if "-x" not in args:
239  parameters.extend(['-x', 'c++'])
240  if not any(it.startswith("-std=") for it in args):
241  parameters.append('-std=c++11')
242 
243  if platform.system() == 'Darwin':
244  dev_path = '/Applications/Xcode.app/Contents/Developer/'
245  lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
246  sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
247  libclang = lib_dir + 'libclang.dylib'
248 
249  if os.path.exists(libclang):
250  cindex.Config.set_library_path(os.path.dirname(libclang))
251 
252  if os.path.exists(sdk_dir):
253  sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
254  parameters.append('-isysroot')
255  parameters.append(sysroot_dir)
256  elif platform.system() == 'Linux':
257  # clang doesn't find its own base includes by default on Linux,
258  # but different distros install them in different paths.
259  # Try to autodetect, preferring the highest numbered version.
260  def clang_folder_version(d):
261  return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
262  clang_include_dir = max((
263  path
264  for libdir in ['lib64', 'lib', 'lib32']
265  for path in glob('/usr/%s/clang/*/include' % libdir)
266  if os.path.isdir(path)
267  ), default=None, key=clang_folder_version)
268  if clang_include_dir:
269  parameters.extend(['-isystem', clang_include_dir])
270 
271  for item in args:
272  if item.startswith('-'):
273  parameters.append(item)
274  else:
275  filenames.append(item)
276 
277  if len(filenames) == 0:
278  raise NoFilenamesError("args parameter did not contain any filenames")
279 
280  return parameters, filenames
281 
282 
283 def extract_all(args):
284  parameters, filenames = read_args(args)
285  output = []
286  for filename in filenames:
287  thr = ExtractionThread(filename, parameters, output)
288  thr.start()
289 
290  print('Waiting for jobs to finish ..', file=sys.stderr)
291  for i in range(job_count):
292  job_semaphore.acquire()
293 
294  return output
295 
296 
297 def write_header(comments, out_file=sys.stdout):
298  print('''/*
299  This file contains docstrings for the Python bindings.
300  Do not edit! These were automatically extracted by mkdoc.py
301  */
302 
303 #define __EXPAND(x) x
304 #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
305 #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
306 #define __CAT1(a, b) a ## b
307 #define __CAT2(a, b) __CAT1(a, b)
308 #define __DOC1(n1) __doc_##n1
309 #define __DOC2(n1, n2) __doc_##n1##_##n2
310 #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
311 #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
312 #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
313 #define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
314 #define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
315 #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
316 
317 #if defined(__GNUG__)
318 #pragma GCC diagnostic push
319 #pragma GCC diagnostic ignored "-Wunused-variable"
320 #endif
321 ''', file=out_file)
322 
323 
324  name_ctr = 1
325  name_prev = None
326  for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))):
327  if name == name_prev:
328  name_ctr += 1
329  name = name + "_%i" % name_ctr
330  else:
331  name_prev = name
332  name_ctr = 1
333  print('\nstatic const char *%s =%sR"doc(%s)doc";' %
334  (name, '\n' if '\n' in comment else ' ', comment), file=out_file)
335 
336  print('''
337 #if defined(__GNUG__)
338 #pragma GCC diagnostic pop
339 #endif
340 ''', file=out_file)
341 
342 
343 def mkdoc(args):
344  args = list(args)
345  out_path = None
346  for idx, arg in enumerate(args):
347  if arg.startswith("-o"):
348  args.remove(arg)
349  try:
350  out_path = arg[2:] or args.pop(idx)
351  except IndexError:
352  print("-o flag requires an argument")
353  exit(-1)
354  break
355 
356  comments = extract_all(args)
357 
358  if out_path:
359  try:
360  with open(out_path, 'w') as out_file:
361  write_header(comments, out_file)
362  except:
363  # In the event of an error, don't leave a partially-written
364  # output file.
365  try:
366  os.unlink(out_path)
367  except:
368  pass
369  raise
370  else:
371  write_header(comments)
372 
373 
374 if __name__ == '__main__':
375  try:
376  mkdoc(sys.argv[1:])
377  except NoFilenamesError:
378  print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
379  exit(-1)
def __init__(self, filename, parameters, output)
Definition: mkdoc.py:217
size_t len(handle h)
Definition: pytypes.h:1361
def process_comment(comment)
Definition: mkdoc.py:83
def extract_all(args)
Definition: mkdoc.py:283
def extract(filename, node, prefix, output)
Definition: mkdoc.py:193
return isinstance(obj, type)
Definition: mkdoc.py:1
def d(s)
Definition: mkdoc.py:69
def mkdoc(args)
Definition: mkdoc.py:343
def write_header(comments, out_file=sys.stdout)
Definition: mkdoc.py:297
def read_args(args)
Definition: mkdoc.py:235
def sanitize_name(name)
Definition: mkdoc.py:73
void print(Args &&...args)
Definition: pybind11.h:1849
auto range
Definition: cast.h:455
std::string join(const T &v, std::string delim=",")
Simple function to join a string.
Definition: CLI11.hpp:277