aboutsummaryrefslogtreecommitdiff
path: root/BUILD
blob: 2e2a25496b43e1c302a360a4cafa3605669b7821 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
# -*- mode: python; -*-

# Description:
#   tesseract build file.

licenses(['notice'])  # Apache 2.0

#-----------------------------------------------------------------------------
# LIBRARIES

cc_library(name = "scrollview_lib",
          srcs = [ "viewer/scrollview.cpp",
                   "viewer/svutil.cpp",
                   "viewer/svmnode.cpp" ],
          deps = [ "//third_party/leptonica",
                   ],
          includes = [ "third_party/leptonica/includes" ],
          copts = [ "-DHAVE_LIBLEPT",
                    "-DSCROLLVIEW_PATH=/home/build/static/projects/ocr" ]
          )

cc_library(name = "tesseract_cc_util",
           srcs = [ "ccutil/basedir.cpp",
                   "ccutil/bits16.cpp",
                   "ccutil/boxread.cpp",
                   "ccutil/clst.cpp",
                   "ccutil/debugwin.cpp",
                   "ccutil/elst.cpp",
                   "ccutil/elst2.cpp",
                   "ccutil/errcode.cpp",
                   "ccutil/tessopt.cpp",
                   "ccutil/globaloc.cpp",
                   "ccutil/hashfn.cpp",
                   "ccutil/mainblk.cpp",
                   "ccutil/memblk.cpp",
                   "ccutil/memry.cpp",
                   "ccutil/ocrshell.cpp",
                   "ccutil/serialis.cpp",
                   "ccutil/strngs.cpp",
                   "ccutil/tprintf.cpp",
                   "ccutil/unichar.cpp",
                   "ccutil/unicharset.cpp",
                   "ccutil/unicharmap.cpp",
                   "ccutil/varable.cpp",
                   "ccutil/ccutil.cpp" ],
           deps = [  ],
           includes = [ "ccutil"
           ])

cc_library(name = "tesseract_c_util",
           srcs = [ "cutil/tessarray.cpp",
                   "cutil/bitvec.cpp",
                   "cutil/danerror.cpp",
                   "cutil/debug.cpp",
                   "cutil/efio.cpp",
                   "cutil/emalloc.cpp",
                   "cutil/freelist.cpp",
                   "cutil/globals.cpp",
                   "cutil/listio.cpp",
                   "cutil/oldheap.cpp",
                   "cutil/oldlist.cpp",
                   "cutil/structures.cpp",
                   "cutil/tordvars.cpp",
                   "cutil/cutil.cpp",
                   "cutil/variables.cpp",
                   "cutil/cutil_class.cpp" ],
          deps = [ "tesseract_cc_util" ],
           includes = [ "cutil",
                        "ccutil"
           ])

cc_library(name = "tesseract_dict",
           srcs = [ "dict/choices.cpp",
                   "dict/context.cpp",
                   "dict/conversion.cpp",
                   "dict/dawg.cpp",
                   "dict/dict.cpp",
                   "dict/hyphen.cpp",
                   "dict/permdawg.cpp",
                   "dict/permnum.cpp",
                   "dict/permngram.cpp",
                   "dict/permute.cpp",
                   "dict/states.cpp",
                   "dict/stopper.cpp",
                   "dict/reduce.cpp",
                   "dict/makedawg.cpp",
                   "dict/lookdawg.cpp",
                   "dict/trie.cpp",
                    "dict/wordfst.cpp",
                    "dict/patternfst.cpp",
                    "dict/fstmodel.cpp" ],
           deps = [ "tesseract_c_util",
                   "tesseract_cc_struct",
                   "tesseract_cc_util",
                    "//third_party/libidn:idn",
                    "//third_party/icu:icu",
                    "//i18n/utf8",
                    "//nlp/fst/lib" ],
           includes = [ "cutil",
                        "ccutil",
                        "ccstruct",
                        "dict"
           ])

cc_library(name = "tesseract_aspirin",
           srcs = [ "aspirin/bpsim.cpp",
                   "aspirin/bpsupport.cpp",
                   "aspirin/nmatch.cpp",
                   "aspirin/vanilla.cpp" ],
           deps = [ "tesseract_c_util",
                    "tesseract_cc_util" ],
           includes = [ "aspirin",
                        "ccutil",
                        "cutil" ],
           # license with restrictions on commercial use, see LICENSE file
           licenses = [ "by_exception_only" ],
           )

cc_library(name = "tesseract_image",
           srcs = ["image/image.cpp",
                   "image/imgbmp.cpp",
                   "image/imgio.cpp",
                   "image/imgs.cpp",
                   "image/imgtiff.cpp",
                   "image/bitstrm.cpp",
                   "image/svshowim.cpp",
                  ],
           deps = [ "tesseract_cc_util",
                    "scrollview_lib",
                    "//third_party/leptonica" ],
           includes = [ "ccutil",
                        "image",
                        "viewer",
                        "third_party/leptonica/includes"
                        ],
           copts = [ "-DHAVE_LIBLEPT" ]
           )

cc_library(name = "tesseract_cc_struct",
           srcs = [ "ccstruct/blobbox.cpp",
                   "ccstruct/blobs.cpp",
                   "ccstruct/blread.cpp",
                   "ccstruct/callcpp.cpp",
                   "ccstruct/ccstruct.cpp",
                   "ccstruct/coutln.cpp",
                   "ccstruct/genblob.cpp",
                   "ccstruct/labls.cpp",
                   "ccstruct/linlsq.cpp",
                   "ccstruct/lmedsq.cpp",
                   "ccstruct/mod128.cpp",
                   "ccstruct/normalis.cpp",
                   "ccstruct/ocrblock.cpp",
                   "ccstruct/ocrrow.cpp",
                   "ccstruct/pageblk.cpp",
                   "ccstruct/pageres.cpp",
                   "ccstruct/pdblock.cpp",
                   "ccstruct/points.cpp",
                   "ccstruct/polyaprx.cpp",
                   "ccstruct/polyblk.cpp",
                   "ccstruct/polyblob.cpp",
                   "ccstruct/polyvert.cpp",
                   "ccstruct/poutline.cpp",
                   "ccstruct/quadlsq.cpp",
                   "ccstruct/quadratc.cpp",
                   "ccstruct/quspline.cpp",
                   "ccstruct/ratngs.cpp",
                   "ccstruct/rect.cpp",
                   "ccstruct/rejctmap.cpp",
                   "ccstruct/rwpoly.cpp",
                   "ccstruct/statistc.cpp",
                   "ccstruct/stepblob.cpp",
                   "ccstruct/txtregn.cpp",
                   "ccstruct/vecfuncs.cpp",
                   "ccstruct/werd.cpp" ],
          deps = [ "tesseract_image",
                   "tesseract_c_util",
                   "tesseract_cc_util" ],
          includes = [ "ccstruct",
                       "ccutil",
                       "cutil",
                       "image",
                       "viewer",
                     ] )

cc_library(name = "tesseract_classify",
           srcs = [ "classify/adaptive.cpp",
                   "classify/adaptmatch.cpp",
                   "classify/baseline.cpp",
                   "classify/blobclass.cpp",
                   "classify/chartoname.cpp",
                   "classify/classify.cpp",
                   "classify/cluster.cpp",
                   "classify/clusttool.cpp",
                   "classify/cutoffs.cpp",
                   "classify/extract.cpp",
                   "classify/featdefs.cpp",
                   "classify/flexfx.cpp",
                   "classify/float2int.cpp",
                   "classify/fpoint.cpp",
                   "classify/fxdefs.cpp",
                   "classify/hideedge.cpp",
                   "classify/intfx.cpp",
                   "classify/intmatcher.cpp",
                   "classify/intproto.cpp",
                   "classify/kdtree.cpp",
                   "classify/mf.cpp",
                   "classify/mfdefs.cpp",
                   "classify/mfoutline.cpp",
                   "classify/mfx.cpp",
                   "classify/normfeat.cpp",
                   "classify/normmatch.cpp",
                   "classify/ocrfeatures.cpp",
                   "classify/outfeat.cpp",
                   "classify/picofeat.cpp",
                   "classify/protos.cpp",
                   "classify/sigmenu.cpp",
                   "classify/speckle.cpp",
                   "classify/xform2d.cpp" ],
          deps = [ "tesseract_cc_struct",
                   "tesseract_dict",
                   "tesseract_c_util",
                   "tesseract_cc_util" ],
           includes = [ "cutil",
                        "classify",
                        "ccutil",
                        "ccstruct",
                        "dict"
           ])

cc_library(name = "tesseract_textord",
          srcs = [ "textord/blkocc.cpp",
                   "textord/drawedg.cpp",
                   "textord/drawtord.cpp",
                   "textord/edgblob.cpp",
                   "textord/edgloop.cpp",
                   "textord/fpchop.cpp",
                   "textord/gap_map.cpp",
                   "textord/makerow.cpp",
                   "textord/oldbasel.cpp",
                   "textord/pagesegmain.cpp",
                   "textord/pithsync.cpp",
                   "textord/pitsync1.cpp",
                   "textord/scanedg.cpp",
                   "textord/sortflts.cpp",
                   "textord/topitch.cpp",
                   "textord/tordmain.cpp",
                   "textord/tospace.cpp",
                   "textord/tovars.cpp",
                   "textord/underlin.cpp",
                   "textord/wordseg.cpp" ],
          deps = [ "tesseract_cc_struct",
                   "tesseract_image",
                   "tesseract_cc_util",
                   "tesseract_pageseg" ],
          includes = [ "ccstruct",
                       "wordrec",
                       "ccutil",
                       "dict",
                       "classify",
                       "image",
                       "viewer",
                       "pageseg"
                     ] )

cc_library(name = "tesseract_wordrec",
          srcs = [ "wordrec/associate.cpp",
                   "wordrec/badwords.cpp",
                   "wordrec/bestfirst.cpp",
                   "wordrec/chop.cpp",
                   "wordrec/chopper.cpp",
                   "wordrec/closed.cpp",
                   "wordrec/djmenus.cpp",
                   "wordrec/drawfx.cpp",
                   "wordrec/findseam.cpp",
                   "wordrec/gradechop.cpp",
                   "wordrec/heuristic.cpp",
                   "wordrec/makechop.cpp",
                   "wordrec/matchtab.cpp",
                   "wordrec/matrix.cpp",
                   "wordrec/metrics.cpp",
                   "wordrec/mfvars.cpp",
                   "wordrec/msmenus.cpp",
                   "wordrec/olutil.cpp",
                   "wordrec/outlines.cpp",
                   "wordrec/pieces.cpp",
                   "wordrec/plotedges.cpp",
                   "wordrec/plotseg.cpp",
                   "wordrec/render.cpp",
                   "wordrec/seam.cpp",
                   "wordrec/split.cpp",
                   "wordrec/tally.cpp",
                   "wordrec/tessinit.cpp",
                   "wordrec/tface.cpp",
                   "wordrec/wordclass.cpp",
                   "wordrec/wordrec.cpp" ],
          deps = [ "tesseract_cc_struct",
                   "tesseract_classify",
                   "tesseract_image",
                   "tesseract_dict",
                   "tesseract_c_util",
                   "tesseract_cc_util" ],
          includes = [ "ccstruct",
                       "ccutil",
                       "cutil",
                       "cstruct",
                       "classify",
                       "image",
                       "dict",
                       "viewer",
                     ] )

cc_library(name = "tesseract_pageseg",
          srcs = [ "pageseg/pageseg.cpp",
                   "pageseg/leptonica_pageseg.cpp",
                   "pageseg/leptonica_pageseg_interface.cpp" ],
          deps = [ "tesseract_image",
                   "//third_party/leptonica" ],
          includes = [ "ccstruct",
                       "ccutil",
                       "ccmain",
                       "image",
                       "textord",
                       "viewer",
                       "third_party/leptonica/includes" ],
          copts = [ "-DHAVE_LIBLEPT" ] )

cc_library(name = "tesseract_main",
           srcs = [ "ccmain/tessedit.cpp",
                    "ccmain/adaptions.cpp",
                    "ccmain/applybox.cpp",
                    "ccmain/baseapi.cpp",
                    "ccmain/blobcmp.cpp",
                    "ccmain/callnet.cpp",
                    "ccmain/charcut.cpp",
                    "ccmain/charsample.cpp",
                    "ccmain/control.cpp",
                    "ccmain/docqual.cpp",
                    "ccmain/expandblob.cpp",
                    "ccmain/fixspace.cpp",
                    "ccmain/fixxht.cpp",
                    "ccmain/imgscale.cpp",
                    "ccmain/matmatch.cpp",
                    "ccmain/osdetect.cpp",
                    "ccmain/output.cpp",
                    "ccmain/otsuthr.cpp",
                    "ccmain/pagewalk.cpp",
                    "ccmain/paircmp.cpp",
                    "ccmain/pgedit.cpp",
                    "ccmain/reject.cpp",
                    "ccmain/scaleimg.cpp",
                    "ccmain/tessbox.cpp",
                    "ccmain/tesseractclass.cpp",
                    "ccmain/tessvars.cpp",
                    "ccmain/thresholder.cpp",
                    "ccmain/tfacepp.cpp",
                    "ccmain/tstruct.cpp",
                    "ccmain/varabled.cpp",
                    "ccmain/werdit.cpp" ],
           deps = [ "tesseract_wordrec",
                    "tesseract_textord",
                    "tesseract_cc_struct",
                    "tesseract_classify",
                    "tesseract_image",
                    "tesseract_aspirin",
                    "tesseract_dict",
                    "tesseract_c_util",
                    "tesseract_cc_util",
                    "//third_party/tiff",
                    "//third_party/leptonica",
                    "scrollview_lib" ],
           includes = [ "ccutil",
                        "ccstruct",
                        "image",
                        "viewer",
                        "dict",
                        "classify",
                        "wordrec",
                        "cutil",
                        "textord",
                        "aspirin",
                        "ccmain",
                        "third_party/tiff",
                        "third_party/leptonica/includes"
                        ],
           defines = [ "HAVE_LIBTIFF", "HAVE_LIBLEPT" ]
#                     copts = [ "-DHAVE_LIBTIFF", "-DHAVE_LIBLEPT" ]
           )

cc_library(name = "tesseract_training",
          srcs = [ "training/name2char.cpp",
                   "training/commontraining.cpp",
                   "training/mergenf.cpp",
                 ],
          deps = [ "tesseract_textord",
                   "tesseract_c_util",
                   "tesseract_cc_util" ],
          includes = [ "cutil",
                       "ccutil",
                       "dict",
                       "training",
                       "classify"
                     ] )

#-----------------------------------------------------------------------------

# BINARIES

cc_binary(name = "tesseract",
          srcs = [ "ccmain/tesseractmain.cpp" ],
          deps = [ "tesseract_main",
                   "//third_party/tiff" ],
          includes = [ "ccutil",
                       "ccstruct",
                       "image",
                       "viewer",
                       "dict",
                       "classify",
                       "wordrec",
                       "cutil",
                       "textord",
                       "aspirin",
                       "ccmain"
                     ],
         copts = [ "-DHAVE_LIBTIFF" ]
         )

cc_binary(name = "cntraining",
          srcs = [ "training/cntraining.cpp" ],
          deps = [ "tesseract_training",
                   "tesseract_classify" ],
          includes = [ "ccutil",
                       "cutil",
                       "viewer",
                       "dict",
                       "classify",
                     ] )

cc_binary(name = "mftraining",
          srcs = [ "training/mftraining.cpp",
                 ],
          deps = [ "tesseract_training",
                   "tesseract_classify" ],
          includes = [ "ccutil",
                       "cutil",
                       "viewer",
                       "dict",
                       "classify",
                     ] )

cc_binary(name = "svpaint",
          srcs = [ "viewer/svpaint.cpp"],
          deps = [ "scrollview_lib"],
         )

cc_binary(name = "unicharset_extractor",
          srcs = [ "training/unicharset_extractor.cpp"],
          deps = [ "tesseract_cc_util"],
          includes = [ "ccutil"
                     ] )

cc_binary(name = "wordlist2dawg",
          srcs = [ "training/wordlist2dawg.cpp"],
          deps = [ "tesseract_dict",
                   "tesseract_classify",
                   "tesseract_cc_util",
                   "tesseract_c_util"],
          includes = [ "ccutil",
                       "cutil",
                       "dict",
                       "classify"
                     ] )

#-----------------------------------------------------------------------------
# Data

# export configuration data for Blaze compliance
exports_files(["tessdata"])

#-----------------------------------------------------------------------------
# UNITTESTS