-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrepetition-error.el
1344 lines (1289 loc) · 46.3 KB
/
repetition-error.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;; repetition-error.el --- Interactive tools to find repetition errors in the buffer.
;; Copyright (C) 2015 Albert Heinle
;; Author: Albert Heinle <[email protected]>
;; Keywords: matching, convenience, files
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; General Description ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; The main function in this package is "find-reperr". It
;; takes a starting point and an end point, and if the section is long
;; enough (i.e. enough words given), it will highlight repetitions,
;; and ask the user either to continue and ignore that repetition, or
;; to quit the search. In the latter case, the current cursor remains
;; at the position, and the user can correct the error, if he or she
;; wants.
;; There are the following functions that can be interactively
;; called.
;; find-reperr-whole-buffer:
;; Runs the function "find-reperr" from the beginning
;; until the end of the whole buffer
;; find-reperr-from-point:
;; Runs the function "find-repetition error" from the current cursor
;; position until the end of the document. This can be also used as
;; a way to resume a previously stopped repetition error search
;; (after the error has been corrected -- or not)
;;
;; Just try to call one of these functions, and the usage is quite
;; straight forward.
;;
;; What do our functions recognize as words:
;; - Anything fulfilling the following regular expression:
;; [a-zA-Z]{4,}, i.e. any trailing whitespaces or other symbols
;; will be ignored.
;; How does the function move forward?
;; - We always check a block of size 100 (default value, which can
;; be changed by setting the variable
;; repetition-error-word-block-size). If nothing is found in this
;; block, the starting point moves forward by one word and tries
;; again. This is repeated until either the last 100 words are reached
;; and nothing was found, or until a repetition error has been
;; revealed. If the user decides to ignore it and moves forward, the
;; repeated word in that block will be saved with its position. In the
;; moment, when the cursor moves forward, all ignored repeated words
;; will be saved with the information about the block they have been
;; encountered in. This is done because we don't want to repeatedly
;; warn the user about repeated words in the same area, and these
;; word will be ignored for all areas that intersect with the already
;; considered and ignored one.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; TODOs: ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; - Experiment with different block sizes and repetition
;; occurrences.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; CODE:
(defvar repetition-error-word-block-size 100
"
(Positive Integer)
This variable determines the block size (i.e. the number of words)
our repetition-error-finding-routines will consider when trying to
find repetition errors. The initial value is 100
"
);repetition-error-word-block-size
(defvar repetition-error-min-occurrence 2
"
(Positive Integer)
This variable determines the minimum number of repetitions for a certain
word to appear in a word-block to be considered a repetition error.
The initial value is 2.
"
);repetition-error-min-occurrence
(defvar min-not-ignore-letters 4
"
(Positive Integer)
This variable stores the minimum word length, for which the word will
not be ignored in the search for repetitions. Its standard value is
4. This means, that words like and, it, ... will be ignored in the
search for repetitions, but words like then, than, they will not.
"
);min-not-ignore-letters
(defvar rep-temp-word-block nil
"
If not nil, this is a three touple (s i j) consisting of a string s,
two non-negative integers i and j with i<j, which represent
the starting position of s in the buffer and the ending position
(remark: j is not necessarily i+length(s), because we might have
an ignore list going around)
"
)
(defun transform-complete-ci-string (s)
"string->string
This function consumes a string s, and replaces any letter in this
string to the regular expression accepting both the upper-case, as
well as the lower-case of that same letter. For example, the string
'ab' would be transformed into '[aA][bB]'.
"
(replace-regexp-in-string "[[:alpha:]]"
(lambda (z) (concat "[" (downcase z) (upcase z) "]"))
s
t)
);transform-complete-ci-string
(byte-compile 'transform-complete-ci-string)
(ert-deftest transform-complete-ci-string-test ()
"This function tests the fuction transform-complete-ci-string.
The covered test cases are:
1. Empty string
2. String with one letter, lowercase
3. String with one letter, uppercase
4. String with more than one letter, all lowercase
5. String with more than one letter, all uppercase
6. String with more than one letter, mixed upper and lower-case
"
;1.
(should (equal (transform-complete-ci-string "") ""))
;2.
(should (equal (transform-complete-ci-string "k") "[kK]"))
;3.
(should (equal (transform-complete-ci-string "K") "[kK]"))
;4.
(should (equal (transform-complete-ci-string "should")
"[sS][hH][oO][uU][lL][dD]"))
;5.
(should (equal (transform-complete-ci-string "SHOULD")
"[sS][hH][oO][uU][lL][dD]"))
;6.
(should (equal (transform-complete-ci-string "sHoulD")
"[sS][hH][oO][uU][lL][dD]"))
);transform-complete-ci-string-test
(defun find-reperr-whole-buffer ()
"None->None
This function will scan the whole buffer for repetitions of certain
words. If the buffer does not have repetition-error-word-block-size
words (default: 100) then nothing is returned. If in between, there
is a block found with repetition-error-word-block-size words and a
repetition of repetition-error-min-occurrence, then the repeated words
will be highlighted for the user. Then he or she can decide, if s/he
wants to go to the next repetition error or not using a
command-prompt.
SIDE-EFFECT:
- Takes user inputs
- Highlights text
"
(interactive)
(find-reperr (point-min) (point-max) repetition-error-word-block-size repetition-error-min-occurrence)
);find-reperr-whole-buffer
(defun find-reperr-latex-whole-buffer ()
"None->None
This function will scan the whole buffer for repetitions of certain
words, by ignoring LaTeX Commands. If the buffer does not have repetition-error-word-block-size
words (default: 100) then nothing is returned. If in between, there
is a block found with repetition-error-word-block-size words and a
repetition of repetition-error-min-occurrence, then the repeated words
will be highlighted for the user. Then he or she can decide, if s/he
wants to go to the next repetition error or not using a
command-prompt.
SIDE-EFFECT:
- Takes user inputs
- Highlights text
"
(interactive)
(find-reperr (point-min)
(point-max)
repetition-error-word-block-size
repetition-error-min-occurrence
(create-ignore-list-for-latex-buffer))
);find-reperr-latex-whole-buffer
(defun find-reperr-from-point ()
"None->None
This function will scan the whole buffer, starting from the current
cursor position, for repetitions of certain
words. If the buffer does not have repetition-error-word-block-size
words (default: 100) then nothing is returned. If in between, there
is a block found with repetition-error-word-block-size words and a
repetition of repetition-error-min-occurrence, then the repeated words
will be highlighted for the user. Then he or she can decide, if s/he
wants to go to the next repetition error or not using a
command-prompt.
SIDE-EFFECT:
- Takes user inputs
- Highlights text
"
(interactive)
(find-reperr (point) (point-max) repetition-error-word-block-size repetition-error-min-occurrence)
);find-reperr-whole-buffer
(defun find-reperr-latex-from-point ()
"None->None
This function will scan the whole buffer, starting from the current
cursor position, for repetitions of certain
words, by igoring LaTeX commands. If the buffer does not have repetition-error-word-block-size
words (default: 100) then nothing is returned. If in between, there
is a block found with repetition-error-word-block-size words and a
repetition of repetition-error-min-occurrence, then the repeated words
will be highlighted for the user. Then he or she can decide, if s/he
wants to go to the next repetition error or not using a
command-prompt.
SIDE-EFFECT:
- Takes user inputs
- Highlights text
"
(interactive)
(find-reperr (point)
(point-max)
repetition-error-word-block-size
repetition-error-min-occurrence
(create-ignore-list-for-latex-buffer))
);find-reperr-latex-whole-buffer
(defun find-reperr (begin end &optional nWords minRep ignlist)
"integer->integer(->integer->integer->(listof (list integer integer)))->None
This function will scan the buffer between the character at position
begin and the character at position end for repetitions of certain
words. Begin and end are non-negative integers. Optionally, the user
can also define two integers nWords and minRep, which will make the
function return a finding, if there are more or equal minRep words
repeated in a block of nWords words. The default value for nWords is
100, the default value for minRep is 2, represented by the global
variables repetition-error-word-block-size and
repetition-error-min-occurrence. If the block does not have nWords
words, then nothing is returned. If in between, there is a block
found with nWords words and a repetition of minRep, then the repeated
words will be highlighted for the user. Then he or she can decide, if
s/he wants to go to the next repetition error or not using a
command-prompt.
Another optional parameter is ignlist. This is a list containing
intervals in which shall not be searched for repetition errors (There
are e.g. commands in LaTeX inside these intervals, etc...)
SIDE EFFECTS:
- Takes user input
- Highlights text
ASSUMPTIONS:
- The elements in ignlist are sorted by their first entry.
"
(if (not nWords)
(setq nWords repetition-error-word-block-size);then
);if
(if (not minRep)
(setq minRep repetition-error-min-occurrence);then
);if
;(save-excursion
(if (<= end begin)
"Invalid bounds"
(goto-char begin)
(setq rep-temp-word-block nil)
(recenter 0)
(let
(;let definitions
(flag t)
(curWordBlock nil)
(exc nil)
(tempExc nil)
(tempKnownsList nil)
(usrcmd nil)
(temp-touple nil)
(temp-pos 0)
);let definitions
(while flag
(if (not ignlist)
(setq curWordBlock (get-next-n-words-from-point nWords (point)))
;else
;(setq ignlist (cl-remove-if (lambda (x) (< (nth 1 x) (point))) ignlist))
(while (and
(not (equal ignlist nil))
(< (nth 1 (nth 0 ignlist)) (point)))
(setq ignlist (cdr ignlist))
);while
(setq curWordBlock (get-next-n-words-with-ignore-list nWords (point) ignlist))
);if
(if
(>= (nth 2 curWordBlock) end)
(progn
(setq flag nil)
"Reached the end of the buffer"
);progn for then
;else
(setq exc (filter-known-words tempKnownsList
(exceeders
(nth 0 curWordBlock)
minRep)
(nth 1 curWordBlock)
(nth 2 curWordBlock)))
(setq tempKnownsList
(update-knowns-list tempKnownsList exc
(nth 1 curWordBlock)
(nth 2 curWordBlock)))
(if (equal exc ())
(progn
(re-search-forward "[[:space:]\n]+" end t)
(recenter 0)
(if (> (point) end)
(setq flag nil)
);if
(if ignlist
;;in this case, we can move even further
(setq temp-touple (is-point-in-ignore-list (point) ignlist))
(while (and
(not (equal temp-touple nil))
(<= (nth 1 temp-touple) end))
;(progn
;(setq ignlist (cl-remove-if (lambda (x) (< (nth 1 x) (point))) ignlist))
(goto-char (+ 1 (nth 1 (is-point-in-ignore-list
(point) ignlist))))
(recenter 0)
(setq temp-touple (is-point-in-ignore-list
(point) ignlist))
(if (or
(> (point) end)
(and
(not (equal temp-touple nil))
(>= (nth 1 temp-touple) end)))
(setq flag nil)
);if
;);progn
);while
);if
);progn
;else
(setq tempExc exc)
(while (not (equal tempExc ()))
(setq temp-pos (point))
(re-search-forward (transform-complete-ci-string (car
(car tempExc))) end t)
(recenter 0)
(highlight-regexp (transform-complete-ci-string (car (car tempExc))))
(setq usrcmd
(read-char (format "Repeated word: \"%s\". (c) Continue search for repetition errors or (any key) quit?" (car (car tempExc))))
);setq
(unhighlight-regexp (transform-complete-ci-string (car
(car tempExc))))
(goto-char temp-pos)
(recenter 0)
(setq tempExc (cdr tempExc))
(if (not (equal usrcmd 99));;99 is ASCII for 'c'
(progn
(setq flag nil)
(setq tempExc ())
);progn
);if
);while
(re-search-forward "[[:space:]\n]+" end t)
(recenter 0)
(if (> (point) end)
(setq flag nil)
);if
(if ignlist
;;in this case, we can move even further
(setq temp-touple (is-point-in-ignore-list (point) ignlist))
(while (and
(not (equal temp-touple nil))
(<= (nth 1 temp-touple) end))
;(progn
;(setq ignlist (cl-remove-if (lambda (x) (< (nth 1 x) (point))) ignlist))
(goto-char (+ 1 (nth 1 (is-point-in-ignore-list (point) ignlist))))
(recenter 0)
(setq temp-touple (is-point-in-ignore-list
(point) ignlist))
(if (or
(> (point) end)
(and
(not (equal temp-touple nil))
(>= (nth 1 temp-touple) end)))
(setq flag nil)
);if
;);progn
);while
);if
);if
);if
);while
);let
);if
(message "Finished finding repetition errors")
;);save-excursion
);find-reperr
(byte-compile 'find-reperr)
(defun update-knowns-list (knownList newExceeders leftBound
rightBound)
"(listof (list string int int))->(listof (list string int))->int->int->(listof (list string int))
This function consumes a list, knownlist, whose entries are tuples of a string and two integers,
a list with tuples of string and int, newExceeders, and two integers, leftBound and rightBound.
It first filters all the elements (s i j) in knownslist out, where j<leftbound. Then
it returns a concatenation of the filtered knownList and a list containing for every (s i) in newExceeders a tuple
(s leftBound rightbound).
ASSUMPTIONS:
- newExceeders has no intersection with knownslist, given the left and the right bound.
"
(let
(;let definitions
(tempNE newExceeders)
(result (reverse (cl-remove-if (lambda (m) (if (< (nth 2 m) leftBound) t nil)) knownList)))
);let definitions
(while (not (equal tempNE ()))
(setq result
(cons (cons (nth 0 (nth 0 tempNE)) (cons leftBound (cons rightBound())))
result))
(setq tempNE (cdr tempNE))
);while
(reverse result)
);let
);update-knowns-list
(byte-compile 'update-knowns-list)
;; Tests
(ert-deftest test-update-knowns-list ()
"Tests the function update-knowns-list. These are the covered test
cases:
1. both lists are empty
2. knownList is empty
3. newExceeders is empty and the result is knownslist
4. newExceeders is empty and there are entries in knownslist that are
filtered.
5. Both lists are non-empty and in the end we get a clean
concatenation of both.
6. Both lists are non-empty, but some entries in knownslist will be
filtered.
"
;;1.
(should (equal (update-knowns-list nil nil 0 1) nil))
;;2.
(should (equal (update-knowns-list nil (list (list "hello" 0)
(list "kitty" 6)) 0
100)
(list (list "hello" 0 100)
(list "kitty" 0 100))))
;;3.
(should (equal (update-knowns-list (list (list "hello" 0 50)
(list "kitty" 6 50)) nil 0
100)
(list (list "hello" 0 50)
(list "kitty" 6 50))))
;;4.
(should (equal (update-knowns-list (list (list "hello" 0 50)
(list "kitty" 6 60)) nil 51
100)
(list (list "kitty" 6 60))))
;;5.
(should (equal (update-knowns-list (list (list "hello" 0 50)
(list "kitty" 0 50))
(list (list "Dear Daniel" 70)
(list "Badtz-Maru" 85))
0 100)
(list (list "hello" 0 50)
(list "kitty" 0 50)
(list "Dear Daniel" 0 100)
(list "Badtz-Maru" 0 100))))
;;6.
(should (equal (update-knowns-list (list (list "hello" 0 50)
(list "kitty" 0 60))
(list (list "Dear Daniel" 70)
(list "Badtz-Maru" 85))
51 100)
(list (list "kitty" 0 60)
(list "Dear Daniel" 51 100)
(list "Badtz-Maru" 51 100))))
);;test-update-knowns-list
(defun filter-known-words (knownList newExceeders leftBound rightBound)
"(listof (list string int int))->(listof (list string int))->int->int->(listof (list string int))
This function consumes a list, knownlist, whose entries are tuples of a string and two integers,
a list with tuples of string and int, newExceeders, and two integers, leftBound and rightBound.
It returns a filtered copy of the list newExceeders: If there is an entry (s i j) in knownList,
and the intervals [i,j] and [leftBound, rightBound] have a nontrivial
intersection, it will be omitted in newExceeders.
ASSUMPTIONS:
- In every entry (s i j) of knownList, we always have i<j
- leftBound < rightBound
"
(cl-labels
(;labels definitions
(compareToKnownList (kl el)
"This helper function consumes two lists, kl and el. kl
consists of 3 tuples, consisting of a string and two integers. el is a
list which first element is a string. This function returns true, if
the string in el does coincide with the string in at least one of the
elements in kl, and when this element's second and third element, say
(u,v), has an intersection with (leftBound, rightBound) when viewing
them as intervals."
(let
(;let definitions
(tempKL kl)
(flag nil)
);let definitions
(while (and (not flag) (not (equal tempKL ())))
(if
(and
(equal (nth 0 (nth 0 tempKL)) (nth 0 el))
(< (nth 1 (nth 0 tempKL)) rightBound)
(> (nth 2 (nth 0 tempKL)) leftBound)
);and
(setq flag t)
);if
(setq tempKL (cdr tempKL))
);while
flag
);let
);compareToKnownList
);labels definitions
(cl-remove-if (lambda (el) (compareToKnownList knownList el)) newExceeders)
);labels
);filter-known-words
;(byte-compile 'filter-known-words)
;; Tests
(ert-deftest test-filter-known-words ()
"Tests the function filter-known words. The covered test cases are the
following:
1. Both knownList and newExceeders are empty.
2. knownslist is empty, newExceeders is not.
3. knownsList is non-empty, newExceeders is empty
4. Both lists are non-empty, but nothing is filtered from
newExceeders based on the fact that the words are different.
5. Both lists are non-empty, but nothing is filtered from newExceeders
based on the fact that the interval in one entry in knownslist is
not right.
6. Both lists are non-empty, and there is a filtering happening in
newExceeders."
;;1.
(should (equal (filter-known-words nil nil 0 100) nil))
;;2.
(should (equal (filter-known-words nil '(("abc" 4) ("def" 2)) 19 30)
(list (list "abc" 4) (list "def" 2))))
;;3.
(should (equal (filter-known-words '(("abc" 4 20) ("def" 2 35)) nil 19 30)
nil))
;;4.
(should (equal (filter-known-words '(("abc" 4 20) ("def" 2 35))
'(("cde" 10) ("efg" 15)) 0 100)
'(("cde" 10) ("efg" 15))))
;;5.
(should (equal (filter-known-words '(("abc" 1 18) ("cde" 5 25))
'(("abc" 4) ("def" 2)) 19 30)
'(("abc" 4) ("def" 2))))
;;6.
(should (equal (filter-known-words '(("abc" 1 20) ("cde" 5 25))
'(("abc" 4) ("def" 2)) 19 30)
'(("def" 2))))
);test-filter-known-words
(defun get-next-n-words-from-point (n p)
"Integer->Integer->(list string Integer Integer)
Given an integer n and an integer p. The parameter p represents a
position in the buffer, n represents a number of words we want to
extract. This function returns a three tuple, containing:
- a string containing the next n words from point p in the buffer, if
available. If there are no n words, then the function returns what
is available.
- p itself
- the position when this string ends in the buffer
ASSUMPTIONS:
- The point p is at the beginning of a word
- if the variable rep-temp-word-block is set,
p will be ignored and we assume that p is
(nth 1 rep-temp-word-block)
SIDE EFFECTS:
- The cursor will in the end actually be moved to position p
- Accesses cursor positions
"
(goto-char p)
(if (not rep-temp-word-block)
(let
(;let definitions
(flag t)
(i n)
(curpos (point))
);let definitions
(while (and (> i 0) flag)
(setq curpos (point))
(re-search-forward "[[:space:]\n]+" (point-max) t)
(if (equal curpos (point))
(setq flag nil)
);if
(setq i (- i 1))
);while
(setq curpos (point))
(goto-char p)
(setq rep-temp-word-block
;(if flag
(list (buffer-substring-no-properties p curpos) p curpos))
;(list "" p curpos)
;);if
rep-temp-word-block
);let
;else
(goto-char (nth 1 rep-temp-word-block))
(let
(;let definitions
(curWordBlock (nth 0 rep-temp-word-block))
(begin-pos (nth 1 rep-temp-word-block))
(end-pos (nth 2 rep-temp-word-block))
);let definitions
(re-search-forward "[[:space:]\n]+" (point-max) t)
(setq begin-pos (point))
(goto-char end-pos)
(re-search-forward "[[:space:]\n]+" (point-max) t)
(setq end-pos (point))
(setq rep-temp-word-block
(list (buffer-substring-no-properties begin-pos end-pos)
begin-pos end-pos))
(goto-char begin-pos)
rep-temp-word-block
);let
);if
);;get-next-n-words-from-point
(byte-compile 'get-next-n-words-from-point)
(ert-deftest get-next-n-words-from-point-test ()
"Here, we test the function get-next-n-words-from-point.
Our test suite contains the following test-cases:
NO set rep-temp-word-block
1. An empty buffer
2. Boundary case for number of words with boundary that has the exact
number of words available
3. Boundary case for number of words with boundary that goes over the
number of available words.
4. Large text, boundary case with exact number of words.
5. Large text, boundary case with more words asked for than available.
6. Large text, non-boundary case with way more words asked for than available.
7. Large text, non-boundary case producing text.
SET rep-temp-word-block
8. Boundary case: moving one word forward in a block where there is
only one more word.
"
;1.
(setq rep-temp-word-block nil)
(set-buffer (find-file "./test_files/empty_test_buffer.txt"))
(should (equal (get-next-n-words-from-point 100 1) (list "" 1 1)))
(kill-buffer "empty_test_buffer.txt")
;2.
(setq rep-temp-word-block nil)
(set-buffer (find-file "./test_files/test_buffer_3_words.txt"))
(should (equal (get-next-n-words-from-point 3 1) (list "Lorem ipsum \
dolor.\n" 1 20)))
(kill-buffer "test_buffer_3_words.txt")
;3.
(setq rep-temp-word-block nil)
(set-buffer (find-file "./test_files/test_buffer_3_words.txt"))
(should (equal (get-next-n-words-from-point 4 1) (list "Lorem ipsum \
dolor.\n" 1 20)))
(kill-buffer "test_buffer_3_words.txt")
;4.
(setq rep-temp-word-block nil)
(set-buffer (find-file "./test_files/test_buffer_50_words.txt"))
(should (equal (get-next-n-words-from-point 50 1) (list "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam
nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua. At vero eos et accusam et justo duo dolores et ea
rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem
ipsum dolor sit amet.
" 1 297)))
(kill-buffer "test_buffer_50_words.txt")
;5.
(setq rep-temp-word-block nil)
(set-buffer (find-file "./test_files/test_buffer_50_words.txt"))
(should (equal (get-next-n-words-from-point 51 1) (list "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam
nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua. At vero eos et accusam et justo duo dolores et ea
rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem
ipsum dolor sit amet.
" 1 297)))
(kill-buffer "test_buffer_50_words.txt")
;6.
(setq rep-temp-word-block nil)
(set-buffer (find-file "./test_files/test_buffer_50_words.txt"))
(should (equal (get-next-n-words-from-point 100 1) (list "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam
nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua. At vero eos et accusam et justo duo dolores et ea
rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem
ipsum dolor sit amet.
" 1 297)))
(kill-buffer "test_buffer_50_words.txt")
;7.
(setq rep-temp-word-block nil)
(set-buffer (find-file "./test_files/test_buffer_50_words.txt"))
(should (equal (get-next-n-words-from-point 3 1) (list "Lorem ipsum \
dolor " 1 19)))
(kill-buffer "test_buffer_50_words.txt")
;8
(setq rep-temp-word-block (list "Lorem ipsum " 1 13))
(set-buffer (find-file "./test_files/test_buffer_3_words.txt"))
(should (equal (get-next-n-words-from-point 2 1) (list "ipsum \
dolor.\n" 7 20)))
(kill-buffer "test_buffer_3_words.txt")
);get-next-n-words-from-point-test
(defun create-ignore-list-by-regexp (inpRE)
"string->listof (Integer Integer)
This function consumes a regular expression inpRE, and finds it in the
current buffer. For every found regexp, it produces its beginning
point and end-point, and puts these coordinates in a list. A list of
all these tuples is returned in the end.
"
(save-excursion
(goto-char (point-min))
(let
(;let definitions begin
(curpos (point))
(flag t)
(result ())
(tempLeft 0)
(tempRight 0)
);let definitions end
(while flag
(re-search-forward inpRE (point-max) t)
(if (equal curpos (point))
(setq flag nil)
;else
(setq tempRight (point))
(setq tempLeft (match-beginning 0))
(setq curpos (point))
(setq result (cons (cons tempLeft (cons tempRight ())) result))
);if
);while
(reverse result)
);let
);save-excursion
);create-ignore-list-by-regexp
(byte-compile 'create-ignore-list-by-regexp)
;; Tests
(ert-deftest create-ignore-list-by-regexp-test ()
"This is a collection of tests for create-ignore-list-by-regexp.
The covered test cases are:
1. empty file, empty regex
2. empty file, nonempty regex
3. non-empty file, empty regex
4. regex is not in non-empty file
5. regex is in non-empty file exactly once.
6. regex is in non-empty file more than once."
;1.
(set-buffer (find-file "./test_files/empty_test_buffer.txt"))
(should (equal (create-ignore-list-by-regexp "") nil))
(kill-buffer "empty_test_buffer.txt")
;2.
(set-buffer (find-file "./test_files/empty_test_buffer.txt"))
(should (equal (create-ignore-list-by-regexp "[a-zA-Z]+") nil))
(kill-buffer "empty_test_buffer.txt")
;3.
(set-buffer (find-file "./test_files/test_buffer_50_words.txt"))
(should (equal (create-ignore-list-by-regexp "") nil))
(kill-buffer "test_buffer_50_words.txt")
;4.
(set-buffer (find-file "./test_files/test_buffer_50_words.txt"))
(should (equal (create-ignore-list-by-regexp "[0-9]+!$") nil))
(kill-buffer "test_buffer_50_words.txt")
;5.
(set-buffer (find-file "./test_files/test_buffer_3_words.txt"))
(should (equal (create-ignore-list-by-regexp "Lorem") '((1 6)) ))
(kill-buffer "test_buffer_3_words.txt")
;6.
(set-buffer (find-file "./test_files/test_buffer_50_words.txt"))
(should (equal (create-ignore-list-by-regexp "Lorem") '((1 6) (269 274)) ))
(kill-buffer "test_buffer_50_words.txt")
);create-ignore-list-by-regexp-test
(defun create-ignore-list-for-latex-buffer ()
"None->listof (Integer Integer)
This function scans the buffer for substrings which can be ignored by
our find-reperr routines, assuming that the current document
is a LaTeX file. In particular, this function will detect matches to
the following expressions and ignore them:
- Math-modes (\[.*\], \(.*\), $.*$, $$.*$$)
- \begin{.+} and \end{.+}
- \[a-zA-Z0-9]+[{]? in general (commands)
- math modes a la \begin{eqnarray[*]} .. \end{eqnarray[*]} or
\begin{align*} .. \end{align{*}}
- Comments beginning with '%' and going until the end of the line
GENERAL ASSUMPTIONS:
- The returned ignore-list is sorted by the first element of each
contained list.
"
(let
(;let definitions
(curpos 1)
(result ())
(newEntryL 0)
(newEntryR 0)
(foundFlag nil)
);let definitions
;;comments
(setq result (append result (create-ignore-list-by-regexp
"%.*?$")))
;;begin/end eqnarray
(setq result (append result (cl-remove-if
(lambda (x)
(is-point-in-ignore-list
(nth 0 x) result))
(create-ignore-list-by-regexp
"[\\]begin{eqnarray[\*]?}\\(.\\|\n\\)+?[\\]end{eqnarray[\*]?}"))))
;;begin/end align
(setq result (append result (cl-remove-if
(lambda (x)
(is-point-in-ignore-list
(nth 0 x) result))
(create-ignore-list-by-regexp
"[\\]begin{align[\*]?}\\(.\\|\n\\)+?[\\]end{align[\*]?}"))))
;;math notations a la \( ... \)
(setq result (append result (create-ignore-list-by-regexp
"[\\][(]\\(.\\|\n\\)+?[\\][)]")))
;;math notations a la \[ ... \]
(setq result (append result
(mapcar
(lambda (m) (cons (+ 1 (nth 0 m)) (cons
(nth 1
m) '())))
(create-ignore-list-by-regexp
"\\([^\\]\\|^\\)[\\]\\[\\(.\\|\n\\)+?[\\]\\]"))))
;;In the line before: We needed to remove the case \\[12pt] e.g.,
;;which is covered by the next case.
(setq result (append result (create-ignore-list-by-regexp
"[\\][\\]\\[[0-9]+[[:alpha:]]*?\\]")))
(while (< curpos (point-max))
(setq foundFlag nil)
(if (and
(equal (string (char-after curpos)) "$")
(not (is-point-in-ignore-list curpos result))
);;and
;;In this case, we have math mode initialized by $
(let
(;let definitions
(doubleDollar nil)
);let definitions
(setq foundFlag t)
(setq newEntryL curpos)
(if (equal (string (char-after (+ curpos 1))) "$")
(progn
(setq doubleDollar t)
(setq curpos (+ 1 curpos))
);progn
);if
(setq curpos (+ curpos 1))
(while (not (equal (string (char-after curpos)) "$"))
(setq curpos (+ curpos 1))
);while
(setq curpos (+ curpos 1))
(if doubleDollar
(setq curpos (+ curpos 1))
);if
(setq newEntryR curpos)
(setq result (cons (cons newEntryL (cons newEntryR ())) result))
);let
);if
(if (not foundFlag)
(setq curpos (+ 1 curpos))
);if
);while
;;begin{...} in general
(setq result (append result (cl-remove-if
(lambda (x)
(is-point-in-ignore-list
(nth 0 x) result)) (create-ignore-list-by-regexp
"[\\]begin{.+?}"))))
;;end{...} in general
(setq result (append result (cl-remove-if
(lambda (x)
(is-point-in-ignore-list
(nth 0 x) result)) (create-ignore-list-by-regexp
"[\\]end{.+?}"))))
(setq result (append result (cl-remove-if
(lambda (x)
(is-point-in-ignore-list
(nth 0 x) result)) (create-ignore-list-by-regexp
"[\\]\\([[:alnum:]]\\|\\[\\|\\]\\|\\)+"))))
(sort result (lambda (x y) (<= (nth 0 x) (nth 0 y))))
);let
);create-ignore-list-for-latex-buffer ()
(byte-compile 'create-ignore-list-for-latex-buffer)
;; Tests
(ert-deftest create-ignore-list-for-latex-buffer-test ()
"Here, we test the function create-ignore-list-for-latex-buffer.
The test cases are the following:
1. empty buffer.
2. Buffer with no LaTeX in it.
3. Valid LaTeX-Buffer, containing all the ignored LaTeX constructs."
;1.
(set-buffer (find-file "./test_files/empty_test_buffer.txt"))
(should (equal (create-ignore-list-for-latex-buffer) nil))
(kill-buffer "empty_test_buffer.txt")
;2.
(set-buffer (find-file "./test_files/test_buffer_50_words.txt"))
(should (equal (create-ignore-list-for-latex-buffer) nil))
(kill-buffer "test_buffer_50_words.txt")
;3.
(set-buffer (find-file "./test_files/latex_test_file.tex"))
(should (equal (create-ignore-list-for-latex-buffer)
'((1 66) (67 134) (135 197) (198 261) (262 328)
(329 391) (392 458) (459 521) (522 580) (581 647)
(648 659) (661 724) (726 787) (809 872) (912 926)
(1008 1018) (1103 1144) (1178 1199) (1199 1207)
(1238 1250) (1252 1267) (1268 1273) (1283 1288)
(1298 1303) (1313 1318) (1328 1333) (1343 1348)
(1358 1371) (1373 1381) (1408 1418))))
(kill-buffer "latex_test_file.tex")
);create-ignore-list-for-latex-buffer-test
(defun is-point-in-ignore-list (p ign)
"Integer->listof (Integer Integer)->(Integer Integer)
Given an integer p, and a list of integer tuples ign.
If for a tuple (i j) in p we have that i<=p<=j, the function returns
the first interval in ign with p in it, and nil otherwise.
"
(let
(;let definitions
(tempList (cl-remove-if-not (lambda (m) (and (<= (nth 0 m) p) (<= p (nth 1 m)))) ign))
);let definitions
(if (equal tempList ())
;then
nil
;else
(nth 0 tempList)
);if
);let
);is-point-in-ignore-list
(byte-compile 'is-point-in-ignore-list)
;; Tests:
(ert-deftest is-point-in-ignore-list-test ()
"This function tests the helper function is-point-in-ignore-list.
The test-cases are the following:
1. ignore list empty
2. point not in ignore list, while ignore list is not empty.
3. point in ignore list
4. Point in ignore-list boundary case left
5. Point in ignore-list boundary case right"
;1.
(should (equal (is-point-in-ignore-list 3 nil) nil))
;2.
(should (equal (is-point-in-ignore-list 3 '((5 6) (7 15) (20 75)))
nil))
;3.
(should (equal (is-point-in-ignore-list 10 '((5 6) (7 15) (20 75)))
'(7 15)))
;4.
(should (equal (is-point-in-ignore-list 20 '((5 6) (7 15) (20 75)))
'(20 75)))
;5.
(should (equal (is-point-in-ignore-list 6 '((5 6) (7 15) (20 75)))
'(5 6)))
);is-point-in-ignore-list-test
(defun get-next-n-words-with-ignore-list (n p ign)
"Integer->Integer->listof (list int int)->(list string Int Int)
Given an integer n, an integer p, and a list of integer tuples ign.
The parameter p represents a
position in the buffer, n represents a number of words we want to
extract. This function returns a tuple containing
- a string containing the next n words from point p in the buffer, if
available. If for a tuple (i j) in ignore, a word appears at
position somewhere between i and j, it will be ignored.
If there are no n words, then there will be the empty string here
- the point p
- and the position where the string ended
ASSUMPTIONS:
- The point p is at the beginning of a word
- The beginning of a regexp is found after p, not before.
SIDE EFFECTS:
- The cursor will in the end actually be moved to position p