summaryrefslogtreecommitdiff
path: root/scripts/check_mergability.sh
blob: 13da15da30d0704b1835701c42c907a3163de8aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
#!/bin/sh

# How to run:
# * Install the gh cli and run `gh login`: https://github.com/cli/cli/
# * install black isort usort pyupgrade and whatever other tools you want to
#   play with in your active virtualenv
# * also requires "sponge" from moreutils
# * move to a new folder for the script to work in: `mkdir pr_mergability && cd pr_mergability`
# * ../scripts/check_mergability.sh
#
# It'll clone the qutebrowser repo, fetch refs for all the open PRs, checkout
# a branch, run auto formatters, try to merge each PR, report back via CSV
# how badly each merge filed (via "number of conflicting lines").
#
# For details of what auto formatters are ran see the `tools` variable down
# near the bottom of the script.
#
# If you've checked out a branch and ran auto-formatters or whatever on it
# manually and just want the script to try to merge all PRs you can call it
# with the branch name and it'll do so. Remember to go back up to the work dir
# before calling the script.
#
# If it's been a few days and PRs have been opened or merged delete `prs.json`
# from the working dir to have them re-fetched on next run.
# If PRs have had updates pushed you'll have to update the refs yourself or
# nuke the whole clone in the work dir and let the script re-fetch them all.

# requires the github binary, authorized, to list open PRs.
command -v gh > /dev/null || {
  echo "Error: Install the github CLI, gh, make sure it is in PATH and authenticated."
  exit 1
}
# requires some formatting tools available. The are all installable via pip.
all_formatters="black isort usort pyupgrade"
for cmd in $all_formatters; do
  command -v $cmd >/dev/null || {
    echo "Error: Requires all these tools to be in PATH (install them with pip): $all_formatters"
    exit 1
  }
done

[ -e qutebrowser/app.py ] && {
  echo "don't run this from your qutebrowser checkout. Run it from a tmp dir, it'll checkout out a new copy to work on"
  exit 1
}

TTY="$(tty)"
DO_PAUSE="no"
maybepause () {
  msg="$1"
  force="$2"
  if [ -n "$force" ] ;then
    DO_PAUSE="yes"
  elif [ "$DO_PAUSE" = "yes" ] ;then
    true
  else
    echo "$1"
    return
  fi

  echo "$1, investigate in another terminal, continue? [Step|Continue|Quit]"
  read response < $TTY
  case "$response" in
    [Cc]*) DO_PAUSE="no";;
    [Qq]*) exit 0;;
    *) return;;
  esac
}

[ -d qutebrowser ] || {
  git clone git@github.com:qutebrowser/qutebrowser.git
  cd qutebrowser
  git config --local merge.conflictstyle merge
  git config --local rerere.enabled false
  cd -
}

[ -e prs.json ] || {
  # (re-)fetch list of open PRs. Pull refs for any new ones.
  # Resets master and qt6-v2 in case they have changed. Does not handle
  # fetching new changes for updated PRs.
  echo "fetching open PRs"
  gh -R qutebrowser/qutebrowser pr list -s open --json number,title,mergeable,updatedAt -L 100 > prs.json
  cd qutebrowser
  git fetch
  git checkout master && git pull
  git checkout qt6-v2 && git pull
  # this is slow for a fresh clone, idk how to fetch all pull/*/head refs at once
  jq -r '.[] | "\(.number) \(.updatedAt) \(.title)"' < ../prs.json | while read number updated title; do
    git describe pr/$number >/dev/null 2>&1 || git fetch origin refs/pull/$number/head:pr/$number
  done
  cd -
}

python3 <<"EOF"
import json
from collections import Counter
import rich

with open("prs.json") as f: prs=json.load(f)

rich.print(Counter([p['mergeable'] for p in prs]))
# Counter({'MERGEABLE': 29, 'CONFLICTING': 45})
EOF

summary () {
  # Summarize the accumulated report CSVs
  # Should be the last thing we do since it goes back up to the report dir
  cd - >/dev/null
  python3 <<"EOF"
import csv, glob

def read_csv(path):
    with open(path) as f:
        return list(csv.DictReader(f))

for report in sorted(glob.glob("report-*.csv")):
    rows = read_csv(report)
    succeeded = len([row for row in rows if row["state"] == "succeeded"])
    failed = len([row for row in rows if row["state"] == "failed"])
    print(f"{report} {succeeded=} {failed=}")
EOF
}

prompt_or_summary () {
    printf "$1 [Yn]: "
    read ans
    case "$ans" in
      [nN]*)
        summary
        exit 0
        ;;
      *) true;;
    esac
}

# format tool "aliases", where needed
usort () { env usort format "$@"; }
isort () { env isort -q "$@"; }
black () { env black -q "$@"; }
pyupgrade () { git ls-files | grep -F .py | xargs pyupgrade --py37-plus; }

generate_report () {
  # checkout a branch, try to merge each of the open PRs, write the results to
  # a CSV file
  base="${1:-master}"
  quiet="$2"
  rewrite_strategy="$3"
  cmds="$4"
  pr="$5"
  report_file=../report-$base.csv

  # prefix for working branch when we are going to re-write stuff so we don't
  # mess up the pr/* branches and have to re-fetch them.
  [ -n "$rewrite_strategy" ] && {
    prefix="tmp-rewrite-"
    report_file=../report-$base-$rewrite_strategy.csv
  }

  git checkout -q $base

  [ -e $report_file ] && [ -z "$quiet" ] && {
    prompt_or_summary "$report_file exists, overwrite?"
  }

  echo "number,updated,title,state,clean,conflicting" > $report_file
  report () {
    echo "$1,$2,\"$3\",$4,$5,$6" >> $report_file
  }

  head_sha=$(git rev-parse HEAD)
  jq -r '.[] | "\(.number) \(.updatedAt) \(.title)"' < ../prs.json | while read number updated title; do
    [ -n "$pr" ] && [ "$pr" != "$number" ] && continue
    [ -n "$quiet" ] || echo "trying ${prefix}pr/$number $updated $title"
    git reset -q --hard $head_sha

    applies_cleanly_to_master () {
      number="$1"
      grep "^$number" ../report-master.csv | grep failed
      [ $? -eq 1 ]
      return $?
    }

    case "$rewrite_strategy" in
      merge)
        applies_cleanly_to_master $number || {
          echo "pr/$number succeeded already in ../report-master.csv, skipping"
          continue
        }
        merge_with_formatting "$number" "$base" "$cmds" "$prefix" "$rewrite_strategy" || {
          report $number $updated "$title" failed 999 999
          continue
        }
        ;;
      rebase)
        # Only attempt branches that actually merge cleanly with master.
        # Theoretically it wouldn't hurt to do all of them but a) running
        # black via the filter driver is slow b) rebase_with_formatting needs
        # some work to handle more errors in that case (the "git commit -qam
        # 'fix lint" bit at least needs to look for conflict markers)
        # I'm hardcoding master because of a lack of imagination.
        applies_cleanly_to_master $number || {
          echo "pr/$number succeeded already in ../report-master.csv, skipping"
          continue
        }
        rebase_with_formatting "$number" "$base" "$cmds" "$prefix" "$rewrite_strategy" || {
          report $number $updated "$title" failed 999 999
          continue
        }
        ;;
      '')
        true
        ;;
      *)
        echo "Unknown rewrite strategy '$rewrite_strategy'"
        exit 1
        ;;
    esac

    git merge -q --no-ff --no-edit ${prefix}pr/$number 2>&1 1>/dev/null | grep -v preimage
    if [ -e .git/MERGE_HEAD ] ;then
      # merge failed, clean lines staged and conflicting lines in working
      # tree
      merged_lines=$(git diff --cached --numstat | awk -F'	' '{sum+=$1;} END{print sum;}')
      conflicting_lines=$(git diff | sed -n -e '/<<<<<<< HEAD/,/=======$/p' -e '/=======$/,/>>>>>>> pr/p' | wc -l)
      conflicting_lines=$(($conflicting_lines-4)) # account for markers included in both sed expressions
      [ -n "$quiet" ] || echo "#$number failed merging merged_lines=$merged_lines conflicting_lines=$conflicting_lines"
      maybepause "merge of ${prefix}pr/$number into $base failed"
      git merge --abort
      report $number $updated "$title" failed $merged_lines $conflicting_lines
    else
      [ -n "$quiet" ] || echo "#$number merged fine"
      #git show HEAD --oneline --stat
      report $number $updated "$title" succeeded 0 0
    fi
  done
}

add_smudge_filter () {
  cmds="$1"
  # Setup the filters. A "smudge" filter is configured for each tool then we
  # add the required tools to a gitattributes file. And make sure to clean
  # it up later.
  # Running the formatters as filters is slower than running them directly
  # because they seem to be run on the files serially. TODO: can we
  # parallelize them?
  # Maybe just adding a wrapper around the formatters that caches the output
  # would be simpler. At least then you just have to sit through them once.
  git config --local filter.rewrite.smudge "filter-cache"
  printf "*.py" > .git/info/attributes
  printf " filter=rewrite" >> .git/info/attributes
  echo >> .git/info/attributes

  mkdir filter-tools 2>/dev/null
  cat > filter-tools/filter-cache <<EOF
#!/bin/sh
# Script to add as filter for git while rebasing.
# Runs the configured tools in sequence, caches the result of each tool in
# case you find yourself running through this proecss lots while working on
# it.

cmds="$cmds"
inputf="\$(mktemp --suffix=rebase)"
cat > "\$inputf"

# TODO: de-dup these with the parent script?
# Can use aliases here?
# Call with the file directly instead of using stdin?
usort () { env usort format -; }
black () { env black -q -; }
isort () { env isort -q -; }
pyupgrade () { env pyupgrade --exit-zero-even-if-changed --py37-plus -; }

run_with_cache () {
  inputf="\$1"
  cmd="\$2"
  input_hash="\$(sha1sum "\$inputf" | cut -d' ' -f1)"

  mkdir -p "/tmp/filter-caches/\$cmds/\$cmd" 2>/dev/null
  outputf="/tmp/filter-caches/\$cmds/\$cmd/\$input_hash"

  if [ -e "\$outputf" ] ;then
    lines="\$(wc -l "\$outputf" | cut -d' ' -f1)"
    # where are these empty output files coming from???
    # echo "removing bad cached file '\$outputf'" >&2
    [ \$lines -eq 0 ] && rm "\$outputf"
  fi

  if ! [ -e "\$outputf" ] ;then
    \$cmd < "\$inputf" > "\$outputf"
    [ \$? -eq 0 ] || {
      echo "\$cmd failed" >&2
      cat "\$inputf"
      return
    }
    lines="\$(wc -l "\$outputf" | cut -d' ' -f1)"
    [ \$lines -eq 0 ] && {
      echo "tool '\$cmd' produced 0 line output file from '\$inputf'" >&2
    }
  fi

  cat "\$outputf"
}

echo "\$cmds" | tr ' ' '\n' | while read cmd; do
  run_with_cache \$inputf "\$cmd" | sponge \$inputf
done

cat "\$inputf"
rm "\$inputf"
EOF
  chmod +x filter-tools/filter-cache
  export PATH="$PWD/filter-tools:$PATH"
}

remove_smudge_filter () {
  # no need to remove the config or script, it's only active when the
  # attribute is set
  rm .git/info/attributes
}

merge_with_formatting () {
  number="$1"
  base="$2"
  cmds="$3"
  prefix="${4:-tmp-rewrite-}"
  strategy="$5"

  # Use a temp base branch for now but adding "dropme" commits probably isn't the right
  # strategy for the end goal of letting PR authors adapt to autoformatter
  # changes. At that point we'll already have a re-formatted master branch.
  # Unless we can do the merge then rebase-keep-merges-but-drop-dropme or
  # something.
  # TODO: swap out this block to be based off of real master or qt-v2 or $base
  git checkout -b tmp-master-rewrite-pr/$number `git merge-base origin/master pr/$number`
  echo "$cmds" | tr ' ' '\n' | while read cmd; do
    $cmd qutebrowser tests
    git commit -am "dropme! $cmd" # mark commits for dropping when we rebase onto the more recent master
  done
  echo "$cmds" | tr ' ' '\n' | while read cmd; do
    $cmd qutebrowser tests
    git commit -am "dropme! $cmd 2"
  done

  git checkout -b ${prefix}pr/$number pr/$number

  add_smudge_filter "$cmds"

  git merge -X renormalize tmp-master-rewrite-pr/$number
  exit_code="$?"
  remove_smudge_filter
  if [ $exit_code -eq 0 ] ;then
    git commit -qam "fix lint"
  else
    maybepause "merge of ${prefix}pr/$number onto tmp-master-rewrite-pr/$number failed"
    git merge --abort
  fi
  git branch -D tmp-master-rewrite-pr/$number

  [ $exit_code -eq 0 ] || return $exit_code

  git checkout -q $base
}

rebase_with_formatting () {
  number="$1"
  base="$2"
  cmds="$3"
  prefix="${4:-tmp-rewrite-}"
  strategy="$5"

  # We need to apply formatting to PRs and base them on a reformatted base
  # branch.
  # I haven't looked into doing that via a merge but here is an attempt
  # doing a rebase.
  # Rebasing directly on to a formatted branch will fail very easily when it
  # runs into a formatting change. So I'm using git's "filter" attribute to
  # apply the same formatter to the trees corresponding to the
  # commits being rebased. Hopefully if we apply the same formatter to the
  # base branch and to the individual commits from the PRs we can minimize
  # conflicts.
  # An alternative to using the filter attribute might be to use something
  # like the "darker" tool to re-write the commits. I suspect that won't
  # help with conflicts in the context around changes though.

  # Checkout the parent commit of the branch then apply formatting tools to
  # it. This will provide a target for rebasing which doesn't have any
  # additional drift from changes to master. After that then we can rebase
  # the re-written PR branch to the more current, autoformatted, master.
  # TODO: It might be possible to skip the intermediate base branch.
  git checkout -b tmp-master-rewrite-pr/$number `git merge-base origin/master pr/$number`
  echo "$cmds" | tr ' ' '\n' | while read cmd; do
    $cmd qutebrowser tests
    git commit -am "dropme! $cmd" # mark commits for dropping when we rebase onto the more recent master
  done
  # Occasionally we get situations where black and pyupgrade build on each
  # other to enable further changes. So the order you run them matters. But we
  # have situations where each one enables the other, in both orders. So we
  # run them all yet again to pick up any lingering changes from the first
  # run.
  # If we don't do this the leftover changes can be picked up by the smudge
  # filter during the first rebase below and added to "fix lint" commits. Then
  # since they don't have "dropme!" in the messages they stick in the branch
  # and end up conflicting with the base branch.
  echo "$cmds" | tr ' ' '\n' | while read cmd; do
    $cmd qutebrowser tests
    git commit -am "dropme! $cmd 2"
  done

  git checkout -b ${prefix}pr/$number pr/$number

  add_smudge_filter "$cmds"

  # Description of extra options:
  # --exec 'git commit -qam "fix lint"': The git smudge filter leaves changes
  #     in the working tree. So we need to include these in a commit if we
  #     want to keep them.
  # --exec '... || true': git commit fails if there is nothing to commit, in
  #     this case meaning the filter didn't need to make any changes to the
  #     commit we just applied. So short circuiting to `true` just makes it so
  #     the result code is always 0 and the rebase continues.
  # -X theirs: in the case of conflicts, disregard the changes in the working
  #     tree and apply those from the incoming commit. In this case when you
  #     have one commit later in a PR that builds on an earlier one, and we
  #     re-formatted the earlier one, the later one will fail to apply. Since
  #     we know these commits already build on each other and that any
  #     conflicts are due to formatting changes, which'll be applied again
  #     later we can safely disregard the changes.
  #     So this ends up with the right result but is problematic for two
  #     reason:
  #     a) it adds noise to the PRs because formatting changes are applied,
  #        reverted, then applied again.
  #     b) if there are any conflicts with the base branches the base branch
  #        changes will be reverted. In this script we are already checking
  #        that PRs apply cleanly to their existing base before rebasing them
  #        on an auto-formatted version of it. So we shouldn't run into that.
  #        But if this is used in more scenarios it will likely cause some
  #        frustration.
  git rebase -q -X theirs -X renormalize --exec 'git commit -qam "fix lint" || true' tmp-master-rewrite-pr/$number
  exit_code="$?"
  remove_smudge_filter
  [ $exit_code -eq 0 ] || {
    maybepause "rebase -X renormalize of ${prefix}pr/$number onto tmp-master-rewrite-pr/$number failed"
    git rebase --abort
  }
  git branch -D tmp-master-rewrite-pr/$number

  [ $exit_code -eq 0 ] || return $exit_code

  if [ "$strategy" = "rebase" ] ;then
    # now transplant onto the actual upstream branch -- might have to drop this
    # if it causes problems.
    EDITOR='sed -i /dropme/d' git rebase -qi "$base" || {
      maybepause "rebase of ${prefix}pr/$number onto $base failed"
      git rebase --abort
      return 1
    }
  fi

  git checkout -q $base
}

cd qutebrowser

# run as `$0 some-branch` to report on merging all open PRs to a branch you
# made yourself. Otherwise run without args to try with a bunch of builtin
# configurations.

strategy=""
pull_request=""
while [ -n "$1" ] ;do
  case "$1" in
    -s|--rewrite-strategy)
      shift
      [ -n "$1" ] || {
        echo "What strategy?"
        exit 1
      }
      strategy="$1"
      ;;
    -p|--pull-request)
      shift
      [ -n "$1" ] || {
        echo "Which PR?"
        exit 1
      }
      pull_request="$1"
      ;;
    -*)
      echo "Unknown argument '$1'"
      exit 1
      ;;
    *)
      break
      ;;
  esac
  shift
done

if [ -n "$1" ] ;then
  generate_report "$1"
else
  clean_branches () {
    # only clean up tmp- branches in case I run it on my main qutebrowser
    # checkout by mistake :)
    git checkout master
    git reset --hard origin/master
    git branch -l | grep tmp- | grep -v detached | while read l; do git branch -qD $l ;done
  }

  # pre-defined auto-formatter configurations. Branches will be created as
  # needed.
  # format: branch tool1 tool2 ...
  tools_all="master true
  tmp-black black
  tmp-black_isort black isort
  tmp-black_usort black usort
  tmp-black_pyupgrade black pyupgrade
  tmp-black_isort_pyupgrade black isort pyupgrade
  tmp-black_usort_pyupgrade black usort pyupgrade
  qt6-v2 true"
  tools="tmp-black_isort_pyupgrade black isort pyupgrade"

  if [ "$(echo "$tools" | wc -l | cut -d' ' -f1)" -gt 1 ] ;then
    # TODO: turn this "run it with all tool configurations and see which one
    # is the worst" thing into a CLI option. This script is a cross between
    # "gather stats" and "refine merge strategies" now and is in need of a bit
    # of a refactor.
    prompt_or_summary "Generate report for all tool configurations?"
  fi
  clean_branches

  echo "$tools" | while read branch cmds; do
    echo "$branch"
    git checkout -q "$branch" 2>/dev/null || git checkout -q -b "$branch" origin/master
    echo "$cmds" | tr ' ' '\n' | while read cmd; do
      $cmd qutebrowser tests
      git commit -am "$cmd"
    done
    # Occasionally we get situations where black and pyupgrade build on each
    # other to enable further changes. So the order you run them matters. But we
    # have situations where each one enables the other, in both orders. So we
    # run them all yet again to pick up any lingering changes from the first
    # run.
    echo "$cmds" | tr ' ' '\n' | while read cmd; do
      $cmd qutebrowser tests
      git commit -am "$cmd second run"
    done
    generate_report "$branch" y "$strategy" "$cmds" "$pull_request"
  done
fi

summary

# todo:
# * see if we can run formatters on PR branches before/while merging
# * do most stuff based off of qt6-v2 instead of master, not like most PRs
#   will be merged to pre-3.0 master anyway
# * for strategies where we skip PRs that failed in master include them in the
#   report to for reference. With a marker to that affect and a total diffstat
#   so we can see how big they are
# * *try the more simplistic "Run the formatter on all PR branches then merge"
#   instead of trying to do it via a rebase*
# * try rebasing them to an autoformatted qt6-v2 branch
# notes:
# after merging qt6-v2 would merging old PRs to old master then somehow merging
#   the PR merge commit up to the new master easier than rebasing the PR?
# there is a filter attribute you can use to re-write files before committing.
#   For this use case probably the same as rebase -i --exec then merge?
#   >See "Merging branches with differing checkin/checkout attributes" in gitattributes(5)
# if we go with the strategy of rebasing PRs on formatted commits how to deal
#   with stopping isort making import loops on every damn PR. Still need to try
#   rebasing directly on the latest formatted master instead of doing the
#   intermediated one.