#6 build(deps): bump tqdm from 4.66.1 to 4.66.3

Open
GitHub User wants to merge 1 commits into ncusi:main from ncusi:dependabot/pip/tqdm-4.66.3
  
    
        
          
1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

          
            import copy
import re
from difflib import SequenceMatcher, get_close_matches
from operator import itemgetter
from unidiff import PatchSet, PatchedFile, Hunk
from joblib import Parallel, delayed
def to_str(self):
    content = "".join(str(line) for line in self)
    return content
def hunk_to_str(self):
    """Extract string to be used for comparison with Hunk from unidiff.PatchSet
    This `self` hunk can synthetic Hunk created as container for unidiff.patch.Line's
    by the get_hunk_images() function.
    :param Hunk self: modified block of a file, may be synthetic
    :return: all lines of changes in hunk, with their prefix removed,
        concatenated into a single string
    :rtype: str
    """
    return "".join(line.value for line in self)
# monkey-patching Hunk
Hunk.to_str = hunk_to_str
def get_close_matches2(word, pos, n, cutoff = 0.5):
    match_size = 0
    match_word = ""
    for p in pos:
        m = SequenceMatcher(a=word, b=p).find_longest_match()
        if m.size > match_size:
            match_size = m.size
            match_word = p
    if match_size == 0 or (match_size / len(word)) < cutoff:
        return None
    return match_word
def get_close_matches3(word, pos, n, cutoff):
    match_size = 0
    match_word = ""
    for p in pos:
        m = SequenceMatcher(a=word, b=p)
        if m.quick_ratio() > match_size:
            r = m.ratio()
            if r > match_size:
                match_size = r
                match_word = p
            if r >= cutoff and len(match_word.strip()) > 0:
                return match_word
    if match_size == 0 or match_size < cutoff or len(match_word.strip()) == 0:
        return None
    return match_word
class CompareBase:
    def __init__(self, image, lines=False):
        self.pos = {"r": 0, "p": None}
        self.lines = lines
        if lines:
            self.pos["l"] = None
        self.rnewline = re.compile(r"\n")
        self.image = image
        self.simage = image.to_str()
        # sequence matcher and maximal match
        self.seq_match = None
        self.chats = []
    def __repr__(self):
        return str(self.pos)
class CompareLines(CompareBase):
    def __init__(self, image, lines=False, threshold=0.5):
        super().__init__(image, lines)
        self.threshold = threshold
    def compare(self, b, pno, lno=None):
        a = self.simage
        self.chats.append(b)
    # alternate version
    def final(self):
        chatl = []
        for chat in self.chats:
            #chatl.extend([s.strip() for s in chat.splitlines() if len(s.strip())>0])
            chatl.extend(chat.splitlines())
        ret = []
        for line in self.image:
            if len(str(line).strip())==0:
                continue
            m = get_close_matches3(str(line), chatl, 1, self.threshold)
            if m:
                ret.append(line.diff_line_no)
        return ret
class CompareLinesFragmentThreshold(CompareLines):
    def __init__(self, image, lines=False):
        super().__init__(image, lines)
    def compare(self, b, pno, lno=None):
        a = self.simage
        s = SequenceMatcher(None, a, b)
        # Threshold version
        if s.real_quick_ratio() >= 0.1 and s.quick_ratio() >= 0.1 and s.ratio() >= 0.1:
            self.seq_match = s
            self.chats.append(b)
class CompareTopFragments(CompareBase):
    # Work in progress
    def __init__(self, image, lines=False):
        super().__init__(image, lines)
    def compare(self, b, pno, lno=None):
        a = self.simage
        s = SequenceMatcher(None, a, b)
        # Max version
        if s.real_quick_ratio() >= self.pos["r"] and s.quick_ratio() >= self.pos["r"]:
            r = s.ratio()
            if r > self.pos["r"]:
                self.pos = {"r": r, "p": pno}
                self.seq_match = s
                self.chat = b
                if self.lines:
                    self.pos["l"] = lno
class CompareFragments(CompareBase):
    def __init__(self, image, lines=False):
        """Construct a CompareFragments
        :param Hunk image: pre-image or post-image hunk of commit diff;
            unidiff.Hunk is monkey-patched to have `to_str` attribute (method).
        :param bool lines: whether to remember `lno` in compare()
        """
        # noinspection PyTypeChecker
        super().__init__(image, lines)
        self.chat = ""
    def compare(self, b, pno, lno=None):
        """Compare pre-image or post-image hunk against ChatGPT text
        :param str b: text of element of ChatGPT conversation
            (prompt, or answer, or code block)
        :param int pno: index into 'Conversations' list of 'ChatgptSharing'
        :param int or None lno: index into 'ListOfCode' list of conversation
        :rtype: None
        """
        a = self.simage
        s = SequenceMatcher(None, a, b)
        # Max version
        if s.real_quick_ratio() >= self.pos["r"] and s.quick_ratio() >= self.pos["r"]:
            r = s.ratio()
            if r > self.pos["r"]:
                self.pos = {"r": r, "p": pno}
                self.seq_match = s
                self.chat = b
                if self.lines:
                    self.pos["l"] = lno
    # Max version
    def final(self, cutoff=0.6,
              ret_chat_line_no=False, ret_score=False):
        """Final result of sequence of compare()'s
        :param float cutoff: a float in the range [0, 1], default 0.5.
            Lines from ChatGPT that don’t score at least that similar to patch line
            are ignored.
        :param bool ret_chat_line_no: whether to add chat_line_no to output
        :param bool ret_score: whether to add similarity score to output
        :return: list of diff line numbers of those lines in the `self.image` Hunk
            that have at least 1 matching line with at least `cutoff` similarity
            in one of compared chat fragments (prompt, or answer, or code block),
            or list of tuples that include diff line number as first element
        :rtype: list[int] or list[tuple[int, int]] or list[tuple[int, float]] or list[tuple[int, int, float]]
        """
        if not self.seq_match:
            return []
        chat_lines = self.chat.splitlines()
        ret = []
        for line in self.image:
            line_s = getattr(line, 'value', str(line)).rstrip('\n')
            # skip empty lines; str(line) for adding empty line is '+\n', so it does not match ''
            if not line_s:
                continue
            m = get_close_matches(line_s, chat_lines, n=1, cutoff=cutoff)
            if m:
                res = line.diff_line_no
                if ret_chat_line_no:
                    chat_line_no = [line_no
                                    for line_no, line in enumerate(chat_lines)
                                    if line == m[0]][0]
                    res = (res, chat_line_no)
                if ret_score:
                    # following source of get_close_matches() in difflib library
                    # https://github.com/python/cpython/blob/main/Lib/difflib.py
                    # s.set_seq2(word), s.set_seq1(possibilities[i])
                    s = SequenceMatcher(a=m[0], b=line_s)  # a ≡ s.set_seq1, b ≡ s.set_seq2
                    r = s.ratio()
                    if isinstance(res, tuple):
                        res = (*res, r)
                    else:
                        res = (res, r)
                ret.append(res)
        return ret
def get_hunk_images(hunk):
    """Split chunk into pre-image and post-image Hunk
    Second Hunk in the returned tuple includes only added files;
    all the other lines are returned in first Hunk in the tuple.
    Note that those returned synthesized chunks may lack correct
    header information - they are used only as containers for patch.Line.
    :param Hunk hunk: original part of diff, includes added, removed, and context lines
    :return: "preimage" and "postimage" hunks
    :rtype: (Hunk, Hunk)
    """
    postimage = Hunk()
    preimage = Hunk()
    for line in hunk:
        lc = copy.copy(line)
        if line.is_added:
            postimage.append(lc)
        else:
            preimage.append(lc)
    return preimage, postimage
def get_max_coverage(image, conv, Compare = CompareFragments,
                     ret_chat_line_no=False, ret_score=False):
    """
    Returns dict with the following structure:
        {
            "P": <comparison of `hunk` with "Prompt">,
            "A": <comparison of `hunk` with "Answer">,
            "L": <comparison of `hunk` with "ListOfCode">,
        }
    :param Hunk image: modified block of file, changed by diff;
        might be synthesized hunk returned by :func:`get_hunk_images`
    :param dict conv: "Conversation" part of `ChatgptSharing` structure,
        see https://github.com/NAIST-SE/DevGPT/blob/main/README.md#conversations
    :param type[CompareBase] Compare: compare class to use
    :param bool ret_chat_line_no: whether to add chat_line_no to output
    :param bool ret_score: whether to add similarity score to output
    :return:
    :rtype: dict[str, list[int]]
    """
    # iterate over conversation
    m_answer = Compare(image)
    m_prompt = Compare(image)
    m_loc = Compare(image, lines=True)
    for pno, prompt in enumerate(conv):
        a, b = prompt["Prompt"], prompt["Answer"]
        m_prompt.compare(a, pno)
        m_answer.compare(b, pno)
        for lno, loc in enumerate(prompt["ListOfCode"]):
            m_loc.compare(loc["Content"], pno, lno)
    return {
        # among 'Prompt'
        "P": m_prompt.final(ret_chat_line_no=ret_chat_line_no, ret_score=ret_score),
        "p": m_prompt.pos,
        # among 'Answer'
        "A": m_answer.final(ret_chat_line_no=ret_chat_line_no, ret_score=ret_score),
        "a": m_answer.pos,
        # among 'ListOfCode'
        "L": m_loc.final(ret_chat_line_no=ret_chat_line_no, ret_score=ret_score),
        "l": m_loc.pos,
    }
def diff_to_conversation_file(file, conv, debug=False, compare=CompareFragments):
    """
    :param PatchedFile file: file updated by `diff`, it is a list of Hunk's
    :param dict conv: ChatGPT link mention as `ChatgptSharing` structure,
        see https://github.com/NAIST-SE/DevGPT/blob/main/README.md#chatgptsharing
    :param bool debug: return also data about individual files
    :param type[CompareBase] compare: compare class to use
    :return:
    rtype dict[str, dict[str, int | set] | dict]
    """
    ret = {
        "ALL": {
            "coverage": 0,
            "all": 0,
            "lines": set(),
            "preimage": set(),
            "preimage_all": 0,
            "preimage_coverage": 0,
        }
    }
    fn = file.path
    if debug:
        ret["FILE"] = (file.source_file, file.target_file)
        ret["PATH"] = fn
    for i, hunk in enumerate(file):
        preimage, postimage = get_hunk_images(hunk)
        pre = get_max_coverage(preimage, conv["Conversations"], Compare=compare,
                               ret_chat_line_no=debug, ret_score=debug)
        post = get_max_coverage(postimage, conv["Conversations"], Compare=compare,
                                ret_chat_line_no=debug, ret_score=debug)
        # Only 'Answer' and 'ListOfCode' for post
        ret_lines = []
        ret_lines.extend(map(itemgetter(0), post["A"]) if debug else post["A"])
        ret_lines.extend(map(itemgetter(0), post["L"]) if debug else post["L"])
        ret_lines = set(ret_lines)
        # TODO: check how many remove lines from 'P'
        # that are exactly the same as in 'A' + 'L'.
        # this has to be done on source lines and may be expensive
        # ret_lines = set(ret_lines).union(set(post['P']))
        ret["ALL"]["coverage"] += len(ret_lines)
        #ret["ALL"]["all"] += len([l for l in postimage if len(str(l).strip())>0])
        ret["ALL"]["all"] += len(postimage)
        ret["ALL"]["lines"] = ret["ALL"]["lines"].union(ret_lines)
        # Only 'Prompt' for pre
        pre_set = set(map(itemgetter(0), pre["P"]) if debug else pre["P"])
        ret["ALL"]["preimage"] = ret["ALL"]["preimage"].union(pre_set)
        ret["ALL"]["preimage_coverage"] += len(pre_set)
        ret["ALL"]["preimage_all"] += len(preimage)
        if debug:
            if "HUNKS" not in ret:
                ret["HUNKS"] = {}
            ret["HUNKS"][i] = {
                "pre": pre,
                "post": post,
                "lines": list(ret_lines),
            }
    return ret
def diff_to_conversation(diff, conv, debug=False, compare = CompareFragments):
    """
    :param PatchSet diff: result of running GitRepo.unidiff(), it is a list of PatchedFile's
    :param dict conv: ChatGPT link mention as `ChatgptSharing` structure,
        see https://github.com/NAIST-SE/DevGPT/blob/main/README.md#chatgptsharing
    :param bool debug: passed down to :func:`diff_to_conversation_file`
    :param type[CompareBase] compare: compare class to use
    :return:
    :rtype: dict[str, dict[str, int | list]]
    """
    ret = {}
    ret["ALL"] = {"coverage": 0, "all": 0, "lines": [], 'preimage':[], 'preimage_all':0, 'preimage_coverage':0}
    if debug:
        ret["ALL"]["debug"] = True
    if "Conversations" not in conv:
        return ret
    ret_list =[]
    #for file in diff:
    #    ret_list.append(diff_to_conversation_file(file, diff, conv, debug, compare))
    ret_list = Parallel(n_jobs=-1)(delayed(diff_to_conversation_file)(file, conv, debug, compare) for file in diff)
    for r in ret_list:
        ret["ALL"]["coverage"] += r['ALL']["coverage"]
        ret["ALL"]["all"] += r['ALL']["all"]
        ret["ALL"]["preimage_coverage"] += r['ALL']["preimage_coverage"]
        ret["ALL"]["preimage_all"] += r['ALL']["preimage_all"]
        ret["ALL"]["lines"].extend(r['ALL']["lines"])
        ret["ALL"]["preimage"].extend(r['ALL']["preimage"])
        if debug:
            # r might be {} if there were errors
            if 'PATH' in r:
                filename = r['PATH']
                if 'FILES' not in ret:
                    ret['FILES'] = {}
                ret['FILES'][filename] = {
                    key: value
                    for key, value in r.items()
                    if key in ['FILE', 'HUNKS']
                }
    return ret

          
        
      

  
Tip!
Press p or to see the previous file or, n or to see the next file
ncusi / MSR_Challenge_2024 connected to https://github.com/ncusi/MSR_Challenge_2024.git

#6 build(deps): bump tqdm from 4.66.1 to 4.66.3

ncusi
/
MSR_Challenge_2024
connected to https://github.com/ncusi/MSR_Challenge_2024.git