aboutsummaryrefslogblamecommitdiff
path: root/dev/create-release/generate-changelist.py
blob: 13b744ec1b37e252af83f80179b3ab3ca3abb5da (plain) (tree)















































































































































                                                                                                                 
#!/usr/bin/python

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Creates CHANGES.txt from git history.
#
# Usage:
#   First set the new release version and old CHANGES.txt version in this file.
#   Make sure you have SPARK_HOME set.
#   $  python generate-changelist.py


import os
import sys
import subprocess
import time
import traceback
 
SPARK_HOME = os.environ["SPARK_HOME"]
NEW_RELEASE_VERSION = "1.0.0"
PREV_RELEASE_GIT_TAG = "v0.9.1"
 
CHANGELIST = "CHANGES.txt" 
OLD_CHANGELIST = "%s.old" % (CHANGELIST)
NEW_CHANGELIST = "%s.new" % (CHANGELIST)
TMP_CHANGELIST = "%s.tmp" % (CHANGELIST)
 
# date before first PR in TLP Spark repo
SPARK_REPO_CHANGE_DATE1 = time.strptime("2014-02-26", "%Y-%m-%d")
# date after last PR in incubator Spark repo
SPARK_REPO_CHANGE_DATE2 = time.strptime("2014-03-01", "%Y-%m-%d")
# Threshold PR number that differentiates PRs to TLP
# and incubator repos
SPARK_REPO_PR_NUM_THRESH = 200
 
LOG_FILE_NAME = "changes_%s" % time.strftime("%h_%m_%Y_%I_%M_%S")
LOG_FILE = open(LOG_FILE_NAME, 'w')
 
def run_cmd(cmd):
  try:
    print >> LOG_FILE, "Running command: %s" % cmd
    output = subprocess.check_output(cmd, shell=True, stderr=LOG_FILE)
    print >> LOG_FILE, "Output: %s" % output
    return output
  except:
    traceback.print_exc()
    cleanup()
    sys.exit(1)
 
def append_to_changelist(string):
  with open(TMP_CHANGELIST, "a") as f:
    print >> f, string
 
def cleanup(ask = True):
  if ask == True:
    print "OK to delete temporary and log files? (y/N): " 
    response = raw_input()
  if ask == False or (ask == True and response == "y"):
    if os.path.isfile(TMP_CHANGELIST):
      os.remove(TMP_CHANGELIST) 
    if os.path.isfile(OLD_CHANGELIST):
      os.remove(OLD_CHANGELIST)
    LOG_FILE.close()
    os.remove(LOG_FILE_NAME)
 
print "Generating new %s for Spark release %s" % (CHANGELIST, NEW_RELEASE_VERSION)
os.chdir(SPARK_HOME)
if os.path.isfile(TMP_CHANGELIST):
  os.remove(TMP_CHANGELIST) 
if os.path.isfile(OLD_CHANGELIST):
  os.remove(OLD_CHANGELIST)
 
append_to_changelist("Spark Change Log")
append_to_changelist("----------------")
append_to_changelist("")
append_to_changelist("Release %s" % NEW_RELEASE_VERSION)
append_to_changelist("")
 
print "Getting commits between tag %s and HEAD" % PREV_RELEASE_GIT_TAG
hashes = run_cmd("git log %s..HEAD --pretty='%%h'" % PREV_RELEASE_GIT_TAG).split()
 
print "Getting details of %s commits" % len(hashes)
for h in hashes:
  date = run_cmd("git log %s -1 --pretty='%%ad' --date=iso | head -1" % h).strip()
  subject = run_cmd("git log %s -1 --pretty='%%s' | head -1" % h).strip()
  body = run_cmd("git log %s -1 --pretty='%%b'" % h)
  committer = run_cmd("git log %s -1 --pretty='%%cn <%%ce>' | head -1" % h).strip()
  body_lines = body.split("\n")
 
  if "Merge pull" in subject:
    ## Parse old format commit message
    append_to_changelist("  %s %s" % (h, date))
    append_to_changelist("  %s" % subject)
    append_to_changelist("  [%s]" % body_lines[0])
    append_to_changelist("")
     
  elif "maven-release" not in subject:
    ## Parse new format commit message
    # Get authors from commit message, committer otherwise
    authors = [committer]
    if "Author:" in body:
      authors = [line.split(":")[1].strip() for line in body_lines if "Author:" in line]
    
    # Generate GitHub PR URL for easy access if possible
    github_url = ""
    if "Closes #" in body:
      pr_num = [line.split()[1].lstrip("#") for line in body_lines if "Closes #" in line][0]
      github_url = "github.com/apache/spark/pull/%s" % pr_num 
      day = time.strptime(date.split()[0], "%Y-%m-%d")
      if day < SPARK_REPO_CHANGE_DATE1 or (day < SPARK_REPO_CHANGE_DATE2 and pr_num < SPARK_REPO_PR_NUM_THRESH): 
        github_url = "github.com/apache/incubator-spark/pull/%s" % pr_num
    
    append_to_changelist("  %s" % subject)
    append_to_changelist("  %s" % ', '.join(authors))
    # for author in authors:
      # append_to_changelist("  %s" % author)
    append_to_changelist("  %s" % date)
    if len(github_url) > 0:
      append_to_changelist("  Commit: %s, %s" % (h, github_url))
    else:
      append_to_changelist("  Commit: %s" % h)
    append_to_changelist("")
 
# Append old change list
print "Appending changelist from tag %s" % PREV_RELEASE_GIT_TAG 
run_cmd("git show %s:%s | tail -n +3 >> %s" % (PREV_RELEASE_GIT_TAG, CHANGELIST, TMP_CHANGELIST))
run_cmd("cp %s %s" % (TMP_CHANGELIST, NEW_CHANGELIST))
print "New change list generated as %s" % NEW_CHANGELIST
cleanup(False)