最新消息:20210917 已从crifan.com换到crifan.org

【已解决】用Python计算Excel中团队历史业绩和其他统计指标

Python crifan 572浏览 0评论
折腾:
【未解决】用Python实现解析excel计算业绩统计并打包成可执行文件
期间,有了新需求,又新增了个 历史团队业绩。
多了个额外的excel:
input/业绩数据表格_20210615.xlsx
且之前的历史数据:
input/业绩数据表格_historyAll.xlsx
都可能带重复数据的。
重复的判断逻辑是:销售易id + 金额 + 时间
所以去写代码,解析excel,带去重的逻辑。
调试后是:
def loadSaleRecord(excelFullPath):
  """
  Load sale record dict list from excel
  """
  global gLoadedSaleRecordDictList


  # saleRecordDictList = {}
  saleRecordDictList = []
  # timeDict = {}


  saleDf = pd.read_excel(io=excelFullPath, engine="openpyxl")
  print("saleDf=%s" % saleDf)


  saleRowCount, saleColumnCount = saleDf.shape
  print("saleRowCount=%s, saleColumnCount=%s" % (saleRowCount, saleColumnCount))


  saleTitleList = saleDf.columns
  print("saleTitleList=%s" % saleTitleList)


  # # sort by time -> for calc first and last time
  # print("before head=%s" % saleDf.head())
  # saleDf = saleDf.sort_values(by='时间')
  # print("after head=%s" % saleDf.head())


  # firstTime = saleDf.iloc[0]["时间"]
  # lastTime = saleDf.iloc[saleRowCount - 1]["时间"]
  # # print("firstTime=%s, lastTime=%s" % (firstTime, lastTime))


  # Update: change to get valid item's earliest and latest time
  # just init
  # someTime = saleDf.iloc[0]["时间"]
  # firstTime = someTime
  # lastTime = someTime
  # update: in case not add any one, so no valid time
  firstTime = None
  lastTime = None


  curIdx = 0
  # for curIdx, curRow in saleDf.itertuples():
  for rowIdx, rowValue in saleDf.iterrows():
    print("%s [%4d] %s %s" % ("-"*10, curIdx, rowIdx, "-"*10))
    # print(rowIdx)
    # print(rowValue)
    # curRowDict = saleDf.iloc[curIdx]
    # saleId = curRowDict["销售员编号"]
    # personalSaleMoney = curRowDict["业绩金额"]


    saleId = rowValue["销售员编号"]
    personalSaleMoney = rowValue["业绩金额"]
    saleTime = rowValue["时间"]
    # print("saleId=%s, personalSaleMoney=%s, saleTime=%s" % (saleId, personalSaleMoney, saleTime))


    curSaleDict = {
      "saleId": saleId,
      "personalSaleMoney": personalSaleMoney,


      # for debug
      # "personalSaleMoney": int(personalSaleMoney),


      "saleTime": saleTime,
    }


    # if saleId in saleRecordDictList.keys():
    #   saleRecordDictList[saleId] = saleRecordDictList[saleId] + personalSaleMoney
    # else:
    #   saleRecordDictList[saleId] = personalSaleMoney
    # if curSaleDict in saleRecordDictList.values():
    # if curSaleDict in gLoadedSaleRecordDictList.values():
    if curSaleDict in gLoadedSaleRecordDictList:
      print("!!! Duplicated sale record: %s" % curSaleDict)
    else:
      print("Added: %s" % curSaleDict)
      # saleRecordDictList[saleId] = curSaleDict
      # gLoadedSaleRecordDictList[saleId] = curSaleDict
      saleRecordDictList.append(curSaleDict)
      gLoadedSaleRecordDictList.append(curSaleDict)


      # update time
      if firstTime:
        if saleTime < firstTime:
          firstTime = saleTime
      else:
        firstTime = saleTime


      if lastTime:
        if saleTime > lastTime:
          lastTime = saleTime
      else:
        lastTime = saleTime


    curIdx += 1


  # print("len(gLoadedSaleRecordDictList.keys())=%s" % len(gLoadedSaleRecordDictList.keys()))
  # print("len(saleRecordDictList.keys())=%s" % len(saleRecordDictList.keys()))
  # len(saleRecordDictList.keys())=463
  # print("saleRecordDictList=%s" % saleRecordDictList)
  print("len(gLoadedSaleRecordDictList)=%s" % len(gLoadedSaleRecordDictList))
  print("len(saleRecordDictList)=%s" % len(saleRecordDictList))
  # len(gLoadedSaleRecordDictList)=1215
  # len(saleRecordDictList)=1215


  timeDict = {
    "fisrt": firstTime,
    "last": lastTime,
  }


  return saleRecordDictList, timeDict

historySaleRecordDictList, historySaleTimeDict = loadSaleRecord(SaleHistoryFullPath)
currentSaleRecordDictList, currentSaleTimeDict = loadSaleRecord(SaleCurrentFullPath)
print("len(historySaleRecordDictList)=%s, historySaleTimeDict=%s" % (len(historySaleRecordDictList), historySaleTimeDict))
print("len(currentSaleRecordDictList)=%s, currentSaleTimeDict=%s" % (len(currentSaleRecordDictList), currentSaleTimeDict))
输出:
history:
saleRowCount=1220, saleColumnCount=8
->
len(historySaleRecordDictList)=1215, historySaleTimeDict={'fisrt': Timestamp('2021-02-20 14:00:00'), 'last': Timestamp('2021-06-09 20:29:11')}


current:
saleRowCount=1319, saleColumnCount=8
->
len(currentSaleRecordDictList)=99, currentSaleTimeDict={'fisrt': Timestamp('2021-06-09 21:09:50'), 'last': Timestamp('2021-06-15 17:02:32')}
-》
其中可见:
  • input/业绩数据表格_historyAll.xlsx
    • 一共:1220个
    • 重复了=去重了:5个
    • 实际载入:1215个
  • 当前业绩的 input/业绩数据表格_20210615.xlsx
    • 一共:1319个
      • 内部其实包含了 history的所有数据
    • 重复了=去重了:1220个
    • 实际载入:99个
继续后续处理逻辑:计算
团队累计业绩
团队当期业绩
调试期间,重新发现,想起来了之前就知道的:
单个excel,单个销售员,是有可能有多个销售业绩的
调试时也再次证实了这点:
另一一次,调试后,更新了个人业绩总数:
更新完毕所有的 个人总业绩:
最后用代码:

def calcTeamSaleMoney(curNode):
  """Calculate team sale money"""
  totalTeamSaleMoney = 0
  for eachChildNode in curNode.children:
    totalTeamSaleMoney += eachChildNode.personalSaleMoney


    # if not eachChildNode.is_leaf:
    if eachChildNode.children:
      totalTeamSaleMoney += calcTeamSaleMoney(eachChildNode)


  return totalTeamSaleMoney


def loadSaleRecord(excelFullPath):
  """
  Load sale record dict list from excel
  """
  global gLoadedSaleRecordDictList


  # saleRecordDictList = {}
  saleRecordDictList = []
  # timeDict = {}


  saleDf = pd.read_excel(io=excelFullPath, engine="openpyxl")
  print("saleDf=%s" % saleDf)


  saleRowCount, saleColumnCount = saleDf.shape
  print("saleRowCount=%s, saleColumnCount=%s" % (saleRowCount, saleColumnCount))


  saleTitleList = saleDf.columns
  print("saleTitleList=%s" % saleTitleList)


  # # sort by time -> for calc first and last time
  # print("before head=%s" % saleDf.head())
  # saleDf = saleDf.sort_values(by='时间')
  # print("after head=%s" % saleDf.head())


  # firstTime = saleDf.iloc[0]["时间"]
  # lastTime = saleDf.iloc[saleRowCount - 1]["时间"]
  # # print("firstTime=%s, lastTime=%s" % (firstTime, lastTime))


  # Update: change to get valid item's earliest and latest time
  # just init
  # someTime = saleDf.iloc[0]["时间"]
  # firstTime = someTime
  # lastTime = someTime
  # update: in case not add any one, so no valid time
  firstTime = None
  lastTime = None


  curIdx = 0
  # for curIdx, curRow in saleDf.itertuples():
  for rowIdx, rowValue in saleDf.iterrows():
    print("%s [%4d] %s %s" % ("-"*10, curIdx, rowIdx, "-"*10))
    # print(rowIdx)
    # print(rowValue)
    # curRowDict = saleDf.iloc[curIdx]
    # saleId = curRowDict["销售员编号"]
    # personalSaleMoney = curRowDict["业绩金额"]


    saleId = rowValue["销售员编号"]
    personalSaleMoney = rowValue["业绩金额"]
    saleTime = rowValue["时间"]
    # print("saleId=%s, personalSaleMoney=%s, saleTime=%s" % (saleId, personalSaleMoney, saleTime))


    curSaleDict = {
      "saleId": saleId,
      "personalSaleMoney": personalSaleMoney,


      # for debug
      # "personalSaleMoney": int(personalSaleMoney),


      "saleTime": saleTime,
    }


    # if saleId in saleRecordDictList.keys():
    #   saleRecordDictList[saleId] = saleRecordDictList[saleId] + personalSaleMoney
    # else:
    #   saleRecordDictList[saleId] = personalSaleMoney
    # if curSaleDict in saleRecordDictList.values():
    # if curSaleDict in gLoadedSaleRecordDictList.values():
    if curSaleDict in gLoadedSaleRecordDictList:
      print("!!! Duplicated sale record: %s" % curSaleDict)
    else:
      print("Added: %s" % curSaleDict)
      # saleRecordDictList[saleId] = curSaleDict
      # gLoadedSaleRecordDictList[saleId] = curSaleDict
      saleRecordDictList.append(curSaleDict)
      gLoadedSaleRecordDictList.append(curSaleDict)


      # update time
      if firstTime:
        if saleTime < firstTime:
          firstTime = saleTime
      else:
        firstTime = saleTime


      if lastTime:
        if saleTime > lastTime:
          lastTime = saleTime
      else:
        lastTime = saleTime


    curIdx += 1


  # print("len(gLoadedSaleRecordDictList.keys())=%s" % len(gLoadedSaleRecordDictList.keys()))
  # print("len(saleRecordDictList.keys())=%s" % len(saleRecordDictList.keys()))
  # len(saleRecordDictList.keys())=463
  # print("saleRecordDictList=%s" % saleRecordDictList)
  print("len(gLoadedSaleRecordDictList)=%s" % len(gLoadedSaleRecordDictList))
  print("len(saleRecordDictList)=%s" % len(saleRecordDictList))
  # len(gLoadedSaleRecordDictList)=1215
  # len(saleRecordDictList)=1215


  timeDict = {
    "firstTime": firstTime,
    "lastTime": lastTime,
  }


  return saleRecordDictList, timeDict


def generatePersonalSaleMoney(saleRecordDictList):
  """
  Generate personal sale money dict from sale record dict list
  """
  saleMoneyDict = {}
  for eachSaleRecordDict in saleRecordDictList:
    saleId = eachSaleRecordDict["saleId"]
    personalSaleMoney = eachSaleRecordDict["personalSaleMoney"]
    if saleId in saleMoneyDict.keys():
      # saleMoneyDict[saleId] += personalSaleMoney
      existedMoney = saleMoneyDict[saleId]
      saleMoneyDict[saleId] = existedMoney + personalSaleMoney
    else:
      saleMoneyDict[saleId] = personalSaleMoney


  return saleMoneyDict


def buildTopNodeList(saleMoneyDict):
  """
  Build top tree node list
  """
  topNodeList = []


  for curParentId, curChildIdList in recommandRelationDict.items():
    print("curParentId=%s, curChildIdList=%s" % (curParentId, curChildIdList))
    # curParentId=a12341234123, curChildIdList=['a12341234121']
    existedParentNode = findNode(curParentId, topNodeList)
    curRootNode = None
    if existedParentNode:
      curParentNode = existedParentNode
      curRootNode = curParentNode
    else:
      # curParentNode = Node(curParentId)
      curParentNode = createNode(curParentId, saleMoneyDict=saleMoneyDict)


      curRootNode = curParentNode.root
      if not curRootNode:
        curRootNode = curParentNode


      # gRootNodeList.append(curRootNode)
      topNodeList.append(curRootNode)


    print("curParentNode=%s" % curParentNode)
    # curParentNode=Node('/a12341234123')
    for eachChildId in curChildIdList:
      existedChildNode = findNode(eachChildId, topNodeList)
      if existedChildNode:
        if not existedChildNode.parent:
          existedChildNode.parent = curParentNode
        curChildNode = existedChildNode
      else:
        # curChildNode = Node(eachChildId, parent=curParentNode)
        curChildNode = createNode(eachChildId, nodeParent=curParentNode, saleMoneyDict=saleMoneyDict)
      print("curChildNode=%s" % curChildNode)
      # Node('/a12341234123/a12341234121', personalSaleMoney=17226.72636907)
      # curChildNode=Node('/aa1234512333/aa1234512334')
      topNodeList = updateRootNodeList(curChildNode, topNodeList)


    # # for debug
    # for pre, fill, node in RenderTree(curRootNode):
    #   # print("pre=%s,fill=%s,node=%s" % (pre, fill, node))
    #   print("%s%s" % (pre, node.name))


    # print("gRootNodeList=%s" % gRootNodeList)


  # print("curRootNode=%s" % curRootNode)


  return topNodeList


def addFakeRootNode(topNodeList):
  """
  Add fake root node for top node list
  """
  rootNode = None


  # fakeRootNode = Node(FakeRootNodeId)
  fakeRootNode = createNode(FakeRootNodeId)


  # for eachTopNode in gRootNodeList:
  for eachTopNode in topNodeList:
    if eachTopNode.parent:
      print("Unexpected: eachTopNode=%s" % eachTopNode)
    else:
      eachTopNode.parent = fakeRootNode


  # gRootNode = fakeRootNode
  rootNode = fakeRootNode


  # return gRootNode
  return rootNode


def buildSaleTree(saleMoneyDict):
  """
  Build sale node tree
  """
  topNodeList = buildTopNodeList(saleMoneyDict)
  rootNode = addFakeRootNode(topNodeList)
  return rootNode, topNodeList


def updateTreeTeamSaleMoney(rootNode):
  """
  Update tree node's team sale money
  """
  for eachNode in rootNode.descendants:
    eachNode.teamSaleMoney = calcTeamSaleMoney(eachNode)
    print("Updated team sale money: eachNode=%s" % eachNode)


################################################################################
# Main
################################################################################


#----------------------------------------
### read and parse relation excel
#----------------------------------------


relationDf = pd.read_excel(io=RelationFullPath, engine="openpyxl")
print("relationDf=%s" % relationDf)


relationRowCount, relationColumnCount = relationDf.shape
print("relationRowCount=%s, relationColumnCount=%s" % (relationRowCount, relationColumnCount))


relationTitleList = relationDf.columns
print("relationTitleList=%s" % relationTitleList)


for rowIdx, rowValue in relationDf.iterrows():
    print("%s %s %s" % ("-"*10, rowIdx, "-"*10))
    # print(rowIdx)
    # print(rowValue)


    parentId = rowValue["销售员编号(引荐人)"]
    childId = rowValue["推荐的下级账号"]
    print("parentId=%s, childId=%s" % (parentId, childId))
    if pd.isna(childId):
      print("! Unexpected non child for parent %s" % parentId)
      continue


    if parentId in recommandRelationDict.keys():
      curChildList = recommandRelationDict[parentId]
      if childId in curChildList:
        print("Omit duplcated child %s for parent %s" % (childId, parentId))
      else:
        curChildList.append(childId)
        recommandRelationDict[parentId] = curChildList
    else:
      recommandRelationDict[parentId] = [childId]


print("len(recommandRelationDict.keys())=%s" % len(recommandRelationDict.keys())) # len(recommandRelationDict.keys())=158
# print("recommandRelationDict=%s" % recommandRelationDict)


#----------------------------------------
### read and parse sale money excel
#----------------------------------------


historySaleRecordDictList, historySaleTimeDict = loadSaleRecord(SaleHistoryFullPath)
print("len(historySaleRecordDictList)=%s, historySaleTimeDict=%s" % (len(historySaleRecordDictList), historySaleTimeDict))
currentSaleRecordDictList, currentSaleTimeDict = loadSaleRecord(SaleCurrentFullPath)
print("len(currentSaleRecordDictList)=%s, currentSaleTimeDict=%s" % (len(currentSaleRecordDictList), currentSaleTimeDict))


allSaleRecordList = []
allSaleRecordList.extend(historySaleRecordDictList)
allSaleRecordList.extend(currentSaleRecordDictList)
allPersonalSaleMoneyDict = generatePersonalSaleMoney(allSaleRecordList)
print("len(allPersonalSaleMoneyDict.keys())=%s" % len(allPersonalSaleMoneyDict.keys()))
# len(allPersonalSaleMoneyDict.keys())=472


currentyPersonalSaleMoneyDict = generatePersonalSaleMoney(currentSaleRecordDictList)
print("len(currentyPersonalSaleMoneyDict.keys())=%s" % len(currentyPersonalSaleMoneyDict.keys()))
# len(currentyPersonalSaleMoneyDict.keys())=53


#----------------------------------------
### Calculate team sale money
#----------------------------------------


currentRootNode, currentTopNodeList = buildSaleTree(currentyPersonalSaleMoneyDict)
allRootNode, allTopNodeList = buildSaleTree(allPersonalSaleMoneyDict)


# dbgTree(currentRootNode, currentTopNodeList, subFolder="current")
# dbgTree(allRootNode, allTopNodeList, subFolder="all")
dbgPrintTreeToJson(currentRootNode, subFolder="current")
dbgPrintTreeToJson(allRootNode, subFolder="all")


updateTreeTeamSaleMoney(currentRootNode)
updateTreeTeamSaleMoney(allRootNode)


# for debug
updatedTeamSaleMoneyFilename = "RelationAllNode_updatedTeamSaleMoney.json"
dbgPrintTreeToJson(currentRootNode, outputFilename=updatedTeamSaleMoneyFilename, subFolder="current")
dbgPrintTreeToJson(allRootNode, outputFilename=updatedTeamSaleMoneyFilename, subFolder="all")


#----------------------------------------
### Output
#----------------------------------------


allTeamSaleMoneyDict = {}
for eachNode in allRootNode.descendants:
  allTeamSaleMoneyDict[eachNode.name] = eachNode.teamSaleMoney


currentTeamSaleMoneyDict = {}
for eachNode in currentRootNode.descendants:
  currentTeamSaleMoneyDict[eachNode.name] = eachNode.teamSaleMoney


allPersonList = list(allTeamSaleMoneyDict.keys())
allTeamMoneyList = list(allTeamSaleMoneyDict.values())


currentTeamMoneyList = []
for eachPersonId in allPersonList:
  currentTeamMoney = 0
  if eachPersonId in currentTeamSaleMoneyDict.keys():
    currentTeamMoney = currentTeamSaleMoneyDict[eachPersonId]
  currentTeamMoneyList.append(currentTeamMoney)


summaryData = {
  "销售员账户": allPersonList,
  "团队累计总业绩": allTeamMoneyList,
  "团队当前业绩": currentTeamMoneyList,
}


historyFisrtTime = historySaleTimeDict["firstTime"]
historyLastTime = historySaleTimeDict["lastTime"]
currentFirstTime = currentSaleTimeDict["firstTime"]
currentLastTime = currentSaleTimeDict["lastTime"]


if historyFisrtTime < currentFirstTime:
  startTime = historyFisrtTime
else:
  startTime = currentFirstTime


if historyLastTime > currentLastTime:
  endTime = historyLastTime
else:
  endTime = currentLastTime


print("startTime=%s, endTime=%s" % (startTime, endTime))
# startTime=2021-02-20 14:00:00, endTime=2021-06-15 17:02:32


dateFortmat = "%Y年%-m月%-d日%-H时%-M分"
startDatetimeStr = datetimeToStr(startTime, dateFortmat)
endDatetimeStr = datetimeToStr(endTime, dateFortmat)
outputSheetName = "%s-%s" % (startDatetimeStr, endDatetimeStr)
print("outputSheetName=%s" % outputSheetName)


outputDf = pd.DataFrame(data=summaryData)
outputDf.to_excel(OutputFullPath, sheet_name=outputSheetName)
即可输出:
output/统计结果_20210622_202842.xlsx

转载请注明:在路上 » 【已解决】用Python计算Excel中团队历史业绩和其他统计指标

发表我的评论
取消评论

表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址
88 queries in 0.169 seconds, using 20.27MB memory