折腾:
【未解决】用Python实现解析excel计算业绩统计并打包成可执行文件
期间,有了新需求,又新增了个 历史团队业绩。
多了个额外的excel:
input/业绩数据表格_20210615.xlsx

且之前的历史数据:
input/业绩数据表格_historyAll.xlsx
都可能带重复数据的。
重复的判断逻辑是:销售易id + 金额 + 时间
所以去写代码,解析excel,带去重的逻辑。
调试后是:
def loadSaleRecord(excelFullPath):
"""
Load sale record dict list from excel
"""
global gLoadedSaleRecordDictList
# saleRecordDictList = {}
saleRecordDictList = []
# timeDict = {}
saleDf = pd.read_excel(io=excelFullPath, engine="openpyxl")
print("saleDf=%s" % saleDf)
saleRowCount, saleColumnCount = saleDf.shape
print("saleRowCount=%s, saleColumnCount=%s" % (saleRowCount, saleColumnCount))
saleTitleList = saleDf.columns
print("saleTitleList=%s" % saleTitleList)
# # sort by time -> for calc first and last time
# print("before head=%s" % saleDf.head())
# saleDf = saleDf.sort_values(by='时间')
# print("after head=%s" % saleDf.head())
# firstTime = saleDf.iloc[0]["时间"]
# lastTime = saleDf.iloc[saleRowCount - 1]["时间"]
# # print("firstTime=%s, lastTime=%s" % (firstTime, lastTime))
# Update: change to get valid item's earliest and latest time
# just init
# someTime = saleDf.iloc[0]["时间"]
# firstTime = someTime
# lastTime = someTime
# update: in case not add any one, so no valid time
firstTime = None
lastTime = None
curIdx = 0
# for curIdx, curRow in saleDf.itertuples():
for rowIdx, rowValue in saleDf.iterrows():
print("%s [%4d] %s %s" % ("-"*10, curIdx, rowIdx, "-"*10))
# print(rowIdx)
# print(rowValue)
# curRowDict = saleDf.iloc[curIdx]
# saleId = curRowDict["销售员编号"]
# personalSaleMoney = curRowDict["业绩金额"]
saleId = rowValue["销售员编号"]
personalSaleMoney = rowValue["业绩金额"]
saleTime = rowValue["时间"]
# print("saleId=%s, personalSaleMoney=%s, saleTime=%s" % (saleId, personalSaleMoney, saleTime))
curSaleDict = {
"saleId": saleId,
"personalSaleMoney": personalSaleMoney,
# for debug
# "personalSaleMoney": int(personalSaleMoney),
"saleTime": saleTime,
}
# if saleId in saleRecordDictList.keys():
# saleRecordDictList[saleId] = saleRecordDictList[saleId] + personalSaleMoney
# else:
# saleRecordDictList[saleId] = personalSaleMoney
# if curSaleDict in saleRecordDictList.values():
# if curSaleDict in gLoadedSaleRecordDictList.values():
if curSaleDict in gLoadedSaleRecordDictList:
print("!!! Duplicated sale record: %s" % curSaleDict)
else:
print("Added: %s" % curSaleDict)
# saleRecordDictList[saleId] = curSaleDict
# gLoadedSaleRecordDictList[saleId] = curSaleDict
saleRecordDictList.append(curSaleDict)
gLoadedSaleRecordDictList.append(curSaleDict)
# update time
if firstTime:
if saleTime < firstTime:
firstTime = saleTime
else:
firstTime = saleTime
if lastTime:
if saleTime > lastTime:
lastTime = saleTime
else:
lastTime = saleTime
curIdx += 1
# print("len(gLoadedSaleRecordDictList.keys())=%s" % len(gLoadedSaleRecordDictList.keys()))
# print("len(saleRecordDictList.keys())=%s" % len(saleRecordDictList.keys()))
# len(saleRecordDictList.keys())=463
# print("saleRecordDictList=%s" % saleRecordDictList)
print("len(gLoadedSaleRecordDictList)=%s" % len(gLoadedSaleRecordDictList))
print("len(saleRecordDictList)=%s" % len(saleRecordDictList))
# len(gLoadedSaleRecordDictList)=1215
# len(saleRecordDictList)=1215
timeDict = {
"fisrt": firstTime,
"last": lastTime,
}
return saleRecordDictList, timeDict
historySaleRecordDictList, historySaleTimeDict = loadSaleRecord(SaleHistoryFullPath)
currentSaleRecordDictList, currentSaleTimeDict = loadSaleRecord(SaleCurrentFullPath)
print("len(historySaleRecordDictList)=%s, historySaleTimeDict=%s" % (len(historySaleRecordDictList), historySaleTimeDict))
print("len(currentSaleRecordDictList)=%s, currentSaleTimeDict=%s" % (len(currentSaleRecordDictList), currentSaleTimeDict))输出:
history:
saleRowCount=1220, saleColumnCount=8
->
len(historySaleRecordDictList)=1215, historySaleTimeDict={'fisrt': Timestamp('2021-02-20 14:00:00'), 'last': Timestamp('2021-06-09 20:29:11')}
current:
saleRowCount=1319, saleColumnCount=8
->
len(currentSaleRecordDictList)=99, currentSaleTimeDict={'fisrt': Timestamp('2021-06-09 21:09:50'), 'last': Timestamp('2021-06-15 17:02:32')}-》
其中可见:
- input/业绩数据表格_historyAll.xlsx
- 一共:1220个
- 重复了=去重了:5个
- 实际载入:1215个
- 当前业绩的 input/业绩数据表格_20210615.xlsx
- 一共:1319个
- 内部其实包含了 history的所有数据
- 重复了=去重了:1220个
- 实际载入:99个
继续后续处理逻辑:计算
团队累计业绩
团队当期业绩
调试期间,重新发现,想起来了之前就知道的:
单个excel,单个销售员,是有可能有多个销售业绩的
调试时也再次证实了这点:

另一一次,调试后,更新了个人业绩总数:

更新完毕所有的 个人总业绩:

最后用代码:
def calcTeamSaleMoney(curNode):
"""Calculate team sale money"""
totalTeamSaleMoney = 0
for eachChildNode in curNode.children:
totalTeamSaleMoney += eachChildNode.personalSaleMoney
# if not eachChildNode.is_leaf:
if eachChildNode.children:
totalTeamSaleMoney += calcTeamSaleMoney(eachChildNode)
return totalTeamSaleMoney
def loadSaleRecord(excelFullPath):
"""
Load sale record dict list from excel
"""
global gLoadedSaleRecordDictList
# saleRecordDictList = {}
saleRecordDictList = []
# timeDict = {}
saleDf = pd.read_excel(io=excelFullPath, engine="openpyxl")
print("saleDf=%s" % saleDf)
saleRowCount, saleColumnCount = saleDf.shape
print("saleRowCount=%s, saleColumnCount=%s" % (saleRowCount, saleColumnCount))
saleTitleList = saleDf.columns
print("saleTitleList=%s" % saleTitleList)
# # sort by time -> for calc first and last time
# print("before head=%s" % saleDf.head())
# saleDf = saleDf.sort_values(by='时间')
# print("after head=%s" % saleDf.head())
# firstTime = saleDf.iloc[0]["时间"]
# lastTime = saleDf.iloc[saleRowCount - 1]["时间"]
# # print("firstTime=%s, lastTime=%s" % (firstTime, lastTime))
# Update: change to get valid item's earliest and latest time
# just init
# someTime = saleDf.iloc[0]["时间"]
# firstTime = someTime
# lastTime = someTime
# update: in case not add any one, so no valid time
firstTime = None
lastTime = None
curIdx = 0
# for curIdx, curRow in saleDf.itertuples():
for rowIdx, rowValue in saleDf.iterrows():
print("%s [%4d] %s %s" % ("-"*10, curIdx, rowIdx, "-"*10))
# print(rowIdx)
# print(rowValue)
# curRowDict = saleDf.iloc[curIdx]
# saleId = curRowDict["销售员编号"]
# personalSaleMoney = curRowDict["业绩金额"]
saleId = rowValue["销售员编号"]
personalSaleMoney = rowValue["业绩金额"]
saleTime = rowValue["时间"]
# print("saleId=%s, personalSaleMoney=%s, saleTime=%s" % (saleId, personalSaleMoney, saleTime))
curSaleDict = {
"saleId": saleId,
"personalSaleMoney": personalSaleMoney,
# for debug
# "personalSaleMoney": int(personalSaleMoney),
"saleTime": saleTime,
}
# if saleId in saleRecordDictList.keys():
# saleRecordDictList[saleId] = saleRecordDictList[saleId] + personalSaleMoney
# else:
# saleRecordDictList[saleId] = personalSaleMoney
# if curSaleDict in saleRecordDictList.values():
# if curSaleDict in gLoadedSaleRecordDictList.values():
if curSaleDict in gLoadedSaleRecordDictList:
print("!!! Duplicated sale record: %s" % curSaleDict)
else:
print("Added: %s" % curSaleDict)
# saleRecordDictList[saleId] = curSaleDict
# gLoadedSaleRecordDictList[saleId] = curSaleDict
saleRecordDictList.append(curSaleDict)
gLoadedSaleRecordDictList.append(curSaleDict)
# update time
if firstTime:
if saleTime < firstTime:
firstTime = saleTime
else:
firstTime = saleTime
if lastTime:
if saleTime > lastTime:
lastTime = saleTime
else:
lastTime = saleTime
curIdx += 1
# print("len(gLoadedSaleRecordDictList.keys())=%s" % len(gLoadedSaleRecordDictList.keys()))
# print("len(saleRecordDictList.keys())=%s" % len(saleRecordDictList.keys()))
# len(saleRecordDictList.keys())=463
# print("saleRecordDictList=%s" % saleRecordDictList)
print("len(gLoadedSaleRecordDictList)=%s" % len(gLoadedSaleRecordDictList))
print("len(saleRecordDictList)=%s" % len(saleRecordDictList))
# len(gLoadedSaleRecordDictList)=1215
# len(saleRecordDictList)=1215
timeDict = {
"firstTime": firstTime,
"lastTime": lastTime,
}
return saleRecordDictList, timeDict
def generatePersonalSaleMoney(saleRecordDictList):
"""
Generate personal sale money dict from sale record dict list
"""
saleMoneyDict = {}
for eachSaleRecordDict in saleRecordDictList:
saleId = eachSaleRecordDict["saleId"]
personalSaleMoney = eachSaleRecordDict["personalSaleMoney"]
if saleId in saleMoneyDict.keys():
# saleMoneyDict[saleId] += personalSaleMoney
existedMoney = saleMoneyDict[saleId]
saleMoneyDict[saleId] = existedMoney + personalSaleMoney
else:
saleMoneyDict[saleId] = personalSaleMoney
return saleMoneyDict
def buildTopNodeList(saleMoneyDict):
"""
Build top tree node list
"""
topNodeList = []
for curParentId, curChildIdList in recommandRelationDict.items():
print("curParentId=%s, curChildIdList=%s" % (curParentId, curChildIdList))
# curParentId=a12341234123, curChildIdList=['a12341234121']
existedParentNode = findNode(curParentId, topNodeList)
curRootNode = None
if existedParentNode:
curParentNode = existedParentNode
curRootNode = curParentNode
else:
# curParentNode = Node(curParentId)
curParentNode = createNode(curParentId, saleMoneyDict=saleMoneyDict)
curRootNode = curParentNode.root
if not curRootNode:
curRootNode = curParentNode
# gRootNodeList.append(curRootNode)
topNodeList.append(curRootNode)
print("curParentNode=%s" % curParentNode)
# curParentNode=Node('/a12341234123')
for eachChildId in curChildIdList:
existedChildNode = findNode(eachChildId, topNodeList)
if existedChildNode:
if not existedChildNode.parent:
existedChildNode.parent = curParentNode
curChildNode = existedChildNode
else:
# curChildNode = Node(eachChildId, parent=curParentNode)
curChildNode = createNode(eachChildId, nodeParent=curParentNode, saleMoneyDict=saleMoneyDict)
print("curChildNode=%s" % curChildNode)
# Node('/a12341234123/a12341234121', personalSaleMoney=17226.72636907)
# curChildNode=Node('/aa1234512333/aa1234512334')
topNodeList = updateRootNodeList(curChildNode, topNodeList)
# # for debug
# for pre, fill, node in RenderTree(curRootNode):
# # print("pre=%s,fill=%s,node=%s" % (pre, fill, node))
# print("%s%s" % (pre, node.name))
# print("gRootNodeList=%s" % gRootNodeList)
# print("curRootNode=%s" % curRootNode)
return topNodeList
def addFakeRootNode(topNodeList):
"""
Add fake root node for top node list
"""
rootNode = None
# fakeRootNode = Node(FakeRootNodeId)
fakeRootNode = createNode(FakeRootNodeId)
# for eachTopNode in gRootNodeList:
for eachTopNode in topNodeList:
if eachTopNode.parent:
print("Unexpected: eachTopNode=%s" % eachTopNode)
else:
eachTopNode.parent = fakeRootNode
# gRootNode = fakeRootNode
rootNode = fakeRootNode
# return gRootNode
return rootNode
def buildSaleTree(saleMoneyDict):
"""
Build sale node tree
"""
topNodeList = buildTopNodeList(saleMoneyDict)
rootNode = addFakeRootNode(topNodeList)
return rootNode, topNodeList
def updateTreeTeamSaleMoney(rootNode):
"""
Update tree node's team sale money
"""
for eachNode in rootNode.descendants:
eachNode.teamSaleMoney = calcTeamSaleMoney(eachNode)
print("Updated team sale money: eachNode=%s" % eachNode)
################################################################################
# Main
################################################################################
#----------------------------------------
### read and parse relation excel
#----------------------------------------
relationDf = pd.read_excel(io=RelationFullPath, engine="openpyxl")
print("relationDf=%s" % relationDf)
relationRowCount, relationColumnCount = relationDf.shape
print("relationRowCount=%s, relationColumnCount=%s" % (relationRowCount, relationColumnCount))
relationTitleList = relationDf.columns
print("relationTitleList=%s" % relationTitleList)
for rowIdx, rowValue in relationDf.iterrows():
print("%s %s %s" % ("-"*10, rowIdx, "-"*10))
# print(rowIdx)
# print(rowValue)
parentId = rowValue["销售员编号(引荐人)"]
childId = rowValue["推荐的下级账号"]
print("parentId=%s, childId=%s" % (parentId, childId))
if pd.isna(childId):
print("! Unexpected non child for parent %s" % parentId)
continue
if parentId in recommandRelationDict.keys():
curChildList = recommandRelationDict[parentId]
if childId in curChildList:
print("Omit duplcated child %s for parent %s" % (childId, parentId))
else:
curChildList.append(childId)
recommandRelationDict[parentId] = curChildList
else:
recommandRelationDict[parentId] = [childId]
print("len(recommandRelationDict.keys())=%s" % len(recommandRelationDict.keys())) # len(recommandRelationDict.keys())=158
# print("recommandRelationDict=%s" % recommandRelationDict)
#----------------------------------------
### read and parse sale money excel
#----------------------------------------
historySaleRecordDictList, historySaleTimeDict = loadSaleRecord(SaleHistoryFullPath)
print("len(historySaleRecordDictList)=%s, historySaleTimeDict=%s" % (len(historySaleRecordDictList), historySaleTimeDict))
currentSaleRecordDictList, currentSaleTimeDict = loadSaleRecord(SaleCurrentFullPath)
print("len(currentSaleRecordDictList)=%s, currentSaleTimeDict=%s" % (len(currentSaleRecordDictList), currentSaleTimeDict))
allSaleRecordList = []
allSaleRecordList.extend(historySaleRecordDictList)
allSaleRecordList.extend(currentSaleRecordDictList)
allPersonalSaleMoneyDict = generatePersonalSaleMoney(allSaleRecordList)
print("len(allPersonalSaleMoneyDict.keys())=%s" % len(allPersonalSaleMoneyDict.keys()))
# len(allPersonalSaleMoneyDict.keys())=472
currentyPersonalSaleMoneyDict = generatePersonalSaleMoney(currentSaleRecordDictList)
print("len(currentyPersonalSaleMoneyDict.keys())=%s" % len(currentyPersonalSaleMoneyDict.keys()))
# len(currentyPersonalSaleMoneyDict.keys())=53
#----------------------------------------
### Calculate team sale money
#----------------------------------------
currentRootNode, currentTopNodeList = buildSaleTree(currentyPersonalSaleMoneyDict)
allRootNode, allTopNodeList = buildSaleTree(allPersonalSaleMoneyDict)
# dbgTree(currentRootNode, currentTopNodeList, subFolder="current")
# dbgTree(allRootNode, allTopNodeList, subFolder="all")
dbgPrintTreeToJson(currentRootNode, subFolder="current")
dbgPrintTreeToJson(allRootNode, subFolder="all")
updateTreeTeamSaleMoney(currentRootNode)
updateTreeTeamSaleMoney(allRootNode)
# for debug
updatedTeamSaleMoneyFilename = "RelationAllNode_updatedTeamSaleMoney.json"
dbgPrintTreeToJson(currentRootNode, outputFilename=updatedTeamSaleMoneyFilename, subFolder="current")
dbgPrintTreeToJson(allRootNode, outputFilename=updatedTeamSaleMoneyFilename, subFolder="all")
#----------------------------------------
### Output
#----------------------------------------
allTeamSaleMoneyDict = {}
for eachNode in allRootNode.descendants:
allTeamSaleMoneyDict[eachNode.name] = eachNode.teamSaleMoney
currentTeamSaleMoneyDict = {}
for eachNode in currentRootNode.descendants:
currentTeamSaleMoneyDict[eachNode.name] = eachNode.teamSaleMoney
allPersonList = list(allTeamSaleMoneyDict.keys())
allTeamMoneyList = list(allTeamSaleMoneyDict.values())
currentTeamMoneyList = []
for eachPersonId in allPersonList:
currentTeamMoney = 0
if eachPersonId in currentTeamSaleMoneyDict.keys():
currentTeamMoney = currentTeamSaleMoneyDict[eachPersonId]
currentTeamMoneyList.append(currentTeamMoney)
summaryData = {
"销售员账户": allPersonList,
"团队累计总业绩": allTeamMoneyList,
"团队当前业绩": currentTeamMoneyList,
}
historyFisrtTime = historySaleTimeDict["firstTime"]
historyLastTime = historySaleTimeDict["lastTime"]
currentFirstTime = currentSaleTimeDict["firstTime"]
currentLastTime = currentSaleTimeDict["lastTime"]
if historyFisrtTime < currentFirstTime:
startTime = historyFisrtTime
else:
startTime = currentFirstTime
if historyLastTime > currentLastTime:
endTime = historyLastTime
else:
endTime = currentLastTime
print("startTime=%s, endTime=%s" % (startTime, endTime))
# startTime=2021-02-20 14:00:00, endTime=2021-06-15 17:02:32
dateFortmat = "%Y年%-m月%-d日%-H时%-M分"
startDatetimeStr = datetimeToStr(startTime, dateFortmat)
endDatetimeStr = datetimeToStr(endTime, dateFortmat)
outputSheetName = "%s-%s" % (startDatetimeStr, endDatetimeStr)
print("outputSheetName=%s" % outputSheetName)
outputDf = pd.DataFrame(data=summaryData)
outputDf.to_excel(OutputFullPath, sheet_name=outputSheetName)即可输出:
output/统计结果_20210622_202842.xlsx


转载请注明:在路上 » 【已解决】用Python计算Excel中团队历史业绩和其他统计指标