折腾:
期间,已经本地保存完毕了,把excel中提取出来的音频文件名,加上前缀,得到全部完整路径,保存到GridFS中去,一共171个,数据共700多MB
现在需要的是:
如何Mac本地写代码,连接远程的MongoDB数据库,把文件数据存储到远端GridFS中。
连接远程mongodb
使用MongoDB命令连接远程服务器的MongoDB数据库 – CSDN博客
使用MongoDB命令连接远程服务器的MongoDB数据库 – MongoDB频道_MongoDB基础_MongoDB教程_nosql – 360sdn.com_专业优秀的程序员网上知识家园
好像给出的例子都是 mongo这个命令行工具的,而不是api的
不过也可以先去试试mongo命令远程连接试试
再去试试 API去连接
MongoDB命令连接远程服务器的MongoDB数据库 – 奔赴de博客
然后接着要去搞清楚:
【已解决】阿里云ECS服务器中已有的MongoDB的用户名密码和端口
然后接着去试试Python的代码,用pymongo中的api操作,远程连接mongodb:
参考:
Tutorial — PyMongo 3.6.1 documentation
mongo_client – Tools for connecting to MongoDB — PyMongo 3.6.1 documentation
之前连接本地的是:
<code>mongoClient = MongoClient() </code>
【已解决】远程的mongoDB中GridFS报错:AttributeError GridFS object has no attribute totalSize
然后继续去试试,用PyCharm的Mongodb插件去打开远程服务器中mongo数据库试试:
【已解决】用PyCharm的MongoDB插件连接远程MongoDB数据库
【总结】
如此,即可正常的通过代码:
<code># -*- coding: utf-8 -*-
import pymongo
from pymongo import MongoClient
import gridfs
# from pymongo.objectid import ObjectId
# from pymongo import objectid
from bson.objectid import ObjectId
from gridfs import GridFS
# import pprint
import os
import logging
import sys
sys.path.append("libs/crifan")
import crifanLib
import re
import mime
from openpyxl import Workbook, load_workbook
################################################################################
# Global Config/Setting
################################################################################
StorybookSheetTitle = u"绘本"
EnglishStorybookRootPath = u"xxx/数据/FromMaggie"
ExcelFilename = u"英语绘本资源2018.3.28_forDebug.xlsx"
ExcelFullFilename = os.path.join(EnglishStorybookRootPath, ExcelFilename)
AudioFilePathPrefix = EnglishStorybookRootPath
# the real content start row number
realContentRowStartNum = 3
# each column number
StorybookSerieNumColNum = 1
KeywordStorybookSerieColNum = 2
KeywordStorybookNameColNum = 3
KeywordStorybookNameKeywordColNum = 4
KeywordMainActorColNum = 5
KeywordTopicColNum = 6
KeywordContentKeywordColNum = 7
FitAgeRangeColNum = 8
IsFictionColNum = 9
HasStorybookFileColNum = 10
StorybookFilePathColNum = 11
HasAudioFileColNum = 12
AudioFilePathColNum = 13
AuthorColNum = 14
ContentSimpleIntroColNum = 15
PublisherColNum = 16
ForeignCountryColNum = 17
AwardsColNum = 18
LexileIndexColNum = 19
################################################################################
# Global Value
################################################################################
gSummaryDict = {
"totalCostTime": 0,
"savedFile": {
"totalCount": 0,
"idNameList": []
}
}
################################################################################
# Local Function
################################################################################
def initLogging():
"""
init logging
:return: log file name
"""
global gCfg
# init logging
filenameNoSufx = crifanLib.getInputFileBasenameNoSuffix()
logFilename = filenameNoSufx + ".log"
crifanLib.loggingInit(logFilename)
return logFilename
def strToList(inputStr, seperatorChar=","):
"""
convert string to list by using seperator char
example:
u'Family members,Sick'
->
[u'Family members', u'Sick']
:param seperatorChar: the seperator char
:return: converted list
"""
convertedList = None
if inputStr:
convertedList = inputStr.split(seperatorChar) #<type 'list'>: [u'Family members', u'Sick']
return convertedList
def testGridfsDeleteFile(fsCollection):
# test file delete
# fileIdToDelete = "5abc96dfa4bc715f473f0297"
# fileIdToDelete = "5abc9525a4bc715e187c6d6d"
# fileIdToDelete = "ObjectId('5abc96dfa4bc715f473f0297')"
# fileIdToDelete = 'ObjectId("5abc8d77a4bc71563222d455")'
# fileIdToDelete = '5abc8d77a4bc71563222d455'
# logging.info("fileIdToDelete=%s", fileIdToDelete)
# foundFile = fsCollection.find_one({"_id": fileIdToDelete})
# foundFile = fsCollection.find_one()
# logging.info("foundFile=%s", foundFile)
# fileIdToDelete = foundFile._id
# logging.info("fileIdToDelete=%s", fileIdToDelete)
curNum = 0
for curIdx, eachFile in enumerate(fsCollection.find()):
curNum = curIdx + 1
# fileIdToDelete = eachFile._id
# fileObjectIdToDelete = ObjectId(fileIdToDelete)
fileObjectIdToDelete = eachFile._id
logging.info("fileObjectIdToDelete=%s", fileObjectIdToDelete)
# if fsCollection.exists(fileObjectIdToDelete):
fsCollection.delete(fileObjectIdToDelete)
logging.info("delete [%d] ok for file object id=%s", curNum, fileObjectIdToDelete)
# else:
# logging.warning("Can not find file to delete for id=%s", fileIdToDelete)
logging.info("Total deleted [%d] files", curNum)
################################################################################
# Main Part
################################################################################
initLogging()
# parse excel file
wb = load_workbook(ExcelFullFilename)
logging.info("wb=%s", wb)
# sheetNameList = wb.get_sheet_names()
# logging.info("sheetNameList=%s", sheetNameList)
ws = wb[StorybookSheetTitle]
logging.info("ws=%s", ws)
# init mongodb
# connect to local mongo
# mongoClient = MongoClient()
# connect to remote mongo
mongoClient = MongoClient(
host="x.x.x.x",
port=27017,
username="username",
password="P@wd"
)
logging.info("mongoClient=%s", mongoClient)
gridfsDb = mongoClient.gridfs
logging.info("gridfsDb=%s", gridfsDb)
# collectionNames = gridfsDb.collection_names(include_system_collections=False)
# logging.info("collectionNames=%s", collectionNames)
# fsCollection = gridfsDb.fs
# fsCollection = gridfsDb["fs"]
fsCollection = GridFS(gridfsDb)
logging.info("fsCollection=%s", fsCollection)
# logging.info("fsCollection.stats()=%s", fsCollection.stats())
# logging.info("fsCollection.totalSize()=%s", fsCollection.totalSize())
testGridfsDeleteFile(fsCollection)
crifanLib.calcTimeStart("saveAllAudioFile")
# process each row in excel
for curRowNum in range(realContentRowStartNum, ws.max_row + 1):
logging.info("-"*30 + " row[%d] " + "-"*30, curRowNum)
hasAudioFileColNumCellValue = ws.cell(row=curRowNum, column=HasAudioFileColNum).value
logging.info("col[%d] hasAudioFileColNumCellValue=%s", HasAudioFileColNum, hasAudioFileColNumCellValue)
audioFilePathColNumCellValue = ws.cell(row=curRowNum, column=AudioFilePathColNum).value
logging.info("col[%d] audioFilePathColNumCellValue=%s", AudioFilePathColNum, audioFilePathColNumCellValue)
if not ((hasAudioFileColNumCellValue == u"有") and audioFilePathColNumCellValue and (audioFilePathColNumCellValue != u"")):
logging.warning("not found valid audio file for row=%d", curRowNum)
continue
logging.info("will save audio file %s", audioFilePathColNumCellValue)
# extract all column value
storybookSerieNumCellValue = ws.cell(row=curRowNum, column=StorybookSerieNumColNum).value
logging.info("col[%d] storybookSerieNumCellValue=%s", StorybookSerieNumColNum, storybookSerieNumCellValue)
keywordStorybookSerieCellValue = ws.cell(row=curRowNum, column=KeywordStorybookSerieColNum).value
logging.info("col[%d] keywordStorybookSerieCellValue=%s", KeywordStorybookSerieColNum, keywordStorybookSerieCellValue)
keywordStorybookNameColNumCellValue = ws.cell(row=curRowNum, column=KeywordStorybookNameColNum).value
logging.info("col[%d] keywordStorybookNameColNumCellValue=%s", KeywordStorybookNameColNum, keywordStorybookNameColNumCellValue)
keywordStorybookNameKeywordCellValue = ws.cell(row=curRowNum, column=KeywordStorybookNameKeywordColNum).value
logging.info("col[%d] keywordStorybookNameKeywordCellValue=%s", KeywordStorybookNameKeywordColNum, keywordStorybookNameKeywordCellValue)
keywordMainActorColNumCellValue = ws.cell(row=curRowNum, column=KeywordMainActorColNum).value
logging.info("col[%d] keywordMainActorColNumCellValue=%s", KeywordMainActorColNum, keywordMainActorColNumCellValue)
keywordTopicColNumCellValue = ws.cell(row=curRowNum, column=KeywordTopicColNum).value
logging.info("col[%d] keywordTopicColNumCellValue=%s", KeywordTopicColNum, keywordTopicColNumCellValue)
keywordContentKeywordColNumCellValue = ws.cell(row=curRowNum, column=KeywordContentKeywordColNum).value
logging.info("col[%d] keywordContentKeywordColNumCellValue=%s", KeywordContentKeywordColNum, keywordContentKeywordColNumCellValue)
fitAgeRangeColNumCellValue = ws.cell(row=curRowNum, column=FitAgeRangeColNum).value
logging.info("col[%d] fitAgeRangeColNumCellValue=%s", FitAgeRangeColNum, fitAgeRangeColNumCellValue)
isFictionColNumCellValue = ws.cell(row=curRowNum, column=IsFictionColNum).value
logging.info("col[%d] isFictionColNumCellValue=%s", IsFictionColNum, isFictionColNumCellValue)
hasStorybookFileColNumCellValue = ws.cell(row=curRowNum, column=HasStorybookFileColNum).value
logging.info("col[%d] hasStorybookFileColNumCellValue=%s", HasStorybookFileColNum, hasStorybookFileColNumCellValue)
storybookFilePathColNumCellValue = ws.cell(row=curRowNum, column=StorybookFilePathColNum).value
logging.info("col[%d] storybookFilePathColNumCellValue=%s", StorybookFilePathColNum, storybookFilePathColNumCellValue)
authorColNumCellValue = ws.cell(row=curRowNum, column=AuthorColNum).value
logging.info("col[%d] authorColNumCellValue=%s", AuthorColNum, authorColNumCellValue)
contentSimpleIntroColNumCellValue = ws.cell(row=curRowNum, column=ContentSimpleIntroColNum).value
logging.info("col[%d] contentSimpleIntroColNumCellValue=%s", ContentSimpleIntroColNum, contentSimpleIntroColNumCellValue)
publisherColNumCellValue = ws.cell(row=curRowNum, column=PublisherColNum).value
logging.info("col[%d] publisherColNumCellValue=%s", PublisherColNum, publisherColNumCellValue)
foreignCountryColNumCellValue = ws.cell(row=curRowNum, column=ForeignCountryColNum).value
logging.info("col[%d] foreignCountryColNumCellValue=%s", ForeignCountryColNum, foreignCountryColNumCellValue)
awardsColNumCellValue = ws.cell(row=curRowNum, column=AwardsColNum).value
logging.info("col[%d] awardsColNumCellValue=%s", AwardsColNum, awardsColNumCellValue)
lexileIndexColNumCellValue = ws.cell(row=curRowNum, column=LexileIndexColNum).value
logging.info("col[%d] lexileIndexColNumCellValue=%s", LexileIndexColNum, lexileIndexColNumCellValue)
# test read existed file info
# someFile = fsCollection.files.find_one()
# someFile = fsCollection.find_one()
# logging.info("someFile=%s", someFile)
# # ottoTheCatFile = fsCollection.files.find_one({"filename": "Otto the Cat-withMIME.MP3"})
# ottoTheCatFile = fsCollection.find_one({"filename": "Otto the Cat-withMIME.MP3"})
# logging.info("ottoTheCatFile=%s", ottoTheCatFile)
# put/save local file to mongodb
# curAudioFilename = "英语资源\All Aboard Reading\音频\Lots of Hearts.mp3"
# curAudioFilenameFiltered = re.sub(r"\\", "/", curAudioFilename) #'英语资源/All Aboard Reading/音频/Lots of Hearts.mp3'
curAudioFilenameFiltered = re.sub(r"\\", "/", audioFilePathColNumCellValue) # u'英语资源/Madeline/音频/Madeline.mp3'
# curAudioFullFilename = "xxx/FromMaggie/" + curAudioFilename
curAudioFullFilename = os.path.join(AudioFilePathPrefix, curAudioFilenameFiltered) #u'xxx/数据/FromMaggie/英语资源/Madeline/音频/Madeline.mp3'
if not os.path.isfile(curAudioFullFilename):
logging.error("Can not find file: %s", curAudioFullFilename)
continue
curFilename = crifanLib.getBasename(curAudioFullFilename) #u'Madeline.mp3'
logging.info("curFilename=%s", curFilename)
# extarct MIME
# fileMimeType = mime.MIMETypes.load_from_file(curFilename)
# fileMimeType = mime.MimeType.fromName(curFilename)
fileMimeType = mime.Types.of(curFilename)[0].content_type
logging.info("fileMimeType=%s", fileMimeType) #'audio/mpeg'
metadataDict = {
"type": "storybook",
"storybookSeriesNumber": storybookSerieNumCellValue,
"keywordStorybookSeries": keywordStorybookSerieCellValue,
"keywordStorybookName": keywordStorybookNameColNumCellValue,
"keywordStorybookNameKeywordList": strToList(keywordStorybookNameKeywordCellValue),
"keywordMainActorList": strToList(keywordMainActorColNumCellValue),
"keywordTopicList": strToList(keywordTopicColNumCellValue),
"keywordContentKeywordList": strToList(keywordContentKeywordColNumCellValue),
"fitAgeRange": fitAgeRangeColNumCellValue,
"isFiction": isFictionColNumCellValue,
"hasStorybookFile": hasStorybookFileColNumCellValue,
"storybookFilePath": storybookFilePathColNumCellValue,
"hasAudioFile": hasAudioFileColNumCellValue,
"audioFilePath": audioFilePathColNumCellValue,
"authorList": strToList(authorColNumCellValue),
"contentSimpleIntro": contentSimpleIntroColNumCellValue,
"publisher": publisherColNumCellValue,
"foreignCountry": foreignCountryColNumCellValue,
"awards": awardsColNumCellValue,
"lexileIndex": lexileIndexColNumCellValue
}
logging.info("metadataDict=%s", metadataDict)
with open(curAudioFullFilename) as audioFp:
audioFileObjectId = fsCollection.put(
audioFp,
filename=curFilename,
content_type=fileMimeType,
metadata=metadataDict)
logging.info("audioFileObjectId=%s", audioFileObjectId)
# readOutAudioFile = fsCollection.get(audioFileObjectId)
# logging.info("readOutAudioFile=%s", readOutAudioFile)
# audioFileMedata = readOutAudioFile.metadata
# logging.info("audioFileMedata=%s", audioFileMedata)
audioFileIdStr = str(audioFileObjectId)
gSummaryDict["savedFile"]["totalCount"] += 1
idNameDict = {
"fileId": audioFileIdStr,
"fileName": curFilename
}
gSummaryDict["savedFile"]["idNameList"].append(idNameDict)
gSummaryDict["totalCostTime"] = crifanLib.calcTimeEnd("saveAllAudioFile")
logging.info("="*30 + " Summary Info " + "="*30)
logging.info("gSummaryDict=%s", gSummaryDict)
logging.info("%s", crifanLib.jsonToPrettyStr(gSummaryDict))
</code>把本地的音频文件:

上传到在线的MongoDB中了:

