python反斜杠问题,用python生成datax的json配置文件,输入这项配置信息 "fieldDelimiter": "\u0001",
生成的json文件总是会变成"fieldDelimiter": "\u0001", 多一个反斜杠,求解答
"fieldDelimiter": "\u0001"
"fieldDelimiter": "\u0001"
字典转json的时候给个参数,ensure_ascii=False
例如将其保存为json,就是你代码的第88行
json.dump(job,f,ensure_ascii=False)
我这没有问题啊!
打开文件时加个encoding="utf-8"试试
import json
job = {"fieldDelimiter": "\u0001"}
with open("data.json","w",encoding="utf-8") as f:
json.dump(job,f,ensure_ascii=False,indent=2)
job = {"fieldDelimiter": "\u0001".encode().decode()}
# coding=utf-8
import json
import getopt
import os
import sys
import MySQLdb
#MySQL相关配置,需根据实际情况作出修改
mysql_host = ""
mysql_port = ""
mysql_user = ""
mysql_passwd = ""
#HDFS NameNode相关配置,需根据实际情况作出修改
hdfs_nn_host = ""
hdfs_nn_port = ""
#生成配置文件的目标路径,可根据实际情况作出修改
output_path = "/opt/module/datax/job/export/data_json"
def get_connection():
return MySQLdb.connect(host=mysql_host, port=int(mysql_port), user=mysql_user, passwd=mysql_passwd)
def get_mysql_meta(database, table):
connection = get_connection()
cursor = connection.cursor()
sql = "SELECT COLUMN_NAME,DATA_TYPE from information_schema.COLUMNS WHERE TABLE_SCHEMA=%s AND TABLE_NAME=%s ORDER BY ORDINAL_POSITION"
cursor.execute(sql, [database, table])
fetchall = cursor.fetchall()
cursor.close()
connection.close()
return fetchall
def get_mysql_columns(database, table):
return map(lambda x: x[0], get_mysql_meta(database, table))
def generate_json(target_database, target_table, target_business, target_filetype):
job = {
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "${exportdir}",
"defaultFS": "hdfs://" + hdfs_nn_host + ":" + hdfs_nn_port,
"column": ["*"],
"fileType": target_filetype,
"encoding": "UTF-8",
"fieldDelimiter": "\u0001",
"nullFormat": "\\N"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "replace",
"username": mysql_user,
"password": mysql_passwd,
"column": get_mysql_columns(target_database, target_table),
"connection": [
{
"jdbcUrl":
"jdbc:mysql://" + mysql_host + ":" + mysql_port + "/" + target_database + "?useUnicode=true&characterEncoding=utf-8",
"table": [target_table]
}
]
}
}
}]
}
}
if not os.path.exists(output_path):
os.makedirs(output_path)
with open(os.path.join(output_path, ".".join(["_".join([target_business, "hive", "mysql", target_database, target_table]), "json"])), "w") as f:
json.dump(job, f)
def main(args):
target_database = ""
target_table = ""
target_business = ""
target_filetype = ""
options, arguments = getopt.getopt(args, '-d:-t:-n:-f:', ['targetdb=', 'targettbl=', 'targetbun=', 'targetft='])
for opt_name, opt_value in options:
if opt_name in ('-d', '--targetdb'):
target_database = opt_value
if opt_name in ('-t', '--targettbl'):
target_table = opt_value
if opt_name in ('-n', '--targetbun'):
target_business = opt_value
if opt_name in ('-f', '--targetft'):
target_filetype = opt_value
generate_json(target_database, target_table, target_business, target_filetype)
if __name__ == '__main__':
main(sys.argv[1:])
再用replace("\","")处理一下,让它变成一个\
这是一个实例,供你参考:
jsonContent = json.dumps(myDict, default=convert)
with open('data.json', 'w', encoding='utf-8') as f:
json.dump(jsonContent, f, ensure_ascii=False, indent=4)
return jsonContent
我这样做是为了将字典转换为json并将其保存在一个文件中。如果我尝试用Python打印json,我会得到如下未格式化的dict:
myDict = {'first': {'phone': 1900, 'desktop': 1577, 'tablet': 148, 'bot': 9, 'other': 1}},
这还是可以的。但是当我打开文件时,我看到了这样的东西:
"{\"first\": {\"phone\": 1900, \"desktop\": 1577, \"tablet\": 148, \"bot\": 9, \"other\": 1}´}"
如何删除所有反斜杠并在Python和保存的文件中正确格式化它?
解决思路:
import json
myDict = {"first": {"phone": 1900,"other": 1}, "second": {"adwords": 1419, "no_om_source": 1223}}
with open('data.json', 'w', encoding='utf-8') as f:
json.dump(myDict, f, ensure_ascii=False, indent=4)
看起来,这事不简单啊!
其实会不会没前面回答的那么复杂?
你的情况是在输出json后,打开显示是多了“\”
那么有可能在两个方面出现情况:
1、输出时转码的时候出现问题,这个跟系统有关系;(对于这个,你可以换台电脑试试。)
2、输出没问题,打开的时候多了一个“\”,这个跟打开json文件的软件有关系;(对于这个,你可以换个软件打开试试。)
python生成datax同步数据需要的json格式文件
https://blog.csdn.net/qq_25131333/article/details/122178922
加一个r试试:
# coding=utf-8
import json
import getopt
import os
import sys
import MySQLdb
#MySQL相关配置,需根据实际情况作出修改
mysql_host = ""
mysql_port = ""
mysql_user = ""
mysql_passwd = ""
#HDFS NameNode相关配置,需根据实际情况作出修改
hdfs_nn_host = ""
hdfs_nn_port = ""
#生成配置文件的目标路径,可根据实际情况作出修改
output_path = "/opt/module/datax/job/export/data_json"
def get_connection():
return MySQLdb.connect(host=mysql_host, port=int(mysql_port), user=mysql_user, passwd=mysql_passwd)
def get_mysql_meta(database, table):
connection = get_connection()
cursor = connection.cursor()
sql = "SELECT COLUMN_NAME,DATA_TYPE from information_schema.COLUMNS WHERE TABLE_SCHEMA=%s AND TABLE_NAME=%s ORDER BY ORDINAL_POSITION"
cursor.execute(sql, [database, table])
fetchall = cursor.fetchall()
cursor.close()
connection.close()
return fetchall
def get_mysql_columns(database, table):
return map(lambda x: x[0], get_mysql_meta(database, table))
def generate_json(target_database, target_table, target_business, target_filetype):
job = {
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "${exportdir}",
"defaultFS": "hdfs://" + hdfs_nn_host + ":" + hdfs_nn_port,
"column": ["*"],
"fileType": target_filetype,
"encoding": "UTF-8",
"fieldDelimiter": r"\u0001",
"nullFormat": "\\N"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "replace",
"username": mysql_user,
"password": mysql_passwd,
"column": get_mysql_columns(target_database, target_table),
"connection": [
{
"jdbcUrl":
"jdbc:mysql://" + mysql_host + ":" + mysql_port + "/" + target_database + "?useUnicode=true&characterEncoding=utf-8",
"table": [target_table]
}
]
}
}
}]
}
}
if not os.path.exists(output_path):
os.makedirs(output_path)
with open(os.path.join(output_path, ".".join(["_".join([target_business, "hive", "mysql", target_database, target_table]), "json"])), "w") as f:
json.dump(job, f)
def main(args):
target_database = ""
target_table = ""
target_business = ""
target_filetype = ""
options, arguments = getopt.getopt(args, '-d:-t:-n:-f:', ['targetdb=', 'targettbl=', 'targetbun=', 'targetft='])
for opt_name, opt_value in options:
if opt_name in ('-d', '--targetdb'):
target_database = opt_value
if opt_name in ('-t', '--targettbl'):
target_table = opt_value
if opt_name in ('-n', '--targetbun'):
target_business = opt_value
if opt_name in ('-f', '--targetft'):
target_filetype = opt_value
generate_json(target_database, target_table, target_business, target_filetype)
if __name__ == '__main__':
main(sys.argv[1:])