ADD: added new version of protobuf

This commit is contained in:
Henry Winkel
2022-12-20 10:09:28 +01:00
parent 4a79559129
commit 1e2b3dda7b
1513 changed files with 123720 additions and 83381 deletions

View File

@@ -0,0 +1,189 @@
#!/usr/bin/env python2.7
from __future__ import print_function
import argparse
import json
import uuid
import httplib2
from apiclient import discovery
from apiclient.errors import HttpError
from oauth2client.client import GoogleCredentials
# 30 days in milliseconds
_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000
NUM_RETRIES = 3
def create_big_query():
"""Authenticates with cloud platform and gets a BiqQuery service object
"""
creds = GoogleCredentials.get_application_default()
return discovery.build(
'bigquery', 'v2', credentials=creds, cache_discovery=False)
def create_dataset(biq_query, project_id, dataset_id):
is_success = True
body = {
'datasetReference': {
'projectId': project_id,
'datasetId': dataset_id
}
}
try:
dataset_req = biq_query.datasets().insert(
projectId=project_id, body=body)
dataset_req.execute(num_retries=NUM_RETRIES)
except HttpError as http_error:
if http_error.resp.status == 409:
print('Warning: The dataset %s already exists' % dataset_id)
else:
# Note: For more debugging info, print "http_error.content"
print('Error in creating dataset: %s. Err: %s' % (dataset_id,
http_error))
is_success = False
return is_success
def create_table(big_query, project_id, dataset_id, table_id, table_schema,
description):
fields = [{
'name': field_name,
'type': field_type,
'description': field_description
} for (field_name, field_type, field_description) in table_schema]
return create_table2(big_query, project_id, dataset_id, table_id, fields,
description)
def create_partitioned_table(big_query,
project_id,
dataset_id,
table_id,
table_schema,
description,
partition_type='DAY',
expiration_ms=_EXPIRATION_MS):
"""Creates a partitioned table. By default, a date-paritioned table is created with
each partition lasting 30 days after it was last modified.
"""
fields = [{
'name': field_name,
'type': field_type,
'description': field_description
} for (field_name, field_type, field_description) in table_schema]
return create_table2(big_query, project_id, dataset_id, table_id, fields,
description, partition_type, expiration_ms)
def create_table2(big_query,
project_id,
dataset_id,
table_id,
fields_schema,
description,
partition_type=None,
expiration_ms=None):
is_success = True
body = {
'description': description,
'schema': {
'fields': fields_schema
},
'tableReference': {
'datasetId': dataset_id,
'projectId': project_id,
'tableId': table_id
}
}
if partition_type and expiration_ms:
body["timePartitioning"] = {
"type": partition_type,
"expirationMs": expiration_ms
}
try:
table_req = big_query.tables().insert(
projectId=project_id, datasetId=dataset_id, body=body)
res = table_req.execute(num_retries=NUM_RETRIES)
print('Successfully created %s "%s"' % (res['kind'], res['id']))
except HttpError as http_error:
if http_error.resp.status == 409:
print('Warning: Table %s already exists' % table_id)
else:
print('Error in creating table: %s. Err: %s' % (table_id,
http_error))
is_success = False
return is_success
def patch_table(big_query, project_id, dataset_id, table_id, fields_schema):
is_success = True
body = {
'schema': {
'fields': fields_schema
},
'tableReference': {
'datasetId': dataset_id,
'projectId': project_id,
'tableId': table_id
}
}
try:
table_req = big_query.tables().patch(
projectId=project_id,
datasetId=dataset_id,
tableId=table_id,
body=body)
res = table_req.execute(num_retries=NUM_RETRIES)
print('Successfully patched %s "%s"' % (res['kind'], res['id']))
except HttpError as http_error:
print('Error in creating table: %s. Err: %s' % (table_id, http_error))
is_success = False
return is_success
def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
is_success = True
body = {'rows': rows_list}
try:
insert_req = big_query.tabledata().insertAll(
projectId=project_id,
datasetId=dataset_id,
tableId=table_id,
body=body)
res = insert_req.execute(num_retries=NUM_RETRIES)
if res.get('insertErrors', None):
print('Error inserting rows! Response: %s' % res)
is_success = False
except HttpError as http_error:
print('Error inserting rows to the table %s' % table_id)
is_success = False
return is_success
def sync_query_job(big_query, project_id, query, timeout=5000):
query_data = {'query': query, 'timeoutMs': timeout}
query_job = None
try:
query_job = big_query.jobs().query(
projectId=project_id,
body=query_data).execute(num_retries=NUM_RETRIES)
except HttpError as http_error:
print('Query execute job failed with error: %s' % http_error)
print(http_error.content)
return query_job
# List of (column name, column type, description) tuples
def make_row(unique_row_id, row_values_dict):
"""row_values_dict is a dictionary of column name and column value.
"""
return {'insertId': unique_row_id, 'json': row_values_dict}

View File

@@ -0,0 +1,64 @@
#ifndef PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_
#define PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_
#include "google/protobuf/message.h"
#include "google/protobuf/descriptor.h"
using google::protobuf::FieldDescriptor;
using google::protobuf::Message;
using google::protobuf::Reflection;
namespace google {
namespace protobuf {
namespace util {
class DataStripper {
public:
void StripMessage(Message *message) {
std::vector<const FieldDescriptor*> set_fields;
const Reflection* reflection = message->GetReflection();
reflection->ListFields(*message, &set_fields);
for (size_t i = 0; i < set_fields.size(); i++) {
const FieldDescriptor* field = set_fields[i];
if (ShouldBeClear(field)) {
reflection->ClearField(message, field);
continue;
}
if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
if (field->is_repeated()) {
for (int j = 0; j < reflection->FieldSize(*message, field); j++) {
StripMessage(reflection->MutableRepeatedMessage(message, field, j));
}
} else {
StripMessage(reflection->MutableMessage(message, field));
}
}
}
reflection->MutableUnknownFields(message)->Clear();
}
private:
virtual bool ShouldBeClear(const FieldDescriptor *field) = 0;
};
class GogoDataStripper : public DataStripper {
private:
virtual bool ShouldBeClear(const FieldDescriptor *field) {
return field->type() == FieldDescriptor::TYPE_GROUP;
}
};
class Proto3DataStripper : public DataStripper {
private:
virtual bool ShouldBeClear(const FieldDescriptor *field) {
return field->type() == FieldDescriptor::TYPE_GROUP ||
field->is_extension();
}
};
} // namespace util
} // namespace protobuf
} // namespace google
#endif // PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_

View File

@@ -0,0 +1,74 @@
#include "benchmarks.pb.h"
#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
#include "datasets/google_message2/benchmark_message2.pb.h"
#include "datasets/google_message3/benchmark_message3.pb.h"
#include "datasets/google_message4/benchmark_message4.pb.h"
#include "data_proto2_to_proto3_util.h"
#include <fstream>
using google::protobuf::util::GogoDataStripper;
std::string ReadFile(const std::string& name) {
std::ifstream file(name.c_str());
GOOGLE_CHECK(file.is_open()) << "Couldn't find file '"
<< name
<< "', please make sure you are running this command from the benchmarks"
<< " directory.\n";
return std::string((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
}
int main(int argc, char *argv[]) {
if (argc % 2 == 0 || argc == 1) {
std::cerr << "Usage: [input_files] [output_file_names] where " <<
"input_files are one to one mapping to output_file_names." <<
std::endl;
return 1;
}
for (int i = argc / 2; i > 0; i--) {
const std::string &input_file = argv[i];
const std::string &output_file = argv[i + argc / 2];
std::cerr << "Generating " << input_file
<< " to " << output_file << std::endl;
benchmarks::BenchmarkDataset dataset;
Message* message;
std::string dataset_payload = ReadFile(input_file);
GOOGLE_CHECK(dataset.ParseFromString(dataset_payload))
<< "Can' t parse data file " << input_file;
if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") {
message = new benchmarks::proto3::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") {
message = new benchmarks::proto2::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") {
message = new benchmarks::proto2::GoogleMessage2;
} else if (dataset.message_name() ==
"benchmarks.google_message3.GoogleMessage3") {
message = new benchmarks::google_message3::GoogleMessage3;
} else if (dataset.message_name() ==
"benchmarks.google_message4.GoogleMessage4") {
message = new benchmarks::google_message4::GoogleMessage4;
} else {
std::cerr << "Unknown message type: " << dataset.message_name();
exit(1);
}
for (int i = 0; i < dataset.payload_size(); i++) {
message->ParseFromString(dataset.payload(i));
GogoDataStripper stripper;
stripper.StripMessage(message);
dataset.set_payload(i, message->SerializeAsString());
}
std::ofstream ofs(output_file);
ofs << dataset.SerializeAsString();
ofs.close();
}
return 0;
}

View File

@@ -0,0 +1,74 @@
#include "benchmarks.pb.h"
#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
#include "datasets/google_message2/benchmark_message2.pb.h"
#include "datasets/google_message3/benchmark_message3.pb.h"
#include "datasets/google_message4/benchmark_message4.pb.h"
#include "data_proto2_to_proto3_util.h"
#include <fstream>
using google::protobuf::util::Proto3DataStripper;
std::string ReadFile(const std::string& name) {
std::ifstream file(name.c_str());
GOOGLE_CHECK(file.is_open()) << "Couldn't find file '"
<< name
<< "', please make sure you are running this command from the benchmarks"
<< " directory.\n";
return std::string((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
}
int main(int argc, char *argv[]) {
if (argc % 2 == 0 || argc == 1) {
std::cerr << "Usage: [input_files] [output_file_names] where " <<
"input_files are one to one mapping to output_file_names." <<
std::endl;
return 1;
}
for (int i = argc / 2; i > 0; i--) {
const std::string &input_file = argv[i];
const std::string &output_file = argv[i + argc / 2];
std::cerr << "Generating " << input_file
<< " to " << output_file << std::endl;
benchmarks::BenchmarkDataset dataset;
Message* message;
std::string dataset_payload = ReadFile(input_file);
GOOGLE_CHECK(dataset.ParseFromString(dataset_payload))
<< "Can' t parse data file " << input_file;
if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") {
message = new benchmarks::proto3::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") {
message = new benchmarks::proto2::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") {
message = new benchmarks::proto2::GoogleMessage2;
} else if (dataset.message_name() ==
"benchmarks.google_message3.GoogleMessage3") {
message = new benchmarks::google_message3::GoogleMessage3;
} else if (dataset.message_name() ==
"benchmarks.google_message4.GoogleMessage4") {
message = new benchmarks::google_message4::GoogleMessage4;
} else {
std::cerr << "Unknown message type: " << dataset.message_name();
exit(1);
}
for (int i = 0; i < dataset.payload_size(); i++) {
message->ParseFromString(dataset.payload(i));
Proto3DataStripper stripper;
stripper.StripMessage(message);
dataset.set_payload(i, message->SerializeAsString());
}
std::ofstream ofs(output_file);
ofs << dataset.SerializeAsString();
ofs.close();
}
return 0;
}

View File

@@ -0,0 +1,103 @@
#include "google/protobuf/compiler/code_generator.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "google/protobuf/io/printer.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include "schema_proto2_to_proto3_util.h"
#include "google/protobuf/compiler/plugin.h"
using google::protobuf::FileDescriptorProto;
using google::protobuf::FileDescriptor;
using google::protobuf::DescriptorPool;
using google::protobuf::io::Printer;
using google::protobuf::util::SchemaGroupStripper;
using google::protobuf::util::EnumScrubber;
namespace google {
namespace protobuf {
namespace compiler {
namespace {
string StripProto(string filename) {
if (filename.substr(filename.size() - 11) == ".protodevel") {
// .protodevel
return filename.substr(0, filename.size() - 11);
} else {
// .proto
return filename.substr(0, filename.size() - 6);
}
}
DescriptorPool new_pool_;
} // namespace
class GoGoProtoGenerator : public CodeGenerator {
public:
virtual bool GenerateAll(const std::vector<const FileDescriptor*>& files,
const string& parameter,
GeneratorContext* context,
string* error) const {
for (int i = 0; i < files.size(); i++) {
for (auto file : files) {
bool can_generate =
(new_pool_.FindFileByName(file->name()) == nullptr);
for (int j = 0; j < file->dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->dependency(j)->name()) != nullptr);
}
for (int j = 0; j < file->public_dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->public_dependency(j)->name()) != nullptr);
}
for (int j = 0; j < file->weak_dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->weak_dependency(j)->name()) != nullptr);
}
if (can_generate) {
Generate(file, parameter, context, error);
break;
}
}
}
return true;
}
virtual bool Generate(const FileDescriptor* file,
const string& parameter,
GeneratorContext* context,
string* error) const {
FileDescriptorProto new_file;
file->CopyTo(&new_file);
SchemaGroupStripper::StripFile(file, &new_file);
EnumScrubber enum_scrubber;
enum_scrubber.ScrubFile(&new_file);
string filename = file->name();
string basename = StripProto(filename);
std::vector<std::pair<string,string>> option_pairs;
ParseGeneratorParameter(parameter, &option_pairs);
std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output(
context->Open(basename + ".proto"));
string content = new_pool_.BuildFile(new_file)->DebugString();
Printer printer(output.get(), '$');
printer.WriteRaw(content.c_str(), content.size());
return true;
}
};
} // namespace compiler
} // namespace protobuf
} // namespace google
int main(int argc, char* argv[]) {
google::protobuf::compiler::GoGoProtoGenerator generator;
return google::protobuf::compiler::PluginMain(argc, argv, &generator);
}

View File

@@ -0,0 +1,115 @@
#include "google/protobuf/compiler/code_generator.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "google/protobuf/io/printer.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include "schema_proto2_to_proto3_util.h"
#include "google/protobuf/compiler/plugin.h"
using google::protobuf::FileDescriptorProto;
using google::protobuf::FileDescriptor;
using google::protobuf::DescriptorPool;
using google::protobuf::io::Printer;
using google::protobuf::util::SchemaGroupStripper;
using google::protobuf::util::EnumScrubber;
using google::protobuf::util::ExtensionStripper;
using google::protobuf::util::FieldScrubber;
namespace google {
namespace protobuf {
namespace compiler {
namespace {
string StripProto(string filename) {
return filename.substr(0, filename.rfind(".proto"));
}
DescriptorPool* GetPool() {
static DescriptorPool *pool = new DescriptorPool();
return pool;
}
} // namespace
class Proto2ToProto3Generator final : public CodeGenerator {
public:
bool GenerateAll(const std::vector<const FileDescriptor*>& files,
const string& parameter,
GeneratorContext* context,
string* error) const {
for (int i = 0; i < files.size(); i++) {
for (auto file : files) {
if (CanGenerate(file)) {
Generate(file, parameter, context, error);
break;
}
}
}
return true;
}
bool Generate(const FileDescriptor* file,
const string& parameter,
GeneratorContext* context,
string* error) const {
FileDescriptorProto new_file;
file->CopyTo(&new_file);
SchemaGroupStripper::StripFile(file, &new_file);
EnumScrubber enum_scrubber;
enum_scrubber.ScrubFile(&new_file);
ExtensionStripper::StripFile(&new_file);
FieldScrubber::ScrubFile(&new_file);
new_file.set_syntax("proto3");
string filename = file->name();
string basename = StripProto(filename);
std::vector<std::pair<string,string>> option_pairs;
ParseGeneratorParameter(parameter, &option_pairs);
std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output(
context->Open(basename + ".proto"));
string content = GetPool()->BuildFile(new_file)->DebugString();
Printer printer(output.get(), '$');
printer.WriteRaw(content.c_str(), content.size());
return true;
}
private:
bool CanGenerate(const FileDescriptor* file) const {
if (GetPool()->FindFileByName(file->name()) != nullptr) {
return false;
}
for (int j = 0; j < file->dependency_count(); j++) {
if (GetPool()->FindFileByName(file->dependency(j)->name()) == nullptr) {
return false;
}
}
for (int j = 0; j < file->public_dependency_count(); j++) {
if (GetPool()->FindFileByName(
file->public_dependency(j)->name()) == nullptr) {
return false;
}
}
for (int j = 0; j < file->weak_dependency_count(); j++) {
if (GetPool()->FindFileByName(
file->weak_dependency(j)->name()) == nullptr) {
return false;
}
}
return true;
}
};
} // namespace compiler
} // namespace protobuf
} // namespace google
int main(int argc, char* argv[]) {
google::protobuf::compiler::Proto2ToProto3Generator generator;
return google::protobuf::compiler::PluginMain(argc, argv, &generator);
}

View File

@@ -0,0 +1,352 @@
# This import depends on the automake rule protoc_middleman, please make sure
# protoc_middleman has been built before run this file.
import argparse
import json
import re
import os.path
# BEGIN OPENSOURCE
import sys
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
# END OPENSOURCE
import tmp.benchmarks_pb2 as benchmarks_pb2
__file_size_map = {}
def __get_data_size(filename):
if filename[0] != '/':
filename = os.path.dirname(os.path.abspath(__file__)) + "/../" + filename
if filename in __file_size_map:
return __file_size_map[filename]
benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
benchmark_dataset.ParseFromString(
open(filename, "rb").read())
size = 0
count = 0
for payload in benchmark_dataset.payload:
size += len(payload)
count += 1
__file_size_map[filename] = (size, 1.0 * size / count)
return size, 1.0 * size / count
def __extract_file_name(file_name):
name_list = re.split(r"[/\.]", file_name)
short_file_name = ""
for name in name_list:
if name[:14] == "google_message":
short_file_name = name
return short_file_name
__results = []
# CPP results example:
# [
# "benchmarks": [
# {
# "bytes_per_second": int,
# "cpu_time_ns": double,
# "iterations": int,
# "name: string,
# "real_time_ns: double,
# ...
# },
# ...
# ],
# ...
# ]
def __parse_cpp_result(filename):
if filename == "":
return
if filename[0] != '/':
filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
with open(filename, encoding="utf-8") as f:
results = json.loads(f.read())
for benchmark in results["benchmarks"]:
data_filename = "".join(
re.split("(_parse_|_serialize)", benchmark["name"])[0])
behavior = benchmark["name"][len(data_filename) + 1:]
if data_filename[:2] == "BM":
data_filename = data_filename[3:]
__results.append({
"language": "cpp",
"dataFilename": data_filename,
"behavior": behavior,
"throughput": benchmark["bytes_per_second"] / 2.0 ** 20
})
# Synthetic benchmark results example:
# [
# "benchmarks": [
# {
# "cpu_time_ns": double,
# "iterations": int,
# "name: string,
# "real_time_ns: double,
# ...
# },
# ...
# ],
# ...
# ]
def __parse_synthetic_result(filename):
if filename == "":
return
if filename[0] != "/":
filename = os.path.dirname(os.path.abspath(__file__)) + "/" + filename
with open(filename, encoding="utf-8") as f:
results = json.loads(f.read())
for benchmark in results["benchmarks"]:
__results.append({
"language": "cpp",
"dataFilename": "",
"behavior": "synthetic",
"throughput": 10.0**9 / benchmark["cpu_time_ns"]
})
# Python results example:
# [
# [
# {
# "filename": string,
# "benchmarks": {
# behavior: results,
# ...
# },
# },
# ...
# ], #pure-python
# ...
# ]
def __parse_python_result(filename):
if filename == "":
return
if filename[0] != '/':
filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
with open(filename, encoding="utf-8") as f:
results_list = json.loads(f.read())
for results in results_list:
for result in results:
_, avg_size = __get_data_size(result["filename"])
for behavior in result["benchmarks"]:
__results.append({
"language": "python",
"dataFilename": __extract_file_name(result["filename"]),
"behavior": behavior,
"throughput": result["benchmarks"][behavior]
})
# Java results example:
# [
# {
# "id": string,
# "instrumentSpec": {...},
# "measurements": [
# {
# "weight": float,
# "value": {
# "magnitude": float,
# "unit": string
# },
# ...
# },
# ...
# ],
# "run": {...},
# "scenario": {
# "benchmarkSpec": {
# "methodName": string,
# "parameters": {
# defined parameters in the benchmark: parameters value
# },
# ...
# },
# ...
# }
#
# },
# ...
# ]
def __parse_java_result(filename):
if filename == "":
return
if filename[0] != '/':
filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
with open(filename, encoding="utf-8") as f:
results = json.loads(f.read())
for result in results:
total_weight = 0
total_value = 0
for measurement in result["measurements"]:
total_weight += measurement["weight"]
total_value += measurement["value"]["magnitude"]
avg_time = total_value * 1.0 / total_weight
total_size, _ = __get_data_size(
result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
__results.append({
"language": "java",
"throughput": total_size / avg_time * 1e9 / 2 ** 20,
"behavior": result["scenario"]["benchmarkSpec"]["methodName"],
"dataFilename": __extract_file_name(
result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
})
# Go benchmark results:
#
# goos: linux
# goarch: amd64
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op
# PASS
# ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s
def __parse_go_result(filename):
if filename == "":
return
if filename[0] != '/':
filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
with open(filename, encoding="utf-8") as f:
for line in f:
result_list = re.split(r"[\ \t]+", line)
if result_list[0][:9] != "Benchmark":
continue
first_slash_index = result_list[0].find('/')
last_slash_index = result_list[0].rfind('/')
full_filename = result_list[0][first_slash_index+1:last_slash_index]
total_bytes, _ = __get_data_size(full_filename)
behavior_with_suffix = result_list[0][last_slash_index+1:]
last_dash = behavior_with_suffix.rfind("-")
if last_dash == -1:
behavior = behavior_with_suffix
else:
behavior = behavior_with_suffix[:last_dash]
__results.append({
"dataFilename": __extract_file_name(full_filename),
"throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20,
"behavior": behavior,
"language": "go"
})
# Self built json results example:
#
# [
# {
# "filename": string,
# "benchmarks": {
# behavior: results,
# ...
# },
# },
# ...
# ]
def __parse_custom_result(filename, language):
if filename == "":
return
if filename[0] != '/':
filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
with open(filename, encoding="utf-8") as f:
results = json.loads(f.read())
for result in results:
_, avg_size = __get_data_size(result["filename"])
for behavior in result["benchmarks"]:
__results.append({
"language": language,
"dataFilename": __extract_file_name(result["filename"]),
"behavior": behavior,
"throughput": result["benchmarks"][behavior]
})
def __parse_js_result(filename, language):
return __parse_custom_result(filename, language)
def __parse_php_result(filename, language):
return __parse_custom_result(filename, language)
def get_result_from_file(cpp_file="",
java_file="",
python_file="",
go_file="",
synthetic_file="",
node_file="",
php_c_file="",
php_file=""):
results = {}
if cpp_file != "":
__parse_cpp_result(cpp_file)
if java_file != "":
__parse_java_result(java_file)
if python_file != "":
__parse_python_result(python_file)
if go_file != "":
__parse_go_result(go_file)
if synthetic_file != "":
__parse_synthetic_result(synthetic_file)
if node_file != "":
__parse_js_result(node_file, "node")
if php_file != "":
__parse_php_result(php_file, "php")
if php_c_file != "":
__parse_php_result(php_c_file, "php")
return __results
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-cpp",
"--cpp_input_file",
help="The CPP benchmark result file's name",
default="")
parser.add_argument(
"-java",
"--java_input_file",
help="The Java benchmark result file's name",
default="")
parser.add_argument(
"-python",
"--python_input_file",
help="The Python benchmark result file's name",
default="")
parser.add_argument(
"-go",
"--go_input_file",
help="The golang benchmark result file's name",
default="")
parser.add_argument(
"-node",
"--node_input_file",
help="The node.js benchmark result file's name",
default="")
parser.add_argument(
"-php",
"--php_input_file",
help="The pure php benchmark result file's name",
default="")
parser.add_argument(
"-php_c",
"--php_c_input_file",
help="The php with c ext benchmark result file's name",
default="")
args = parser.parse_args()
results = get_result_from_file(
cpp_file=args.cpp_input_file,
java_file=args.java_input_file,
python_file=args.python_input_file,
go_file=args.go_input_file,
node_file=args.node_input_file,
php_file=args.php_input_file,
php_c_file=args.php_c_input_file,
)
print(json.dumps(results, indent=2))

View File

@@ -0,0 +1,107 @@
from __future__ import print_function
from __future__ import absolute_import
import argparse
import os
import re
import copy
import uuid
import calendar
import time
import datetime
from util import big_query_utils
from util import result_parser
_PROJECT_ID = 'grpc-testing'
_DATASET = 'protobuf_benchmark_result'
_TABLE = 'opensource_result_v2'
_NOW = "%d%02d%02d" % (datetime.datetime.now().year,
datetime.datetime.now().month,
datetime.datetime.now().day)
_INITIAL_TIME = calendar.timegm(time.gmtime())
def get_metadata():
build_number = os.getenv('BUILD_NUMBER')
build_url = os.getenv('BUILD_URL')
job_name = os.getenv('JOB_NAME')
git_commit = os.getenv('GIT_COMMIT')
# actual commit is the actual head of PR that is getting tested
git_actual_commit = os.getenv('ghprbActualCommit')
utc_timestamp = str(calendar.timegm(time.gmtime()))
metadata = {'created': utc_timestamp}
if build_number:
metadata['buildNumber'] = build_number
if build_url:
metadata['buildUrl'] = build_url
if job_name:
metadata['jobName'] = job_name
if git_commit:
metadata['gitCommit'] = git_commit
if git_actual_commit:
metadata['gitActualCommit'] = git_actual_commit
return metadata
def upload_result(result_list, metadata):
for result in result_list:
new_result = {}
new_result["metric"] = "throughput"
new_result["value"] = result["throughput"]
new_result["unit"] = "MB/s"
new_result["test"] = "protobuf_benchmark"
new_result["product_name"] = "protobuf"
labels_string = ""
for key in result:
labels_string += ",|%s:%s|" % (key, result[key])
new_result["labels"] = labels_string[1:]
new_result["timestamp"] = _INITIAL_TIME
print(labels_string)
bq = big_query_utils.create_big_query()
row = big_query_utils.make_row(str(uuid.uuid4()), new_result)
if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET,
_TABLE + "$" + _NOW,
[row]):
print('Error when uploading result', new_result)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-cpp", "--cpp_input_file",
help="The CPP benchmark result file's name",
default="")
parser.add_argument("-java", "--java_input_file",
help="The Java benchmark result file's name",
default="")
parser.add_argument("-python", "--python_input_file",
help="The Python benchmark result file's name",
default="")
parser.add_argument("-go", "--go_input_file",
help="The golang benchmark result file's name",
default="")
parser.add_argument("-node", "--node_input_file",
help="The node.js benchmark result file's name",
default="")
parser.add_argument("-php", "--php_input_file",
help="The pure php benchmark result file's name",
default="")
parser.add_argument("-php_c", "--php_c_input_file",
help="The php with c ext benchmark result file's name",
default="")
args = parser.parse_args()
metadata = get_metadata()
print("uploading results...")
upload_result(result_parser.get_result_from_file(
cpp_file=args.cpp_input_file,
java_file=args.java_input_file,
python_file=args.python_input_file,
go_file=args.go_input_file,
node_file=args.node_input_file,
php_file=args.php_input_file,
php_c_file=args.php_c_input_file,
), metadata)

View File

@@ -0,0 +1,194 @@
#ifndef PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_
#define PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_
#include "google/protobuf/message.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include <sstream>
#include <algorithm>
using google::protobuf::Descriptor;
using google::protobuf::DescriptorProto;
using google::protobuf::FileDescriptorProto;
using google::protobuf::FieldDescriptorProto;
using google::protobuf::Message;
using google::protobuf::EnumValueDescriptorProto;
namespace google {
namespace protobuf {
namespace util {
class SchemaGroupStripper {
public:
static void StripFile(const FileDescriptor* old_file,
FileDescriptorProto *file) {
for (int i = file->mutable_message_type()->size() - 1; i >= 0; i--) {
if (IsMessageSet(old_file->message_type(i))) {
file->mutable_message_type()->DeleteSubrange(i, 1);
continue;
}
StripMessage(old_file->message_type(i), file->mutable_message_type(i));
}
for (int i = file->mutable_extension()->size() - 1; i >= 0; i--) {
auto field = old_file->extension(i);
if (field->type() == FieldDescriptor::TYPE_GROUP ||
IsMessageSet(field->message_type()) ||
IsMessageSet(field->containing_type())) {
file->mutable_extension()->DeleteSubrange(i, 1);
}
}
}
private:
static bool IsMessageSet(const Descriptor *descriptor) {
if (descriptor != nullptr
&& descriptor->options().message_set_wire_format()) {
return true;
}
return false;
}
static void StripMessage(const Descriptor *old_message,
DescriptorProto *new_message) {
for (int i = new_message->mutable_field()->size() - 1; i >= 0; i--) {
if (old_message->field(i)->type() == FieldDescriptor::TYPE_GROUP ||
IsMessageSet(old_message->field(i)->message_type())) {
new_message->mutable_field()->DeleteSubrange(i, 1);
}
}
for (int i = new_message->mutable_extension()->size() - 1; i >= 0; i--) {
auto field_type_name = new_message->mutable_extension(i)->type_name();
if (old_message->extension(i)->type() == FieldDescriptor::TYPE_GROUP ||
IsMessageSet(old_message->extension(i)->containing_type()) ||
IsMessageSet(old_message->extension(i)->message_type())) {
new_message->mutable_extension()->DeleteSubrange(i, 1);
}
}
for (int i = 0; i < new_message->mutable_nested_type()->size(); i++) {
StripMessage(old_message->nested_type(i),
new_message->mutable_nested_type(i));
}
}
};
class EnumScrubber {
public:
EnumScrubber()
: total_added_(0) {
}
void ScrubFile(FileDescriptorProto *file) {
for (int i = 0; i < file->enum_type_size(); i++) {
ScrubEnum(file->mutable_enum_type(i));
}
for (int i = 0; i < file->mutable_message_type()->size(); i++) {
ScrubMessage(file->mutable_message_type(i));
}
}
private:
void ScrubEnum(EnumDescriptorProto *enum_type) {
if (enum_type->value(0).number() != 0) {
bool has_zero = false;
for (int j = 0; j < enum_type->value().size(); j++) {
if (enum_type->value(j).number() == 0) {
EnumValueDescriptorProto temp_enum_value;
temp_enum_value.CopyFrom(enum_type->value(j));
enum_type->mutable_value(j)->CopyFrom(enum_type->value(0));
enum_type->mutable_value(0)->CopyFrom(temp_enum_value);
has_zero = true;
break;
}
}
if (!has_zero) {
enum_type->mutable_value()->Add();
for (int i = enum_type->mutable_value()->size() - 1; i > 0; i--) {
enum_type->mutable_value(i)->CopyFrom(
*enum_type->mutable_value(i - 1));
}
enum_type->mutable_value(0)->set_number(0);
enum_type->mutable_value(0)->set_name("ADDED_ZERO_VALUE_" +
std::to_string(total_added_++));
}
}
}
void ScrubMessage(DescriptorProto *message_type) {
for (int i = 0; i < message_type->mutable_enum_type()->size(); i++) {
ScrubEnum(message_type->mutable_enum_type(i));
}
for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
ScrubMessage(message_type->mutable_nested_type(i));
}
}
int total_added_;
};
class ExtensionStripper {
public:
static void StripFile(FileDescriptorProto *file) {
for (int i = 0; i < file->mutable_message_type()->size(); i++) {
StripMessage(file->mutable_message_type(i));
}
file->mutable_extension()->Clear();
}
private:
static void StripMessage(DescriptorProto *message_type) {
message_type->mutable_extension()->Clear();
message_type->clear_extension_range();
for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
StripMessage(message_type->mutable_nested_type(i));
}
}
};
class FieldScrubber {
public:
static void ScrubFile(FileDescriptorProto *file) {
for (int i = 0; i < file->mutable_message_type()->size(); i++) {
ScrubMessage(file->mutable_message_type(i));
}
for (int i = 0; i < file->mutable_extension()->size(); i++) {
file->mutable_extension(i)->clear_default_value();
if (ShouldClearLabel(file->mutable_extension(i))) {
file->mutable_extension(i)->clear_label();
}
}
}
private:
static bool ShouldClearLabel(const FieldDescriptorProto *field) {
return field->label() == FieldDescriptorProto::LABEL_REQUIRED;
}
static void ScrubMessage(DescriptorProto *message_type) {
message_type->mutable_extension()->Clear();
for (int i = 0; i < message_type->mutable_extension()->size(); i++) {
message_type->mutable_extension(i)->clear_default_value();
if (ShouldClearLabel(message_type->mutable_extension(i))) {
message_type->mutable_extension(i)->clear_label();
}
}
for (int i = 0; i < message_type->mutable_field()->size(); i++) {
message_type->mutable_field(i)->clear_default_value();
if (ShouldClearLabel(message_type->mutable_field(i))) {
message_type->mutable_field(i)->clear_label();
}
}
for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
ScrubMessage(message_type->mutable_nested_type(i));
}
}
};
} // namespace util
} // namespace protobuf
} // namespace google
#endif // PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_