Provides an example of processing a CSV file containing evidence for device detection. There are 20,000 examples in the supplied file of evidence representing HTTP Headers. For example:
We create a device detection pipeline to read the data and output the time taken and information about about the detection.
Requesting properties from a single component reduces detection time compared with requesting properties from multiple components. If you don't specify any properties to detect, then all properties are detected.
This example requires a local data file. The free 'Lite' data file can be acquired by
pulling the git submodules under this repository (run `git submodule update --recursive`)
or from the device-detection-data
GitHub repository.
The Lite data file is only used for illustration, and has limited accuracy and capabilities.
Find out about the more capable data files that are available on our
pricing page
49 import multiprocessing
as mp
66 flowdata = pipeline.create_flowdata()
69 flowdata.evidence.add(
"header.user-agent", user_agent)
79 if flowdata.device.ismobile.has_value():
80 return flowdata.device.ismobile.value()
86 pipeline = DeviceDetectionOnPremisePipelineBuilder(
87 data_file_path=data_file,
89 performance_profile=
'MaxPerformance',
90 add_javascript_builder=
False,
91 restricted_properties=[
"ismobile"],
93 auto_update=
False).
build()
100 for user_agent
in user_agent_list:
105 results[
"unknown"] += 1
107 results[
"mobile"] += 1
109 results[
"notmobile"] += 1
111 output.put(results, list_number)
114 def run(data_file, skip = False):
122 for x
in range(threads):
123 processes.append(mp.Process(target=process_user_agent_list,
124 args=(data_file, split_lists[x], x, output, skip)))
138 results = [output.get()
for p
in processes]
143 return {
"time": total,
"result": results}
145 if __name__ ==
"__main__":
146 ap = argparse.ArgumentParser(description=
'Run detection benchmark.')
147 ap.add_argument(
'-d',
'--data_file', default=
'', help=
'Path to data file')
148 ap.add_argument(
'-u',
'--user_agents_file', default=
'src/fiftyone_devicedetection_onpremise/cxx/device-detection-data/20000 User Agents.csv', help=
'Path to user agents evidence file')
149 ap.add_argument(
'-j',
'--json_output', default=
'', help=
'Output results in JSON format')
150 args = ap.parse_args()
151 if args.data_file ==
"":
152 args.data_file = ExampleUtils.find_file(
"51Degrees-LiteV4.1.hash")
155 with open(args.user_agents_file, newline=
'')
as file:
156 reader = csv.reader(file)
157 user_agents = list(reader)
159 number_of_user_agents = len(user_agents)
161 print(
"Processing " + str(number_of_user_agents) +
" user agents")
165 threads = mp.cpu_count()
167 print(
"Using " + str(threads) +
" threads")
169 chunk_size = int(number_of_user_agents / threads)
172 split_lists = [user_agents[x:x+chunk_size]
173 for x
in range(0, len(user_agents), chunk_size)]
175 calibration =
run(args.data_file, skip=
True)
177 real =
run(args.data_file, skip=
False)
179 real_time = real[
"time"]
181 print(
"Total time (seconds): " + str(real_time) +
" seconds")
182 print(
"Time per user agent (ms): " + str((real_time / number_of_user_agents) * 1000))
184 if args.json_output !=
"":
186 "DetectionsPerSecond": 1.0 / (real_time / number_of_user_agents),
187 "MsPerDetection": real_time * 1000 / number_of_user_agents
189 with open(args.json_output,
"w")
as file:
190 print(json.dumps(results), file = file)
198 for result
in real[
"result"]:
199 final_result[
"unknown"] += result[
"unknown"]
200 final_result[
"mobile"] += result[
"mobile"]
201 final_result[
"notmobile"] += result[
"notmobile"]
203 print(
"Results", final_result)