cprover
Loading...
Searching...
No Matches
taint_analysis.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Taint Analysis
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
11
12#include "taint_analysis.h"
13
14#include <iostream>
15#include <fstream>
16
17#include <util/invariant.h>
18#include <util/json.h>
19#include <util/message.h>
20#include <util/pointer_expr.h>
21#include <util/prefix.h>
22#include <util/simplify_expr.h>
23#include <util/std_code.h>
25
27
29
30#include "taint_parser.h"
31
33{
34public:
35 explicit taint_analysist(message_handlert &message_handler)
36 : log(message_handler)
37 {
38 }
39
40 bool operator()(
41 const std::string &taint_file_name,
42 const symbol_tablet &,
44 bool show_full,
45 const optionalt<std::string> &json_file_name);
46
47protected:
51
52 void instrument(const namespacet &, goto_functionst &);
54};
55
57 const namespacet &ns,
58 goto_functionst &goto_functions)
59{
60 for(auto &function : goto_functions.function_map)
61 instrument(ns, function.second);
62}
63
65 const namespacet &ns,
67{
68 for(goto_programt::instructionst::iterator
69 it=goto_function.body.instructions.begin();
70 it!=goto_function.body.instructions.end();
71 it++)
72 {
73 const goto_programt::instructiont &instruction=*it;
74
75 goto_programt insert_before, insert_after;
76
77 if(instruction.is_function_call())
78 {
79 const exprt &function = instruction.call_function();
80
81 if(function.id() == ID_symbol)
82 {
83 const irep_idt &identifier = to_symbol_expr(function).get_identifier();
84
85 std::set<irep_idt> identifiers;
86
87 identifiers.insert(identifier);
88
89 irep_idt class_id = function.get(ID_C_class);
90 if(class_id.empty())
91 {
92 }
93 else
94 {
95 std::string suffix = std::string(
96 id2string(identifier), class_id.size(), std::string::npos);
97
100 for(const auto &p : parents)
101 identifiers.insert(id2string(p) + suffix);
102 }
103
104 for(const auto &rule : taint.rules)
105 {
106 bool match = false;
107 for(const auto &i : identifiers)
108 {
109 if(
110 i == rule.function_identifier ||
112 id2string(i),
113 "java::" + id2string(rule.function_identifier) + ":"))
114 {
115 match = true;
116 break;
117 }
118 }
119
120 if(match)
121 {
122 log.debug() << "MATCH " << rule.id << " on " << identifier
123 << messaget::eom;
124
125 exprt where = nil_exprt();
126
127 const code_typet &code_type = to_code_type(function.type());
128
129 bool have_this = !code_type.parameters().empty() &&
130 code_type.parameters().front().get_bool(ID_C_this);
131
132 switch(rule.where)
133 {
135 {
137 ns.lookup(id2string(identifier) + "#return_value");
138 where = return_value_symbol.symbol_expr();
139 break;
140 }
141
143 {
144 unsigned nr =
145 have_this ? rule.parameter_number : rule.parameter_number - 1;
146 if(instruction.call_arguments().size() > nr)
147 where = instruction.call_arguments()[nr];
148 break;
149 }
150
152 if(have_this)
153 {
155 !instruction.call_arguments().empty(),
156 "`this` implies at least one argument in function call");
157 where = instruction.call_arguments()[0];
158 }
159 break;
160 }
161
162 switch(rule.kind)
163 {
165 {
166 codet code_set_may{ID_set_may};
167 code_set_may.operands().resize(2);
168 code_set_may.op0() = where;
169 code_set_may.op1() =
171 insert_after.add(goto_programt::make_other(
172 code_set_may, instruction.source_location()));
173 break;
174 }
175
177 {
179 where,
180 ID_get_may,
184 not_exprt(get_may), instruction.source_location()));
185 t->source_location_nonconst().set_property_class(
186 "taint rule " + id2string(rule.id));
187 t->source_location_nonconst().set_comment(rule.message);
188 break;
189 }
190
192 {
193 codet code_clear_may{ID_clear_may};
194 code_clear_may.operands().resize(2);
195 code_clear_may.op0() = where;
196 code_clear_may.op1() =
198 insert_after.add(goto_programt::make_other(
199 code_clear_may, instruction.source_location()));
200 break;
201 }
202 }
203 }
204 }
205 }
206 }
207
208 if(!insert_before.empty())
209 {
210 goto_function.body.insert_before_swap(it, insert_before);
211 // advance until we get back to the call
212 while(!it->is_function_call()) ++it;
213 }
214
215 if(!insert_after.empty())
216 {
217 goto_function.body.destructive_insert(
218 std::next(it), insert_after);
219 }
220 }
221}
222
224 const std::string &taint_file_name,
225 const symbol_tablet &symbol_table,
226 goto_functionst &goto_functions,
227 bool show_full,
228 const optionalt<std::string> &json_file_name)
229{
230 try
231 {
232 json_arrayt json_result;
233 bool use_json = json_file_name.has_value();
234
235 log.status() << "Reading taint file '" << taint_file_name << "'"
236 << messaget::eom;
237
238 if(taint_parser(taint_file_name, taint, log.get_message_handler()))
239 {
240 log.error() << "Failed to read taint definition file" << messaget::eom;
241 return true;
242 }
243
244 log.status() << "Got " << taint.rules.size() << " taint definitions"
245 << messaget::eom;
246
248 taint.output(mstream);
249 mstream << messaget::eom;
250 });
251
252 log.status() << "Instrumenting taint" << messaget::eom;
253
254 class_hierarchy(symbol_table);
255
256 const namespacet ns(symbol_table);
257 instrument(ns, goto_functions);
258 goto_functions.update();
259
260 bool have_entry_point=
261 goto_functions.function_map.find(goto_functionst::entry_point())!=
262 goto_functions.function_map.end();
263
264 // do we have an entry point?
265 if(have_entry_point)
266 {
267 log.status() << "Working from entry point" << messaget::eom;
268 }
269 else
270 {
271 log.status() << "No entry point found; "
272 << "we will consider the heads of all functions as reachable"
273 << messaget::eom;
274
275 goto_programt end, gotos, calls;
276
278
279 for(const auto &gf_entry : goto_functions.function_map)
280 {
281 if(
282 gf_entry.second.body_available() &&
283 gf_entry.first != goto_functionst::entry_point())
284 {
285 const symbolt &symbol = ns.lookup(gf_entry.first);
286 const code_function_callt call(symbol.symbol_expr());
289 calls.add(
293 }
294 }
295
298
299 goto_programt &body=entry.body;
300
301 body.destructive_append(gotos);
302 body.destructive_append(calls);
303 body.destructive_append(end);
304
305 goto_functions.update();
306 }
307
308 log.status() << "Data-flow analysis" << messaget::eom;
309
310 custom_bitvector_analysist custom_bitvector_analysis;
311 custom_bitvector_analysis(goto_functions, ns);
312
313 if(show_full)
314 {
315 custom_bitvector_analysis.output(ns, goto_functions, std::cout);
316 return false;
317 }
318
319 for(const auto &gf_entry : goto_functions.function_map)
320 {
321 if(!gf_entry.second.body.has_assertion())
322 continue;
323
324 const symbolt &symbol = ns.lookup(gf_entry.first);
325
326 if(gf_entry.first == "__actual_thread_spawn")
327 continue;
328
329 bool first=true;
330
331 forall_goto_program_instructions(i_it, gf_entry.second.body)
332 {
333 if(!i_it->is_assert())
334 continue;
335
337 i_it->get_condition()))
338 {
339 continue;
340 }
341
342 if(custom_bitvector_analysis[i_it].has_values.is_false())
343 continue;
344
345 exprt result =
346 custom_bitvector_analysis.eval(i_it->get_condition(), i_it);
347 if(simplify_expr(std::move(result), ns).is_true())
348 continue;
349
350 if(first)
351 {
352 first=false;
353 if(!use_json)
354 std::cout << "\n"
355 << "******** Function " << symbol.display_name() << '\n';
356 }
357
358 if(use_json)
359 {
361 {"bugClass",
362 json_stringt(i_it->source_location().get_property_class())},
363 {"file", json_stringt(i_it->source_location().get_file())},
364 {"line",
365 json_numbert(id2string(i_it->source_location().get_line()))}};
366 json_result.push_back(std::move(json));
367 }
368 else
369 {
370 std::cout << i_it->source_location();
371 if(!i_it->source_location().get_comment().empty())
372 std::cout << ": " << i_it->source_location().get_comment();
373
374 if(!i_it->source_location().get_property_class().empty())
375 std::cout << " (" << i_it->source_location().get_property_class()
376 << ")";
377
378 std::cout << '\n';
379 }
380 }
381 }
382
383 if(use_json)
384 {
385 std::ofstream json_out(json_file_name.value());
386
387 if(!json_out)
388 {
389 log.error() << "Failed to open json output '" << json_file_name.value()
390 << "'" << messaget::eom;
391 return true;
392 }
393
394 log.status() << "Analysis result is written to '"
395 << json_file_name.value() << "'" << messaget::eom;
396
397 json_out << json_result << '\n';
398 }
399
400 return false;
401 }
402 catch(const char *error_msg)
403 {
404 log.error() << error_msg << messaget::eom;
405 return true;
406 }
407 catch(const std::string &error_msg)
408 {
409 log.error() << error_msg << messaget::eom;
410 return true;
411 }
412 catch(...)
413 {
414 log.error() << "Caught unexpected error in taint_analysist::operator()"
415 << messaget::eom;
416 return true;
417 }
418}
419
421 goto_modelt &goto_model,
422 const std::string &taint_file_name,
423 message_handlert &message_handler,
424 bool show_full,
425 const optionalt<std::string> &json_file_name)
426{
427 taint_analysist taint_analysis(message_handler);
428 return taint_analysis(
429 taint_file_name,
430 goto_model.symbol_table,
431 goto_model.goto_functions,
432 show_full,
433 json_file_name);
434}
Class Hierarchy.
Operator to return the address of an object.
virtual void output(const namespacet &ns, const irep_idt &function_id, const goto_programt &goto_program, std::ostream &out) const
Output the abstract states for a single function.
Definition ai.cpp:40
A base class for expressions that are predicates, i.e., Boolean-typed, and that take exactly two argu...
Definition std_expr.h:643
The Boolean type.
Definition std_types.h:36
Non-graph-based representation of the class hierarchy.
idst get_parents_trans(const irep_idt &id) const
std::vector< irep_idt > idst
codet representation of a function call statement.
Base type of functions.
Definition std_types.h:539
const parameterst & parameters() const
Definition std_types.h:655
Data structure for representing an arbitrary statement in a program.
exprt eval(const exprt &src, locationt loc)
static bool has_get_must_or_may(const exprt &)
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:37
bool empty() const
Definition dstring.h:88
size_t size() const
Definition dstring.h:104
Base class for all expressions.
Definition expr.h:54
exprt & op1()
Definition expr.h:102
typet & type()
Return the type of the expression.
Definition expr.h:82
operandst & operands()
Definition expr.h:92
A collection of goto functions.
function_mapt function_map
::goto_functiont goto_functiont
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
symbol_tablet symbol_table
Symbol table.
Definition goto_model.h:30
goto_functionst goto_functions
GOTO functions.
Definition goto_model.h:33
This class represents an instruction in the GOTO intermediate representation.
const exprt::operandst & call_arguments() const
Get the arguments of a FUNCTION_CALL.
const exprt & call_function() const
Get the function that is called for FUNCTION_CALL.
const source_locationt & source_location() const
A generic container class for the GOTO intermediate representation of one function.
instructionst instructions
The list of instructions in the goto program.
static instructiont make_end_function(const source_locationt &l=source_locationt::nil())
instructionst::iterator targett
void destructive_append(goto_programt &p)
Appends the given program p to *this. p is destroyed.
static instructiont make_function_call(const code_function_callt &_code, const source_locationt &l=source_locationt::nil())
Create a function call instruction.
targett add(instructiont &&instruction)
Adds a given instruction at the end.
static instructiont make_goto(targett _target, const source_locationt &l=source_locationt::nil())
bool empty() const
Is the program empty?
static instructiont make_assertion(const exprt &g, const source_locationt &l=source_locationt::nil())
static instructiont make_other(const codet &_code, const source_locationt &l=source_locationt::nil())
const irep_idt & get(const irep_idt &name) const
Definition irep.cpp:45
const irep_idt & id() const
Definition irep.h:396
jsont & push_back(const jsont &json)
Definition json.h:212
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:155
mstreamt & error() const
Definition message.h:399
mstreamt & debug() const
Definition message.h:429
message_handlert & get_message_handler()
Definition message.h:184
void conditional_output(mstreamt &mstream, const std::function< void(mstreamt &)> &output_generator) const
Generate output to message_stream using output_generator if the configured verbosity is at least as h...
Definition message.cpp:139
static eomt eom
Definition message.h:297
mstreamt & status() const
Definition message.h:414
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
The NIL expression.
Definition std_expr.h:2874
Boolean negation.
Definition std_expr.h:2181
A side_effect_exprt that returns a non-deterministically chosen value.
Definition std_code.h:1520
const irep_idt & get_identifier() const
Definition std_expr.h:109
The symbol table.
Symbol table entry.
Definition symbol.h:28
source_locationt location
Source code location of definition of symbol.
Definition symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition symbol.cpp:121
const irep_idt & display_name() const
Return language specific display name if present.
Definition symbol.h:55
void instrument(const namespacet &, goto_functionst &)
taint_parse_treet taint
taint_analysist(message_handlert &message_handler)
bool operator()(const std::string &taint_file_name, const symbol_tablet &, goto_functionst &, bool show_full, const optionalt< std::string > &json_file_name)
class_hierarchyt class_hierarchy
The Boolean constant true.
Definition std_expr.h:2856
bool has_prefix(const std::string &s, const std::string &prefix)
Definition converter.cpp:13
Field-insensitive, location-sensitive bitvector analysis.
#define forall_goto_program_instructions(it, program)
const std::string & id2string(const irep_idt &d)
Definition irep.h:47
bool is_true(const literalt &l)
Definition literal.h:198
nonstd::optional< T > optionalt
Definition optional.h:35
API to expression classes for Pointers.
static void json(json_objectT &result, const irep_idt &property_id, const property_infot &property_info)
symbol_exprt return_value_symbol(const irep_idt &identifier, const namespacet &ns)
produces the symbol that is used to store the return value of the function with the given identifier
exprt simplify_expr(exprt src, const namespacet &ns)
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:510
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition std_expr.h:189
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition std_types.h:744
bool taint_analysis(goto_modelt &goto_model, const std::string &taint_file_name, message_handlert &message_handler, bool show_full, const optionalt< std::string > &json_file_name)
Taint Analysis.
bool taint_parser(const std::string &file_name, taint_parse_treet &dest, message_handlert &message_handler)
Taint Parser.